|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994242947610823, |
|
"eval_steps": 100, |
|
"global_step": 868, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 146.8957421194674, |
|
"learning_rate": 5.747126436781609e-09, |
|
"logits/chosen": -1.8686045408248901, |
|
"logits/rejected": -1.7644572257995605, |
|
"logps/chosen": -235.48362731933594, |
|
"logps/rejected": -183.77415466308594, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 237.04909600464902, |
|
"learning_rate": 5.747126436781609e-08, |
|
"logits/chosen": -1.9218311309814453, |
|
"logits/rejected": -1.8686226606369019, |
|
"logps/chosen": -240.50628662109375, |
|
"logps/rejected": -216.8230438232422, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -0.002868607407435775, |
|
"rewards/margins": -0.0005126786418259144, |
|
"rewards/rejected": -0.002355928998440504, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 211.82620683349913, |
|
"learning_rate": 1.1494252873563217e-07, |
|
"logits/chosen": -2.010253429412842, |
|
"logits/rejected": -1.8642921447753906, |
|
"logps/chosen": -283.1783752441406, |
|
"logps/rejected": -215.68887329101562, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.008166970685124397, |
|
"rewards/margins": 0.013039084151387215, |
|
"rewards/rejected": -0.004872114397585392, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 112.97267570781544, |
|
"learning_rate": 1.7241379310344828e-07, |
|
"logits/chosen": -1.9509646892547607, |
|
"logits/rejected": -1.8835735321044922, |
|
"logps/chosen": -240.29074096679688, |
|
"logps/rejected": -221.15274047851562, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.06300461292266846, |
|
"rewards/margins": 0.06308884918689728, |
|
"rewards/rejected": -8.423496183240786e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 95.74241597637995, |
|
"learning_rate": 2.2988505747126435e-07, |
|
"logits/chosen": -1.9107061624526978, |
|
"logits/rejected": -1.8901317119598389, |
|
"logps/chosen": -237.59036254882812, |
|
"logps/rejected": -216.1823272705078, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1618741899728775, |
|
"rewards/margins": 0.1895591914653778, |
|
"rewards/rejected": -0.027684981003403664, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 99.41131976874209, |
|
"learning_rate": 2.873563218390804e-07, |
|
"logits/chosen": -1.9803855419158936, |
|
"logits/rejected": -1.9117343425750732, |
|
"logps/chosen": -222.8354949951172, |
|
"logps/rejected": -207.8356170654297, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.23140163719654083, |
|
"rewards/margins": 0.3971676230430603, |
|
"rewards/rejected": -0.16576598584651947, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 85.607755330573, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"logits/chosen": -2.0026185512542725, |
|
"logits/rejected": -1.9393552541732788, |
|
"logps/chosen": -283.61199951171875, |
|
"logps/rejected": -245.7741241455078, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.2226170301437378, |
|
"rewards/margins": 0.6314491629600525, |
|
"rewards/rejected": -0.4088321626186371, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 105.09596276450131, |
|
"learning_rate": 4.0229885057471266e-07, |
|
"logits/chosen": -1.9401233196258545, |
|
"logits/rejected": -1.9097837209701538, |
|
"logps/chosen": -223.41983032226562, |
|
"logps/rejected": -222.15283203125, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10973484814167023, |
|
"rewards/margins": 0.7763268351554871, |
|
"rewards/rejected": -0.8860616683959961, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 103.35591967882027, |
|
"learning_rate": 4.597701149425287e-07, |
|
"logits/chosen": -1.8168106079101562, |
|
"logits/rejected": -1.7877649068832397, |
|
"logps/chosen": -227.1862030029297, |
|
"logps/rejected": -217.2149200439453, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1610104888677597, |
|
"rewards/margins": 0.8937110900878906, |
|
"rewards/rejected": -1.0547215938568115, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 96.75574881093117, |
|
"learning_rate": 4.999817969178237e-07, |
|
"logits/chosen": -1.8949458599090576, |
|
"logits/rejected": -1.8469880819320679, |
|
"logps/chosen": -249.9053192138672, |
|
"logps/rejected": -233.94631958007812, |
|
"loss": 0.4415, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.24459132552146912, |
|
"rewards/margins": 1.046515703201294, |
|
"rewards/rejected": -1.291106939315796, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 92.71145741103783, |
|
"learning_rate": 4.996582603056428e-07, |
|
"logits/chosen": -1.8467813730239868, |
|
"logits/rejected": -1.7593021392822266, |
|
"logps/chosen": -251.5709686279297, |
|
"logps/rejected": -255.50814819335938, |
|
"loss": 0.4898, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3932832181453705, |
|
"rewards/margins": 1.1354423761367798, |
|
"rewards/rejected": -1.5287256240844727, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_logits/chosen": -1.8469043970108032, |
|
"eval_logits/rejected": -1.7659016847610474, |
|
"eval_logps/chosen": -339.4445495605469, |
|
"eval_logps/rejected": -353.2087707519531, |
|
"eval_loss": 0.5505225658416748, |
|
"eval_rewards/accuracies": 0.6875, |
|
"eval_rewards/chosen": -0.19665074348449707, |
|
"eval_rewards/margins": 0.8084924817085266, |
|
"eval_rewards/rejected": -1.005143165588379, |
|
"eval_runtime": 98.3147, |
|
"eval_samples_per_second": 20.343, |
|
"eval_steps_per_second": 0.325, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 108.17707201404545, |
|
"learning_rate": 4.989308132738126e-07, |
|
"logits/chosen": -1.8153671026229858, |
|
"logits/rejected": -1.6921818256378174, |
|
"logps/chosen": -244.46237182617188, |
|
"logps/rejected": -222.83145141601562, |
|
"loss": 0.4368, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.10259479284286499, |
|
"rewards/margins": 1.188932180404663, |
|
"rewards/rejected": -1.2915267944335938, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 100.40422750465044, |
|
"learning_rate": 4.978006327248536e-07, |
|
"logits/chosen": -1.7503843307495117, |
|
"logits/rejected": -1.6768817901611328, |
|
"logps/chosen": -247.3853759765625, |
|
"logps/rejected": -240.05496215820312, |
|
"loss": 0.4237, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.34009289741516113, |
|
"rewards/margins": 1.1859456300735474, |
|
"rewards/rejected": -1.526038408279419, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 91.1070676572417, |
|
"learning_rate": 4.962695471250032e-07, |
|
"logits/chosen": -1.654911756515503, |
|
"logits/rejected": -1.62875235080719, |
|
"logps/chosen": -254.4684600830078, |
|
"logps/rejected": -245.8209686279297, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5070122480392456, |
|
"rewards/margins": 1.3276703357696533, |
|
"rewards/rejected": -1.8346824645996094, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 97.14051346245, |
|
"learning_rate": 4.94340033546025e-07, |
|
"logits/chosen": -1.6183685064315796, |
|
"logits/rejected": -1.6242666244506836, |
|
"logps/chosen": -220.20852661132812, |
|
"logps/rejected": -242.1243133544922, |
|
"loss": 0.4218, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.6856725811958313, |
|
"rewards/margins": 1.2277655601501465, |
|
"rewards/rejected": -1.913438081741333, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 94.8408241800107, |
|
"learning_rate": 4.920152136576705e-07, |
|
"logits/chosen": -1.4978981018066406, |
|
"logits/rejected": -1.5163871049880981, |
|
"logps/chosen": -251.6667938232422, |
|
"logps/rejected": -254.0281219482422, |
|
"loss": 0.4261, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6180266737937927, |
|
"rewards/margins": 1.381075143814087, |
|
"rewards/rejected": -1.9991016387939453, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 108.14894780962342, |
|
"learning_rate": 4.892988486772756e-07, |
|
"logits/chosen": -1.579886794090271, |
|
"logits/rejected": -1.5718797445297241, |
|
"logps/chosen": -250.04495239257812, |
|
"logps/rejected": -262.3863830566406, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6856819987297058, |
|
"rewards/margins": 1.5999362468719482, |
|
"rewards/rejected": -2.285618305206299, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 70.9507948612333, |
|
"learning_rate": 4.861953332846629e-07, |
|
"logits/chosen": -1.7066303491592407, |
|
"logits/rejected": -1.6715686321258545, |
|
"logps/chosen": -270.17132568359375, |
|
"logps/rejected": -260.03350830078125, |
|
"loss": 0.4316, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.8634172677993774, |
|
"rewards/margins": 1.3165854215621948, |
|
"rewards/rejected": -2.1800026893615723, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 114.85174445864124, |
|
"learning_rate": 4.827096885121953e-07, |
|
"logits/chosen": -1.817678689956665, |
|
"logits/rejected": -1.7374699115753174, |
|
"logps/chosen": -286.0107116699219, |
|
"logps/rejected": -279.7050476074219, |
|
"loss": 0.4309, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0613857507705688, |
|
"rewards/margins": 1.1405597925186157, |
|
"rewards/rejected": -2.2019453048706055, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 90.50728576001431, |
|
"learning_rate": 4.788475536214821e-07, |
|
"logits/chosen": -1.7829034328460693, |
|
"logits/rejected": -1.7400600910186768, |
|
"logps/chosen": -236.38650512695312, |
|
"logps/rejected": -245.64291381835938, |
|
"loss": 0.3873, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.8258365392684937, |
|
"rewards/margins": 1.4504865407943726, |
|
"rewards/rejected": -2.276322841644287, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 100.41476084088063, |
|
"learning_rate": 4.746151769798818e-07, |
|
"logits/chosen": -1.7743650674819946, |
|
"logits/rejected": -1.7203725576400757, |
|
"logps/chosen": -279.8271789550781, |
|
"logps/rejected": -269.1463317871094, |
|
"loss": 0.4277, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.8149574398994446, |
|
"rewards/margins": 1.5317128896713257, |
|
"rewards/rejected": -2.346670389175415, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_logits/chosen": -1.8008995056152344, |
|
"eval_logits/rejected": -1.7248116731643677, |
|
"eval_logps/chosen": -345.1794738769531, |
|
"eval_logps/rejected": -370.77880859375, |
|
"eval_loss": 0.46549829840660095, |
|
"eval_rewards/accuracies": 0.73828125, |
|
"eval_rewards/chosen": -0.4833980202674866, |
|
"eval_rewards/margins": 1.4002480506896973, |
|
"eval_rewards/rejected": -1.8836462497711182, |
|
"eval_runtime": 98.0211, |
|
"eval_samples_per_second": 20.404, |
|
"eval_steps_per_second": 0.326, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 99.20354803224151, |
|
"learning_rate": 4.7001940595156055e-07, |
|
"logits/chosen": -1.7606436014175415, |
|
"logits/rejected": -1.6915203332901, |
|
"logps/chosen": -232.93832397460938, |
|
"logps/rejected": -246.24072265625, |
|
"loss": 0.441, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.0429778099060059, |
|
"rewards/margins": 1.6135696172714233, |
|
"rewards/rejected": -2.6565470695495605, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 80.49166768353011, |
|
"learning_rate": 4.650676758194623e-07, |
|
"logits/chosen": -1.820059061050415, |
|
"logits/rejected": -1.741408348083496, |
|
"logps/chosen": -265.69769287109375, |
|
"logps/rejected": -259.860595703125, |
|
"loss": 0.4003, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.915117621421814, |
|
"rewards/margins": 1.692671537399292, |
|
"rewards/rejected": -2.6077892780303955, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 103.96889607439884, |
|
"learning_rate": 4.5976799775611215e-07, |
|
"logits/chosen": -1.84983229637146, |
|
"logits/rejected": -1.7714653015136719, |
|
"logps/chosen": -260.39678955078125, |
|
"logps/rejected": -254.94235229492188, |
|
"loss": 0.4118, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -1.0704491138458252, |
|
"rewards/margins": 1.982627511024475, |
|
"rewards/rejected": -3.05307674407959, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 87.59607388393054, |
|
"learning_rate": 4.5412894586271543e-07, |
|
"logits/chosen": -1.8541374206542969, |
|
"logits/rejected": -1.7867300510406494, |
|
"logps/chosen": -259.6327819824219, |
|
"logps/rejected": -233.4642333984375, |
|
"loss": 0.3961, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.8504000902175903, |
|
"rewards/margins": 1.6259987354278564, |
|
"rewards/rejected": -2.4763987064361572, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 87.76098428515671, |
|
"learning_rate": 4.481596432975201e-07, |
|
"logits/chosen": -1.8068411350250244, |
|
"logits/rejected": -1.7720015048980713, |
|
"logps/chosen": -218.69546508789062, |
|
"logps/rejected": -231.09292602539062, |
|
"loss": 0.4236, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8816181421279907, |
|
"rewards/margins": 1.4680168628692627, |
|
"rewards/rejected": -2.349634885787964, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 99.04429789846779, |
|
"learning_rate": 4.41869747515886e-07, |
|
"logits/chosen": -1.7596534490585327, |
|
"logits/rejected": -1.7420539855957031, |
|
"logps/chosen": -272.49273681640625, |
|
"logps/rejected": -301.8647766113281, |
|
"loss": 0.3824, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0995886325836182, |
|
"rewards/margins": 1.7163059711456299, |
|
"rewards/rejected": -2.815894603729248, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 91.17380836071179, |
|
"learning_rate": 4.352694346459396e-07, |
|
"logits/chosen": -1.7119308710098267, |
|
"logits/rejected": -1.713330626487732, |
|
"logps/chosen": -245.8417205810547, |
|
"logps/rejected": -257.9246826171875, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.6780133843421936, |
|
"rewards/margins": 1.5433403253555298, |
|
"rewards/rejected": -2.221353769302368, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 85.89753055245001, |
|
"learning_rate": 4.2836938302509256e-07, |
|
"logits/chosen": -1.7531852722167969, |
|
"logits/rejected": -1.6354246139526367, |
|
"logps/chosen": -243.12454223632812, |
|
"logps/rejected": -249.15576171875, |
|
"loss": 0.4123, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8161875605583191, |
|
"rewards/margins": 1.92093825340271, |
|
"rewards/rejected": -2.737125873565674, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 93.29457537220665, |
|
"learning_rate": 4.2118075592405874e-07, |
|
"logits/chosen": -1.6761655807495117, |
|
"logits/rejected": -1.668341040611267, |
|
"logps/chosen": -263.28924560546875, |
|
"logps/rejected": -276.89190673828125, |
|
"loss": 0.3899, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.7706686854362488, |
|
"rewards/margins": 1.6867666244506836, |
|
"rewards/rejected": -2.457435131072998, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 104.22663391151875, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -1.5810635089874268, |
|
"logits/rejected": -1.56969153881073, |
|
"logps/chosen": -215.5782012939453, |
|
"logps/rejected": -242.8097381591797, |
|
"loss": 0.4188, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0694911479949951, |
|
"rewards/margins": 1.6282224655151367, |
|
"rewards/rejected": -2.6977133750915527, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_logits/chosen": -1.6898695230484009, |
|
"eval_logits/rejected": -1.6142553091049194, |
|
"eval_logps/chosen": -336.9513244628906, |
|
"eval_logps/rejected": -373.6328430175781, |
|
"eval_loss": 0.39220622181892395, |
|
"eval_rewards/accuracies": 0.796875, |
|
"eval_rewards/chosen": -0.07198946177959442, |
|
"eval_rewards/margins": 1.9543578624725342, |
|
"eval_rewards/rejected": -2.0263473987579346, |
|
"eval_runtime": 98.0591, |
|
"eval_samples_per_second": 20.396, |
|
"eval_steps_per_second": 0.326, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 83.12181184953562, |
|
"learning_rate": 4.059847439122671e-07, |
|
"logits/chosen": -1.7229493856430054, |
|
"logits/rejected": -1.590850591659546, |
|
"logps/chosen": -260.96539306640625, |
|
"logps/rejected": -262.43865966796875, |
|
"loss": 0.4106, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0447856187820435, |
|
"rewards/margins": 1.545778512954712, |
|
"rewards/rejected": -2.590564250946045, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 101.57242075352875, |
|
"learning_rate": 3.98001943918432e-07, |
|
"logits/chosen": -1.6267013549804688, |
|
"logits/rejected": -1.5495989322662354, |
|
"logps/chosen": -246.096923828125, |
|
"logps/rejected": -268.87274169921875, |
|
"loss": 0.3937, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1101688146591187, |
|
"rewards/margins": 1.6341445446014404, |
|
"rewards/rejected": -2.7443130016326904, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 95.29775137930592, |
|
"learning_rate": 3.8977969850346866e-07, |
|
"logits/chosen": -1.5687649250030518, |
|
"logits/rejected": -1.5476423501968384, |
|
"logps/chosen": -262.67156982421875, |
|
"logps/rejected": -260.74725341796875, |
|
"loss": 0.3887, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9716947674751282, |
|
"rewards/margins": 1.6578747034072876, |
|
"rewards/rejected": -2.6295692920684814, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 105.61150036189296, |
|
"learning_rate": 3.8133131005357465e-07, |
|
"logits/chosen": -1.662001371383667, |
|
"logits/rejected": -1.5881233215332031, |
|
"logps/chosen": -249.8246612548828, |
|
"logps/rejected": -272.1719970703125, |
|
"loss": 0.38, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.9675191044807434, |
|
"rewards/margins": 2.143432140350342, |
|
"rewards/rejected": -3.1109511852264404, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 90.41304599394526, |
|
"learning_rate": 3.7267044682118435e-07, |
|
"logits/chosen": -1.6518446207046509, |
|
"logits/rejected": -1.6180702447891235, |
|
"logps/chosen": -237.4104461669922, |
|
"logps/rejected": -247.13473510742188, |
|
"loss": 0.3749, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1348434686660767, |
|
"rewards/margins": 1.6873624324798584, |
|
"rewards/rejected": -2.8222060203552246, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 95.16076600849088, |
|
"learning_rate": 3.638111208117425e-07, |
|
"logits/chosen": -1.7075703144073486, |
|
"logits/rejected": -1.6682260036468506, |
|
"logps/chosen": -247.7568817138672, |
|
"logps/rejected": -263.2861633300781, |
|
"loss": 0.3914, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0359086990356445, |
|
"rewards/margins": 1.2954949140548706, |
|
"rewards/rejected": -2.3314034938812256, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 96.37188736705814, |
|
"learning_rate": 3.5476766511433605e-07, |
|
"logits/chosen": -1.7550392150878906, |
|
"logits/rejected": -1.6907631158828735, |
|
"logps/chosen": -281.13836669921875, |
|
"logps/rejected": -266.0453796386719, |
|
"loss": 0.4097, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0189838409423828, |
|
"rewards/margins": 1.6438014507293701, |
|
"rewards/rejected": -2.662785291671753, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 101.71059089383448, |
|
"learning_rate": 3.455547107128602e-07, |
|
"logits/chosen": -1.7938178777694702, |
|
"logits/rejected": -1.787418007850647, |
|
"logps/chosen": -298.5751647949219, |
|
"logps/rejected": -281.03118896484375, |
|
"loss": 0.3611, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.0122178792953491, |
|
"rewards/margins": 2.0528714656829834, |
|
"rewards/rejected": -3.065088987350464, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 109.30188938122785, |
|
"learning_rate": 3.361871628152338e-07, |
|
"logits/chosen": -1.8471622467041016, |
|
"logits/rejected": -1.820481300354004, |
|
"logps/chosen": -251.8716278076172, |
|
"logps/rejected": -283.156982421875, |
|
"loss": 0.4006, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0065103769302368, |
|
"rewards/margins": 1.774526834487915, |
|
"rewards/rejected": -2.7810370922088623, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 89.15033820155391, |
|
"learning_rate": 3.2668017673896077e-07, |
|
"logits/chosen": -1.8275989294052124, |
|
"logits/rejected": -1.8090099096298218, |
|
"logps/chosen": -244.0500946044922, |
|
"logps/rejected": -245.302490234375, |
|
"loss": 0.3506, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.62747722864151, |
|
"rewards/margins": 2.005857467651367, |
|
"rewards/rejected": -2.6333346366882324, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_logits/chosen": -2.029554843902588, |
|
"eval_logits/rejected": -1.9793704748153687, |
|
"eval_logps/chosen": -331.169189453125, |
|
"eval_logps/rejected": -374.0495300292969, |
|
"eval_loss": 0.3456653654575348, |
|
"eval_rewards/accuracies": 0.8203125, |
|
"eval_rewards/chosen": 0.217118039727211, |
|
"eval_rewards/margins": 2.2643015384674072, |
|
"eval_rewards/rejected": -2.0471832752227783, |
|
"eval_runtime": 97.9966, |
|
"eval_samples_per_second": 20.409, |
|
"eval_steps_per_second": 0.327, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 95.54284470696203, |
|
"learning_rate": 3.1704913339205103e-07, |
|
"logits/chosen": -1.9682222604751587, |
|
"logits/rejected": -1.9323310852050781, |
|
"logps/chosen": -251.9891357421875, |
|
"logps/rejected": -276.3076477050781, |
|
"loss": 0.3841, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.7251254320144653, |
|
"rewards/margins": 1.8693323135375977, |
|
"rewards/rejected": -2.5944573879241943, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 84.89194807368709, |
|
"learning_rate": 3.0730961438896885e-07, |
|
"logits/chosen": -1.973587989807129, |
|
"logits/rejected": -1.9758260250091553, |
|
"logps/chosen": -326.0084533691406, |
|
"logps/rejected": -316.46380615234375, |
|
"loss": 0.3676, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.2638908624649048, |
|
"rewards/margins": 1.681646704673767, |
|
"rewards/rejected": -2.945537805557251, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 79.45330669787697, |
|
"learning_rate": 2.9747737684186795e-07, |
|
"logits/chosen": -1.9824804067611694, |
|
"logits/rejected": -2.0262296199798584, |
|
"logps/chosen": -253.5684814453125, |
|
"logps/rejected": -258.55352783203125, |
|
"loss": 0.3861, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8729881048202515, |
|
"rewards/margins": 1.7805559635162354, |
|
"rewards/rejected": -2.6535439491271973, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 92.0142521630137, |
|
"learning_rate": 2.8756832786789663e-07, |
|
"logits/chosen": -2.008882761001587, |
|
"logits/rejected": -1.9781932830810547, |
|
"logps/chosen": -269.37042236328125, |
|
"logps/rejected": -264.4306335449219, |
|
"loss": 0.382, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.5206736326217651, |
|
"rewards/margins": 1.916638970375061, |
|
"rewards/rejected": -2.437312364578247, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 81.53843237959488, |
|
"learning_rate": 2.7759849885381747e-07, |
|
"logits/chosen": -1.951061487197876, |
|
"logits/rejected": -1.9269872903823853, |
|
"logps/chosen": -282.7742614746094, |
|
"logps/rejected": -261.77130126953125, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7152455449104309, |
|
"rewards/margins": 2.0386595726013184, |
|
"rewards/rejected": -2.7539050579071045, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 81.29719600574327, |
|
"learning_rate": 2.675840195195762e-07, |
|
"logits/chosen": -1.935346007347107, |
|
"logits/rejected": -1.8713289499282837, |
|
"logps/chosen": -237.9385528564453, |
|
"logps/rejected": -262.90496826171875, |
|
"loss": 0.3768, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.7526956796646118, |
|
"rewards/margins": 2.036323070526123, |
|
"rewards/rejected": -2.7890188694000244, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 79.41069847067702, |
|
"learning_rate": 2.575410918227829e-07, |
|
"logits/chosen": -1.9017337560653687, |
|
"logits/rejected": -1.8612467050552368, |
|
"logps/chosen": -286.1165771484375, |
|
"logps/rejected": -286.47576904296875, |
|
"loss": 0.3801, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7328876256942749, |
|
"rewards/margins": 1.7589473724365234, |
|
"rewards/rejected": -2.491835117340088, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 84.31202005756423, |
|
"learning_rate": 2.474859637463226e-07, |
|
"logits/chosen": -1.7930266857147217, |
|
"logits/rejected": -1.6920406818389893, |
|
"logps/chosen": -261.0568542480469, |
|
"logps/rejected": -249.1828155517578, |
|
"loss": 0.373, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.6825451254844666, |
|
"rewards/margins": 2.1065242290496826, |
|
"rewards/rejected": -2.789069414138794, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 85.77456711320595, |
|
"learning_rate": 2.3743490301150355e-07, |
|
"logits/chosen": -1.8167390823364258, |
|
"logits/rejected": -1.6751165390014648, |
|
"logps/chosen": -254.97879028320312, |
|
"logps/rejected": -258.82794189453125, |
|
"loss": 0.3847, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.421345055103302, |
|
"rewards/margins": 2.164630174636841, |
|
"rewards/rejected": -2.585975408554077, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 80.63419673971578, |
|
"learning_rate": 2.274041707592724e-07, |
|
"logits/chosen": -1.7511818408966064, |
|
"logits/rejected": -1.744807481765747, |
|
"logps/chosen": -238.7528839111328, |
|
"logps/rejected": -277.8020935058594, |
|
"loss": 0.3611, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.161329984664917, |
|
"rewards/margins": 2.127523422241211, |
|
"rewards/rejected": -3.288853406906128, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.8592296838760376, |
|
"eval_logits/rejected": -1.818342685699463, |
|
"eval_logps/chosen": -330.5163879394531, |
|
"eval_logps/rejected": -381.7996520996094, |
|
"eval_loss": 0.29585352540016174, |
|
"eval_rewards/accuracies": 0.8515625, |
|
"eval_rewards/chosen": 0.24975742399692535, |
|
"eval_rewards/margins": 2.68444561958313, |
|
"eval_rewards/rejected": -2.4346883296966553, |
|
"eval_runtime": 97.917, |
|
"eval_samples_per_second": 20.425, |
|
"eval_steps_per_second": 0.327, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 93.97823403064545, |
|
"learning_rate": 2.17409995242075e-07, |
|
"logits/chosen": -1.8619616031646729, |
|
"logits/rejected": -1.8197988271713257, |
|
"logps/chosen": -246.95120239257812, |
|
"logps/rejected": -251.1341552734375, |
|
"loss": 0.3787, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.9560788869857788, |
|
"rewards/margins": 2.1973986625671387, |
|
"rewards/rejected": -3.153477668762207, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 81.29931903371055, |
|
"learning_rate": 2.0746854556892544e-07, |
|
"logits/chosen": -1.8456933498382568, |
|
"logits/rejected": -1.8626302480697632, |
|
"logps/chosen": -228.947265625, |
|
"logps/rejected": -255.99887084960938, |
|
"loss": 0.383, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.850503146648407, |
|
"rewards/margins": 1.7254650592803955, |
|
"rewards/rejected": -2.5759682655334473, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 82.34962963997694, |
|
"learning_rate": 1.9759590554616173e-07, |
|
"logits/chosen": -1.9397910833358765, |
|
"logits/rejected": -1.9602453708648682, |
|
"logps/chosen": -259.236328125, |
|
"logps/rejected": -268.33233642578125, |
|
"loss": 0.3812, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.5990003347396851, |
|
"rewards/margins": 1.7735519409179688, |
|
"rewards/rejected": -2.3725523948669434, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 106.09742448074628, |
|
"learning_rate": 1.8780804765620746e-07, |
|
"logits/chosen": -1.8515198230743408, |
|
"logits/rejected": -1.8524954319000244, |
|
"logps/chosen": -268.37164306640625, |
|
"logps/rejected": -304.6097106933594, |
|
"loss": 0.3726, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.32395535707473755, |
|
"rewards/margins": 1.7947556972503662, |
|
"rewards/rejected": -2.118710994720459, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 89.06639468347461, |
|
"learning_rate": 1.7812080721643973e-07, |
|
"logits/chosen": -1.8575937747955322, |
|
"logits/rejected": -1.761614441871643, |
|
"logps/chosen": -263.5863037109375, |
|
"logps/rejected": -246.20455932617188, |
|
"loss": 0.3972, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7359617352485657, |
|
"rewards/margins": 1.8330894708633423, |
|
"rewards/rejected": -2.5690512657165527, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 91.51640548835455, |
|
"learning_rate": 1.6854985675997063e-07, |
|
"logits/chosen": -1.8521220684051514, |
|
"logits/rejected": -1.8151109218597412, |
|
"logps/chosen": -261.0481872558594, |
|
"logps/rejected": -272.9272155761719, |
|
"loss": 0.3641, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9162181615829468, |
|
"rewards/margins": 1.6873579025268555, |
|
"rewards/rejected": -2.603576183319092, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 83.89007997657542, |
|
"learning_rate": 1.5911068067978818e-07, |
|
"logits/chosen": -1.7626062631607056, |
|
"logits/rejected": -1.7946131229400635, |
|
"logps/chosen": -240.85189819335938, |
|
"logps/rejected": -273.8741149902344, |
|
"loss": 0.3514, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9277374148368835, |
|
"rewards/margins": 1.9961919784545898, |
|
"rewards/rejected": -2.923929214477539, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 103.64429212789244, |
|
"learning_rate": 1.4981855017728197e-07, |
|
"logits/chosen": -1.8487884998321533, |
|
"logits/rejected": -1.8991634845733643, |
|
"logps/chosen": -267.7337341308594, |
|
"logps/rejected": -304.248291015625, |
|
"loss": 0.37, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1558853387832642, |
|
"rewards/margins": 1.9654242992401123, |
|
"rewards/rejected": -3.121309757232666, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 87.97513457883551, |
|
"learning_rate": 1.406884985556804e-07, |
|
"logits/chosen": -1.8694698810577393, |
|
"logits/rejected": -1.844276785850525, |
|
"logps/chosen": -257.92413330078125, |
|
"logps/rejected": -268.24462890625, |
|
"loss": 0.3586, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2031538486480713, |
|
"rewards/margins": 2.1714892387390137, |
|
"rewards/rejected": -3.374642848968506, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 85.79715679551722, |
|
"learning_rate": 1.3173529689837354e-07, |
|
"logits/chosen": -1.8827781677246094, |
|
"logits/rejected": -1.8290605545043945, |
|
"logps/chosen": -256.6871032714844, |
|
"logps/rejected": -256.0238952636719, |
|
"loss": 0.3562, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1103084087371826, |
|
"rewards/margins": 1.9665857553482056, |
|
"rewards/rejected": -3.0768942832946777, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_logits/chosen": -1.9736415147781372, |
|
"eval_logits/rejected": -1.9216868877410889, |
|
"eval_logps/chosen": -327.7752685546875, |
|
"eval_logps/rejected": -382.5696105957031, |
|
"eval_loss": 0.2513369023799896, |
|
"eval_rewards/accuracies": 0.87109375, |
|
"eval_rewards/chosen": 0.386812299489975, |
|
"eval_rewards/margins": 2.8599982261657715, |
|
"eval_rewards/rejected": -2.4731857776641846, |
|
"eval_runtime": 97.9427, |
|
"eval_samples_per_second": 20.42, |
|
"eval_steps_per_second": 0.327, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 91.70310566548932, |
|
"learning_rate": 1.2297343017146726e-07, |
|
"logits/chosen": -1.8489186763763428, |
|
"logits/rejected": -1.8288800716400146, |
|
"logps/chosen": -259.6686096191406, |
|
"logps/rejected": -267.1070861816406, |
|
"loss": 0.3693, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0914928913116455, |
|
"rewards/margins": 1.885284662246704, |
|
"rewards/rejected": -2.9767773151397705, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 83.02991328479807, |
|
"learning_rate": 1.1441707378923474e-07, |
|
"logits/chosen": -1.950595498085022, |
|
"logits/rejected": -1.8974205255508423, |
|
"logps/chosen": -237.6828155517578, |
|
"logps/rejected": -257.06005859375, |
|
"loss": 0.3625, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.4481216371059418, |
|
"rewards/margins": 2.152703046798706, |
|
"rewards/rejected": -2.600825071334839, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 97.69736464926112, |
|
"learning_rate": 1.06080070680377e-07, |
|
"logits/chosen": -1.874447226524353, |
|
"logits/rejected": -1.8658256530761719, |
|
"logps/chosen": -269.42034912109375, |
|
"logps/rejected": -275.75946044921875, |
|
"loss": 0.3679, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.6408880949020386, |
|
"rewards/margins": 2.110172986984253, |
|
"rewards/rejected": -2.751060962677002, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 89.55100518815718, |
|
"learning_rate": 9.797590889219587e-08, |
|
"logits/chosen": -1.927781343460083, |
|
"logits/rejected": -1.915203332901001, |
|
"logps/chosen": -262.8731384277344, |
|
"logps/rejected": -265.70404052734375, |
|
"loss": 0.3725, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5387133359909058, |
|
"rewards/margins": 2.1157500743865967, |
|
"rewards/rejected": -2.654463529586792, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 88.47283263403602, |
|
"learning_rate": 9.011769976891367e-08, |
|
"logits/chosen": -1.9129142761230469, |
|
"logits/rejected": -1.9037758111953735, |
|
"logps/chosen": -253.7337188720703, |
|
"logps/rejected": -273.1281433105469, |
|
"loss": 0.3654, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.8051580190658569, |
|
"rewards/margins": 1.8512630462646484, |
|
"rewards/rejected": -2.656421184539795, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 101.63563998101947, |
|
"learning_rate": 8.251815673944218e-08, |
|
"logits/chosen": -1.8455785512924194, |
|
"logits/rejected": -1.862540602684021, |
|
"logps/chosen": -269.90655517578125, |
|
"logps/rejected": -266.5269470214844, |
|
"loss": 0.3636, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.0431994199752808, |
|
"rewards/margins": 1.969745397567749, |
|
"rewards/rejected": -3.0129449367523193, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 101.36570058059344, |
|
"learning_rate": 7.518957474892148e-08, |
|
"logits/chosen": -1.8844772577285767, |
|
"logits/rejected": -1.8293778896331787, |
|
"logps/chosen": -262.8396301269531, |
|
"logps/rejected": -270.7688293457031, |
|
"loss": 0.3566, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.0009379386901855, |
|
"rewards/margins": 2.288020133972168, |
|
"rewards/rejected": -3.2889580726623535, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 108.29800746794625, |
|
"learning_rate": 6.814381036730274e-08, |
|
"logits/chosen": -1.9113209247589111, |
|
"logits/rejected": -1.8669430017471313, |
|
"logps/chosen": -248.9401092529297, |
|
"logps/rejected": -263.89459228515625, |
|
"loss": 0.3789, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.7384175062179565, |
|
"rewards/margins": 2.038583993911743, |
|
"rewards/rejected": -2.77700138092041, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 81.30832233061167, |
|
"learning_rate": 6.139226260715872e-08, |
|
"logits/chosen": -1.936092734336853, |
|
"logits/rejected": -1.950042724609375, |
|
"logps/chosen": -261.84283447265625, |
|
"logps/rejected": -281.1523132324219, |
|
"loss": 0.3622, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.8801490068435669, |
|
"rewards/margins": 2.052794933319092, |
|
"rewards/rejected": -2.932943820953369, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 81.87543968348014, |
|
"learning_rate": 5.4945854481754734e-08, |
|
"logits/chosen": -1.8324077129364014, |
|
"logits/rejected": -1.7888282537460327, |
|
"logps/chosen": -245.5817413330078, |
|
"logps/rejected": -259.8585510253906, |
|
"loss": 0.3624, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9253555536270142, |
|
"rewards/margins": 2.031710624694824, |
|
"rewards/rejected": -2.957066059112549, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": -1.9716989994049072, |
|
"eval_logits/rejected": -1.930106282234192, |
|
"eval_logps/chosen": -322.60308837890625, |
|
"eval_logps/rejected": -380.21783447265625, |
|
"eval_loss": 0.21937939524650574, |
|
"eval_rewards/accuracies": 0.90625, |
|
"eval_rewards/chosen": 0.6454216241836548, |
|
"eval_rewards/margins": 3.001021385192871, |
|
"eval_rewards/rejected": -2.3555996417999268, |
|
"eval_runtime": 98.0586, |
|
"eval_samples_per_second": 20.396, |
|
"eval_steps_per_second": 0.326, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 104.29775989655523, |
|
"learning_rate": 4.881501533321605e-08, |
|
"logits/chosen": -1.7864516973495483, |
|
"logits/rejected": -1.7958072423934937, |
|
"logps/chosen": -228.2520294189453, |
|
"logps/rejected": -255.38577270507812, |
|
"loss": 0.3399, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.952102780342102, |
|
"rewards/margins": 2.169015884399414, |
|
"rewards/rejected": -3.1211180686950684, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 87.51056493700892, |
|
"learning_rate": 4.300966395938377e-08, |
|
"logits/chosen": -1.91278076171875, |
|
"logits/rejected": -1.887738823890686, |
|
"logps/chosen": -269.040283203125, |
|
"logps/rejected": -277.3271484375, |
|
"loss": 0.3737, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9400026202201843, |
|
"rewards/margins": 2.1387667655944824, |
|
"rewards/rejected": -3.0787696838378906, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 82.85641235575972, |
|
"learning_rate": 3.7539192566655246e-08, |
|
"logits/chosen": -1.8706934452056885, |
|
"logits/rejected": -1.8632476329803467, |
|
"logps/chosen": -258.32257080078125, |
|
"logps/rejected": -262.11151123046875, |
|
"loss": 0.3615, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.5498217344284058, |
|
"rewards/margins": 1.930101752281189, |
|
"rewards/rejected": -2.479923725128174, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 86.13162915966026, |
|
"learning_rate": 3.24124515747731e-08, |
|
"logits/chosen": -1.827125906944275, |
|
"logits/rejected": -1.8333660364151, |
|
"logps/chosen": -245.1622314453125, |
|
"logps/rejected": -269.59857177734375, |
|
"loss": 0.376, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0357568264007568, |
|
"rewards/margins": 1.9602575302124023, |
|
"rewards/rejected": -2.996014356613159, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 86.92913330390785, |
|
"learning_rate": 2.763773529814506e-08, |
|
"logits/chosen": -1.940159559249878, |
|
"logits/rejected": -1.9158337116241455, |
|
"logps/chosen": -282.51397705078125, |
|
"logps/rejected": -276.21331787109375, |
|
"loss": 0.372, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7603832483291626, |
|
"rewards/margins": 2.119266986846924, |
|
"rewards/rejected": -2.879650354385376, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 95.56287400260709, |
|
"learning_rate": 2.3222768526860698e-08, |
|
"logits/chosen": -1.863567590713501, |
|
"logits/rejected": -1.788558006286621, |
|
"logps/chosen": -249.87838745117188, |
|
"logps/rejected": -264.7607727050781, |
|
"loss": 0.3676, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6970678567886353, |
|
"rewards/margins": 2.2210423946380615, |
|
"rewards/rejected": -2.9181103706359863, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 82.6081133840389, |
|
"learning_rate": 1.9174694029115146e-08, |
|
"logits/chosen": -1.9369175434112549, |
|
"logits/rejected": -1.9106756448745728, |
|
"logps/chosen": -287.55975341796875, |
|
"logps/rejected": -268.37493896484375, |
|
"loss": 0.3621, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7068324685096741, |
|
"rewards/margins": 1.9774510860443115, |
|
"rewards/rejected": -2.684283494949341, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 84.83905388303022, |
|
"learning_rate": 1.5500060995258134e-08, |
|
"logits/chosen": -1.8579909801483154, |
|
"logits/rejected": -1.8068830966949463, |
|
"logps/chosen": -258.0039367675781, |
|
"logps/rejected": -259.26202392578125, |
|
"loss": 0.3456, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.8524407148361206, |
|
"rewards/margins": 2.122145414352417, |
|
"rewards/rejected": -2.974586248397827, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 94.72717106858794, |
|
"learning_rate": 1.2204814442165812e-08, |
|
"logits/chosen": -1.8840267658233643, |
|
"logits/rejected": -1.8291581869125366, |
|
"logps/chosen": -252.4437713623047, |
|
"logps/rejected": -252.0703582763672, |
|
"loss": 0.3741, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8425480127334595, |
|
"rewards/margins": 2.2477831840515137, |
|
"rewards/rejected": -3.0903308391571045, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 102.52909854244074, |
|
"learning_rate": 9.294285595075669e-09, |
|
"logits/chosen": -1.9619266986846924, |
|
"logits/rejected": -1.904088020324707, |
|
"logps/chosen": -277.6653747558594, |
|
"logps/rejected": -272.3161315917969, |
|
"loss": 0.4069, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.8278266787528992, |
|
"rewards/margins": 2.0403804779052734, |
|
"rewards/rejected": -2.868206739425659, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -1.9591288566589355, |
|
"eval_logits/rejected": -1.9204463958740234, |
|
"eval_logps/chosen": -322.0539245605469, |
|
"eval_logps/rejected": -380.2658386230469, |
|
"eval_loss": 0.20271854102611542, |
|
"eval_rewards/accuracies": 0.9140625, |
|
"eval_rewards/chosen": 0.6728801131248474, |
|
"eval_rewards/margins": 3.0308780670166016, |
|
"eval_rewards/rejected": -2.3579981327056885, |
|
"eval_runtime": 97.802, |
|
"eval_samples_per_second": 20.449, |
|
"eval_steps_per_second": 0.327, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 85.0418535767566, |
|
"learning_rate": 6.773183262446914e-09, |
|
"logits/chosen": -1.8587850332260132, |
|
"logits/rejected": -1.7795337438583374, |
|
"logps/chosen": -248.3004913330078, |
|
"logps/rejected": -261.2222595214844, |
|
"loss": 0.3824, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7576299905776978, |
|
"rewards/margins": 2.073674440383911, |
|
"rewards/rejected": -2.8313040733337402, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 88.3275931540908, |
|
"learning_rate": 4.645586217799452e-09, |
|
"logits/chosen": -1.8911056518554688, |
|
"logits/rejected": -1.940203309059143, |
|
"logps/chosen": -265.9781799316406, |
|
"logps/rejected": -290.08770751953125, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.839026927947998, |
|
"rewards/margins": 2.1166014671325684, |
|
"rewards/rejected": -2.9556286334991455, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 108.33767167817186, |
|
"learning_rate": 2.9149366008568987e-09, |
|
"logits/chosen": -1.8930469751358032, |
|
"logits/rejected": -1.9079952239990234, |
|
"logps/chosen": -263.65264892578125, |
|
"logps/rejected": -278.3803405761719, |
|
"loss": 0.3825, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.6759049296379089, |
|
"rewards/margins": 2.146210193634033, |
|
"rewards/rejected": -2.822114944458008, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 86.51713108380295, |
|
"learning_rate": 1.5840343486700215e-09, |
|
"logits/chosen": -1.9469448328018188, |
|
"logits/rejected": -1.939854383468628, |
|
"logps/chosen": -281.2010498046875, |
|
"logps/rejected": -275.5614318847656, |
|
"loss": 0.367, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5235612392425537, |
|
"rewards/margins": 2.1837284564971924, |
|
"rewards/rejected": -2.707289457321167, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 82.94960696998083, |
|
"learning_rate": 6.550326657293881e-10, |
|
"logits/chosen": -1.9341312646865845, |
|
"logits/rejected": -1.901523232460022, |
|
"logps/chosen": -257.5312805175781, |
|
"logps/rejected": -268.84619140625, |
|
"loss": 0.346, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.7536182999610901, |
|
"rewards/margins": 2.499413251876831, |
|
"rewards/rejected": -3.2530312538146973, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 100.59142277641878, |
|
"learning_rate": 1.2943454039654467e-10, |
|
"logits/chosen": -1.821735143661499, |
|
"logits/rejected": -1.835100531578064, |
|
"logps/chosen": -244.0499267578125, |
|
"logps/rejected": -261.69805908203125, |
|
"loss": 0.3624, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0147624015808105, |
|
"rewards/margins": 1.7580705881118774, |
|
"rewards/rejected": -2.7728328704833984, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 868, |
|
"total_flos": 0.0, |
|
"train_loss": 0.40559157427005504, |
|
"train_runtime": 13777.3263, |
|
"train_samples_per_second": 8.066, |
|
"train_steps_per_second": 0.063 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 868, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|