|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9991111111111111, |
|
"eval_steps": 100, |
|
"global_step": 562, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0017777777777777779, |
|
"grad_norm": 0.22311078073034826, |
|
"learning_rate": 8.771929824561404e-08, |
|
"logits/chosen": 14.539060592651367, |
|
"logits/rejected": 15.870795249938965, |
|
"logps/chosen": -470.04345703125, |
|
"logps/rejected": -509.49163818359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017777777777777778, |
|
"grad_norm": 0.1976540584216542, |
|
"learning_rate": 8.771929824561404e-07, |
|
"logits/chosen": 12.96641731262207, |
|
"logits/rejected": 13.155448913574219, |
|
"logps/chosen": -400.9219665527344, |
|
"logps/rejected": -399.60699462890625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": -0.00211474671959877, |
|
"rewards/margins": -0.001011626678518951, |
|
"rewards/rejected": -0.0011031196918338537, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.035555555555555556, |
|
"grad_norm": 0.21429385122960512, |
|
"learning_rate": 1.7543859649122807e-06, |
|
"logits/chosen": 13.817936897277832, |
|
"logits/rejected": 13.85405158996582, |
|
"logps/chosen": -420.9461975097656, |
|
"logps/rejected": -404.7037658691406, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -6.537516310345381e-05, |
|
"rewards/margins": -0.0014555875677615404, |
|
"rewards/rejected": 0.0013902120990678668, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05333333333333334, |
|
"grad_norm": 0.1898289423532557, |
|
"learning_rate": 2.631578947368421e-06, |
|
"logits/chosen": 13.255971908569336, |
|
"logits/rejected": 13.399693489074707, |
|
"logps/chosen": -402.4463806152344, |
|
"logps/rejected": -412.36480712890625, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0020085538271814585, |
|
"rewards/margins": -0.0013788806973025203, |
|
"rewards/rejected": -0.00062967324629426, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07111111111111111, |
|
"grad_norm": 0.2162185808879369, |
|
"learning_rate": 3.5087719298245615e-06, |
|
"logits/chosen": 13.550105094909668, |
|
"logits/rejected": 13.091300964355469, |
|
"logps/chosen": -379.90777587890625, |
|
"logps/rejected": -373.92138671875, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0008380211656913161, |
|
"rewards/margins": -0.00024208976537920535, |
|
"rewards/rejected": 0.0010801110183820128, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 0.22810063623939997, |
|
"learning_rate": 4.385964912280702e-06, |
|
"logits/chosen": 13.239973068237305, |
|
"logits/rejected": 13.241312980651855, |
|
"logps/chosen": -396.4941101074219, |
|
"logps/rejected": -430.57049560546875, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0004129047447349876, |
|
"rewards/margins": 4.834262654185295e-05, |
|
"rewards/rejected": -0.00046124737127684057, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10666666666666667, |
|
"grad_norm": 0.2512545993725567, |
|
"learning_rate": 4.999564631597802e-06, |
|
"logits/chosen": 13.0819673538208, |
|
"logits/rejected": 13.337594985961914, |
|
"logps/chosen": -388.62628173828125, |
|
"logps/rejected": -414.85382080078125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.002572793047875166, |
|
"rewards/margins": 0.0024341598618775606, |
|
"rewards/rejected": -0.005006952676922083, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12444444444444444, |
|
"grad_norm": 0.22761734037588918, |
|
"learning_rate": 4.991828966534002e-06, |
|
"logits/chosen": 13.887951850891113, |
|
"logits/rejected": 13.595651626586914, |
|
"logps/chosen": -450.8296813964844, |
|
"logps/rejected": -444.7730407714844, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.006924263201653957, |
|
"rewards/margins": -0.001168354763649404, |
|
"rewards/rejected": -0.005755907855927944, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 0.21722031386603705, |
|
"learning_rate": 4.974452899279292e-06, |
|
"logits/chosen": 13.351213455200195, |
|
"logits/rejected": 12.537083625793457, |
|
"logps/chosen": -425.934814453125, |
|
"logps/rejected": -383.2563781738281, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.015285441651940346, |
|
"rewards/margins": -0.0011091658379882574, |
|
"rewards/rejected": -0.014176277443766594, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.27601757784168, |
|
"learning_rate": 4.947503654462277e-06, |
|
"logits/chosen": 13.753236770629883, |
|
"logits/rejected": 13.5679292678833, |
|
"logps/chosen": -435.3922424316406, |
|
"logps/rejected": -426.5663146972656, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.03359255567193031, |
|
"rewards/margins": -0.0016514979070052505, |
|
"rewards/rejected": -0.03194105625152588, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.23435867570612137, |
|
"learning_rate": 4.911085493475802e-06, |
|
"logits/chosen": 12.843754768371582, |
|
"logits/rejected": 12.69409465789795, |
|
"logps/chosen": -412.6481018066406, |
|
"logps/rejected": -412.96405029296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.0513535737991333, |
|
"rewards/margins": 0.002654131967574358, |
|
"rewards/rejected": -0.05400770157575607, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"eval_logits/chosen": 14.180258750915527, |
|
"eval_logits/rejected": 12.376383781433105, |
|
"eval_logps/chosen": -413.67803955078125, |
|
"eval_logps/rejected": -391.0186462402344, |
|
"eval_loss": 0.6835460066795349, |
|
"eval_rewards/accuracies": 0.6904761791229248, |
|
"eval_rewards/chosen": -0.05108103156089783, |
|
"eval_rewards/margins": 0.021760080009698868, |
|
"eval_rewards/rejected": -0.072841115295887, |
|
"eval_runtime": 90.1605, |
|
"eval_samples_per_second": 11.091, |
|
"eval_steps_per_second": 0.699, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19555555555555557, |
|
"grad_norm": 0.2803874830493494, |
|
"learning_rate": 4.86533931110987e-06, |
|
"logits/chosen": 13.455083847045898, |
|
"logits/rejected": 13.341041564941406, |
|
"logps/chosen": -415.30401611328125, |
|
"logps/rejected": -425.7816467285156, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.06350487470626831, |
|
"rewards/margins": 0.00028126564575359225, |
|
"rewards/rejected": -0.06378613412380219, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 0.35378680708306215, |
|
"learning_rate": 4.810442090457072e-06, |
|
"logits/chosen": 12.907583236694336, |
|
"logits/rejected": 13.067194938659668, |
|
"logps/chosen": -389.30303955078125, |
|
"logps/rejected": -398.48333740234375, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.06600853055715561, |
|
"rewards/margins": -0.003265662584453821, |
|
"rewards/rejected": -0.06274287402629852, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2311111111111111, |
|
"grad_norm": 0.2869743496766919, |
|
"learning_rate": 4.7466062181993855e-06, |
|
"logits/chosen": 13.347814559936523, |
|
"logits/rejected": 13.254618644714355, |
|
"logps/chosen": -396.9159851074219, |
|
"logps/rejected": -428.38861083984375, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08628938347101212, |
|
"rewards/margins": 0.005706036929041147, |
|
"rewards/rejected": -0.09199541807174683, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24888888888888888, |
|
"grad_norm": 0.3379629862483891, |
|
"learning_rate": 4.6740786629253595e-06, |
|
"logits/chosen": 13.342312812805176, |
|
"logits/rejected": 13.019983291625977, |
|
"logps/chosen": -384.1279602050781, |
|
"logps/rejected": -393.2745666503906, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0947018712759018, |
|
"rewards/margins": 0.0077432007528841496, |
|
"rewards/rejected": -0.10244506597518921, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.350219971213616, |
|
"learning_rate": 4.5931400196566256e-06, |
|
"logits/chosen": 13.074136734008789, |
|
"logits/rejected": 13.148015022277832, |
|
"logps/chosen": -425.1592712402344, |
|
"logps/rejected": -443.909423828125, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.11653944104909897, |
|
"rewards/margins": 0.003865143982693553, |
|
"rewards/rejected": -0.12040458619594574, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 0.37627729798456006, |
|
"learning_rate": 4.504103424280267e-06, |
|
"logits/chosen": 13.34211540222168, |
|
"logits/rejected": 13.010579109191895, |
|
"logps/chosen": -439.13916015625, |
|
"logps/rejected": -433.8727111816406, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.16217225790023804, |
|
"rewards/margins": 0.007592835463583469, |
|
"rewards/rejected": -0.16976511478424072, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3022222222222222, |
|
"grad_norm": 0.3180727458224856, |
|
"learning_rate": 4.407313342086906e-06, |
|
"logits/chosen": 13.196157455444336, |
|
"logits/rejected": 13.456674575805664, |
|
"logps/chosen": -426.7943420410156, |
|
"logps/rejected": -437.723388671875, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.16415511071681976, |
|
"rewards/margins": 0.009541703388094902, |
|
"rewards/rejected": -0.17369681596755981, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.31672781627835284, |
|
"learning_rate": 4.303144235101412e-06, |
|
"logits/chosen": 13.912513732910156, |
|
"logits/rejected": 13.92699146270752, |
|
"logps/chosen": -400.5416564941406, |
|
"logps/rejected": -418.99554443359375, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.18776951730251312, |
|
"rewards/margins": 0.00795517023652792, |
|
"rewards/rejected": -0.1957246959209442, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3377777777777778, |
|
"grad_norm": 0.3187670525496018, |
|
"learning_rate": 4.1919991133620705e-06, |
|
"logits/chosen": 12.957951545715332, |
|
"logits/rejected": 12.835040092468262, |
|
"logps/chosen": -428.75921630859375, |
|
"logps/rejected": -429.77801513671875, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.18785884976387024, |
|
"rewards/margins": -0.00687809195369482, |
|
"rewards/rejected": -0.18098074197769165, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 0.31966290081913296, |
|
"learning_rate": 4.074307975753044e-06, |
|
"logits/chosen": 13.527238845825195, |
|
"logits/rejected": 13.133882522583008, |
|
"logps/chosen": -411.04052734375, |
|
"logps/rejected": -405.96820068359375, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1447705328464508, |
|
"rewards/margins": 0.013265645131468773, |
|
"rewards/rejected": -0.15803618729114532, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"eval_logits/chosen": 13.984086036682129, |
|
"eval_logits/rejected": 12.180322647094727, |
|
"eval_logps/chosen": -422.97613525390625, |
|
"eval_logps/rejected": -403.87432861328125, |
|
"eval_loss": 0.6681899428367615, |
|
"eval_rewards/accuracies": 0.7460317611694336, |
|
"eval_rewards/chosen": -0.1440620720386505, |
|
"eval_rewards/margins": 0.05733573064208031, |
|
"eval_rewards/rejected": -0.20139780640602112, |
|
"eval_runtime": 90.195, |
|
"eval_samples_per_second": 11.087, |
|
"eval_steps_per_second": 0.698, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37333333333333335, |
|
"grad_norm": 0.2899334647869614, |
|
"learning_rate": 3.950526146422213e-06, |
|
"logits/chosen": 13.182365417480469, |
|
"logits/rejected": 13.274526596069336, |
|
"logps/chosen": -412.0303649902344, |
|
"logps/rejected": -435.66754150390625, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.1916799396276474, |
|
"rewards/margins": -0.0004939109203405678, |
|
"rewards/rejected": -0.19118604063987732, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.39111111111111113, |
|
"grad_norm": 0.3113343433252108, |
|
"learning_rate": 3.821132513220511e-06, |
|
"logits/chosen": 13.472271919250488, |
|
"logits/rejected": 12.98419189453125, |
|
"logps/chosen": -407.59857177734375, |
|
"logps/rejected": -413.1893615722656, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.18176871538162231, |
|
"rewards/margins": 0.02464126981794834, |
|
"rewards/rejected": -0.2064099758863449, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4088888888888889, |
|
"grad_norm": 0.37212373445120756, |
|
"learning_rate": 3.686627674977858e-06, |
|
"logits/chosen": 12.922555923461914, |
|
"logits/rejected": 13.052223205566406, |
|
"logps/chosen": -420.0254821777344, |
|
"logps/rejected": -438.8111267089844, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.2267444133758545, |
|
"rewards/margins": 0.00632152333855629, |
|
"rewards/rejected": -0.23306593298912048, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 0.4194286554865179, |
|
"learning_rate": 3.547532004783539e-06, |
|
"logits/chosen": 13.442713737487793, |
|
"logits/rejected": 12.382542610168457, |
|
"logps/chosen": -448.96044921875, |
|
"logps/rejected": -424.40631103515625, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.2144048511981964, |
|
"rewards/margins": 0.04138387367129326, |
|
"rewards/rejected": -0.2557887136936188, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.2831313864511856, |
|
"learning_rate": 3.404383636763809e-06, |
|
"logits/chosen": 13.222406387329102, |
|
"logits/rejected": 12.874628067016602, |
|
"logps/chosen": -460.45782470703125, |
|
"logps/rejected": -457.3854064941406, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.22632627189159393, |
|
"rewards/margins": 0.018462661653757095, |
|
"rewards/rejected": -0.24478892982006073, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4622222222222222, |
|
"grad_norm": 0.3260671824484102, |
|
"learning_rate": 3.2577363841455063e-06, |
|
"logits/chosen": 13.391107559204102, |
|
"logits/rejected": 13.3804931640625, |
|
"logps/chosen": -380.6063537597656, |
|
"logps/rejected": -394.7472839355469, |
|
"loss": 0.6954, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.20835450291633606, |
|
"rewards/margins": 0.013799709267914295, |
|
"rewards/rejected": -0.22215421497821808, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.3777555315785377, |
|
"learning_rate": 3.1081575966602627e-06, |
|
"logits/chosen": 12.585054397583008, |
|
"logits/rejected": 12.490537643432617, |
|
"logps/chosen": -439.46844482421875, |
|
"logps/rejected": -421.29962158203125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.19497910141944885, |
|
"rewards/margins": -0.00843017641454935, |
|
"rewards/rejected": -0.18654890358448029, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.49777777777777776, |
|
"grad_norm": 0.2554633592042652, |
|
"learning_rate": 2.9562259655786067e-06, |
|
"logits/chosen": 13.010714530944824, |
|
"logits/rejected": 12.371983528137207, |
|
"logps/chosen": -389.00665283203125, |
|
"logps/rejected": -389.6372985839844, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.14177796244621277, |
|
"rewards/margins": 0.008302886970341206, |
|
"rewards/rejected": -0.1500808447599411, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5155555555555555, |
|
"grad_norm": 0.27965221608690755, |
|
"learning_rate": 2.802529284865863e-06, |
|
"logits/chosen": 12.02336597442627, |
|
"logits/rejected": 12.071900367736816, |
|
"logps/chosen": -402.3577575683594, |
|
"logps/rejected": -393.9205017089844, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.13531939685344696, |
|
"rewards/margins": 0.004990140907466412, |
|
"rewards/rejected": -0.14030954241752625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.2913462405762709, |
|
"learning_rate": 2.6476621771214865e-06, |
|
"logits/chosen": 13.3645601272583, |
|
"logits/rejected": 13.140283584594727, |
|
"logps/chosen": -407.8977966308594, |
|
"logps/rejected": -416.1576232910156, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1288231909275055, |
|
"rewards/margins": 0.010681845247745514, |
|
"rewards/rejected": -0.1395050287246704, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"eval_logits/chosen": 13.974791526794434, |
|
"eval_logits/rejected": 12.176884651184082, |
|
"eval_logps/chosen": -419.9747009277344, |
|
"eval_logps/rejected": -401.2295227050781, |
|
"eval_loss": 0.6673460006713867, |
|
"eval_rewards/accuracies": 0.7896825671195984, |
|
"eval_rewards/chosen": -0.11404754966497421, |
|
"eval_rewards/margins": 0.06090213730931282, |
|
"eval_rewards/rejected": -0.17494967579841614, |
|
"eval_runtime": 90.2773, |
|
"eval_samples_per_second": 11.077, |
|
"eval_steps_per_second": 0.698, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5511111111111111, |
|
"grad_norm": 0.340661457280574, |
|
"learning_rate": 2.4922237930997435e-06, |
|
"logits/chosen": 12.930778503417969, |
|
"logits/rejected": 12.75629711151123, |
|
"logps/chosen": -408.3033142089844, |
|
"logps/rejected": -429.09954833984375, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1441982239484787, |
|
"rewards/margins": 0.008970534428954124, |
|
"rewards/rejected": -0.15316873788833618, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 0.3045715209107194, |
|
"learning_rate": 2.3368154937118355e-06, |
|
"logits/chosen": 12.338762283325195, |
|
"logits/rejected": 12.408100128173828, |
|
"logps/chosen": -417.6708984375, |
|
"logps/rejected": -436.1878356933594, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1610310822725296, |
|
"rewards/margins": 0.003636928740888834, |
|
"rewards/rejected": -0.164668008685112, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5866666666666667, |
|
"grad_norm": 0.3261096004039151, |
|
"learning_rate": 2.1820385234773604e-06, |
|
"logits/chosen": 12.432132720947266, |
|
"logits/rejected": 12.333971977233887, |
|
"logps/chosen": -371.9541015625, |
|
"logps/rejected": -377.29327392578125, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.14137257635593414, |
|
"rewards/margins": 0.012616041116416454, |
|
"rewards/rejected": -0.15398862957954407, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6044444444444445, |
|
"grad_norm": 0.3167841738260838, |
|
"learning_rate": 2.02849168442607e-06, |
|
"logits/chosen": 13.544441223144531, |
|
"logits/rejected": 13.638467788696289, |
|
"logps/chosen": -407.0322265625, |
|
"logps/rejected": -412.5428771972656, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1435524821281433, |
|
"rewards/margins": 0.008516514673829079, |
|
"rewards/rejected": -0.15206900238990784, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6222222222222222, |
|
"grad_norm": 0.30772925863397405, |
|
"learning_rate": 1.876769019449141e-06, |
|
"logits/chosen": 12.576923370361328, |
|
"logits/rejected": 12.56715202331543, |
|
"logps/chosen": -379.2666015625, |
|
"logps/rejected": -408.4591979980469, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12999220192432404, |
|
"rewards/margins": 0.009667792357504368, |
|
"rewards/rejected": -0.13966000080108643, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.3510251282815541, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 12.569269180297852, |
|
"logits/rejected": 12.474992752075195, |
|
"logps/chosen": -426.98651123046875, |
|
"logps/rejected": -420.2184143066406, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.14840544760227203, |
|
"rewards/margins": -0.0038787845987826586, |
|
"rewards/rejected": -0.1445266604423523, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6577777777777778, |
|
"grad_norm": 0.29359728465406504, |
|
"learning_rate": 1.5811348254745574e-06, |
|
"logits/chosen": 13.278701782226562, |
|
"logits/rejected": 13.138870239257812, |
|
"logps/chosen": -419.22552490234375, |
|
"logps/rejected": -429.7049865722656, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1437017023563385, |
|
"rewards/margins": 0.006204875651746988, |
|
"rewards/rejected": -0.14990659058094025, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6755555555555556, |
|
"grad_norm": 0.3041311221496272, |
|
"learning_rate": 1.4383670477413676e-06, |
|
"logits/chosen": 12.825994491577148, |
|
"logits/rejected": 12.180424690246582, |
|
"logps/chosen": -393.3042297363281, |
|
"logps/rejected": -379.05279541015625, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1392853856086731, |
|
"rewards/margins": 0.007530958391726017, |
|
"rewards/rejected": -0.14681634306907654, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6933333333333334, |
|
"grad_norm": 0.3072757460893545, |
|
"learning_rate": 1.2997065216600179e-06, |
|
"logits/chosen": 12.89039134979248, |
|
"logits/rejected": 13.162782669067383, |
|
"logps/chosen": -427.39910888671875, |
|
"logps/rejected": -431.7848205566406, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.14786246418952942, |
|
"rewards/margins": 0.0003430729848332703, |
|
"rewards/rejected": -0.14820551872253418, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 0.27986512697276633, |
|
"learning_rate": 1.165689697868726e-06, |
|
"logits/chosen": 12.779977798461914, |
|
"logits/rejected": 12.16929817199707, |
|
"logps/chosen": -420.75653076171875, |
|
"logps/rejected": -415.25244140625, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.14449290931224823, |
|
"rewards/margins": 0.02024017833173275, |
|
"rewards/rejected": -0.16473311185836792, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"eval_logits/chosen": 13.931650161743164, |
|
"eval_logits/rejected": 12.126729011535645, |
|
"eval_logps/chosen": -420.5239562988281, |
|
"eval_logps/rejected": -402.1236267089844, |
|
"eval_loss": 0.665495753288269, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": -0.11954014003276825, |
|
"eval_rewards/margins": 0.06435071676969528, |
|
"eval_rewards/rejected": -0.18389087915420532, |
|
"eval_runtime": 90.069, |
|
"eval_samples_per_second": 11.103, |
|
"eval_steps_per_second": 0.699, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7288888888888889, |
|
"grad_norm": 0.3450004634992278, |
|
"learning_rate": 1.0368350614236685e-06, |
|
"logits/chosen": 12.588386535644531, |
|
"logits/rejected": 12.996353149414062, |
|
"logps/chosen": -403.56207275390625, |
|
"logps/rejected": -424.3435974121094, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.15337641537189484, |
|
"rewards/margins": 0.0022048167884349823, |
|
"rewards/rejected": -0.15558123588562012, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7466666666666667, |
|
"grad_norm": 0.2947231406284332, |
|
"learning_rate": 9.136411258810229e-07, |
|
"logits/chosen": 13.508552551269531, |
|
"logits/rejected": 13.856636047363281, |
|
"logps/chosen": -397.31103515625, |
|
"logps/rejected": -415.14794921875, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.14091812074184418, |
|
"rewards/margins": 0.009603964164853096, |
|
"rewards/rejected": -0.15052208304405212, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7644444444444445, |
|
"grad_norm": 0.2949706409885573, |
|
"learning_rate": 7.965845046448659e-07, |
|
"logits/chosen": 12.716641426086426, |
|
"logits/rejected": 12.739425659179688, |
|
"logps/chosen": -413.5958557128906, |
|
"logps/rejected": -418.07379150390625, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13801425695419312, |
|
"rewards/margins": 0.0007665277225896716, |
|
"rewards/rejected": -0.13878078758716583, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7822222222222223, |
|
"grad_norm": 0.29714148256405104, |
|
"learning_rate": 6.861180670424983e-07, |
|
"logits/chosen": 13.228073120117188, |
|
"logits/rejected": 12.513433456420898, |
|
"logps/chosen": -436.03326416015625, |
|
"logps/rejected": -432.7162170410156, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.14261861145496368, |
|
"rewards/margins": 0.018722299486398697, |
|
"rewards/rejected": -0.16134092211723328, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.35583565190086214, |
|
"learning_rate": 5.826691862609987e-07, |
|
"logits/chosen": 13.008562088012695, |
|
"logits/rejected": 12.5421724319458, |
|
"logps/chosen": -393.10614013671875, |
|
"logps/rejected": -396.7547607421875, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.14102980494499207, |
|
"rewards/margins": 0.006347469985485077, |
|
"rewards/rejected": -0.14737728238105774, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8177777777777778, |
|
"grad_norm": 1.2849378777119373, |
|
"learning_rate": 4.866380859233891e-07, |
|
"logits/chosen": 12.925387382507324, |
|
"logits/rejected": 13.122329711914062, |
|
"logps/chosen": -406.62255859375, |
|
"logps/rejected": -432.58575439453125, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13780517876148224, |
|
"rewards/margins": 0.009602868929505348, |
|
"rewards/rejected": -0.14740803837776184, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8355555555555556, |
|
"grad_norm": 0.3135496868277975, |
|
"learning_rate": 3.98396291701183e-07, |
|
"logits/chosen": 13.169825553894043, |
|
"logits/rejected": 12.919093132019043, |
|
"logps/chosen": -420.40594482421875, |
|
"logps/rejected": -424.626220703125, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.14023754000663757, |
|
"rewards/margins": 0.012833138927817345, |
|
"rewards/rejected": -0.15307065844535828, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 0.3340559430991029, |
|
"learning_rate": 3.1828519395374095e-07, |
|
"logits/chosen": 13.121709823608398, |
|
"logits/rejected": 13.285209655761719, |
|
"logps/chosen": -428.4640197753906, |
|
"logps/rejected": -445.868408203125, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1603536158800125, |
|
"rewards/margins": 0.00828765518963337, |
|
"rewards/rejected": -0.16864125430583954, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8711111111111111, |
|
"grad_norm": 0.32372602686992025, |
|
"learning_rate": 2.466147269552893e-07, |
|
"logits/chosen": 13.549275398254395, |
|
"logits/rejected": 13.303945541381836, |
|
"logps/chosen": -398.70245361328125, |
|
"logps/rejected": -403.9294128417969, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1653539538383484, |
|
"rewards/margins": 0.015345364809036255, |
|
"rewards/rejected": -0.18069931864738464, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.28909373758975987, |
|
"learning_rate": 1.8366216981942632e-07, |
|
"logits/chosen": 12.993377685546875, |
|
"logits/rejected": 13.172945976257324, |
|
"logps/chosen": -441.27288818359375, |
|
"logps/rejected": -444.38983154296875, |
|
"loss": 0.696, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.17415288090705872, |
|
"rewards/margins": -0.0011280607432126999, |
|
"rewards/rejected": -0.17302480340003967, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"eval_logits/chosen": 13.899726867675781, |
|
"eval_logits/rejected": 12.095231056213379, |
|
"eval_logps/chosen": -421.1900939941406, |
|
"eval_logps/rejected": -403.3279724121094, |
|
"eval_loss": 0.6633419990539551, |
|
"eval_rewards/accuracies": 0.7539682388305664, |
|
"eval_rewards/chosen": -0.1262015700340271, |
|
"eval_rewards/margins": 0.06973244994878769, |
|
"eval_rewards/rejected": -0.19593402743339539, |
|
"eval_runtime": 90.1435, |
|
"eval_samples_per_second": 11.093, |
|
"eval_steps_per_second": 0.699, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9066666666666666, |
|
"grad_norm": 0.44905140288995754, |
|
"learning_rate": 1.296710737600934e-07, |
|
"logits/chosen": 12.890707015991211, |
|
"logits/rejected": 12.480443000793457, |
|
"logps/chosen": -397.1539611816406, |
|
"logps/rejected": -404.51470947265625, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.14948078989982605, |
|
"rewards/margins": 0.0065328641794621944, |
|
"rewards/rejected": -0.1560136377811432, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9244444444444444, |
|
"grad_norm": 0.3012451953425647, |
|
"learning_rate": 8.485031983924558e-08, |
|
"logits/chosen": 13.72374153137207, |
|
"logits/rejected": 14.290632247924805, |
|
"logps/chosen": -409.4125061035156, |
|
"logps/rejected": -427.1560974121094, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.15699708461761475, |
|
"rewards/margins": -0.00222613662481308, |
|
"rewards/rejected": -0.15477094054222107, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9422222222222222, |
|
"grad_norm": 0.3269177206871498, |
|
"learning_rate": 4.93733108466013e-08, |
|
"logits/chosen": 12.78927993774414, |
|
"logits/rejected": 13.388028144836426, |
|
"logps/chosen": -428.2759704589844, |
|
"logps/rejected": -456.5814514160156, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.15250369906425476, |
|
"rewards/margins": 0.005445868708193302, |
|
"rewards/rejected": -0.15794958174228668, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.33934624748600184, |
|
"learning_rate": 2.3377300437934236e-08, |
|
"logits/chosen": 13.640138626098633, |
|
"logits/rejected": 13.247881889343262, |
|
"logps/chosen": -384.22723388671875, |
|
"logps/rejected": -377.16546630859375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.14129753410816193, |
|
"rewards/margins": 0.019017567858099937, |
|
"rewards/rejected": -0.16031508147716522, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9777777777777777, |
|
"grad_norm": 0.31794002037631625, |
|
"learning_rate": 6.962862127343206e-09, |
|
"logits/chosen": 13.490945816040039, |
|
"logits/rejected": 13.803362846374512, |
|
"logps/chosen": -422.62939453125, |
|
"logps/rejected": -427.26959228515625, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.167506605386734, |
|
"rewards/margins": 0.010436911135911942, |
|
"rewards/rejected": -0.17794351279735565, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 0.29176173477304573, |
|
"learning_rate": 1.9350018786556956e-10, |
|
"logits/chosen": 13.073209762573242, |
|
"logits/rejected": 13.310659408569336, |
|
"logps/chosen": -431.8447265625, |
|
"logps/rejected": -418.0799865722656, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.1620911806821823, |
|
"rewards/margins": -0.006803811527788639, |
|
"rewards/rejected": -0.15528738498687744, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9991111111111111, |
|
"step": 562, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6916975294143703, |
|
"train_runtime": 7520.1237, |
|
"train_samples_per_second": 4.787, |
|
"train_steps_per_second": 0.075 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 562, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|