|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 2.617801047120419e-08, |
|
"logits/chosen": -3.1532161235809326, |
|
"logits/rejected": -3.1690337657928467, |
|
"logps/chosen": -305.45306396484375, |
|
"logps/rejected": -294.4603576660156, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0007838421151973307, |
|
"rewards/margins": -0.00040248289587907493, |
|
"rewards/rejected": -0.000381359423045069, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 2.617801047120419e-07, |
|
"logits/chosen": -3.177987813949585, |
|
"logits/rejected": -3.2059593200683594, |
|
"logps/chosen": -299.1102294921875, |
|
"logps/rejected": -249.10623168945312, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -0.0008526805322617292, |
|
"rewards/margins": -0.00045007685548625886, |
|
"rewards/rejected": -0.0004026036476716399, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.59765625, |
|
"learning_rate": 5.235602094240838e-07, |
|
"logits/chosen": -3.1716275215148926, |
|
"logits/rejected": -3.166067123413086, |
|
"logps/chosen": -238.83120727539062, |
|
"logps/rejected": -244.2283935546875, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": 0.0001240858546225354, |
|
"rewards/margins": 5.3543342801276594e-05, |
|
"rewards/rejected": 7.054249726934358e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 7.853403141361258e-07, |
|
"logits/chosen": -3.194286823272705, |
|
"logits/rejected": -3.2046267986297607, |
|
"logps/chosen": -268.1184387207031, |
|
"logps/rejected": -239.86087036132812, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.002198445377871394, |
|
"rewards/margins": 0.0013555358164012432, |
|
"rewards/rejected": 0.0008429096196778119, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 1.0471204188481676e-06, |
|
"logits/chosen": -3.1798417568206787, |
|
"logits/rejected": -3.185044765472412, |
|
"logps/chosen": -273.47900390625, |
|
"logps/rejected": -255.7032928466797, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": 0.005988434888422489, |
|
"rewards/margins": 0.0028830617666244507, |
|
"rewards/rejected": 0.003105373587459326, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.3089005235602096e-06, |
|
"logits/chosen": -3.162355899810791, |
|
"logits/rejected": -3.1799404621124268, |
|
"logps/chosen": -256.9862060546875, |
|
"logps/rejected": -239.87069702148438, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.012596851214766502, |
|
"rewards/margins": 0.006152496673166752, |
|
"rewards/rejected": 0.0064443545415997505, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 1.5706806282722515e-06, |
|
"logits/chosen": -3.1871049404144287, |
|
"logits/rejected": -3.200637102127075, |
|
"logps/chosen": -294.3240661621094, |
|
"logps/rejected": -262.1870422363281, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.022759366780519485, |
|
"rewards/margins": 0.012819233350455761, |
|
"rewards/rejected": 0.009940135292708874, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 1.8324607329842933e-06, |
|
"logits/chosen": -3.1636972427368164, |
|
"logits/rejected": -3.161069869995117, |
|
"logps/chosen": -266.68853759765625, |
|
"logps/rejected": -243.20263671875, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.031995244324207306, |
|
"rewards/margins": 0.016239028424024582, |
|
"rewards/rejected": 0.015756219625473022, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 2.094240837696335e-06, |
|
"logits/chosen": -3.1705234050750732, |
|
"logits/rejected": -3.1865649223327637, |
|
"logps/chosen": -271.360595703125, |
|
"logps/rejected": -252.78170776367188, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.038576819002628326, |
|
"rewards/margins": 0.015578309074044228, |
|
"rewards/rejected": 0.022998513653874397, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 2.356020942408377e-06, |
|
"logits/chosen": -3.166350841522217, |
|
"logits/rejected": -3.1725335121154785, |
|
"logps/chosen": -240.39999389648438, |
|
"logps/rejected": -236.23782348632812, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.035779114812612534, |
|
"rewards/margins": 0.018319377675652504, |
|
"rewards/rejected": 0.01745973899960518, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 2.617801047120419e-06, |
|
"logits/chosen": -3.169689655303955, |
|
"logits/rejected": -3.2062766551971436, |
|
"logps/chosen": -260.24462890625, |
|
"logps/rejected": -230.7229766845703, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.04880410060286522, |
|
"rewards/margins": 0.029587719589471817, |
|
"rewards/rejected": 0.0192163847386837, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.65625, |
|
"learning_rate": 2.8795811518324613e-06, |
|
"logits/chosen": -3.1644022464752197, |
|
"logits/rejected": -3.178515672683716, |
|
"logps/chosen": -257.0642395019531, |
|
"logps/rejected": -233.0090789794922, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.05435089394450188, |
|
"rewards/margins": 0.03999961167573929, |
|
"rewards/rejected": 0.014351281337440014, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 3.141361256544503e-06, |
|
"logits/chosen": -3.1647660732269287, |
|
"logits/rejected": -3.181644916534424, |
|
"logps/chosen": -300.6939392089844, |
|
"logps/rejected": -279.0010070800781, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.0594431571662426, |
|
"rewards/margins": 0.035575076937675476, |
|
"rewards/rejected": 0.023868080228567123, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 3.403141361256545e-06, |
|
"logits/chosen": -3.1200623512268066, |
|
"logits/rejected": -3.1410274505615234, |
|
"logps/chosen": -265.76513671875, |
|
"logps/rejected": -246.6106414794922, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.05789119750261307, |
|
"rewards/margins": 0.045422304421663284, |
|
"rewards/rejected": 0.012468896806240082, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 3.6649214659685865e-06, |
|
"logits/chosen": -3.1760947704315186, |
|
"logits/rejected": -3.1799349784851074, |
|
"logps/chosen": -258.2982482910156, |
|
"logps/rejected": -239.6028289794922, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.06110318750143051, |
|
"rewards/margins": 0.06362718343734741, |
|
"rewards/rejected": -0.00252399779856205, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.625, |
|
"learning_rate": 3.926701570680629e-06, |
|
"logits/chosen": -3.1222329139709473, |
|
"logits/rejected": -3.133145332336426, |
|
"logps/chosen": -265.1998291015625, |
|
"logps/rejected": -251.78952026367188, |
|
"loss": 0.4875, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.05429766699671745, |
|
"rewards/margins": 0.05967814847826958, |
|
"rewards/rejected": -0.005380480550229549, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 4.18848167539267e-06, |
|
"logits/chosen": -3.176487445831299, |
|
"logits/rejected": -3.190802812576294, |
|
"logps/chosen": -272.86651611328125, |
|
"logps/rejected": -244.38687133789062, |
|
"loss": 0.4852, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.05848199874162674, |
|
"rewards/margins": 0.07154129445552826, |
|
"rewards/rejected": -0.013059285469353199, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 4.450261780104713e-06, |
|
"logits/chosen": -3.153256893157959, |
|
"logits/rejected": -3.1715409755706787, |
|
"logps/chosen": -274.42901611328125, |
|
"logps/rejected": -259.0591125488281, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.06613625586032867, |
|
"rewards/margins": 0.0808890238404274, |
|
"rewards/rejected": -0.014752751216292381, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 4.712041884816754e-06, |
|
"logits/chosen": -3.116530179977417, |
|
"logits/rejected": -3.125654697418213, |
|
"logps/chosen": -264.8692932128906, |
|
"logps/rejected": -256.16748046875, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.05057697370648384, |
|
"rewards/margins": 0.0704292505979538, |
|
"rewards/rejected": -0.019852278754115105, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 4.9738219895287965e-06, |
|
"logits/chosen": -3.123274564743042, |
|
"logits/rejected": -3.1312222480773926, |
|
"logps/chosen": -288.4281311035156, |
|
"logps/rejected": -258.3103332519531, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.05620621517300606, |
|
"rewards/margins": 0.09435133635997772, |
|
"rewards/rejected": -0.038145121186971664, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 4.999661831436499e-06, |
|
"logits/chosen": -3.126032590866089, |
|
"logits/rejected": -3.1409640312194824, |
|
"logps/chosen": -271.5264587402344, |
|
"logps/rejected": -251.412109375, |
|
"loss": 0.4787, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.0683365911245346, |
|
"rewards/margins": 0.10697062313556671, |
|
"rewards/rejected": -0.038634032011032104, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 4.9984929711403395e-06, |
|
"logits/chosen": -3.0909173488616943, |
|
"logits/rejected": -3.0889172554016113, |
|
"logps/chosen": -235.9461212158203, |
|
"logps/rejected": -240.8140106201172, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.03789529204368591, |
|
"rewards/margins": 0.07996337115764618, |
|
"rewards/rejected": -0.04206807166337967, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 4.996489634487865e-06, |
|
"logits/chosen": -3.149304151535034, |
|
"logits/rejected": -3.1437649726867676, |
|
"logps/chosen": -276.66497802734375, |
|
"logps/rejected": -261.4449157714844, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.030237609520554543, |
|
"rewards/margins": 0.1089547872543335, |
|
"rewards/rejected": -0.0787171721458435, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 4.9936524905772466e-06, |
|
"logits/chosen": -3.092778444290161, |
|
"logits/rejected": -3.1226553916931152, |
|
"logps/chosen": -265.37457275390625, |
|
"logps/rejected": -260.417724609375, |
|
"loss": 0.4759, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.06305978447198868, |
|
"rewards/margins": 0.13326093554496765, |
|
"rewards/rejected": -0.07020114362239838, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 4.9899824869915e-06, |
|
"logits/chosen": -3.1257612705230713, |
|
"logits/rejected": -3.142120838165283, |
|
"logps/chosen": -251.7377166748047, |
|
"logps/rejected": -258.2868957519531, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.034979041665792465, |
|
"rewards/margins": 0.12733003497123718, |
|
"rewards/rejected": -0.09235100448131561, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.75, |
|
"learning_rate": 4.985480849482012e-06, |
|
"logits/chosen": -3.1416521072387695, |
|
"logits/rejected": -3.1663966178894043, |
|
"logps/chosen": -280.97442626953125, |
|
"logps/rejected": -259.81915283203125, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.03899794816970825, |
|
"rewards/margins": 0.14351439476013184, |
|
"rewards/rejected": -0.10451646894216537, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 4.980149081559142e-06, |
|
"logits/chosen": -3.166862964630127, |
|
"logits/rejected": -3.1826682090759277, |
|
"logps/chosen": -277.7224426269531, |
|
"logps/rejected": -264.73248291015625, |
|
"loss": 0.4769, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.02976931631565094, |
|
"rewards/margins": 0.13806195557117462, |
|
"rewards/rejected": -0.10829265415668488, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 4.9739889639900655e-06, |
|
"logits/chosen": -3.1009061336517334, |
|
"logits/rejected": -3.1220781803131104, |
|
"logps/chosen": -271.1175842285156, |
|
"logps/rejected": -270.35601806640625, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.041884977370500565, |
|
"rewards/margins": 0.18558195233345032, |
|
"rewards/rejected": -0.14369697868824005, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 4.967002554204009e-06, |
|
"logits/chosen": -3.1314620971679688, |
|
"logits/rejected": -3.147207021713257, |
|
"logps/chosen": -272.50836181640625, |
|
"logps/rejected": -278.05487060546875, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0026828604750335217, |
|
"rewards/margins": 0.14521858096122742, |
|
"rewards/rejected": -0.1425357311964035, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 4.959192185605089e-06, |
|
"logits/chosen": -3.1541976928710938, |
|
"logits/rejected": -3.1754889488220215, |
|
"logps/chosen": -304.40740966796875, |
|
"logps/rejected": -284.8604736328125, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.0017113524954766035, |
|
"rewards/margins": 0.18049772083759308, |
|
"rewards/rejected": -0.17878638207912445, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 4.950560466792969e-06, |
|
"logits/chosen": -3.1485819816589355, |
|
"logits/rejected": -3.1677544116973877, |
|
"logps/chosen": -263.881103515625, |
|
"logps/rejected": -263.1119689941406, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.045438483357429504, |
|
"rewards/margins": 0.1866464614868164, |
|
"rewards/rejected": -0.23208491504192352, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 4.9411102806916185e-06, |
|
"logits/chosen": -3.122454881668091, |
|
"logits/rejected": -3.144866943359375, |
|
"logps/chosen": -278.1111145019531, |
|
"logps/rejected": -262.2369689941406, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.052451539784669876, |
|
"rewards/margins": 0.22689659893512726, |
|
"rewards/rejected": -0.2793481647968292, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 4.930844783586424e-06, |
|
"logits/chosen": -3.116986036300659, |
|
"logits/rejected": -3.1314234733581543, |
|
"logps/chosen": -280.05523681640625, |
|
"logps/rejected": -292.8658447265625, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.18633142113685608, |
|
"rewards/margins": 0.2993203103542328, |
|
"rewards/rejected": -0.4856516718864441, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.8828125, |
|
"learning_rate": 4.919767404070033e-06, |
|
"logits/chosen": -3.0919606685638428, |
|
"logits/rejected": -3.1069421768188477, |
|
"logps/chosen": -290.76690673828125, |
|
"logps/rejected": -299.6058044433594, |
|
"loss": 0.4644, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.24150581657886505, |
|
"rewards/margins": 0.32079803943634033, |
|
"rewards/rejected": -0.562303900718689, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 4.907881841897216e-06, |
|
"logits/chosen": -3.113529920578003, |
|
"logits/rejected": -3.13189697265625, |
|
"logps/chosen": -316.0239562988281, |
|
"logps/rejected": -312.1597900390625, |
|
"loss": 0.4576, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.3316032886505127, |
|
"rewards/margins": 0.40208154916763306, |
|
"rewards/rejected": -0.733684778213501, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 4.89519206674919e-06, |
|
"logits/chosen": -3.0299558639526367, |
|
"logits/rejected": -3.0673482418060303, |
|
"logps/chosen": -298.59539794921875, |
|
"logps/rejected": -356.8745422363281, |
|
"loss": 0.4522, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.496961772441864, |
|
"rewards/margins": 0.5310899615287781, |
|
"rewards/rejected": -1.028051733970642, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.984375, |
|
"learning_rate": 4.881702316907769e-06, |
|
"logits/chosen": -3.04780912399292, |
|
"logits/rejected": -3.0520381927490234, |
|
"logps/chosen": -342.5206604003906, |
|
"logps/rejected": -376.9095153808594, |
|
"loss": 0.4588, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.7044845819473267, |
|
"rewards/margins": 0.4986873269081116, |
|
"rewards/rejected": -1.203171968460083, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 4.86741709783982e-06, |
|
"logits/chosen": -3.0080177783966064, |
|
"logits/rejected": -3.0334441661834717, |
|
"logps/chosen": -322.6118469238281, |
|
"logps/rejected": -343.72320556640625, |
|
"loss": 0.4541, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.7056992053985596, |
|
"rewards/margins": 0.5343005061149597, |
|
"rewards/rejected": -1.239999771118164, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 4.852341180692471e-06, |
|
"logits/chosen": -2.9617342948913574, |
|
"logits/rejected": -2.9721832275390625, |
|
"logps/chosen": -318.089111328125, |
|
"logps/rejected": -369.661865234375, |
|
"loss": 0.4526, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5964111089706421, |
|
"rewards/margins": 0.5622240900993347, |
|
"rewards/rejected": -1.1586352586746216, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.9375, |
|
"learning_rate": 4.836479600699579e-06, |
|
"logits/chosen": -3.004973888397217, |
|
"logits/rejected": -3.015404224395752, |
|
"logps/chosen": -322.59222412109375, |
|
"logps/rejected": -350.57952880859375, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4821473956108093, |
|
"rewards/margins": 0.5106935501098633, |
|
"rewards/rejected": -0.9928409457206726, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 4.819837655500014e-06, |
|
"logits/chosen": -2.982062816619873, |
|
"logits/rejected": -3.0214340686798096, |
|
"logps/chosen": -328.14764404296875, |
|
"logps/rejected": -358.10198974609375, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6790810823440552, |
|
"rewards/margins": 0.5925682783126831, |
|
"rewards/rejected": -1.2716493606567383, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.1953125, |
|
"learning_rate": 4.802420903368286e-06, |
|
"logits/chosen": -2.996058225631714, |
|
"logits/rejected": -3.0428948402404785, |
|
"logps/chosen": -311.92181396484375, |
|
"logps/rejected": -364.3048095703125, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5532656908035278, |
|
"rewards/margins": 0.49352067708969116, |
|
"rewards/rejected": -1.0467865467071533, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 4.784235161358124e-06, |
|
"logits/chosen": -2.960181951522827, |
|
"logits/rejected": -2.9744322299957275, |
|
"logps/chosen": -318.44744873046875, |
|
"logps/rejected": -352.4103088378906, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.5166889429092407, |
|
"rewards/margins": 0.5014506578445435, |
|
"rewards/rejected": -1.0181396007537842, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 4.765286503359632e-06, |
|
"logits/chosen": -2.887781858444214, |
|
"logits/rejected": -2.90468430519104, |
|
"logps/chosen": -332.93792724609375, |
|
"logps/rejected": -379.0559997558594, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.6485487222671509, |
|
"rewards/margins": 0.5632439255714417, |
|
"rewards/rejected": -1.2117927074432373, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 4.745581258070654e-06, |
|
"logits/chosen": -2.8706612586975098, |
|
"logits/rejected": -2.8921449184417725, |
|
"logps/chosen": -356.7577209472656, |
|
"logps/rejected": -414.74224853515625, |
|
"loss": 0.4523, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9310439229011536, |
|
"rewards/margins": 0.6457870602607727, |
|
"rewards/rejected": -1.5768309831619263, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 4.725126006883047e-06, |
|
"logits/chosen": -2.8595480918884277, |
|
"logits/rejected": -2.884725332260132, |
|
"logps/chosen": -306.8425598144531, |
|
"logps/rejected": -388.7693786621094, |
|
"loss": 0.4424, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.637574315071106, |
|
"rewards/margins": 0.7818979620933533, |
|
"rewards/rejected": -1.4194722175598145, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.0546875, |
|
"learning_rate": 4.70392758168454e-06, |
|
"logits/chosen": -2.8378663063049316, |
|
"logits/rejected": -2.8632078170776367, |
|
"logps/chosen": -362.0655822753906, |
|
"logps/rejected": -393.5525817871094, |
|
"loss": 0.456, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.843133807182312, |
|
"rewards/margins": 0.6695644855499268, |
|
"rewards/rejected": -1.5126984119415283, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.59375, |
|
"learning_rate": 4.68199306257695e-06, |
|
"logits/chosen": -2.90700101852417, |
|
"logits/rejected": -2.930222511291504, |
|
"logps/chosen": -368.11419677734375, |
|
"logps/rejected": -419.2084045410156, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.8187941312789917, |
|
"rewards/margins": 0.7371198534965515, |
|
"rewards/rejected": -1.5559141635894775, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.8984375, |
|
"learning_rate": 4.659329775511478e-06, |
|
"logits/chosen": -2.8993983268737793, |
|
"logits/rejected": -2.930690050125122, |
|
"logps/chosen": -331.52081298828125, |
|
"logps/rejected": -389.1390686035156, |
|
"loss": 0.4494, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6849466562271118, |
|
"rewards/margins": 0.7577627301216125, |
|
"rewards/rejected": -1.4427093267440796, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 4.635945289841902e-06, |
|
"logits/chosen": -2.8397905826568604, |
|
"logits/rejected": -2.8895552158355713, |
|
"logps/chosen": -351.8777770996094, |
|
"logps/rejected": -399.30126953125, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6446736454963684, |
|
"rewards/margins": 0.8137520551681519, |
|
"rewards/rejected": -1.458425760269165, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 4.611847415796476e-06, |
|
"logits/chosen": -2.8411877155303955, |
|
"logits/rejected": -2.8607966899871826, |
|
"logps/chosen": -360.9768371582031, |
|
"logps/rejected": -416.825927734375, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.8380700945854187, |
|
"rewards/margins": 0.860516369342804, |
|
"rewards/rejected": -1.6985862255096436, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.75, |
|
"learning_rate": 4.587044201869378e-06, |
|
"logits/chosen": -2.848315715789795, |
|
"logits/rejected": -2.8671722412109375, |
|
"logps/chosen": -326.2365417480469, |
|
"logps/rejected": -389.13720703125, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7817949056625366, |
|
"rewards/margins": 0.8195638656616211, |
|
"rewards/rejected": -1.6013587713241577, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.4453125, |
|
"learning_rate": 4.561543932132574e-06, |
|
"logits/chosen": -2.7974531650543213, |
|
"logits/rejected": -2.828226327896118, |
|
"logps/chosen": -348.9537658691406, |
|
"logps/rejected": -422.35186767578125, |
|
"loss": 0.4319, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7357637286186218, |
|
"rewards/margins": 0.9210460782051086, |
|
"rewards/rejected": -1.6568095684051514, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 4.535355123469009e-06, |
|
"logits/chosen": -2.8311352729797363, |
|
"logits/rejected": -2.845012664794922, |
|
"logps/chosen": -350.1925964355469, |
|
"logps/rejected": -428.8135681152344, |
|
"loss": 0.4413, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9110676050186157, |
|
"rewards/margins": 0.8541079759597778, |
|
"rewards/rejected": -1.765175461769104, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.546875, |
|
"learning_rate": 4.508486522728037e-06, |
|
"logits/chosen": -2.7914469242095947, |
|
"logits/rejected": -2.821166515350342, |
|
"logps/chosen": -362.1348876953125, |
|
"logps/rejected": -431.3775329589844, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.72906094789505, |
|
"rewards/margins": 1.1009550094604492, |
|
"rewards/rejected": -1.8300158977508545, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 4.480947103804044e-06, |
|
"logits/chosen": -2.7665412425994873, |
|
"logits/rejected": -2.7718183994293213, |
|
"logps/chosen": -364.27362060546875, |
|
"logps/rejected": -409.62042236328125, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.9586559534072876, |
|
"rewards/margins": 0.7505531311035156, |
|
"rewards/rejected": -1.7092090845108032, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 4.452746064639239e-06, |
|
"logits/chosen": -2.7609431743621826, |
|
"logits/rejected": -2.7761027812957764, |
|
"logps/chosen": -350.8570556640625, |
|
"logps/rejected": -438.7533264160156, |
|
"loss": 0.4401, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.9020726084709167, |
|
"rewards/margins": 0.9013819694519043, |
|
"rewards/rejected": -1.8034546375274658, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 4.423892824151617e-06, |
|
"logits/chosen": -2.724292755126953, |
|
"logits/rejected": -2.7128217220306396, |
|
"logps/chosen": -369.49822998046875, |
|
"logps/rejected": -421.8377380371094, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9183570742607117, |
|
"rewards/margins": 0.7936559319496155, |
|
"rewards/rejected": -1.7120128870010376, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.296875, |
|
"learning_rate": 4.3943970190891164e-06, |
|
"logits/chosen": -2.767193555831909, |
|
"logits/rejected": -2.7671852111816406, |
|
"logps/chosen": -338.62347412109375, |
|
"logps/rejected": -402.12713623046875, |
|
"loss": 0.4297, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6558475494384766, |
|
"rewards/margins": 0.883314311504364, |
|
"rewards/rejected": -1.5391619205474854, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.9765625, |
|
"learning_rate": 4.364268500811025e-06, |
|
"logits/chosen": -2.71061635017395, |
|
"logits/rejected": -2.7537200450897217, |
|
"logps/chosen": -356.565185546875, |
|
"logps/rejected": -418.8211975097656, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9843032956123352, |
|
"rewards/margins": 0.7892019152641296, |
|
"rewards/rejected": -1.773505449295044, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.7421875, |
|
"learning_rate": 4.333517331997704e-06, |
|
"logits/chosen": -2.7916760444641113, |
|
"logits/rejected": -2.771669864654541, |
|
"logps/chosen": -304.84649658203125, |
|
"logps/rejected": -379.7848815917969, |
|
"loss": 0.4453, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.6202788949012756, |
|
"rewards/margins": 0.7137486338615417, |
|
"rewards/rejected": -1.3340275287628174, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 4.302153783289737e-06, |
|
"logits/chosen": -2.771042823791504, |
|
"logits/rejected": -2.7721784114837646, |
|
"logps/chosen": -304.8459167480469, |
|
"logps/rejected": -382.05029296875, |
|
"loss": 0.4336, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.48120832443237305, |
|
"rewards/margins": 0.8254661560058594, |
|
"rewards/rejected": -1.3066743612289429, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.171875, |
|
"learning_rate": 4.270188329857613e-06, |
|
"logits/chosen": -2.6971840858459473, |
|
"logits/rejected": -2.7405362129211426, |
|
"logps/chosen": -365.59869384765625, |
|
"logps/rejected": -422.66082763671875, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.7066925168037415, |
|
"rewards/margins": 0.9992557764053345, |
|
"rewards/rejected": -1.7059482336044312, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.546875, |
|
"learning_rate": 4.237631647903115e-06, |
|
"logits/chosen": -2.729823589324951, |
|
"logits/rejected": -2.7406442165374756, |
|
"logps/chosen": -343.63861083984375, |
|
"logps/rejected": -398.4572448730469, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.834626317024231, |
|
"rewards/margins": 0.7315307855606079, |
|
"rewards/rejected": -1.5661571025848389, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 4.204494611093548e-06, |
|
"logits/chosen": -2.6876988410949707, |
|
"logits/rejected": -2.710921049118042, |
|
"logps/chosen": -344.4961242675781, |
|
"logps/rejected": -413.8218688964844, |
|
"loss": 0.4355, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9162249565124512, |
|
"rewards/margins": 0.9599907994270325, |
|
"rewards/rejected": -1.8762153387069702, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 4.170788286930024e-06, |
|
"logits/chosen": -2.720813751220703, |
|
"logits/rejected": -2.7221953868865967, |
|
"logps/chosen": -396.78424072265625, |
|
"logps/rejected": -472.07293701171875, |
|
"loss": 0.4488, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4064452648162842, |
|
"rewards/margins": 0.8613995313644409, |
|
"rewards/rejected": -2.2678446769714355, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.578125, |
|
"learning_rate": 4.136523933051005e-06, |
|
"logits/chosen": -2.759519338607788, |
|
"logits/rejected": -2.7709243297576904, |
|
"logps/chosen": -343.8990173339844, |
|
"logps/rejected": -425.5016174316406, |
|
"loss": 0.4433, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.881717324256897, |
|
"rewards/margins": 0.9298511743545532, |
|
"rewards/rejected": -1.8115684986114502, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.390625, |
|
"learning_rate": 4.101712993472348e-06, |
|
"logits/chosen": -2.7461307048797607, |
|
"logits/rejected": -2.7893545627593994, |
|
"logps/chosen": -306.47528076171875, |
|
"logps/rejected": -357.6148986816406, |
|
"loss": 0.4434, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.500246524810791, |
|
"rewards/margins": 0.6672911643981934, |
|
"rewards/rejected": -1.1675376892089844, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.53125, |
|
"learning_rate": 4.066367094765091e-06, |
|
"logits/chosen": -2.6456458568573, |
|
"logits/rejected": -2.6387665271759033, |
|
"logps/chosen": -346.2305603027344, |
|
"logps/rejected": -411.1793518066406, |
|
"loss": 0.4375, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.803577721118927, |
|
"rewards/margins": 0.8857296109199524, |
|
"rewards/rejected": -1.689307451248169, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.875, |
|
"learning_rate": 4.030498042172277e-06, |
|
"logits/chosen": -2.729139804840088, |
|
"logits/rejected": -2.7350516319274902, |
|
"logps/chosen": -343.1637268066406, |
|
"logps/rejected": -424.0751037597656, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.6723712086677551, |
|
"rewards/margins": 0.953707218170166, |
|
"rewards/rejected": -1.6260782480239868, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.5, |
|
"learning_rate": 3.994117815666095e-06, |
|
"logits/chosen": -2.737205743789673, |
|
"logits/rejected": -2.756513833999634, |
|
"logps/chosen": -358.9730529785156, |
|
"logps/rejected": -435.59930419921875, |
|
"loss": 0.4352, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9257775545120239, |
|
"rewards/margins": 0.9314867258071899, |
|
"rewards/rejected": -1.8572641611099243, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 3.957238565946672e-06, |
|
"logits/chosen": -2.7129783630371094, |
|
"logits/rejected": -2.740182399749756, |
|
"logps/chosen": -382.54144287109375, |
|
"logps/rejected": -479.25079345703125, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0284990072250366, |
|
"rewards/margins": 1.2022464275360107, |
|
"rewards/rejected": -2.230745315551758, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.109375, |
|
"learning_rate": 3.919872610383831e-06, |
|
"logits/chosen": -2.700688600540161, |
|
"logits/rejected": -2.715548038482666, |
|
"logps/chosen": -346.7179870605469, |
|
"logps/rejected": -442.24468994140625, |
|
"loss": 0.429, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9038440585136414, |
|
"rewards/margins": 1.1126606464385986, |
|
"rewards/rejected": -2.0165047645568848, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 3.882032428903195e-06, |
|
"logits/chosen": -2.732431650161743, |
|
"logits/rejected": -2.741513252258301, |
|
"logps/chosen": -341.4494323730469, |
|
"logps/rejected": -418.03485107421875, |
|
"loss": 0.4393, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.9002019166946411, |
|
"rewards/margins": 0.9896795153617859, |
|
"rewards/rejected": -1.8898814916610718, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.828125, |
|
"learning_rate": 3.84373065981799e-06, |
|
"logits/chosen": -2.6866860389709473, |
|
"logits/rejected": -2.7075419425964355, |
|
"logps/chosen": -335.77630615234375, |
|
"logps/rejected": -392.2721252441406, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7698886394500732, |
|
"rewards/margins": 0.8988865613937378, |
|
"rewards/rejected": -1.668775200843811, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 3.8049800956079552e-06, |
|
"logits/chosen": -2.728248357772827, |
|
"logits/rejected": -2.719029426574707, |
|
"logps/chosen": -333.34747314453125, |
|
"logps/rejected": -403.9107971191406, |
|
"loss": 0.4329, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.688866376876831, |
|
"rewards/margins": 0.9195898771286011, |
|
"rewards/rejected": -1.6084562540054321, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 3.765793678646753e-06, |
|
"logits/chosen": -2.7588818073272705, |
|
"logits/rejected": -2.764564037322998, |
|
"logps/chosen": -327.6832580566406, |
|
"logps/rejected": -397.34967041015625, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.7366557121276855, |
|
"rewards/margins": 0.8244975805282593, |
|
"rewards/rejected": -1.5611531734466553, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 3.726184496879323e-06, |
|
"logits/chosen": -2.7015931606292725, |
|
"logits/rejected": -2.7093541622161865, |
|
"logps/chosen": -328.6782531738281, |
|
"logps/rejected": -392.6251525878906, |
|
"loss": 0.4434, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.6729280948638916, |
|
"rewards/margins": 0.7624879479408264, |
|
"rewards/rejected": -1.4354161024093628, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 3.686165779450619e-06, |
|
"logits/chosen": -2.7212226390838623, |
|
"logits/rejected": -2.7281336784362793, |
|
"logps/chosen": -332.78985595703125, |
|
"logps/rejected": -407.54339599609375, |
|
"loss": 0.4226, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5121452808380127, |
|
"rewards/margins": 1.0209487676620483, |
|
"rewards/rejected": -1.5330939292907715, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 3.645750892287178e-06, |
|
"logits/chosen": -2.6992287635803223, |
|
"logits/rejected": -2.7106306552886963, |
|
"logps/chosen": -349.5966491699219, |
|
"logps/rejected": -479.349609375, |
|
"loss": 0.4291, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9312782287597656, |
|
"rewards/margins": 1.2202502489089966, |
|
"rewards/rejected": -2.1515283584594727, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 3.604953333633009e-06, |
|
"logits/chosen": -2.6964669227600098, |
|
"logits/rejected": -2.718437671661377, |
|
"logps/chosen": -368.614990234375, |
|
"logps/rejected": -455.652099609375, |
|
"loss": 0.444, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1283608675003052, |
|
"rewards/margins": 0.9377814531326294, |
|
"rewards/rejected": -2.0661423206329346, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.796875, |
|
"learning_rate": 3.56378672954129e-06, |
|
"logits/chosen": -2.734504222869873, |
|
"logits/rejected": -2.741596221923828, |
|
"logps/chosen": -345.35894775390625, |
|
"logps/rejected": -441.3074645996094, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.8633454442024231, |
|
"rewards/margins": 1.0725480318069458, |
|
"rewards/rejected": -1.9358936548233032, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 3.5222648293233806e-06, |
|
"logits/chosen": -2.726121425628662, |
|
"logits/rejected": -2.757293701171875, |
|
"logps/chosen": -328.99981689453125, |
|
"logps/rejected": -409.4175720214844, |
|
"loss": 0.4433, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.8074792623519897, |
|
"rewards/margins": 0.893712043762207, |
|
"rewards/rejected": -1.7011913061141968, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 3.4804015009566573e-06, |
|
"logits/chosen": -2.7143707275390625, |
|
"logits/rejected": -2.7266762256622314, |
|
"logps/chosen": -343.6038513183594, |
|
"logps/rejected": -408.5287170410156, |
|
"loss": 0.4446, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0202709436416626, |
|
"rewards/margins": 0.8006850481033325, |
|
"rewards/rejected": -1.8209559917449951, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 8.25, |
|
"learning_rate": 3.4382107264527244e-06, |
|
"logits/chosen": -2.731356143951416, |
|
"logits/rejected": -2.749807357788086, |
|
"logps/chosen": -387.00372314453125, |
|
"logps/rejected": -461.8328552246094, |
|
"loss": 0.4274, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.057356834411621, |
|
"rewards/margins": 1.1233993768692017, |
|
"rewards/rejected": -2.180756092071533, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.328125, |
|
"learning_rate": 3.3957065971875387e-06, |
|
"logits/chosen": -2.736109972000122, |
|
"logits/rejected": -2.761101245880127, |
|
"logps/chosen": -378.2186279296875, |
|
"logps/rejected": -433.41986083984375, |
|
"loss": 0.4517, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.291465401649475, |
|
"rewards/margins": 0.7508509159088135, |
|
"rewards/rejected": -2.042316198348999, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 3.352903309194999e-06, |
|
"logits/chosen": -2.7496652603149414, |
|
"logits/rejected": -2.766449451446533, |
|
"logps/chosen": -347.00531005859375, |
|
"logps/rejected": -450.0355529785156, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9623804092407227, |
|
"rewards/margins": 1.0263848304748535, |
|
"rewards/rejected": -1.9887651205062866, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 3.309815158425591e-06, |
|
"logits/chosen": -2.6927943229675293, |
|
"logits/rejected": -2.7125327587127686, |
|
"logps/chosen": -312.2998046875, |
|
"logps/rejected": -379.62017822265625, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.39604613184928894, |
|
"rewards/margins": 0.9399534463882446, |
|
"rewards/rejected": -1.335999608039856, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 3.266456535971654e-06, |
|
"logits/chosen": -2.790156602859497, |
|
"logits/rejected": -2.7961854934692383, |
|
"logps/chosen": -305.0512390136719, |
|
"logps/rejected": -366.155029296875, |
|
"loss": 0.4325, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.48138341307640076, |
|
"rewards/margins": 0.8310259580612183, |
|
"rewards/rejected": -1.3124094009399414, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 3.2228419232608692e-06, |
|
"logits/chosen": -2.701418399810791, |
|
"logits/rejected": -2.688974380493164, |
|
"logps/chosen": -316.68133544921875, |
|
"logps/rejected": -413.67156982421875, |
|
"loss": 0.4314, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6776655316352844, |
|
"rewards/margins": 0.9983049631118774, |
|
"rewards/rejected": -1.6759703159332275, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 3.1789858872195888e-06, |
|
"logits/chosen": -2.6642849445343018, |
|
"logits/rejected": -2.647975444793701, |
|
"logps/chosen": -365.28936767578125, |
|
"logps/rejected": -515.3648681640625, |
|
"loss": 0.438, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0754855871200562, |
|
"rewards/margins": 1.3716323375701904, |
|
"rewards/rejected": -2.447117567062378, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.625, |
|
"learning_rate": 3.1349030754075945e-06, |
|
"logits/chosen": -2.668508291244507, |
|
"logits/rejected": -2.6765646934509277, |
|
"logps/chosen": -357.05712890625, |
|
"logps/rejected": -427.643798828125, |
|
"loss": 0.4391, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8638874888420105, |
|
"rewards/margins": 0.9187320470809937, |
|
"rewards/rejected": -1.7826197147369385, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 3.0906082111259313e-06, |
|
"logits/chosen": -2.721989154815674, |
|
"logits/rejected": -2.7402305603027344, |
|
"logps/chosen": -341.566650390625, |
|
"logps/rejected": -425.6533203125, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6936538815498352, |
|
"rewards/margins": 1.0446574687957764, |
|
"rewards/rejected": -1.7383114099502563, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.25, |
|
"learning_rate": 3.046116088499449e-06, |
|
"logits/chosen": -2.732478380203247, |
|
"logits/rejected": -2.7298645973205566, |
|
"logps/chosen": -366.4847717285156, |
|
"logps/rejected": -460.4178161621094, |
|
"loss": 0.4317, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7452308535575867, |
|
"rewards/margins": 1.1475111246109009, |
|
"rewards/rejected": -1.8927419185638428, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.46875, |
|
"learning_rate": 3.0014415675356813e-06, |
|
"logits/chosen": -2.7274584770202637, |
|
"logits/rejected": -2.7278664112091064, |
|
"logps/chosen": -354.26751708984375, |
|
"logps/rejected": -469.7132873535156, |
|
"loss": 0.4211, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7849363088607788, |
|
"rewards/margins": 1.4399199485778809, |
|
"rewards/rejected": -2.224856376647949, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 2.9565995691617242e-06, |
|
"logits/chosen": -2.7352840900421143, |
|
"logits/rejected": -2.737842321395874, |
|
"logps/chosen": -358.52020263671875, |
|
"logps/rejected": -454.4913024902344, |
|
"loss": 0.4305, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.7539668083190918, |
|
"rewards/margins": 1.1879950761795044, |
|
"rewards/rejected": -1.941961646080017, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.9765625, |
|
"learning_rate": 2.9116050702407706e-06, |
|
"logits/chosen": -2.7479987144470215, |
|
"logits/rejected": -2.7785484790802, |
|
"logps/chosen": -313.64837646484375, |
|
"logps/rejected": -373.56689453125, |
|
"loss": 0.4343, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.4597587585449219, |
|
"rewards/margins": 0.8521866798400879, |
|
"rewards/rejected": -1.3119454383850098, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 2.8664730985699537e-06, |
|
"logits/chosen": -2.7080225944519043, |
|
"logits/rejected": -2.717515707015991, |
|
"logps/chosen": -313.6962890625, |
|
"logps/rejected": -389.29205322265625, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.46188563108444214, |
|
"rewards/margins": 0.994129478931427, |
|
"rewards/rejected": -1.4560149908065796, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.5, |
|
"learning_rate": 2.8212187278611907e-06, |
|
"logits/chosen": -2.7235171794891357, |
|
"logits/rejected": -2.736121416091919, |
|
"logps/chosen": -342.36273193359375, |
|
"logps/rejected": -417.322509765625, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6042460203170776, |
|
"rewards/margins": 1.0531026124954224, |
|
"rewards/rejected": -1.6573486328125, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.7265625, |
|
"learning_rate": 2.7758570727066843e-06, |
|
"logits/chosen": -2.690983533859253, |
|
"logits/rejected": -2.6975481510162354, |
|
"logps/chosen": -342.63079833984375, |
|
"logps/rejected": -407.81231689453125, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.8637169599533081, |
|
"rewards/margins": 0.7765380144119263, |
|
"rewards/rejected": -1.6402549743652344, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 2.730403283530767e-06, |
|
"logits/chosen": -2.6636836528778076, |
|
"logits/rejected": -2.661403179168701, |
|
"logps/chosen": -344.44744873046875, |
|
"logps/rejected": -413.1937561035156, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.7091799974441528, |
|
"rewards/margins": 1.0423099994659424, |
|
"rewards/rejected": -1.7514899969100952, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 2.6848725415297888e-06, |
|
"logits/chosen": -2.6942477226257324, |
|
"logits/rejected": -2.7120838165283203, |
|
"logps/chosen": -336.60760498046875, |
|
"logps/rejected": -421.9783630371094, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.7389696836471558, |
|
"rewards/margins": 0.9657734036445618, |
|
"rewards/rejected": -1.7047427892684937, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.140625, |
|
"learning_rate": 2.639280053601719e-06, |
|
"logits/chosen": -2.700098991394043, |
|
"logits/rejected": -2.7320022583007812, |
|
"logps/chosen": -346.1884460449219, |
|
"logps/rejected": -410.5491638183594, |
|
"loss": 0.4324, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.5768822431564331, |
|
"rewards/margins": 0.9592208862304688, |
|
"rewards/rejected": -1.5361031293869019, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.046875, |
|
"learning_rate": 2.59364104726716e-06, |
|
"logits/chosen": -2.7298641204833984, |
|
"logits/rejected": -2.73411226272583, |
|
"logps/chosen": -346.91912841796875, |
|
"logps/rejected": -417.67034912109375, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.7697745561599731, |
|
"rewards/margins": 1.0892785787582397, |
|
"rewards/rejected": -1.8590532541275024, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.609375, |
|
"learning_rate": 2.547970765583491e-06, |
|
"logits/chosen": -2.716035842895508, |
|
"logits/rejected": -2.702650547027588, |
|
"logps/chosen": -330.34124755859375, |
|
"logps/rejected": -416.9476623535156, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.7259209156036377, |
|
"rewards/margins": 0.974997878074646, |
|
"rewards/rejected": -1.7009187936782837, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.390625, |
|
"learning_rate": 2.502284462053799e-06, |
|
"logits/chosen": -2.656066656112671, |
|
"logits/rejected": -2.6666572093963623, |
|
"logps/chosen": -331.7149658203125, |
|
"logps/rejected": -423.1683654785156, |
|
"loss": 0.4287, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.5932595729827881, |
|
"rewards/margins": 1.035936713218689, |
|
"rewards/rejected": -1.6291964054107666, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 2.456597395532338e-06, |
|
"logits/chosen": -2.7209274768829346, |
|
"logits/rejected": -2.735020399093628, |
|
"logps/chosen": -328.7770080566406, |
|
"logps/rejected": -391.19354248046875, |
|
"loss": 0.4294, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6377438306808472, |
|
"rewards/margins": 0.9876799583435059, |
|
"rewards/rejected": -1.625423789024353, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 2.4109248251281953e-06, |
|
"logits/chosen": -2.7338156700134277, |
|
"logits/rejected": -2.7391562461853027, |
|
"logps/chosen": -343.69390869140625, |
|
"logps/rejected": -414.94805908203125, |
|
"loss": 0.432, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6750501990318298, |
|
"rewards/margins": 1.0820457935333252, |
|
"rewards/rejected": -1.7570960521697998, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.99609375, |
|
"learning_rate": 2.365282005108875e-06, |
|
"logits/chosen": -2.7210652828216553, |
|
"logits/rejected": -2.7339107990264893, |
|
"logps/chosen": -335.1302490234375, |
|
"logps/rejected": -390.1886901855469, |
|
"loss": 0.4303, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6980403661727905, |
|
"rewards/margins": 1.0391963720321655, |
|
"rewards/rejected": -1.737236738204956, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 2.319684179805491e-06, |
|
"logits/chosen": -2.6966023445129395, |
|
"logits/rejected": -2.7194454669952393, |
|
"logps/chosen": -343.26934814453125, |
|
"logps/rejected": -400.3363037109375, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8400837779045105, |
|
"rewards/margins": 0.9520319104194641, |
|
"rewards/rejected": -1.7921158075332642, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.0, |
|
"learning_rate": 2.2741465785212905e-06, |
|
"logits/chosen": -2.7013275623321533, |
|
"logits/rejected": -2.7162578105926514, |
|
"logps/chosen": -336.082763671875, |
|
"logps/rejected": -424.86895751953125, |
|
"loss": 0.4383, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.7391124963760376, |
|
"rewards/margins": 0.9916101694107056, |
|
"rewards/rejected": -1.7307227849960327, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 2.2286844104451848e-06, |
|
"logits/chosen": -2.7336266040802, |
|
"logits/rejected": -2.7385802268981934, |
|
"logps/chosen": -368.2712707519531, |
|
"logps/rejected": -442.3502502441406, |
|
"loss": 0.4393, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.8786946535110474, |
|
"rewards/margins": 0.9058005213737488, |
|
"rewards/rejected": -1.7844951152801514, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.1328125, |
|
"learning_rate": 2.183312859572008e-06, |
|
"logits/chosen": -2.6661019325256348, |
|
"logits/rejected": -2.687495708465576, |
|
"logps/chosen": -360.59503173828125, |
|
"logps/rejected": -445.728759765625, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6851642727851868, |
|
"rewards/margins": 1.106838345527649, |
|
"rewards/rejected": -1.7920026779174805, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 2.1380470796311843e-06, |
|
"logits/chosen": -2.6705803871154785, |
|
"logits/rejected": -2.668128252029419, |
|
"logps/chosen": -339.4599609375, |
|
"logps/rejected": -421.2923278808594, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.642924964427948, |
|
"rewards/margins": 1.065707802772522, |
|
"rewards/rejected": -1.7086328268051147, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 2.092902189025507e-06, |
|
"logits/chosen": -2.6446332931518555, |
|
"logits/rejected": -2.655050754547119, |
|
"logps/chosen": -355.1614074707031, |
|
"logps/rejected": -427.96636962890625, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7822728157043457, |
|
"rewards/margins": 1.1439166069030762, |
|
"rewards/rejected": -1.9261894226074219, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.98828125, |
|
"learning_rate": 2.0478932657817105e-06, |
|
"logits/chosen": -2.737699031829834, |
|
"logits/rejected": -2.732815742492676, |
|
"logps/chosen": -356.5775451660156, |
|
"logps/rejected": -431.93609619140625, |
|
"loss": 0.4282, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8207361102104187, |
|
"rewards/margins": 1.0940120220184326, |
|
"rewards/rejected": -1.914747953414917, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.546875, |
|
"learning_rate": 2.0030353425145376e-06, |
|
"logits/chosen": -2.676217555999756, |
|
"logits/rejected": -2.6970784664154053, |
|
"logps/chosen": -318.20355224609375, |
|
"logps/rejected": -386.79388427734375, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.741962730884552, |
|
"rewards/margins": 1.0068390369415283, |
|
"rewards/rejected": -1.748801589012146, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 1.958343401405964e-06, |
|
"logits/chosen": -2.6743831634521484, |
|
"logits/rejected": -2.6914896965026855, |
|
"logps/chosen": -323.74053955078125, |
|
"logps/rejected": -417.9308166503906, |
|
"loss": 0.4318, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6001055240631104, |
|
"rewards/margins": 1.030330777168274, |
|
"rewards/rejected": -1.6304363012313843, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 1.9138323692012734e-06, |
|
"logits/chosen": -2.719978094100952, |
|
"logits/rejected": -2.7362709045410156, |
|
"logps/chosen": -316.5953063964844, |
|
"logps/rejected": -410.26593017578125, |
|
"loss": 0.4226, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5119751691818237, |
|
"rewards/margins": 1.1661722660064697, |
|
"rewards/rejected": -1.678147554397583, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.7890625, |
|
"learning_rate": 1.8695171122236443e-06, |
|
"logits/chosen": -2.6844494342803955, |
|
"logits/rejected": -2.6935813426971436, |
|
"logps/chosen": -319.3164367675781, |
|
"logps/rejected": -416.75616455078125, |
|
"loss": 0.4208, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.5857919454574585, |
|
"rewards/margins": 1.1580404043197632, |
|
"rewards/rejected": -1.7438323497772217, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 1.8254124314089225e-06, |
|
"logits/chosen": -2.755115509033203, |
|
"logits/rejected": -2.7330613136291504, |
|
"logps/chosen": -322.7129821777344, |
|
"logps/rejected": -407.7967529296875, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4961959719657898, |
|
"rewards/margins": 1.078574776649475, |
|
"rewards/rejected": -1.5747709274291992, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.9375, |
|
"learning_rate": 1.781533057362221e-06, |
|
"logits/chosen": -2.746798038482666, |
|
"logits/rejected": -2.7739720344543457, |
|
"logps/chosen": -299.58160400390625, |
|
"logps/rejected": -374.47930908203125, |
|
"loss": 0.4296, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.4686378538608551, |
|
"rewards/margins": 0.9811908602714539, |
|
"rewards/rejected": -1.4498287439346313, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.296875, |
|
"learning_rate": 1.7378936454380277e-06, |
|
"logits/chosen": -2.7310540676116943, |
|
"logits/rejected": -2.7376887798309326, |
|
"logps/chosen": -327.89410400390625, |
|
"logps/rejected": -404.711669921875, |
|
"loss": 0.4279, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5759900808334351, |
|
"rewards/margins": 1.1363575458526611, |
|
"rewards/rejected": -1.7123476266860962, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 1.6945087708454273e-06, |
|
"logits/chosen": -2.6734468936920166, |
|
"logits/rejected": -2.714264392852783, |
|
"logps/chosen": -343.795654296875, |
|
"logps/rejected": -427.343505859375, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5730609893798828, |
|
"rewards/margins": 1.2771522998809814, |
|
"rewards/rejected": -1.8502132892608643, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 1.651392923780105e-06, |
|
"logits/chosen": -2.6670920848846436, |
|
"logits/rejected": -2.6546778678894043, |
|
"logps/chosen": -322.3703918457031, |
|
"logps/rejected": -434.3629455566406, |
|
"loss": 0.4254, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.6311038136482239, |
|
"rewards/margins": 1.2766520977020264, |
|
"rewards/rejected": -1.9077558517456055, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 1.608560504584737e-06, |
|
"logits/chosen": -2.737743377685547, |
|
"logits/rejected": -2.720360517501831, |
|
"logps/chosen": -334.63275146484375, |
|
"logps/rejected": -431.16827392578125, |
|
"loss": 0.4287, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8308721780776978, |
|
"rewards/margins": 1.0826470851898193, |
|
"rewards/rejected": -1.913519263267517, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 1.5660258189393945e-06, |
|
"logits/chosen": -2.7399215698242188, |
|
"logits/rejected": -2.750929117202759, |
|
"logps/chosen": -338.90924072265625, |
|
"logps/rejected": -417.3045959472656, |
|
"loss": 0.4214, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.5204821825027466, |
|
"rewards/margins": 1.254575252532959, |
|
"rewards/rejected": -1.7750571966171265, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 1.5238030730835578e-06, |
|
"logits/chosen": -2.716184616088867, |
|
"logits/rejected": -2.690531015396118, |
|
"logps/chosen": -311.8260192871094, |
|
"logps/rejected": -440.0191345214844, |
|
"loss": 0.4252, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6099028587341309, |
|
"rewards/margins": 1.3696248531341553, |
|
"rewards/rejected": -1.979527473449707, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 1.4819063690713565e-06, |
|
"logits/chosen": -2.723191022872925, |
|
"logits/rejected": -2.699627637863159, |
|
"logps/chosen": -343.42608642578125, |
|
"logps/rejected": -442.84033203125, |
|
"loss": 0.424, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.7352994680404663, |
|
"rewards/margins": 1.2973926067352295, |
|
"rewards/rejected": -2.0326919555664062, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 1.4403497000615885e-06, |
|
"logits/chosen": -2.709167242050171, |
|
"logits/rejected": -2.711165189743042, |
|
"logps/chosen": -346.2407531738281, |
|
"logps/rejected": -453.6136779785156, |
|
"loss": 0.4365, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9276860952377319, |
|
"rewards/margins": 1.1097049713134766, |
|
"rewards/rejected": -2.037391185760498, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 3.109375, |
|
"learning_rate": 1.3991469456441273e-06, |
|
"logits/chosen": -2.6968870162963867, |
|
"logits/rejected": -2.7066521644592285, |
|
"logps/chosen": -349.227294921875, |
|
"logps/rejected": -449.37274169921875, |
|
"loss": 0.4264, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.8388339281082153, |
|
"rewards/margins": 1.244845986366272, |
|
"rewards/rejected": -2.083679676055908, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.90625, |
|
"learning_rate": 1.3583118672042441e-06, |
|
"logits/chosen": -2.682274580001831, |
|
"logits/rejected": -2.689542055130005, |
|
"logps/chosen": -358.6896057128906, |
|
"logps/rejected": -461.7701721191406, |
|
"loss": 0.4298, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8582962155342102, |
|
"rewards/margins": 1.2384599447250366, |
|
"rewards/rejected": -2.0967559814453125, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 1.3178581033264218e-06, |
|
"logits/chosen": -2.7299036979675293, |
|
"logits/rejected": -2.725554943084717, |
|
"logps/chosen": -337.6006774902344, |
|
"logps/rejected": -427.9097595214844, |
|
"loss": 0.4254, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8014458417892456, |
|
"rewards/margins": 1.1019701957702637, |
|
"rewards/rejected": -1.9034160375595093, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 1.2777991652391757e-06, |
|
"logits/chosen": -2.7372395992279053, |
|
"logits/rejected": -2.713787078857422, |
|
"logps/chosen": -349.9215087890625, |
|
"logps/rejected": -458.30218505859375, |
|
"loss": 0.4146, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.605326235294342, |
|
"rewards/margins": 1.2629462480545044, |
|
"rewards/rejected": -1.8682724237442017, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.796875, |
|
"learning_rate": 1.2381484323024178e-06, |
|
"logits/chosen": -2.710744619369507, |
|
"logits/rejected": -2.7276930809020996, |
|
"logps/chosen": -340.2386779785156, |
|
"logps/rejected": -410.47406005859375, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.6984527111053467, |
|
"rewards/margins": 0.98872309923172, |
|
"rewards/rejected": -1.687175989151001, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 4.375, |
|
"learning_rate": 1.1989191475388518e-06, |
|
"logits/chosen": -2.7345008850097656, |
|
"logits/rejected": -2.737495183944702, |
|
"logps/chosen": -341.25408935546875, |
|
"logps/rejected": -463.845703125, |
|
"loss": 0.423, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.720673680305481, |
|
"rewards/margins": 1.3686655759811401, |
|
"rewards/rejected": -2.089339256286621, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 1.160124413210918e-06, |
|
"logits/chosen": -2.710549831390381, |
|
"logits/rejected": -2.7327733039855957, |
|
"logps/chosen": -357.73516845703125, |
|
"logps/rejected": -460.0575256347656, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.7213457822799683, |
|
"rewards/margins": 1.1925950050354004, |
|
"rewards/rejected": -1.9139407873153687, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 1.1217771864447396e-06, |
|
"logits/chosen": -2.745375871658325, |
|
"logits/rejected": -2.734910488128662, |
|
"logps/chosen": -334.57159423828125, |
|
"logps/rejected": -431.4033203125, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6131495833396912, |
|
"rewards/margins": 1.3364530801773071, |
|
"rewards/rejected": -1.949602484703064, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.5, |
|
"learning_rate": 1.08389027490255e-06, |
|
"logits/chosen": -2.7352352142333984, |
|
"logits/rejected": -2.731990098953247, |
|
"logps/chosen": -318.87939453125, |
|
"logps/rejected": -447.78167724609375, |
|
"loss": 0.4207, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.6276998519897461, |
|
"rewards/margins": 1.426599383354187, |
|
"rewards/rejected": -2.0542993545532227, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 1.046476332505036e-06, |
|
"logits/chosen": -2.7105917930603027, |
|
"logits/rejected": -2.7000508308410645, |
|
"logps/chosen": -351.5965881347656, |
|
"logps/rejected": -464.24041748046875, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.666134238243103, |
|
"rewards/margins": 1.3925716876983643, |
|
"rewards/rejected": -2.0587058067321777, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.78125, |
|
"learning_rate": 1.0095478552050348e-06, |
|
"logits/chosen": -2.7293667793273926, |
|
"logits/rejected": -2.748169422149658, |
|
"logps/chosen": -347.258056640625, |
|
"logps/rejected": -454.30126953125, |
|
"loss": 0.428, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7874716520309448, |
|
"rewards/margins": 1.2901605367660522, |
|
"rewards/rejected": -2.077632188796997, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.6875, |
|
"learning_rate": 9.731171768139808e-07, |
|
"logits/chosen": -2.747467279434204, |
|
"logits/rejected": -2.7329678535461426, |
|
"logps/chosen": -328.9295959472656, |
|
"logps/rejected": -426.88812255859375, |
|
"loss": 0.4343, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7400668859481812, |
|
"rewards/margins": 1.056199073791504, |
|
"rewards/rejected": -1.796265959739685, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.40625, |
|
"learning_rate": 9.371964648825221e-07, |
|
"logits/chosen": -2.7056431770324707, |
|
"logits/rejected": -2.711988925933838, |
|
"logps/chosen": -358.60186767578125, |
|
"logps/rejected": -416.0870056152344, |
|
"loss": 0.4388, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9142545461654663, |
|
"rewards/margins": 0.8788741827011108, |
|
"rewards/rejected": -1.7931289672851562, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 9.017977166366445e-07, |
|
"logits/chosen": -2.712580919265747, |
|
"logits/rejected": -2.7144558429718018, |
|
"logps/chosen": -340.9812316894531, |
|
"logps/rejected": -484.244873046875, |
|
"loss": 0.4154, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.7170324921607971, |
|
"rewards/margins": 1.5092626810073853, |
|
"rewards/rejected": -2.2262954711914062, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 8.669327549707096e-07, |
|
"logits/chosen": -2.7783093452453613, |
|
"logits/rejected": -2.782752513885498, |
|
"logps/chosen": -336.6181335449219, |
|
"logps/rejected": -427.4580078125, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7095115184783936, |
|
"rewards/margins": 1.0761725902557373, |
|
"rewards/rejected": -1.7856842279434204, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 6.625, |
|
"learning_rate": 8.326132244986932e-07, |
|
"logits/chosen": -2.7186310291290283, |
|
"logits/rejected": -2.7277238368988037, |
|
"logps/chosen": -330.8462219238281, |
|
"logps/rejected": -445.0597229003906, |
|
"loss": 0.4328, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.7316503524780273, |
|
"rewards/margins": 1.2994225025177002, |
|
"rewards/rejected": -2.0310728549957275, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 7.988505876649863e-07, |
|
"logits/chosen": -2.673827648162842, |
|
"logits/rejected": -2.6909382343292236, |
|
"logps/chosen": -328.7157287597656, |
|
"logps/rejected": -426.40997314453125, |
|
"loss": 0.4208, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6509988307952881, |
|
"rewards/margins": 1.171143651008606, |
|
"rewards/rejected": -1.8221423625946045, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 7.656561209160248e-07, |
|
"logits/chosen": -2.688586711883545, |
|
"logits/rejected": -2.6996278762817383, |
|
"logps/chosen": -335.6667785644531, |
|
"logps/rejected": -412.533447265625, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6491155624389648, |
|
"rewards/margins": 1.1151740550994873, |
|
"rewards/rejected": -1.7642898559570312, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 7.330409109340563e-07, |
|
"logits/chosen": -2.7187418937683105, |
|
"logits/rejected": -2.713865041732788, |
|
"logps/chosen": -330.7655334472656, |
|
"logps/rejected": -445.03485107421875, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6798223257064819, |
|
"rewards/margins": 1.387519121170044, |
|
"rewards/rejected": -2.0673413276672363, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 7.010158509342682e-07, |
|
"logits/chosen": -2.705104351043701, |
|
"logits/rejected": -2.7204995155334473, |
|
"logps/chosen": -348.8349304199219, |
|
"logps/rejected": -465.5157775878906, |
|
"loss": 0.4239, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7643205523490906, |
|
"rewards/margins": 1.3054136037826538, |
|
"rewards/rejected": -2.0697340965270996, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 3.25, |
|
"learning_rate": 6.695916370265529e-07, |
|
"logits/chosen": -2.7456018924713135, |
|
"logits/rejected": -2.75111722946167, |
|
"logps/chosen": -298.90582275390625, |
|
"logps/rejected": -406.1940002441406, |
|
"loss": 0.4243, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6092133522033691, |
|
"rewards/margins": 1.119093894958496, |
|
"rewards/rejected": -1.7283073663711548, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 3.75, |
|
"learning_rate": 6.387787646430854e-07, |
|
"logits/chosen": -2.7123939990997314, |
|
"logits/rejected": -2.7206320762634277, |
|
"logps/chosen": -311.45892333984375, |
|
"logps/rejected": -402.0213317871094, |
|
"loss": 0.4307, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5885223746299744, |
|
"rewards/margins": 1.1126749515533447, |
|
"rewards/rejected": -1.7011972665786743, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 6.085875250329401e-07, |
|
"logits/chosen": -2.7589831352233887, |
|
"logits/rejected": -2.747678756713867, |
|
"logps/chosen": -326.8139953613281, |
|
"logps/rejected": -408.4124450683594, |
|
"loss": 0.4281, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6763142347335815, |
|
"rewards/margins": 1.2141085863113403, |
|
"rewards/rejected": -1.8904228210449219, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 5.79028001824894e-07, |
|
"logits/chosen": -2.676466941833496, |
|
"logits/rejected": -2.6653060913085938, |
|
"logps/chosen": -302.88861083984375, |
|
"logps/rejected": -453.6151428222656, |
|
"loss": 0.4205, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5426191687583923, |
|
"rewards/margins": 1.233906626701355, |
|
"rewards/rejected": -1.776525855064392, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 5.501100676595761e-07, |
|
"logits/chosen": -2.7586090564727783, |
|
"logits/rejected": -2.7702584266662598, |
|
"logps/chosen": -345.1263732910156, |
|
"logps/rejected": -439.22808837890625, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6815992593765259, |
|
"rewards/margins": 1.174318790435791, |
|
"rewards/rejected": -1.8559181690216064, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 5.218433808920884e-07, |
|
"logits/chosen": -2.741339921951294, |
|
"logits/rejected": -2.7451634407043457, |
|
"logps/chosen": -329.6106262207031, |
|
"logps/rejected": -418.102294921875, |
|
"loss": 0.418, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.48116618394851685, |
|
"rewards/margins": 1.2408123016357422, |
|
"rewards/rejected": -1.7219784259796143, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.78125, |
|
"learning_rate": 4.942373823661928e-07, |
|
"logits/chosen": -2.7156219482421875, |
|
"logits/rejected": -2.7195382118225098, |
|
"logps/chosen": -338.1180114746094, |
|
"logps/rejected": -429.4137268066406, |
|
"loss": 0.4242, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5551623702049255, |
|
"rewards/margins": 1.1304242610931396, |
|
"rewards/rejected": -1.6855865716934204, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 4.6730129226114363e-07, |
|
"logits/chosen": -2.74899959564209, |
|
"logits/rejected": -2.7066941261291504, |
|
"logps/chosen": -316.0484619140625, |
|
"logps/rejected": -443.41156005859375, |
|
"loss": 0.4244, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.6658861637115479, |
|
"rewards/margins": 1.3104979991912842, |
|
"rewards/rejected": -1.976383924484253, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 4.4104410701222703e-07, |
|
"logits/chosen": -2.764725923538208, |
|
"logits/rejected": -2.7511062622070312, |
|
"logps/chosen": -345.6468200683594, |
|
"logps/rejected": -458.3433532714844, |
|
"loss": 0.4297, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.7146095037460327, |
|
"rewards/margins": 1.2348105907440186, |
|
"rewards/rejected": -1.9494202136993408, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.609375, |
|
"learning_rate": 4.154745963060197e-07, |
|
"logits/chosen": -2.690899610519409, |
|
"logits/rejected": -2.68369460105896, |
|
"logps/chosen": -327.4097900390625, |
|
"logps/rejected": -445.50457763671875, |
|
"loss": 0.4267, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.7736159563064575, |
|
"rewards/margins": 1.229644775390625, |
|
"rewards/rejected": -2.003260850906372, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 3.9060130015138863e-07, |
|
"logits/chosen": -2.7254629135131836, |
|
"logits/rejected": -2.7183403968811035, |
|
"logps/chosen": -338.1430358886719, |
|
"logps/rejected": -464.69049072265625, |
|
"loss": 0.4245, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.7131890058517456, |
|
"rewards/margins": 1.2739613056182861, |
|
"rewards/rejected": -1.987149953842163, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 3.664325260271953e-07, |
|
"logits/chosen": -2.7088608741760254, |
|
"logits/rejected": -2.730421304702759, |
|
"logps/chosen": -357.98126220703125, |
|
"logps/rejected": -414.66668701171875, |
|
"loss": 0.4309, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7439313530921936, |
|
"rewards/margins": 0.9089797139167786, |
|
"rewards/rejected": -1.652910828590393, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 3.429763461076677e-07, |
|
"logits/chosen": -2.7392077445983887, |
|
"logits/rejected": -2.7532036304473877, |
|
"logps/chosen": -340.41436767578125, |
|
"logps/rejected": -411.6546936035156, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.7221731543540955, |
|
"rewards/margins": 0.9596914052963257, |
|
"rewards/rejected": -1.6818645000457764, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 6.1875, |
|
"learning_rate": 3.202405945663556e-07, |
|
"logits/chosen": -2.739431858062744, |
|
"logits/rejected": -2.7153592109680176, |
|
"logps/chosen": -351.1690368652344, |
|
"logps/rejected": -460.6399841308594, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7035288214683533, |
|
"rewards/margins": 1.3128876686096191, |
|
"rewards/rejected": -2.016416549682617, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 2.982328649595856e-07, |
|
"logits/chosen": -2.7347230911254883, |
|
"logits/rejected": -2.7274715900421143, |
|
"logps/chosen": -338.5975646972656, |
|
"logps/rejected": -435.02972412109375, |
|
"loss": 0.4329, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.722823441028595, |
|
"rewards/margins": 1.025390625, |
|
"rewards/rejected": -1.7482140064239502, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.75, |
|
"learning_rate": 2.7696050769026954e-07, |
|
"logits/chosen": -2.7211785316467285, |
|
"logits/rejected": -2.684309720993042, |
|
"logps/chosen": -344.1480407714844, |
|
"logps/rejected": -481.3314514160156, |
|
"loss": 0.4299, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.7990375757217407, |
|
"rewards/margins": 1.399259328842163, |
|
"rewards/rejected": -2.1982970237731934, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.015625, |
|
"learning_rate": 2.564306275529341e-07, |
|
"logits/chosen": -2.7370145320892334, |
|
"logits/rejected": -2.7407755851745605, |
|
"logps/chosen": -321.891357421875, |
|
"logps/rejected": -401.0546569824219, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.7101179957389832, |
|
"rewards/margins": 1.1242733001708984, |
|
"rewards/rejected": -1.8343912363052368, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.671875, |
|
"learning_rate": 2.3665008136077332e-07, |
|
"logits/chosen": -2.7449889183044434, |
|
"logits/rejected": -2.7408945560455322, |
|
"logps/chosen": -360.5602722167969, |
|
"logps/rejected": -467.7608337402344, |
|
"loss": 0.4302, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.823918342590332, |
|
"rewards/margins": 1.2585489749908447, |
|
"rewards/rejected": -2.0824673175811768, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 4.25, |
|
"learning_rate": 2.1762547565553293e-07, |
|
"logits/chosen": -2.6576642990112305, |
|
"logits/rejected": -2.6262636184692383, |
|
"logps/chosen": -352.53857421875, |
|
"logps/rejected": -485.3980407714844, |
|
"loss": 0.419, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7759647965431213, |
|
"rewards/margins": 1.4105063676834106, |
|
"rewards/rejected": -2.1864712238311768, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.2578125, |
|
"learning_rate": 1.993631645009747e-07, |
|
"logits/chosen": -2.70914888381958, |
|
"logits/rejected": -2.6992902755737305, |
|
"logps/chosen": -348.275390625, |
|
"logps/rejected": -432.3793029785156, |
|
"loss": 0.4294, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9621282815933228, |
|
"rewards/margins": 1.0199404954910278, |
|
"rewards/rejected": -1.982068657875061, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 4.125, |
|
"learning_rate": 1.818692473606748e-07, |
|
"logits/chosen": -2.749687671661377, |
|
"logits/rejected": -2.743114948272705, |
|
"logps/chosen": -356.88311767578125, |
|
"logps/rejected": -437.4881896972656, |
|
"loss": 0.4101, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5851279497146606, |
|
"rewards/margins": 1.345589280128479, |
|
"rewards/rejected": -1.93071711063385, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 1.6514956706084885e-07, |
|
"logits/chosen": -2.731550931930542, |
|
"logits/rejected": -2.7247581481933594, |
|
"logps/chosen": -367.1907653808594, |
|
"logps/rejected": -460.9814453125, |
|
"loss": 0.4151, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.8159070014953613, |
|
"rewards/margins": 1.3104612827301025, |
|
"rewards/rejected": -2.1263680458068848, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.921875, |
|
"learning_rate": 1.4920970783889737e-07, |
|
"logits/chosen": -2.728755235671997, |
|
"logits/rejected": -2.716214895248413, |
|
"logps/chosen": -326.6175842285156, |
|
"logps/rejected": -425.10528564453125, |
|
"loss": 0.4179, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5755528807640076, |
|
"rewards/margins": 1.2382352352142334, |
|
"rewards/rejected": -1.8137880563735962, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.921875, |
|
"learning_rate": 1.340549934783164e-07, |
|
"logits/chosen": -2.7430858612060547, |
|
"logits/rejected": -2.7155654430389404, |
|
"logps/chosen": -319.4033508300781, |
|
"logps/rejected": -460.38494873046875, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.5762430429458618, |
|
"rewards/margins": 1.5360796451568604, |
|
"rewards/rejected": -2.1123225688934326, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.8046875, |
|
"learning_rate": 1.196904855305961e-07, |
|
"logits/chosen": -2.7446229457855225, |
|
"logits/rejected": -2.741819143295288, |
|
"logps/chosen": -347.061279296875, |
|
"logps/rejected": -454.8154296875, |
|
"loss": 0.4227, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.7210251688957214, |
|
"rewards/margins": 1.3714605569839478, |
|
"rewards/rejected": -2.0924859046936035, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 1.0612098162470302e-07, |
|
"logits/chosen": -2.754495143890381, |
|
"logits/rejected": -2.770519733428955, |
|
"logps/chosen": -356.2037658691406, |
|
"logps/rejected": -428.56915283203125, |
|
"loss": 0.4204, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6976330876350403, |
|
"rewards/margins": 1.1086335182189941, |
|
"rewards/rejected": -1.8062665462493896, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.703125, |
|
"learning_rate": 9.335101386471285e-08, |
|
"logits/chosen": -2.7438502311706543, |
|
"logits/rejected": -2.732393741607666, |
|
"logps/chosen": -325.47747802734375, |
|
"logps/rejected": -466.6300354003906, |
|
"loss": 0.4096, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.5821502208709717, |
|
"rewards/margins": 1.489473819732666, |
|
"rewards/rejected": -2.0716240406036377, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 1.125, |
|
"learning_rate": 8.138484731612273e-08, |
|
"logits/chosen": -2.6938061714172363, |
|
"logits/rejected": -2.6772360801696777, |
|
"logps/chosen": -339.33856201171875, |
|
"logps/rejected": -459.6874084472656, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -0.7721551060676575, |
|
"rewards/margins": 1.2288380861282349, |
|
"rewards/rejected": -2.000993251800537, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 7.022647858135501e-08, |
|
"logits/chosen": -2.744938850402832, |
|
"logits/rejected": -2.7732322216033936, |
|
"logps/chosen": -332.72509765625, |
|
"logps/rejected": -421.79949951171875, |
|
"loss": 0.4295, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7302526235580444, |
|
"rewards/margins": 1.0942634344100952, |
|
"rewards/rejected": -1.82451593875885, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.7734375, |
|
"learning_rate": 5.987963446492384e-08, |
|
"logits/chosen": -2.6715445518493652, |
|
"logits/rejected": -2.6765220165252686, |
|
"logps/chosen": -348.5739440917969, |
|
"logps/rejected": -482.542236328125, |
|
"loss": 0.4117, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.6485568284988403, |
|
"rewards/margins": 1.4271323680877686, |
|
"rewards/rejected": -2.0756890773773193, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 5.034777072871394e-08, |
|
"logits/chosen": -2.7057948112487793, |
|
"logits/rejected": -2.7085628509521484, |
|
"logps/chosen": -318.53497314453125, |
|
"logps/rejected": -424.2813415527344, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7039138674736023, |
|
"rewards/margins": 1.1032226085662842, |
|
"rewards/rejected": -1.8071365356445312, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.5625, |
|
"learning_rate": 4.163407093778243e-08, |
|
"logits/chosen": -2.6782615184783936, |
|
"logits/rejected": -2.6741080284118652, |
|
"logps/chosen": -333.125732421875, |
|
"logps/rejected": -420.4185485839844, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.8739233016967773, |
|
"rewards/margins": 1.0754514932632446, |
|
"rewards/rejected": -1.949374794960022, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 3.37414453970758e-08, |
|
"logits/chosen": -2.742910146713257, |
|
"logits/rejected": -2.7536118030548096, |
|
"logps/chosen": -337.20489501953125, |
|
"logps/rejected": -413.38238525390625, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6338103413581848, |
|
"rewards/margins": 1.2298805713653564, |
|
"rewards/rejected": -1.863690972328186, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.125, |
|
"learning_rate": 2.6672530179410183e-08, |
|
"logits/chosen": -2.7278337478637695, |
|
"logits/rejected": -2.7276864051818848, |
|
"logps/chosen": -324.9429626464844, |
|
"logps/rejected": -436.3682556152344, |
|
"loss": 0.4287, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.6213011145591736, |
|
"rewards/margins": 1.2622824907302856, |
|
"rewards/rejected": -1.883583426475525, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 2.04296862450451e-08, |
|
"logits/chosen": -2.7341113090515137, |
|
"logits/rejected": -2.7228341102600098, |
|
"logps/chosen": -359.82659912109375, |
|
"logps/rejected": -492.15771484375, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8840007781982422, |
|
"rewards/margins": 1.547300100326538, |
|
"rewards/rejected": -2.431300640106201, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.501499865314171e-08, |
|
"logits/chosen": -2.655301809310913, |
|
"logits/rejected": -2.6794886589050293, |
|
"logps/chosen": -379.09075927734375, |
|
"logps/rejected": -465.2655334472656, |
|
"loss": 0.4153, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.6638426184654236, |
|
"rewards/margins": 1.2455824613571167, |
|
"rewards/rejected": -1.909425139427185, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 1.8046875, |
|
"learning_rate": 1.0430275865371265e-08, |
|
"logits/chosen": -2.700707197189331, |
|
"logits/rejected": -2.6735973358154297, |
|
"logps/chosen": -324.67864990234375, |
|
"logps/rejected": -426.3045959472656, |
|
"loss": 0.4269, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7671259641647339, |
|
"rewards/margins": 1.2393693923950195, |
|
"rewards/rejected": -2.006495475769043, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 6.677049141901315e-09, |
|
"logits/chosen": -2.714787006378174, |
|
"logits/rejected": -2.729104518890381, |
|
"logps/chosen": -332.42156982421875, |
|
"logps/rejected": -465.78497314453125, |
|
"loss": 0.4213, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6763681769371033, |
|
"rewards/margins": 1.4941723346710205, |
|
"rewards/rejected": -2.1705403327941895, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 3.756572029968708e-09, |
|
"logits/chosen": -2.7444651126861572, |
|
"logits/rejected": -2.756309986114502, |
|
"logps/chosen": -348.5865783691406, |
|
"logps/rejected": -453.53350830078125, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7736285328865051, |
|
"rewards/margins": 1.2819445133209229, |
|
"rewards/rejected": -2.055572986602783, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 1.6698199452053199e-09, |
|
"logits/chosen": -2.7279868125915527, |
|
"logits/rejected": -2.7107343673706055, |
|
"logps/chosen": -339.8231506347656, |
|
"logps/rejected": -435.91436767578125, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6851938962936401, |
|
"rewards/margins": 0.9626883268356323, |
|
"rewards/rejected": -1.6478822231292725, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.25, |
|
"learning_rate": 4.1748984585560094e-10, |
|
"logits/chosen": -2.7459967136383057, |
|
"logits/rejected": -2.7115063667297363, |
|
"logps/chosen": -353.6007385253906, |
|
"logps/rejected": -478.2798767089844, |
|
"loss": 0.4255, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.7070942521095276, |
|
"rewards/margins": 1.4682670831680298, |
|
"rewards/rejected": -2.175361156463623, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.7471261024475098, |
|
"logits/rejected": -2.7326393127441406, |
|
"logps/chosen": -346.9029846191406, |
|
"logps/rejected": -450.1078186035156, |
|
"loss": 0.4197, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.7211757302284241, |
|
"rewards/margins": 1.336022973060608, |
|
"rewards/rejected": -2.0571985244750977, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4411179514611579, |
|
"train_runtime": 83930.2059, |
|
"train_samples_per_second": 0.728, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|