|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 2550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9607843137254902e-08, |
|
"logits/chosen": -0.505158543586731, |
|
"logits/rejected": 1.1344256401062012, |
|
"logps/chosen": -534.2272338867188, |
|
"logps/rejected": -995.0223388671875, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9607843137254904e-07, |
|
"logits/chosen": -1.4771511554718018, |
|
"logits/rejected": -0.7203052043914795, |
|
"logps/chosen": -653.9701538085938, |
|
"logps/rejected": -1290.11083984375, |
|
"loss": 0.2983, |
|
"rewards/accuracies": 0.3055555522441864, |
|
"rewards/chosen": -0.00023890436568763107, |
|
"rewards/margins": -0.0006189702544361353, |
|
"rewards/rejected": 0.00038006596150808036, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.921568627450981e-07, |
|
"logits/chosen": -1.5881028175354004, |
|
"logits/rejected": -0.847257137298584, |
|
"logps/chosen": -677.5276489257812, |
|
"logps/rejected": -1343.302978515625, |
|
"loss": 0.34, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0005764259840361774, |
|
"rewards/margins": 0.0008251671679317951, |
|
"rewards/rejected": -0.0002487411838956177, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.882352941176471e-07, |
|
"logits/chosen": -1.5565259456634521, |
|
"logits/rejected": -0.9040892720222473, |
|
"logps/chosen": -587.6061401367188, |
|
"logps/rejected": -1259.46630859375, |
|
"loss": 0.3992, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0015199712943285704, |
|
"rewards/margins": 0.002795459469780326, |
|
"rewards/rejected": -0.004315430298447609, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.843137254901962e-07, |
|
"logits/chosen": -1.3543564081192017, |
|
"logits/rejected": -0.5594847798347473, |
|
"logps/chosen": -660.8809814453125, |
|
"logps/rejected": -1349.8839111328125, |
|
"loss": 0.3377, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.007950540632009506, |
|
"rewards/margins": 0.009673960506916046, |
|
"rewards/rejected": -0.017624501138925552, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.80392156862745e-07, |
|
"logits/chosen": -1.4439340829849243, |
|
"logits/rejected": -0.9004542231559753, |
|
"logps/chosen": -625.8778076171875, |
|
"logps/rejected": -1303.6329345703125, |
|
"loss": 0.3665, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.01639598235487938, |
|
"rewards/margins": 0.029322799295186996, |
|
"rewards/rejected": -0.04571877792477608, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1764705882352942e-06, |
|
"logits/chosen": -1.5793389081954956, |
|
"logits/rejected": -0.6903096437454224, |
|
"logps/chosen": -691.1597290039062, |
|
"logps/rejected": -1354.8695068359375, |
|
"loss": 0.3259, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04962822049856186, |
|
"rewards/margins": 0.04500482603907585, |
|
"rewards/rejected": -0.0946330577135086, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3725490196078434e-06, |
|
"logits/chosen": -1.2960580587387085, |
|
"logits/rejected": -0.5226901173591614, |
|
"logps/chosen": -677.5730590820312, |
|
"logps/rejected": -1611.273681640625, |
|
"loss": 0.2328, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09385339170694351, |
|
"rewards/margins": 0.11672432720661163, |
|
"rewards/rejected": -0.21057769656181335, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5686274509803923e-06, |
|
"logits/chosen": -1.0945719480514526, |
|
"logits/rejected": -0.5267337560653687, |
|
"logps/chosen": -776.620849609375, |
|
"logps/rejected": -1658.595703125, |
|
"loss": 0.1974, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19967763125896454, |
|
"rewards/margins": 0.2444140613079071, |
|
"rewards/rejected": -0.44409170746803284, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.7647058823529414e-06, |
|
"logits/chosen": -1.4510087966918945, |
|
"logits/rejected": -0.023749172687530518, |
|
"logps/chosen": -911.9953002929688, |
|
"logps/rejected": -1725.72265625, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2247885912656784, |
|
"rewards/margins": 0.1656641662120819, |
|
"rewards/rejected": -0.3904527723789215, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.96078431372549e-06, |
|
"logits/chosen": -1.3619906902313232, |
|
"logits/rejected": -0.15897789597511292, |
|
"logps/chosen": -822.1832275390625, |
|
"logps/rejected": -1571.0025634765625, |
|
"loss": 0.2765, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.16276445984840393, |
|
"rewards/margins": 0.13095514476299286, |
|
"rewards/rejected": -0.2937195897102356, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1568627450980393e-06, |
|
"logits/chosen": -1.1530998945236206, |
|
"logits/rejected": -0.40491175651550293, |
|
"logps/chosen": -854.7205200195312, |
|
"logps/rejected": -1822.2965087890625, |
|
"loss": 0.2099, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15811415016651154, |
|
"rewards/margins": 0.23178577423095703, |
|
"rewards/rejected": -0.3898999094963074, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3529411764705885e-06, |
|
"logits/chosen": -1.5039284229278564, |
|
"logits/rejected": -0.5590807199478149, |
|
"logps/chosen": -726.2496337890625, |
|
"logps/rejected": -1728.7135009765625, |
|
"loss": 0.2438, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13852688670158386, |
|
"rewards/margins": 0.27724406123161316, |
|
"rewards/rejected": -0.415770947933197, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.549019607843137e-06, |
|
"logits/chosen": -1.47978937625885, |
|
"logits/rejected": -0.7583194971084595, |
|
"logps/chosen": -777.576416015625, |
|
"logps/rejected": -1722.5013427734375, |
|
"loss": 0.169, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19047263264656067, |
|
"rewards/margins": 0.221228688955307, |
|
"rewards/rejected": -0.41170138120651245, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.7450980392156867e-06, |
|
"logits/chosen": -1.3619725704193115, |
|
"logits/rejected": -0.45514482259750366, |
|
"logps/chosen": -859.7752685546875, |
|
"logps/rejected": -1761.045654296875, |
|
"loss": 0.2, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2766234278678894, |
|
"rewards/margins": 0.1959143877029419, |
|
"rewards/rejected": -0.4725378155708313, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"logits/chosen": -1.2937225103378296, |
|
"logits/rejected": -0.18269118666648865, |
|
"logps/chosen": -1000.1593627929688, |
|
"logps/rejected": -1983.649169921875, |
|
"loss": 0.2365, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3481788635253906, |
|
"rewards/margins": 0.32042697072029114, |
|
"rewards/rejected": -0.6686058640480042, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1372549019607846e-06, |
|
"logits/chosen": -1.3573691844940186, |
|
"logits/rejected": -0.8902850151062012, |
|
"logps/chosen": -908.5567626953125, |
|
"logps/rejected": -1647.058349609375, |
|
"loss": 0.2157, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2951143980026245, |
|
"rewards/margins": 0.15847407281398773, |
|
"rewards/rejected": -0.45358848571777344, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -1.657151222229004, |
|
"logits/rejected": -0.9709945917129517, |
|
"logps/chosen": -817.062744140625, |
|
"logps/rejected": -1781.638671875, |
|
"loss": 0.1573, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.21228685975074768, |
|
"rewards/margins": 0.23603840172290802, |
|
"rewards/rejected": -0.4483252465724945, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.529411764705883e-06, |
|
"logits/chosen": -1.4512741565704346, |
|
"logits/rejected": -0.1740313172340393, |
|
"logps/chosen": -889.5105590820312, |
|
"logps/rejected": -2051.538818359375, |
|
"loss": 0.2172, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.25935059785842896, |
|
"rewards/margins": 0.3617965579032898, |
|
"rewards/rejected": -0.6211471557617188, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7254901960784316e-06, |
|
"logits/chosen": -1.1388862133026123, |
|
"logits/rejected": -0.18607623875141144, |
|
"logps/chosen": -821.3484497070312, |
|
"logps/rejected": -1919.426513671875, |
|
"loss": 0.1605, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21933992207050323, |
|
"rewards/margins": 0.3276643455028534, |
|
"rewards/rejected": -0.547004222869873, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.92156862745098e-06, |
|
"logits/chosen": -1.348503828048706, |
|
"logits/rejected": -0.5367448925971985, |
|
"logps/chosen": -625.4483032226562, |
|
"logps/rejected": -1496.484130859375, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10934285074472427, |
|
"rewards/margins": 0.23809942603111267, |
|
"rewards/rejected": -0.34744226932525635, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.11764705882353e-06, |
|
"logits/chosen": -1.3590004444122314, |
|
"logits/rejected": -0.8172636032104492, |
|
"logps/chosen": -815.4562377929688, |
|
"logps/rejected": -1780.2926025390625, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1335686445236206, |
|
"rewards/margins": 0.2463621348142624, |
|
"rewards/rejected": -0.3799307644367218, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.313725490196079e-06, |
|
"logits/chosen": -1.5793583393096924, |
|
"logits/rejected": -0.32534486055374146, |
|
"logps/chosen": -928.5148315429688, |
|
"logps/rejected": -1738.1536865234375, |
|
"loss": 0.2501, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20052528381347656, |
|
"rewards/margins": 0.21043157577514648, |
|
"rewards/rejected": -0.41095685958862305, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.509803921568628e-06, |
|
"logits/chosen": -0.9972221255302429, |
|
"logits/rejected": -0.37468206882476807, |
|
"logps/chosen": -708.7088623046875, |
|
"logps/rejected": -1586.076416015625, |
|
"loss": 0.205, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14892444014549255, |
|
"rewards/margins": 0.24091584980487823, |
|
"rewards/rejected": -0.3898402750492096, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.705882352941177e-06, |
|
"logits/chosen": -1.3743550777435303, |
|
"logits/rejected": -0.13277244567871094, |
|
"logps/chosen": -719.425537109375, |
|
"logps/rejected": -1675.90234375, |
|
"loss": 0.2009, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1083696037530899, |
|
"rewards/margins": 0.3259289562702179, |
|
"rewards/rejected": -0.4342985153198242, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.901960784313726e-06, |
|
"logits/chosen": -1.1646835803985596, |
|
"logits/rejected": -0.5943381786346436, |
|
"logps/chosen": -621.46630859375, |
|
"logps/rejected": -1612.871826171875, |
|
"loss": 0.1643, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.17562244832515717, |
|
"rewards/margins": 0.2645077705383301, |
|
"rewards/rejected": -0.44013017416000366, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999941442477777e-06, |
|
"logits/chosen": -1.2978475093841553, |
|
"logits/rejected": -0.576497495174408, |
|
"logps/chosen": -937.4520263671875, |
|
"logps/rejected": -1737.780029296875, |
|
"loss": 0.2432, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25836849212646484, |
|
"rewards/margins": 0.241961270570755, |
|
"rewards/rejected": -0.5003297924995422, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999472998758979e-06, |
|
"logits/chosen": -1.4330791234970093, |
|
"logits/rejected": -0.8838942646980286, |
|
"logps/chosen": -877.1728515625, |
|
"logps/rejected": -1793.1947021484375, |
|
"loss": 0.1393, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25408753752708435, |
|
"rewards/margins": 0.2761463522911072, |
|
"rewards/rejected": -0.5302339792251587, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998536199099246e-06, |
|
"logits/chosen": -1.3899977207183838, |
|
"logits/rejected": 0.03836112096905708, |
|
"logps/chosen": -923.8590087890625, |
|
"logps/rejected": -1724.1558837890625, |
|
"loss": 0.1851, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1945658177137375, |
|
"rewards/margins": 0.2358773946762085, |
|
"rewards/rejected": -0.4304431974887848, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997131219037856e-06, |
|
"logits/chosen": -1.186488389968872, |
|
"logits/rejected": -0.389091819524765, |
|
"logps/chosen": -757.4147338867188, |
|
"logps/rejected": -1886.984130859375, |
|
"loss": 0.1841, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21733467280864716, |
|
"rewards/margins": 0.3353338837623596, |
|
"rewards/rejected": -0.5526684522628784, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995258321842611e-06, |
|
"logits/chosen": -1.1964404582977295, |
|
"logits/rejected": -0.06750938296318054, |
|
"logps/chosen": -907.2109375, |
|
"logps/rejected": -1809.0191650390625, |
|
"loss": 0.1834, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2906198799610138, |
|
"rewards/margins": 0.279925137758255, |
|
"rewards/rejected": -0.5705450177192688, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9929178584605e-06, |
|
"logits/chosen": -1.649431586265564, |
|
"logits/rejected": -0.20804986357688904, |
|
"logps/chosen": -891.9801635742188, |
|
"logps/rejected": -1733.181884765625, |
|
"loss": 0.1278, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.22755351662635803, |
|
"rewards/margins": 0.2664097547531128, |
|
"rewards/rejected": -0.49396324157714844, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9901102674519446e-06, |
|
"logits/chosen": -1.4958832263946533, |
|
"logits/rejected": -0.3006078004837036, |
|
"logps/chosen": -951.6578369140625, |
|
"logps/rejected": -1706.25390625, |
|
"loss": 0.2295, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2715206742286682, |
|
"rewards/margins": 0.25203150510787964, |
|
"rewards/rejected": -0.5235521793365479, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.986836074908616e-06, |
|
"logits/chosen": -1.3995481729507446, |
|
"logits/rejected": 0.009560632519423962, |
|
"logps/chosen": -718.5650634765625, |
|
"logps/rejected": -1350.846923828125, |
|
"loss": 0.2471, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.20702452957630157, |
|
"rewards/margins": 0.14036989212036133, |
|
"rewards/rejected": -0.3473944067955017, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -1.5904731750488281, |
|
"logits/rejected": -0.14893893897533417, |
|
"logps/chosen": -855.9501953125, |
|
"logps/rejected": -1916.079345703125, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.24469470977783203, |
|
"rewards/margins": 0.22492530941963196, |
|
"rewards/rejected": -0.4696199893951416, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9788904266327206e-06, |
|
"logits/chosen": -1.6823375225067139, |
|
"logits/rejected": -0.4657576084136963, |
|
"logps/chosen": -784.65234375, |
|
"logps/rejected": -1751.244873046875, |
|
"loss": 0.1888, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1359667181968689, |
|
"rewards/margins": 0.29217660427093506, |
|
"rewards/rejected": -0.42814335227012634, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9742204597706386e-06, |
|
"logits/chosen": -1.5003750324249268, |
|
"logits/rejected": -0.001354557229205966, |
|
"logps/chosen": -755.9137573242188, |
|
"logps/rejected": -1653.0166015625, |
|
"loss": 0.1933, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12234246730804443, |
|
"rewards/margins": 0.2765265107154846, |
|
"rewards/rejected": -0.39886897802352905, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9690868688357655e-06, |
|
"logits/chosen": -1.3799958229064941, |
|
"logits/rejected": -0.4311766028404236, |
|
"logps/chosen": -724.7586059570312, |
|
"logps/rejected": -1667.642822265625, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1624334752559662, |
|
"rewards/margins": 0.2481921911239624, |
|
"rewards/rejected": -0.4106256365776062, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963490615770003e-06, |
|
"logits/chosen": -1.295836329460144, |
|
"logits/rejected": -0.5849100947380066, |
|
"logps/chosen": -835.3861083984375, |
|
"logps/rejected": -1846.414794921875, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23319277167320251, |
|
"rewards/margins": 0.3248142898082733, |
|
"rewards/rejected": -0.5580071210861206, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.957432749209755e-06, |
|
"logits/chosen": -1.4312934875488281, |
|
"logits/rejected": 0.31627362966537476, |
|
"logps/chosen": -939.7803955078125, |
|
"logps/rejected": -1674.4808349609375, |
|
"loss": 0.2533, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2971717119216919, |
|
"rewards/margins": 0.19919905066490173, |
|
"rewards/rejected": -0.496370792388916, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.950914404289423e-06, |
|
"logits/chosen": -1.3529198169708252, |
|
"logits/rejected": -0.19551090896129608, |
|
"logps/chosen": -940.6759643554688, |
|
"logps/rejected": -1822.1956787109375, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3031768500804901, |
|
"rewards/margins": 0.22445103526115417, |
|
"rewards/rejected": -0.5276279449462891, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.943936802428712e-06, |
|
"logits/chosen": -1.1721961498260498, |
|
"logits/rejected": 0.37075644731521606, |
|
"logps/chosen": -702.531005859375, |
|
"logps/rejected": -1698.3720703125, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18371441960334778, |
|
"rewards/margins": 0.269645094871521, |
|
"rewards/rejected": -0.4533595144748688, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.936501251103751e-06, |
|
"logits/chosen": -1.1501245498657227, |
|
"logits/rejected": -0.04669635370373726, |
|
"logps/chosen": -934.7687377929688, |
|
"logps/rejected": -1762.8375244140625, |
|
"loss": 0.2049, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.27317720651626587, |
|
"rewards/margins": 0.24144259095191956, |
|
"rewards/rejected": -0.5146198272705078, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.928609143602102e-06, |
|
"logits/chosen": -1.3455841541290283, |
|
"logits/rejected": -0.689312219619751, |
|
"logps/chosen": -953.3030395507812, |
|
"logps/rejected": -2143.519775390625, |
|
"loss": 0.1132, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3812543451786041, |
|
"rewards/margins": 0.47345447540283203, |
|
"rewards/rejected": -0.8547086715698242, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.920261958761677e-06, |
|
"logits/chosen": -1.1954295635223389, |
|
"logits/rejected": 0.1524878442287445, |
|
"logps/chosen": -988.5673828125, |
|
"logps/rejected": -1907.625, |
|
"loss": 0.2181, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3235063850879669, |
|
"rewards/margins": 0.30497947335243225, |
|
"rewards/rejected": -0.6284858584403992, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.911461260693639e-06, |
|
"logits/chosen": -1.384975552558899, |
|
"logits/rejected": -0.3957231938838959, |
|
"logps/chosen": -864.88623046875, |
|
"logps/rejected": -1796.1107177734375, |
|
"loss": 0.1692, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.24604813754558563, |
|
"rewards/margins": 0.25146228075027466, |
|
"rewards/rejected": -0.4975104331970215, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.902208698489302e-06, |
|
"logits/chosen": -1.0432078838348389, |
|
"logits/rejected": -0.16131794452667236, |
|
"logps/chosen": -885.232421875, |
|
"logps/rejected": -1651.9114990234375, |
|
"loss": 0.2494, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.22546634078025818, |
|
"rewards/margins": 0.1930330991744995, |
|
"rewards/rejected": -0.4184994697570801, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.89250600591114e-06, |
|
"logits/chosen": -1.3176567554473877, |
|
"logits/rejected": -0.0033722042571753263, |
|
"logps/chosen": -723.5933837890625, |
|
"logps/rejected": -1598.0091552734375, |
|
"loss": 0.2398, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15554097294807434, |
|
"rewards/margins": 0.26405271887779236, |
|
"rewards/rejected": -0.4195936620235443, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882355001067892e-06, |
|
"logits/chosen": -1.188307523727417, |
|
"logits/rejected": 0.14929169416427612, |
|
"logps/chosen": -815.7213134765625, |
|
"logps/rejected": -1634.1407470703125, |
|
"loss": 0.2558, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16603827476501465, |
|
"rewards/margins": 0.21612891554832458, |
|
"rewards/rejected": -0.38216716051101685, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.871757586073897e-06, |
|
"logits/chosen": -1.3035974502563477, |
|
"logits/rejected": 0.26524829864501953, |
|
"logps/chosen": -763.2244262695312, |
|
"logps/rejected": -1522.682861328125, |
|
"loss": 0.2258, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1318461149930954, |
|
"rewards/margins": 0.23059546947479248, |
|
"rewards/rejected": -0.3624415993690491, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.860715746692661e-06, |
|
"logits/chosen": -1.1487717628479004, |
|
"logits/rejected": 0.05942107364535332, |
|
"logps/chosen": -886.2254638671875, |
|
"logps/rejected": -1841.0814208984375, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18671779334545135, |
|
"rewards/margins": 0.27246180176734924, |
|
"rewards/rejected": -0.4591795802116394, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.2474385499954224, |
|
"logits/rejected": -0.14498676359653473, |
|
"logps/chosen": -778.3880615234375, |
|
"logps/rejected": -1752.142333984375, |
|
"loss": 0.1754, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18546968698501587, |
|
"rewards/margins": 0.3222576379776001, |
|
"rewards/rejected": -0.5077272653579712, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.837307153820184e-06, |
|
"logits/chosen": -1.1251775026321411, |
|
"logits/rejected": 0.15637345612049103, |
|
"logps/chosen": -924.3635864257812, |
|
"logps/rejected": -2070.327392578125, |
|
"loss": 0.1343, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3112488090991974, |
|
"rewards/margins": 0.38895484805107117, |
|
"rewards/rejected": -0.7002035975456238, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824944786675003e-06, |
|
"logits/chosen": -1.3947086334228516, |
|
"logits/rejected": 0.045419882982969284, |
|
"logps/chosen": -856.5111083984375, |
|
"logps/rejected": -1587.355712890625, |
|
"loss": 0.1704, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2732272148132324, |
|
"rewards/margins": 0.24021320044994354, |
|
"rewards/rejected": -0.5134404301643372, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.81214676701278e-06, |
|
"logits/chosen": -1.2445639371871948, |
|
"logits/rejected": 0.1435929536819458, |
|
"logps/chosen": -935.2590942382812, |
|
"logps/rejected": -1872.558349609375, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2728428244590759, |
|
"rewards/margins": 0.303517609834671, |
|
"rewards/rejected": -0.5763604044914246, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.798915492950456e-06, |
|
"logits/chosen": -1.3926843404769897, |
|
"logits/rejected": -0.8224552273750305, |
|
"logps/chosen": -930.3948364257812, |
|
"logps/rejected": -1831.987060546875, |
|
"loss": 0.2094, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.22408756613731384, |
|
"rewards/margins": 0.306917279958725, |
|
"rewards/rejected": -0.5310048460960388, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.785253443788997e-06, |
|
"logits/chosen": -1.452789306640625, |
|
"logits/rejected": -0.08553876727819443, |
|
"logps/chosen": -834.9271240234375, |
|
"logps/rejected": -1715.3486328125, |
|
"loss": 0.2066, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1931687295436859, |
|
"rewards/margins": 0.24109697341918945, |
|
"rewards/rejected": -0.43426570296287537, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.771163179548809e-06, |
|
"logits/chosen": -1.2075916528701782, |
|
"logits/rejected": -0.4084923267364502, |
|
"logps/chosen": -895.1989135742188, |
|
"logps/rejected": -1892.2545166015625, |
|
"loss": 0.1562, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.27637767791748047, |
|
"rewards/margins": 0.3559107184410095, |
|
"rewards/rejected": -0.6322883367538452, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.75664734049005e-06, |
|
"logits/chosen": -1.4612247943878174, |
|
"logits/rejected": -0.4000505805015564, |
|
"logps/chosen": -838.0030517578125, |
|
"logps/rejected": -1836.1865234375, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.27399009466171265, |
|
"rewards/margins": 0.3403601050376892, |
|
"rewards/rejected": -0.6143501996994019, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.741708646617879e-06, |
|
"logits/chosen": -1.4533047676086426, |
|
"logits/rejected": -0.44210928678512573, |
|
"logps/chosen": -826.44921875, |
|
"logps/rejected": -1627.9482421875, |
|
"loss": 0.1568, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18495787680149078, |
|
"rewards/margins": 0.24494799971580505, |
|
"rewards/rejected": -0.42990580201148987, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.726349897172791e-06, |
|
"logits/chosen": -1.2161755561828613, |
|
"logits/rejected": -0.4458787441253662, |
|
"logps/chosen": -677.1725463867188, |
|
"logps/rejected": -1372.3172607421875, |
|
"loss": 0.2348, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.11152330785989761, |
|
"rewards/margins": 0.17953188717365265, |
|
"rewards/rejected": -0.29105520248413086, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.710573970106076e-06, |
|
"logits/chosen": -1.2787022590637207, |
|
"logits/rejected": -0.5003519654273987, |
|
"logps/chosen": -937.7862548828125, |
|
"logps/rejected": -1879.7720947265625, |
|
"loss": 0.2216, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.27286097407341003, |
|
"rewards/margins": 0.2761802077293396, |
|
"rewards/rejected": -0.5490411520004272, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.694383821540554e-06, |
|
"logits/chosen": -1.4234240055084229, |
|
"logits/rejected": -0.529420793056488, |
|
"logps/chosen": -879.75830078125, |
|
"logps/rejected": -1886.7099609375, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.23544082045555115, |
|
"rewards/margins": 0.3297392725944519, |
|
"rewards/rejected": -0.5651801824569702, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.677782485216644e-06, |
|
"logits/chosen": -1.5074328184127808, |
|
"logits/rejected": 0.13324348628520966, |
|
"logps/chosen": -894.3519287109375, |
|
"logps/rejected": -1656.986328125, |
|
"loss": 0.2306, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2585051953792572, |
|
"rewards/margins": 0.17639592289924622, |
|
"rewards/rejected": -0.4349011480808258, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.660773071923901e-06, |
|
"logits/chosen": -1.254246473312378, |
|
"logits/rejected": -0.4503572881221771, |
|
"logps/chosen": -743.8980712890625, |
|
"logps/rejected": -1586.318603515625, |
|
"loss": 0.2306, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20321564376354218, |
|
"rewards/margins": 0.28816673159599304, |
|
"rewards/rejected": -0.49138230085372925, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.643358768918106e-06, |
|
"logits/chosen": -1.2100741863250732, |
|
"logits/rejected": -0.6602537631988525, |
|
"logps/chosen": -866.4385986328125, |
|
"logps/rejected": -1698.565185546875, |
|
"loss": 0.2341, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.24518051743507385, |
|
"rewards/margins": 0.21809275448322296, |
|
"rewards/rejected": -0.46327322721481323, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.625542839324036e-06, |
|
"logits/chosen": -1.2801318168640137, |
|
"logits/rejected": -0.20570655167102814, |
|
"logps/chosen": -696.7703857421875, |
|
"logps/rejected": -1810.203857421875, |
|
"loss": 0.1443, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1753673553466797, |
|
"rewards/margins": 0.33784395456314087, |
|
"rewards/rejected": -0.5132113099098206, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6073286215240105e-06, |
|
"logits/chosen": -1.573704719543457, |
|
"logits/rejected": -0.5480459928512573, |
|
"logps/chosen": -698.1392211914062, |
|
"logps/rejected": -1760.8834228515625, |
|
"loss": 3.1142, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11380796134471893, |
|
"rewards/margins": 0.48678064346313477, |
|
"rewards/rejected": -0.6005885601043701, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -1.5839512348175049, |
|
"logits/rejected": -0.7513319253921509, |
|
"logps/chosen": -620.78955078125, |
|
"logps/rejected": -1401.8199462890625, |
|
"loss": 0.2422, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.02382536605000496, |
|
"rewards/margins": 0.09586119651794434, |
|
"rewards/rejected": -0.119686558842659, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.569719047355795e-06, |
|
"logits/chosen": -1.5924733877182007, |
|
"logits/rejected": -0.816574215888977, |
|
"logps/chosen": -557.929931640625, |
|
"logps/rejected": -1159.7681884765625, |
|
"loss": 0.292, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.010283837094902992, |
|
"rewards/margins": 0.04927302524447441, |
|
"rewards/rejected": -0.059556860476732254, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.550330738340189e-06, |
|
"logits/chosen": -1.4926470518112183, |
|
"logits/rejected": -0.8066496849060059, |
|
"logps/chosen": -669.9822387695312, |
|
"logps/rejected": -1387.30419921875, |
|
"loss": 0.2635, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.06443636119365692, |
|
"rewards/margins": 0.10144983232021332, |
|
"rewards/rejected": -0.16588619351387024, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.530558234503252e-06, |
|
"logits/chosen": -1.504148244857788, |
|
"logits/rejected": -0.710750937461853, |
|
"logps/chosen": -563.5753173828125, |
|
"logps/rejected": -1385.9373779296875, |
|
"loss": 0.1933, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.02186558023095131, |
|
"rewards/margins": 0.15772321820259094, |
|
"rewards/rejected": -0.17958880960941315, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5104052408538545e-06, |
|
"logits/chosen": -1.3532848358154297, |
|
"logits/rejected": -0.17277280986309052, |
|
"logps/chosen": -665.1290893554688, |
|
"logps/rejected": -1473.070068359375, |
|
"loss": 0.2188, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0683923214673996, |
|
"rewards/margins": 0.23080816864967346, |
|
"rewards/rejected": -0.29920047521591187, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.489875533697767e-06, |
|
"logits/chosen": -1.2411041259765625, |
|
"logits/rejected": -0.6769916415214539, |
|
"logps/chosen": -796.1107177734375, |
|
"logps/rejected": -1779.8375244140625, |
|
"loss": 0.2014, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11370061337947845, |
|
"rewards/margins": 0.2886132001876831, |
|
"rewards/rejected": -0.40231385827064514, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.468972959930043e-06, |
|
"logits/chosen": -1.4062107801437378, |
|
"logits/rejected": -0.11251994222402573, |
|
"logps/chosen": -810.1907958984375, |
|
"logps/rejected": -1755.5439453125, |
|
"loss": 0.207, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.138728067278862, |
|
"rewards/margins": 0.25112494826316833, |
|
"rewards/rejected": -0.38985303044319153, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.447701436314176e-06, |
|
"logits/chosen": -1.1295002698898315, |
|
"logits/rejected": -0.491716206073761, |
|
"logps/chosen": -665.5704345703125, |
|
"logps/rejected": -1605.226318359375, |
|
"loss": 0.2432, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09421003609895706, |
|
"rewards/margins": 0.22589227557182312, |
|
"rewards/rejected": -0.3201023042201996, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4260649487481835e-06, |
|
"logits/chosen": -1.3528281450271606, |
|
"logits/rejected": -0.8653984069824219, |
|
"logps/chosen": -560.7476806640625, |
|
"logps/rejected": -1564.6998291015625, |
|
"loss": 0.1747, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06951048225164413, |
|
"rewards/margins": 0.29366621375083923, |
|
"rewards/rejected": -0.36317676305770874, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.404067551517704e-06, |
|
"logits/chosen": -1.496765375137329, |
|
"logits/rejected": -0.7339566349983215, |
|
"logps/chosen": -559.6861572265625, |
|
"logps/rejected": -1561.775634765625, |
|
"loss": 0.1495, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0916418582201004, |
|
"rewards/margins": 0.28826963901519775, |
|
"rewards/rejected": -0.37991148233413696, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.381713366536312e-06, |
|
"logits/chosen": -1.2229559421539307, |
|
"logits/rejected": -0.3822958469390869, |
|
"logps/chosen": -795.2717895507812, |
|
"logps/rejected": -1646.298583984375, |
|
"loss": 0.2385, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16480056941509247, |
|
"rewards/margins": 0.25636088848114014, |
|
"rewards/rejected": -0.4211614727973938, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.359006582573138e-06, |
|
"logits/chosen": -1.3127458095550537, |
|
"logits/rejected": -0.6002156138420105, |
|
"logps/chosen": -731.8434448242188, |
|
"logps/rejected": -1655.339599609375, |
|
"loss": 0.2386, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1232454776763916, |
|
"rewards/margins": 0.260085791349411, |
|
"rewards/rejected": -0.3833312392234802, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.335951454467971e-06, |
|
"logits/chosen": -1.4491212368011475, |
|
"logits/rejected": -0.4968988299369812, |
|
"logps/chosen": -708.8034057617188, |
|
"logps/rejected": -1689.820068359375, |
|
"loss": 0.1514, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11892116069793701, |
|
"rewards/margins": 0.311443567276001, |
|
"rewards/rejected": -0.4303646981716156, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3125523023339825e-06, |
|
"logits/chosen": -1.532845377922058, |
|
"logits/rejected": -0.5454439520835876, |
|
"logps/chosen": -708.6060791015625, |
|
"logps/rejected": -1473.7392578125, |
|
"loss": 0.2365, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0950343906879425, |
|
"rewards/margins": 0.2247859686613083, |
|
"rewards/rejected": -0.3198204040527344, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.288813510748207e-06, |
|
"logits/chosen": -1.3746122121810913, |
|
"logits/rejected": -0.3929213881492615, |
|
"logps/chosen": -709.5933837890625, |
|
"logps/rejected": -1493.141357421875, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05677127093076706, |
|
"rewards/margins": 0.22434012591838837, |
|
"rewards/rejected": -0.28111138939857483, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.264739527929959e-06, |
|
"logits/chosen": -1.6062724590301514, |
|
"logits/rejected": -0.8062151074409485, |
|
"logps/chosen": -672.033447265625, |
|
"logps/rejected": -1605.8253173828125, |
|
"loss": 0.2076, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.04862620308995247, |
|
"rewards/margins": 0.277982234954834, |
|
"rewards/rejected": -0.32660841941833496, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.240334864907317e-06, |
|
"logits/chosen": -1.429529070854187, |
|
"logits/rejected": -0.1541730761528015, |
|
"logps/chosen": -751.5721435546875, |
|
"logps/rejected": -1614.796875, |
|
"loss": 0.1689, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07653092592954636, |
|
"rewards/margins": 0.23011043667793274, |
|
"rewards/rejected": -0.3066413402557373, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -1.4942229986190796, |
|
"logits/rejected": -0.5664646029472351, |
|
"logps/chosen": -683.9749755859375, |
|
"logps/rejected": -1751.165771484375, |
|
"loss": 0.1305, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07548153400421143, |
|
"rewards/margins": 0.3088182806968689, |
|
"rewards/rejected": -0.3842998147010803, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.190551851321647e-06, |
|
"logits/chosen": -1.5068459510803223, |
|
"logits/rejected": -0.3654994070529938, |
|
"logps/chosen": -753.12060546875, |
|
"logps/rejected": -1841.876220703125, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10451909154653549, |
|
"rewards/margins": 0.35891246795654297, |
|
"rewards/rejected": -0.46343153715133667, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.165182829193126e-06, |
|
"logits/chosen": -1.4504587650299072, |
|
"logits/rejected": 0.0904449075460434, |
|
"logps/chosen": -773.3833618164062, |
|
"logps/rejected": -1582.4493408203125, |
|
"loss": 0.2156, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06579799205064774, |
|
"rewards/margins": 0.241295725107193, |
|
"rewards/rejected": -0.3070937395095825, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.139501781981245e-06, |
|
"logits/chosen": -1.5094424486160278, |
|
"logits/rejected": -0.5480602383613586, |
|
"logps/chosen": -672.755126953125, |
|
"logps/rejected": -1651.116943359375, |
|
"loss": 0.1111, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07722672820091248, |
|
"rewards/margins": 0.26155346632003784, |
|
"rewards/rejected": -0.33878016471862793, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.113513521848821e-06, |
|
"logits/chosen": -1.594499111175537, |
|
"logits/rejected": -0.5706368684768677, |
|
"logps/chosen": -772.4927978515625, |
|
"logps/rejected": -1745.507080078125, |
|
"loss": 0.1475, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10902180522680283, |
|
"rewards/margins": 0.3192656934261322, |
|
"rewards/rejected": -0.4282875061035156, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.087222918524807e-06, |
|
"logits/chosen": -1.297629952430725, |
|
"logits/rejected": -0.6775213479995728, |
|
"logps/chosen": -705.9368896484375, |
|
"logps/rejected": -1540.4775390625, |
|
"loss": 0.2268, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1348380744457245, |
|
"rewards/margins": 0.2274044305086136, |
|
"rewards/rejected": -0.36224251985549927, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0606348983917924e-06, |
|
"logits/chosen": -1.3503175973892212, |
|
"logits/rejected": -0.9185010194778442, |
|
"logps/chosen": -610.7164306640625, |
|
"logps/rejected": -1734.915771484375, |
|
"loss": 0.1352, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10167312622070312, |
|
"rewards/margins": 0.36634570360183716, |
|
"rewards/rejected": -0.4680188298225403, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.03375444356288e-06, |
|
"logits/chosen": -1.4071118831634521, |
|
"logits/rejected": -0.8690752983093262, |
|
"logps/chosen": -836.24169921875, |
|
"logps/rejected": -1863.6539306640625, |
|
"loss": 0.2307, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1224076971411705, |
|
"rewards/margins": 0.3151033818721771, |
|
"rewards/rejected": -0.43751105666160583, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.006586590948141e-06, |
|
"logits/chosen": -1.3949382305145264, |
|
"logits/rejected": -0.680055558681488, |
|
"logps/chosen": -666.8121948242188, |
|
"logps/rejected": -1796.386474609375, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07509482651948929, |
|
"rewards/margins": 0.31507402658462524, |
|
"rewards/rejected": -0.39016884565353394, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.979136431310781e-06, |
|
"logits/chosen": -1.4007041454315186, |
|
"logits/rejected": -0.44923824071884155, |
|
"logps/chosen": -629.3880615234375, |
|
"logps/rejected": -1281.4881591796875, |
|
"loss": 0.27, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06840632110834122, |
|
"rewards/margins": 0.14729034900665283, |
|
"rewards/rejected": -0.21569669246673584, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.951409108313223e-06, |
|
"logits/chosen": -1.3141412734985352, |
|
"logits/rejected": -0.3359532654285431, |
|
"logps/chosen": -682.4598999023438, |
|
"logps/rejected": -1479.57763671875, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06678664684295654, |
|
"rewards/margins": 0.18387752771377563, |
|
"rewards/rejected": -0.2506641745567322, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.923409817553284e-06, |
|
"logits/chosen": -1.26377534866333, |
|
"logits/rejected": -0.5578689575195312, |
|
"logps/chosen": -753.383056640625, |
|
"logps/rejected": -1470.470458984375, |
|
"loss": 0.1909, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08720171451568604, |
|
"rewards/margins": 0.24566006660461426, |
|
"rewards/rejected": -0.3328618109226227, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.895143805590609e-06, |
|
"logits/chosen": -1.5301742553710938, |
|
"logits/rejected": -0.33912280201911926, |
|
"logps/chosen": -788.5135498046875, |
|
"logps/rejected": -1906.780029296875, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10219583660364151, |
|
"rewards/margins": 0.3592928946018219, |
|
"rewards/rejected": -0.4614887833595276, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8666163689635614e-06, |
|
"logits/chosen": -1.4293967485427856, |
|
"logits/rejected": -0.766064465045929, |
|
"logps/chosen": -697.79443359375, |
|
"logps/rejected": -1692.085693359375, |
|
"loss": 0.2074, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10171394050121307, |
|
"rewards/margins": 0.3115464448928833, |
|
"rewards/rejected": -0.41326045989990234, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.837832853196751e-06, |
|
"logits/chosen": -1.4031484127044678, |
|
"logits/rejected": -0.46277111768722534, |
|
"logps/chosen": -741.0556030273438, |
|
"logps/rejected": -1712.839111328125, |
|
"loss": 0.1786, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10311299562454224, |
|
"rewards/margins": 0.2798925042152405, |
|
"rewards/rejected": -0.3830054700374603, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.808798651799377e-06, |
|
"logits/chosen": -1.4064973592758179, |
|
"logits/rejected": -0.5826825499534607, |
|
"logps/chosen": -687.228271484375, |
|
"logps/rejected": -1728.9072265625, |
|
"loss": 0.1515, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10456766188144684, |
|
"rewards/margins": 0.31050539016723633, |
|
"rewards/rejected": -0.4150730073451996, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7795192052545805e-06, |
|
"logits/chosen": -1.3606574535369873, |
|
"logits/rejected": -0.26507607102394104, |
|
"logps/chosen": -657.6034545898438, |
|
"logps/rejected": -1711.599365234375, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11261602491140366, |
|
"rewards/margins": 0.3513622283935547, |
|
"rewards/rejected": -0.46397823095321655, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.3821978569030762, |
|
"logits/rejected": -0.846422016620636, |
|
"logps/chosen": -700.0028076171875, |
|
"logps/rejected": -1700.7777099609375, |
|
"loss": 0.188, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1201629489660263, |
|
"rewards/margins": 0.2851884663105011, |
|
"rewards/rejected": -0.4053514003753662, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7202465673997123e-06, |
|
"logits/chosen": -1.327423334121704, |
|
"logits/rejected": -0.4249703884124756, |
|
"logps/chosen": -733.533935546875, |
|
"logps/rejected": -1811.9857177734375, |
|
"loss": 0.2335, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15838567912578583, |
|
"rewards/margins": 0.3160237669944763, |
|
"rewards/rejected": -0.47440940141677856, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6902644827077504e-06, |
|
"logits/chosen": -1.163883924484253, |
|
"logits/rejected": -0.564578652381897, |
|
"logps/chosen": -714.31591796875, |
|
"logps/rejected": -1658.974609375, |
|
"loss": 0.204, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1494341641664505, |
|
"rewards/margins": 0.2751534581184387, |
|
"rewards/rejected": -0.4245876669883728, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.660059364023409e-06, |
|
"logits/chosen": -1.1056033372879028, |
|
"logits/rejected": -0.6749047040939331, |
|
"logps/chosen": -836.0635986328125, |
|
"logps/rejected": -1795.9320068359375, |
|
"loss": 0.1381, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13029779493808746, |
|
"rewards/margins": 0.3451148271560669, |
|
"rewards/rejected": -0.47541260719299316, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6296368712385084e-06, |
|
"logits/chosen": -1.2282450199127197, |
|
"logits/rejected": 0.033928144723176956, |
|
"logps/chosen": -668.1098022460938, |
|
"logps/rejected": -1750.6011962890625, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.12075567245483398, |
|
"rewards/margins": 0.3676701486110687, |
|
"rewards/rejected": -0.4884257912635803, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.599002704976835e-06, |
|
"logits/chosen": -1.513203501701355, |
|
"logits/rejected": -0.3770269453525543, |
|
"logps/chosen": -774.125244140625, |
|
"logps/rejected": -1470.924072265625, |
|
"loss": 0.2331, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0825313851237297, |
|
"rewards/margins": 0.2151786983013153, |
|
"rewards/rejected": -0.297710120677948, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5681626055259526e-06, |
|
"logits/chosen": -1.351539134979248, |
|
"logits/rejected": 0.01821332611143589, |
|
"logps/chosen": -615.5689086914062, |
|
"logps/rejected": -1394.30859375, |
|
"loss": 0.1882, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.04903438687324524, |
|
"rewards/margins": 0.17228753864765167, |
|
"rewards/rejected": -0.22132191061973572, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5371223517615684e-06, |
|
"logits/chosen": -1.1955583095550537, |
|
"logits/rejected": -0.7964296340942383, |
|
"logps/chosen": -650.0599365234375, |
|
"logps/rejected": -1640.6591796875, |
|
"loss": 0.1666, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.061278946697711945, |
|
"rewards/margins": 0.23827362060546875, |
|
"rewards/rejected": -0.2995525896549225, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5058877600646814e-06, |
|
"logits/chosen": -1.5846580266952515, |
|
"logits/rejected": -0.4390091896057129, |
|
"logps/chosen": -774.6456298828125, |
|
"logps/rejected": -1672.4420166015625, |
|
"loss": 0.1899, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09766945987939835, |
|
"rewards/margins": 0.26369303464889526, |
|
"rewards/rejected": -0.3613625466823578, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4744646832316985e-06, |
|
"logits/chosen": -1.1662776470184326, |
|
"logits/rejected": -0.2102310210466385, |
|
"logps/chosen": -793.6665649414062, |
|
"logps/rejected": -1921.721923828125, |
|
"loss": 0.1516, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.128018319606781, |
|
"rewards/margins": 0.35574427247047424, |
|
"rewards/rejected": -0.48376256227493286, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.442859009377724e-06, |
|
"logits/chosen": -1.2999095916748047, |
|
"logits/rejected": -0.5450000762939453, |
|
"logps/chosen": -756.6891479492188, |
|
"logps/rejected": -1727.3140869140625, |
|
"loss": 0.2095, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12410111725330353, |
|
"rewards/margins": 0.2954896092414856, |
|
"rewards/rejected": -0.41959071159362793, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4110766608332347e-06, |
|
"logits/chosen": -1.3748492002487183, |
|
"logits/rejected": -0.4282529950141907, |
|
"logps/chosen": -715.91064453125, |
|
"logps/rejected": -1581.970703125, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10205087810754776, |
|
"rewards/margins": 0.2136324942111969, |
|
"rewards/rejected": -0.31568339467048645, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.379123593034342e-06, |
|
"logits/chosen": -1.4860260486602783, |
|
"logits/rejected": -0.33013448119163513, |
|
"logps/chosen": -715.021240234375, |
|
"logps/rejected": -1671.137939453125, |
|
"loss": 0.1657, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0988413542509079, |
|
"rewards/margins": 0.2489662617444992, |
|
"rewards/rejected": -0.3478075861930847, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3470057934068533e-06, |
|
"logits/chosen": -1.4496772289276123, |
|
"logits/rejected": -0.6596914529800415, |
|
"logps/chosen": -673.6126098632812, |
|
"logps/rejected": -1665.568603515625, |
|
"loss": 0.1832, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08717682957649231, |
|
"rewards/margins": 0.2865816652774811, |
|
"rewards/rejected": -0.3737585246562958, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.314729280244332e-06, |
|
"logits/chosen": -1.5033951997756958, |
|
"logits/rejected": -0.4424918591976166, |
|
"logps/chosen": -715.0887451171875, |
|
"logps/rejected": -1384.922119140625, |
|
"loss": 0.2064, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12197653949260712, |
|
"rewards/margins": 0.2576510012149811, |
|
"rewards/rejected": -0.3796275556087494, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2823001015803863e-06, |
|
"logits/chosen": -1.3551867008209229, |
|
"logits/rejected": -0.6100107431411743, |
|
"logps/chosen": -750.599853515625, |
|
"logps/rejected": -1853.4273681640625, |
|
"loss": 0.1589, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09711600840091705, |
|
"rewards/margins": 0.3520352840423584, |
|
"rewards/rejected": -0.44915127754211426, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2497243340553675e-06, |
|
"logits/chosen": -1.0115400552749634, |
|
"logits/rejected": -0.17798957228660583, |
|
"logps/chosen": -745.58984375, |
|
"logps/rejected": -1906.7685546875, |
|
"loss": 0.2539, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14974217116832733, |
|
"rewards/margins": 0.3422713875770569, |
|
"rewards/rejected": -0.4920136332511902, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -1.1727737188339233, |
|
"logits/rejected": -0.37460917234420776, |
|
"logps/chosen": -709.9483642578125, |
|
"logps/rejected": -1686.753173828125, |
|
"loss": 0.1683, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10941555351018906, |
|
"rewards/margins": 0.2822516858577728, |
|
"rewards/rejected": -0.3916672468185425, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.184157475180208e-06, |
|
"logits/chosen": -1.3031466007232666, |
|
"logits/rejected": -0.5970622301101685, |
|
"logps/chosen": -697.8651123046875, |
|
"logps/rejected": -1595.6754150390625, |
|
"loss": 0.2328, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10812550783157349, |
|
"rewards/margins": 0.23647412657737732, |
|
"rewards/rejected": -0.3445996046066284, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1511786698711226e-06, |
|
"logits/chosen": -1.3314238786697388, |
|
"logits/rejected": 0.48418712615966797, |
|
"logps/chosen": -731.9833984375, |
|
"logps/rejected": -1517.853271484375, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12708911299705505, |
|
"rewards/margins": 0.23969343304634094, |
|
"rewards/rejected": -0.3667825162410736, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1180778454808973e-06, |
|
"logits/chosen": -1.289541244506836, |
|
"logits/rejected": -0.4609376788139343, |
|
"logps/chosen": -746.2857666015625, |
|
"logps/rejected": -1523.1092529296875, |
|
"loss": 0.1886, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09962339699268341, |
|
"rewards/margins": 0.28612619638442993, |
|
"rewards/rejected": -0.38574957847595215, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.084861204504122e-06, |
|
"logits/chosen": -1.0148189067840576, |
|
"logits/rejected": -0.48453038930892944, |
|
"logps/chosen": -778.4666748046875, |
|
"logps/rejected": -1931.9429931640625, |
|
"loss": 0.1131, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.08722411096096039, |
|
"rewards/margins": 0.36971360445022583, |
|
"rewards/rejected": -0.4569377303123474, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.051534971137315e-06, |
|
"logits/chosen": -1.2210582494735718, |
|
"logits/rejected": -0.43022990226745605, |
|
"logps/chosen": -752.8408813476562, |
|
"logps/rejected": -1476.504638671875, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09663649648427963, |
|
"rewards/margins": 0.20909292995929718, |
|
"rewards/rejected": -0.3057294487953186, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0181053901126243e-06, |
|
"logits/chosen": -1.1169403791427612, |
|
"logits/rejected": 0.2767347991466522, |
|
"logps/chosen": -749.15673828125, |
|
"logps/rejected": -1505.369140625, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09570419043302536, |
|
"rewards/margins": 0.19496819376945496, |
|
"rewards/rejected": -0.2906723916530609, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9845787255276753e-06, |
|
"logits/chosen": -1.5088775157928467, |
|
"logits/rejected": -0.9695127606391907, |
|
"logps/chosen": -588.0244750976562, |
|
"logps/rejected": -1467.4212646484375, |
|
"loss": 0.1487, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04604783654212952, |
|
"rewards/margins": 0.28641366958618164, |
|
"rewards/rejected": -0.33246147632598877, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.950961259671793e-06, |
|
"logits/chosen": -1.50933837890625, |
|
"logits/rejected": -0.6869689226150513, |
|
"logps/chosen": -710.8389892578125, |
|
"logps/rejected": -1601.9041748046875, |
|
"loss": 0.2032, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0675523430109024, |
|
"rewards/margins": 0.27207452058792114, |
|
"rewards/rejected": -0.33962687849998474, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.917259291848814e-06, |
|
"logits/chosen": -1.4775984287261963, |
|
"logits/rejected": -0.3601114749908447, |
|
"logps/chosen": -680.5808715820312, |
|
"logps/rejected": -1640.981689453125, |
|
"loss": 0.2072, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06015778332948685, |
|
"rewards/margins": 0.2646317481994629, |
|
"rewards/rejected": -0.32478955388069153, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.883479137196714e-06, |
|
"logits/chosen": -1.826909065246582, |
|
"logits/rejected": -0.6638845801353455, |
|
"logps/chosen": -696.27734375, |
|
"logps/rejected": -1482.72119140625, |
|
"loss": 0.184, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05563250929117203, |
|
"rewards/margins": 0.2452922761440277, |
|
"rewards/rejected": -0.30092480778694153, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.849627125504262e-06, |
|
"logits/chosen": -1.374955415725708, |
|
"logits/rejected": -0.20216119289398193, |
|
"logps/chosen": -578.1390380859375, |
|
"logps/rejected": -1516.6820068359375, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0654110461473465, |
|
"rewards/margins": 0.27813297510147095, |
|
"rewards/rejected": -0.34354403614997864, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8157096000249334e-06, |
|
"logits/chosen": -1.5065643787384033, |
|
"logits/rejected": -0.7829849123954773, |
|
"logps/chosen": -630.3825073242188, |
|
"logps/rejected": -1537.6138916015625, |
|
"loss": 0.2042, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.061698682606220245, |
|
"rewards/margins": 0.2721042037010193, |
|
"rewards/rejected": -0.33380287885665894, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7817329162883033e-06, |
|
"logits/chosen": -1.471840500831604, |
|
"logits/rejected": -0.21121864020824432, |
|
"logps/chosen": -743.4503173828125, |
|
"logps/rejected": -1590.5904541015625, |
|
"loss": 0.1418, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.055032916367053986, |
|
"rewards/margins": 0.24513819813728333, |
|
"rewards/rejected": -0.3001710772514343, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.747703440909128e-06, |
|
"logits/chosen": -1.6148380041122437, |
|
"logits/rejected": -0.6764585375785828, |
|
"logps/chosen": -709.3273315429688, |
|
"logps/rejected": -1805.447021484375, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.030007129535079002, |
|
"rewards/margins": 0.361659973859787, |
|
"rewards/rejected": -0.39166706800460815, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.713627550394363e-06, |
|
"logits/chosen": -1.3852078914642334, |
|
"logits/rejected": -0.6749362945556641, |
|
"logps/chosen": -686.713623046875, |
|
"logps/rejected": -1515.2998046875, |
|
"loss": 0.1806, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05257093161344528, |
|
"rewards/margins": 0.26767003536224365, |
|
"rewards/rejected": -0.3202410042285919, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.679511629948319e-06, |
|
"logits/chosen": -1.352468729019165, |
|
"logits/rejected": -0.6524327993392944, |
|
"logps/chosen": -796.8145751953125, |
|
"logps/rejected": -1669.538330078125, |
|
"loss": 0.2095, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10906052589416504, |
|
"rewards/margins": 0.23847489058971405, |
|
"rewards/rejected": -0.3475354313850403, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -1.554595708847046, |
|
"logits/rejected": 0.08318161964416504, |
|
"logps/chosen": -606.7230224609375, |
|
"logps/rejected": -1430.286865234375, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.04838673770427704, |
|
"rewards/margins": 0.2375943958759308, |
|
"rewards/rejected": -0.28598111867904663, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6111852763861763e-06, |
|
"logits/chosen": -1.3457515239715576, |
|
"logits/rejected": -0.39270055294036865, |
|
"logps/chosen": -752.8702392578125, |
|
"logps/rejected": -1860.8333740234375, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.04639287292957306, |
|
"rewards/margins": 0.38252198696136475, |
|
"rewards/rejected": -0.4289148449897766, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.576987646390426e-06, |
|
"logits/chosen": -1.5459932088851929, |
|
"logits/rejected": -0.5794991254806519, |
|
"logps/chosen": -691.588134765625, |
|
"logps/rejected": -1757.112548828125, |
|
"loss": 0.1192, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.04728538915514946, |
|
"rewards/margins": 0.32668638229370117, |
|
"rewards/rejected": -0.3739717900753021, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.542775590305023e-06, |
|
"logits/chosen": -1.304917573928833, |
|
"logits/rejected": -0.4121823310852051, |
|
"logps/chosen": -630.0661010742188, |
|
"logps/rejected": -1441.1773681640625, |
|
"loss": 0.2289, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.03756406903266907, |
|
"rewards/margins": 0.20802605152130127, |
|
"rewards/rejected": -0.24559013545513153, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5085555188492384e-06, |
|
"logits/chosen": -1.2159336805343628, |
|
"logits/rejected": -0.3775702118873596, |
|
"logps/chosen": -709.61376953125, |
|
"logps/rejected": -1723.700927734375, |
|
"loss": 0.1568, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10398067533969879, |
|
"rewards/margins": 0.2780481278896332, |
|
"rewards/rejected": -0.38202884793281555, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.474333844244276e-06, |
|
"logits/chosen": -1.2202876806259155, |
|
"logits/rejected": -0.35152697563171387, |
|
"logps/chosen": -818.2611083984375, |
|
"logps/rejected": -1743.5579833984375, |
|
"loss": 0.1788, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09500784426927567, |
|
"rewards/margins": 0.300513356924057, |
|
"rewards/rejected": -0.39552122354507446, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.440116979011743e-06, |
|
"logits/chosen": -1.4342302083969116, |
|
"logits/rejected": -0.45796999335289, |
|
"logps/chosen": -718.6922607421875, |
|
"logps/rejected": -1725.5560302734375, |
|
"loss": 0.197, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.05935473367571831, |
|
"rewards/margins": 0.324557363986969, |
|
"rewards/rejected": -0.383912056684494, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4059113347720573e-06, |
|
"logits/chosen": -1.5391137599945068, |
|
"logits/rejected": -0.13381418585777283, |
|
"logps/chosen": -690.8306884765625, |
|
"logps/rejected": -1534.461181640625, |
|
"loss": 0.1946, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10408179461956024, |
|
"rewards/margins": 0.27237391471862793, |
|
"rewards/rejected": -0.376455694437027, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3717233210430258e-06, |
|
"logits/chosen": -1.308176875114441, |
|
"logits/rejected": -0.5252507925033569, |
|
"logps/chosen": -736.7550659179688, |
|
"logps/rejected": -1814.572021484375, |
|
"loss": 0.1699, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1053951233625412, |
|
"rewards/margins": 0.3335246741771698, |
|
"rewards/rejected": -0.4389197826385498, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.337559344038817e-06, |
|
"logits/chosen": -1.2826203107833862, |
|
"logits/rejected": 0.2594057321548462, |
|
"logps/chosen": -654.9820556640625, |
|
"logps/rejected": -1548.369384765625, |
|
"loss": 0.1628, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10001038014888763, |
|
"rewards/margins": 0.2573556900024414, |
|
"rewards/rejected": -0.35736608505249023, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.303425805469554e-06, |
|
"logits/chosen": -1.2893702983856201, |
|
"logits/rejected": -0.615670382976532, |
|
"logps/chosen": -686.9696044921875, |
|
"logps/rejected": -1765.0921630859375, |
|
"loss": 0.1341, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06719444692134857, |
|
"rewards/margins": 0.3570956885814667, |
|
"rewards/rejected": -0.42429018020629883, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.269329101341745e-06, |
|
"logits/chosen": -1.5257200002670288, |
|
"logits/rejected": -0.8465067744255066, |
|
"logps/chosen": -722.9954833984375, |
|
"logps/rejected": -1763.6884765625, |
|
"loss": 0.1296, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0741962268948555, |
|
"rewards/margins": 0.36958009004592896, |
|
"rewards/rejected": -0.44377630949020386, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.235275620759797e-06, |
|
"logits/chosen": -1.3611409664154053, |
|
"logits/rejected": 0.612551748752594, |
|
"logps/chosen": -703.1578979492188, |
|
"logps/rejected": -1591.042236328125, |
|
"loss": 0.1764, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10201771557331085, |
|
"rewards/margins": 0.2424260824918747, |
|
"rewards/rejected": -0.34444376826286316, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2012717447288037e-06, |
|
"logits/chosen": -1.3054463863372803, |
|
"logits/rejected": -0.7033378481864929, |
|
"logps/chosen": -731.6030883789062, |
|
"logps/rejected": -1814.713134765625, |
|
"loss": 0.1576, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0751316249370575, |
|
"rewards/margins": 0.3522658348083496, |
|
"rewards/rejected": -0.42739754915237427, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.167323844958867e-06, |
|
"logits/chosen": -1.524957299232483, |
|
"logits/rejected": -0.6119885444641113, |
|
"logps/chosen": -701.2098388671875, |
|
"logps/rejected": -1545.368896484375, |
|
"loss": 0.14, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10369672626256943, |
|
"rewards/margins": 0.28280869126319885, |
|
"rewards/rejected": -0.3865054249763489, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.133438282671149e-06, |
|
"logits/chosen": -1.2132611274719238, |
|
"logits/rejected": -0.7082799673080444, |
|
"logps/chosen": -762.6727294921875, |
|
"logps/rejected": -1658.924072265625, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1439850628376007, |
|
"rewards/margins": 0.27077409625053406, |
|
"rewards/rejected": -0.41475915908813477, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0996214074059033e-06, |
|
"logits/chosen": -1.6239715814590454, |
|
"logits/rejected": -0.5037415623664856, |
|
"logps/chosen": -786.1912841796875, |
|
"logps/rejected": -1638.0843505859375, |
|
"loss": 0.2179, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07825516164302826, |
|
"rewards/margins": 0.2860822379589081, |
|
"rewards/rejected": -0.36433738470077515, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.3029212951660156, |
|
"logits/rejected": -0.10125327110290527, |
|
"logps/chosen": -724.8988647460938, |
|
"logps/rejected": -1521.581787109375, |
|
"loss": 0.1988, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05552230030298233, |
|
"rewards/margins": 0.29605624079704285, |
|
"rewards/rejected": -0.3515785336494446, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0322190505629297e-06, |
|
"logits/chosen": -1.1891577243804932, |
|
"logits/rejected": -0.263233482837677, |
|
"logps/chosen": -726.5543212890625, |
|
"logps/rejected": -1851.503662109375, |
|
"loss": 0.1454, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.10788372904062271, |
|
"rewards/margins": 0.327489972114563, |
|
"rewards/rejected": -0.4353737235069275, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.998646198965312e-06, |
|
"logits/chosen": -1.376450777053833, |
|
"logits/rejected": -0.22948014736175537, |
|
"logps/chosen": -596.0374755859375, |
|
"logps/rejected": -1520.2818603515625, |
|
"loss": 0.2496, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.062327928841114044, |
|
"rewards/margins": 0.3048885762691498, |
|
"rewards/rejected": -0.3672165274620056, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.965167291983757e-06, |
|
"logits/chosen": -1.6274656057357788, |
|
"logits/rejected": -0.2617906928062439, |
|
"logps/chosen": -786.1827392578125, |
|
"logps/rejected": -1801.614990234375, |
|
"loss": 0.1203, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10416553169488907, |
|
"rewards/margins": 0.327767550945282, |
|
"rewards/rejected": -0.43193307518959045, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.931788602958678e-06, |
|
"logits/chosen": -0.9874919652938843, |
|
"logits/rejected": 0.055336445569992065, |
|
"logps/chosen": -801.8827514648438, |
|
"logps/rejected": -1887.7252197265625, |
|
"loss": 0.1647, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1263236552476883, |
|
"rewards/margins": 0.3318944573402405, |
|
"rewards/rejected": -0.4582180976867676, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8985163864514644e-06, |
|
"logits/chosen": -1.4952738285064697, |
|
"logits/rejected": -0.03670965135097504, |
|
"logps/chosen": -776.7321166992188, |
|
"logps/rejected": -1846.3646240234375, |
|
"loss": 0.1433, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.11145295947790146, |
|
"rewards/margins": 0.3160027265548706, |
|
"rewards/rejected": -0.42745572328567505, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8653568770724805e-06, |
|
"logits/chosen": -1.352738618850708, |
|
"logits/rejected": -0.2683241367340088, |
|
"logps/chosen": -648.5192260742188, |
|
"logps/rejected": -1464.099365234375, |
|
"loss": 0.185, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08493933826684952, |
|
"rewards/margins": 0.2524186968803406, |
|
"rewards/rejected": -0.3373579978942871, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8323162883128211e-06, |
|
"logits/chosen": -1.419662356376648, |
|
"logits/rejected": -0.4111382067203522, |
|
"logps/chosen": -699.5247802734375, |
|
"logps/rejected": -1743.6064453125, |
|
"loss": 0.1541, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08453786373138428, |
|
"rewards/margins": 0.296464741230011, |
|
"rewards/rejected": -0.38100260496139526, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7994008113800105e-06, |
|
"logits/chosen": -1.5189629793167114, |
|
"logits/rejected": -0.9077790975570679, |
|
"logps/chosen": -701.3331298828125, |
|
"logps/rejected": -1603.5174560546875, |
|
"loss": 0.1429, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08178045600652695, |
|
"rewards/margins": 0.3028547167778015, |
|
"rewards/rejected": -0.38463518023490906, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7666166140378853e-06, |
|
"logits/chosen": -1.169510841369629, |
|
"logits/rejected": 0.19725301861763, |
|
"logps/chosen": -734.7293090820312, |
|
"logps/rejected": -1564.332763671875, |
|
"loss": 0.1528, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09660240262746811, |
|
"rewards/margins": 0.28595981001853943, |
|
"rewards/rejected": -0.38256219029426575, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7339698394508632e-06, |
|
"logits/chosen": -1.266775369644165, |
|
"logits/rejected": -0.6185767650604248, |
|
"logps/chosen": -627.6648559570312, |
|
"logps/rejected": -1780.268310546875, |
|
"loss": 0.1694, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0766761377453804, |
|
"rewards/margins": 0.3623715043067932, |
|
"rewards/rejected": -0.43904757499694824, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7014666050328325e-06, |
|
"logits/chosen": -1.5317351818084717, |
|
"logits/rejected": -0.46623557806015015, |
|
"logps/chosen": -639.0328369140625, |
|
"logps/rejected": -1635.7354736328125, |
|
"loss": 0.126, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07408356666564941, |
|
"rewards/margins": 0.34189721941947937, |
|
"rewards/rejected": -0.41598081588745117, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6691130013008514e-06, |
|
"logits/chosen": -1.421917200088501, |
|
"logits/rejected": -0.19839780032634735, |
|
"logps/chosen": -837.2825317382812, |
|
"logps/rejected": -1678.8179931640625, |
|
"loss": 0.1956, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08556106686592102, |
|
"rewards/margins": 0.2512792646884918, |
|
"rewards/rejected": -0.33684033155441284, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6369150907339007e-06, |
|
"logits/chosen": -1.195821762084961, |
|
"logits/rejected": -0.20372645556926727, |
|
"logps/chosen": -709.2095336914062, |
|
"logps/rejected": -1652.1871337890625, |
|
"loss": 0.1906, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07075698673725128, |
|
"rewards/margins": 0.289537250995636, |
|
"rewards/rejected": -0.36029425263404846, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6048789066368858e-06, |
|
"logits/chosen": -1.354961633682251, |
|
"logits/rejected": -0.20124280452728271, |
|
"logps/chosen": -728.2799072265625, |
|
"logps/rejected": -1569.3551025390625, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08767645061016083, |
|
"rewards/margins": 0.2705709636211395, |
|
"rewards/rejected": -0.35824739933013916, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5730104520100984e-06, |
|
"logits/chosen": -1.496524453163147, |
|
"logits/rejected": -0.8575867414474487, |
|
"logps/chosen": -612.16650390625, |
|
"logps/rejected": -1632.1365966796875, |
|
"loss": 0.1279, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06737435609102249, |
|
"rewards/margins": 0.3229941725730896, |
|
"rewards/rejected": -0.3903685212135315, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5413156984243715e-06, |
|
"logits/chosen": -1.3209052085876465, |
|
"logits/rejected": -0.12577922642230988, |
|
"logps/chosen": -759.8672485351562, |
|
"logps/rejected": -1498.656494140625, |
|
"loss": 0.1552, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10295001417398453, |
|
"rewards/margins": 0.21431489288806915, |
|
"rewards/rejected": -0.3172649145126343, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -1.1863139867782593, |
|
"logits/rejected": -0.08450505882501602, |
|
"logps/chosen": -838.8494873046875, |
|
"logps/rejected": -1536.5277099609375, |
|
"loss": 0.167, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12472305446863174, |
|
"rewards/margins": 0.24718424677848816, |
|
"rewards/rejected": -0.3719072937965393, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4784710168044215e-06, |
|
"logits/chosen": -1.369985818862915, |
|
"logits/rejected": -0.5248149037361145, |
|
"logps/chosen": -883.6121826171875, |
|
"logps/rejected": -1617.128662109375, |
|
"loss": 0.1984, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1338253617286682, |
|
"rewards/margins": 0.2511526942253113, |
|
"rewards/rejected": -0.3849780857563019, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4473328647245726e-06, |
|
"logits/chosen": -1.624087929725647, |
|
"logits/rejected": -0.42871198058128357, |
|
"logps/chosen": -694.0233764648438, |
|
"logps/rejected": -1572.922119140625, |
|
"loss": 0.2198, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1324300318956375, |
|
"rewards/margins": 0.2687898874282837, |
|
"rewards/rejected": -0.4012199342250824, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4163919633879325e-06, |
|
"logits/chosen": -1.4249976873397827, |
|
"logits/rejected": -0.46216440200805664, |
|
"logps/chosen": -831.1329956054688, |
|
"logps/rejected": -1623.590087890625, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09118635952472687, |
|
"rewards/margins": 0.26622968912124634, |
|
"rewards/rejected": -0.357416033744812, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3856541105586545e-06, |
|
"logits/chosen": -1.5596380233764648, |
|
"logits/rejected": -0.4608355462551117, |
|
"logps/chosen": -826.0984497070312, |
|
"logps/rejected": -1898.5416259765625, |
|
"loss": 0.1421, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1307828575372696, |
|
"rewards/margins": 0.34290483593940735, |
|
"rewards/rejected": -0.47368764877319336, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3551250659532853e-06, |
|
"logits/chosen": -1.492356300354004, |
|
"logits/rejected": -0.7112780809402466, |
|
"logps/chosen": -699.1672973632812, |
|
"logps/rejected": -1537.228271484375, |
|
"loss": 0.1776, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06596329063177109, |
|
"rewards/margins": 0.2567977011203766, |
|
"rewards/rejected": -0.32276099920272827, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3248105501614897e-06, |
|
"logits/chosen": -1.2990128993988037, |
|
"logits/rejected": -0.7208808660507202, |
|
"logps/chosen": -714.08544921875, |
|
"logps/rejected": -1732.0875244140625, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05500803142786026, |
|
"rewards/margins": 0.26819437742233276, |
|
"rewards/rejected": -0.32320234179496765, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.2947162435741278e-06, |
|
"logits/chosen": -1.1586157083511353, |
|
"logits/rejected": 0.03688998147845268, |
|
"logps/chosen": -734.365966796875, |
|
"logps/rejected": -1622.6265869140625, |
|
"loss": 0.2471, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1007305383682251, |
|
"rewards/margins": 0.197604700922966, |
|
"rewards/rejected": -0.2983352243900299, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2648477853188395e-06, |
|
"logits/chosen": -1.412379503250122, |
|
"logits/rejected": -0.5264952778816223, |
|
"logps/chosen": -698.6842651367188, |
|
"logps/rejected": -1511.8642578125, |
|
"loss": 0.1865, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.042982958257198334, |
|
"rewards/margins": 0.26162266731262207, |
|
"rewards/rejected": -0.304605633020401, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2352107722033842e-06, |
|
"logits/chosen": -1.2586696147918701, |
|
"logits/rejected": -0.15170638263225555, |
|
"logps/chosen": -653.3026123046875, |
|
"logps/rejected": -1529.8104248046875, |
|
"loss": 0.1549, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0475340336561203, |
|
"rewards/margins": 0.275061696767807, |
|
"rewards/rejected": -0.3225957453250885, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.205810757666894e-06, |
|
"logits/chosen": -1.3673145771026611, |
|
"logits/rejected": -0.4642263948917389, |
|
"logps/chosen": -588.0513916015625, |
|
"logps/rejected": -1447.431396484375, |
|
"loss": 0.1613, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.07587876915931702, |
|
"rewards/margins": 0.24527780711650848, |
|
"rewards/rejected": -0.3211565613746643, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.176653250739265e-06, |
|
"logits/chosen": -1.4524450302124023, |
|
"logits/rejected": -0.21896734833717346, |
|
"logps/chosen": -831.2824096679688, |
|
"logps/rejected": -1819.2064208984375, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09457580000162125, |
|
"rewards/margins": 0.29631510376930237, |
|
"rewards/rejected": -0.390890896320343, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1477437150088599e-06, |
|
"logits/chosen": -1.112823247909546, |
|
"logits/rejected": -0.731514573097229, |
|
"logps/chosen": -659.6626586914062, |
|
"logps/rejected": -1812.48828125, |
|
"loss": 0.1304, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05299247428774834, |
|
"rewards/margins": 0.3928179442882538, |
|
"rewards/rejected": -0.4458104074001312, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1190875675987355e-06, |
|
"logits/chosen": -1.3094470500946045, |
|
"logits/rejected": -0.5637291073799133, |
|
"logps/chosen": -753.520263671875, |
|
"logps/rejected": -1700.703857421875, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09490607678890228, |
|
"rewards/margins": 0.31419411301612854, |
|
"rewards/rejected": -0.409100204706192, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0906901781515695e-06, |
|
"logits/chosen": -1.550244927406311, |
|
"logits/rejected": -0.08849823474884033, |
|
"logps/chosen": -724.5099487304688, |
|
"logps/rejected": -1681.033447265625, |
|
"loss": 0.1606, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08625416457653046, |
|
"rewards/margins": 0.31124037504196167, |
|
"rewards/rejected": -0.39749449491500854, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0625568678234839e-06, |
|
"logits/chosen": -1.0879476070404053, |
|
"logits/rejected": -0.13099336624145508, |
|
"logps/chosen": -671.8837280273438, |
|
"logps/rejected": -1590.70068359375, |
|
"loss": 0.1721, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06376481801271439, |
|
"rewards/margins": 0.2916422486305237, |
|
"rewards/rejected": -0.35540705919265747, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.034692908286964e-06, |
|
"logits/chosen": -1.3455946445465088, |
|
"logits/rejected": -0.2840282917022705, |
|
"logps/chosen": -611.4814453125, |
|
"logps/rejected": -1663.345703125, |
|
"loss": 0.2039, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07838527858257294, |
|
"rewards/margins": 0.31080400943756104, |
|
"rewards/rejected": -0.3891892731189728, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -1.2556473016738892, |
|
"logits/rejected": -0.011271673254668713, |
|
"logps/chosen": -753.8445434570312, |
|
"logps/rejected": -1636.611083984375, |
|
"loss": 0.2112, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11006224155426025, |
|
"rewards/margins": 0.24791212379932404, |
|
"rewards/rejected": -0.3579743504524231, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.797938749429088e-07, |
|
"logits/chosen": -1.2267249822616577, |
|
"logits/rejected": -0.35565489530563354, |
|
"logps/chosen": -690.4405517578125, |
|
"logps/rejected": -1600.0665283203125, |
|
"loss": 0.1862, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11180742084980011, |
|
"rewards/margins": 0.24852195382118225, |
|
"rewards/rejected": -0.36032935976982117, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.527690882192636e-07, |
|
"logits/chosen": -1.2072794437408447, |
|
"logits/rejected": 0.457929790019989, |
|
"logps/chosen": -697.0407104492188, |
|
"logps/rejected": -1490.8367919921875, |
|
"loss": 0.1672, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08405301719903946, |
|
"rewards/margins": 0.30805063247680664, |
|
"rewards/rejected": -0.3921036422252655, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.260342245273507e-07, |
|
"logits/chosen": -1.3990890979766846, |
|
"logits/rejected": -0.6794065237045288, |
|
"logps/chosen": -618.4937744140625, |
|
"logps/rejected": -1800.4622802734375, |
|
"loss": 0.1376, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07932893931865692, |
|
"rewards/margins": 0.37783947587013245, |
|
"rewards/rejected": -0.4571684002876282, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.995942934960964e-07, |
|
"logits/chosen": -1.4945213794708252, |
|
"logits/rejected": -0.18756787478923798, |
|
"logps/chosen": -803.509521484375, |
|
"logps/rejected": -1819.3349609375, |
|
"loss": 0.1544, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09666319191455841, |
|
"rewards/margins": 0.36414963006973267, |
|
"rewards/rejected": -0.4608128070831299, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.734542494893955e-07, |
|
"logits/chosen": -1.431398868560791, |
|
"logits/rejected": -0.4752410352230072, |
|
"logps/chosen": -792.5185546875, |
|
"logps/rejected": -1632.630126953125, |
|
"loss": 0.2053, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09532758593559265, |
|
"rewards/margins": 0.30739787220954895, |
|
"rewards/rejected": -0.4027254581451416, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.476189906777457e-07, |
|
"logits/chosen": -1.3982821702957153, |
|
"logits/rejected": -0.08427709341049194, |
|
"logps/chosen": -703.8153076171875, |
|
"logps/rejected": -1600.6046142578125, |
|
"loss": 0.1632, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08243191242218018, |
|
"rewards/margins": 0.2651790678501129, |
|
"rewards/rejected": -0.3476109802722931, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.220933581204257e-07, |
|
"logits/chosen": -1.2576748132705688, |
|
"logits/rejected": 0.40268439054489136, |
|
"logps/chosen": -528.5084228515625, |
|
"logps/rejected": -1385.6802978515625, |
|
"loss": 0.1183, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.042114533483982086, |
|
"rewards/margins": 0.2899821698665619, |
|
"rewards/rejected": -0.3320966958999634, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.968821348583644e-07, |
|
"logits/chosen": -1.3039714097976685, |
|
"logits/rejected": -0.34471797943115234, |
|
"logps/chosen": -695.2639770507812, |
|
"logps/rejected": -1490.329345703125, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08580182492733002, |
|
"rewards/margins": 0.252492755651474, |
|
"rewards/rejected": -0.3382945656776428, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.719900450178882e-07, |
|
"logits/chosen": -1.2936707735061646, |
|
"logits/rejected": 0.12274640798568726, |
|
"logps/chosen": -856.3453979492188, |
|
"logps/rejected": -1843.365966796875, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.11615820974111557, |
|
"rewards/margins": 0.33368679881095886, |
|
"rewards/rejected": -0.44984501600265503, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.474217529255018e-07, |
|
"logits/chosen": -1.611425757408142, |
|
"logits/rejected": -0.11960859596729279, |
|
"logps/chosen": -636.3781127929688, |
|
"logps/rejected": -1416.641357421875, |
|
"loss": 0.1836, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03691656142473221, |
|
"rewards/margins": 0.26324373483657837, |
|
"rewards/rejected": -0.30016031861305237, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.231818622338824e-07, |
|
"logits/chosen": -1.616742730140686, |
|
"logits/rejected": -0.024957846850156784, |
|
"logps/chosen": -676.9722900390625, |
|
"logps/rejected": -1823.395751953125, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.06905169785022736, |
|
"rewards/margins": 0.3410964906215668, |
|
"rewards/rejected": -0.4101482033729553, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.992749150592343e-07, |
|
"logits/chosen": -1.2690980434417725, |
|
"logits/rejected": -0.1918954849243164, |
|
"logps/chosen": -866.05029296875, |
|
"logps/rejected": -1606.396240234375, |
|
"loss": 0.1865, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.102320097386837, |
|
"rewards/margins": 0.27621665596961975, |
|
"rewards/rejected": -0.37853676080703735, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.75705391130183e-07, |
|
"logits/chosen": -1.2711069583892822, |
|
"logits/rejected": -0.00027151108952239156, |
|
"logps/chosen": -804.7188720703125, |
|
"logps/rejected": -1668.5374755859375, |
|
"loss": 0.166, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07086384296417236, |
|
"rewards/margins": 0.2957269251346588, |
|
"rewards/rejected": -0.3665907680988312, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.524777069483526e-07, |
|
"logits/chosen": -1.225556492805481, |
|
"logits/rejected": 0.41769227385520935, |
|
"logps/chosen": -634.6071166992188, |
|
"logps/rejected": -1566.803466796875, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.020447371527552605, |
|
"rewards/margins": 0.31025153398513794, |
|
"rewards/rejected": -0.3306989073753357, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.29596214960792e-07, |
|
"logits/chosen": -1.3543643951416016, |
|
"logits/rejected": -0.1612066775560379, |
|
"logps/chosen": -731.138671875, |
|
"logps/rejected": -1658.898193359375, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.078438401222229, |
|
"rewards/margins": 0.2861797511577606, |
|
"rewards/rejected": -0.36461812257766724, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.070652027444102e-07, |
|
"logits/chosen": -1.5058627128601074, |
|
"logits/rejected": -0.940344512462616, |
|
"logps/chosen": -629.819580078125, |
|
"logps/rejected": -1781.6654052734375, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.061135418713092804, |
|
"rewards/margins": 0.3336263597011566, |
|
"rewards/rejected": -0.39476174116134644, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.524287462234497, |
|
"logits/rejected": -0.8633726239204407, |
|
"logps/chosen": -602.5631103515625, |
|
"logps/rejected": -1604.6434326171875, |
|
"loss": 0.1721, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.05612843483686447, |
|
"rewards/margins": 0.33471354842185974, |
|
"rewards/rejected": -0.3908420205116272, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.63071438773913e-07, |
|
"logits/chosen": -1.4894258975982666, |
|
"logits/rejected": -0.14880971610546112, |
|
"logps/chosen": -642.1497802734375, |
|
"logps/rejected": -1459.4459228515625, |
|
"loss": 0.2064, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0682179257273674, |
|
"rewards/margins": 0.21762903034687042, |
|
"rewards/rejected": -0.2858469486236572, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.416169306538485e-07, |
|
"logits/chosen": -1.3140041828155518, |
|
"logits/rejected": 0.3596586287021637, |
|
"logps/chosen": -820.9474487304688, |
|
"logps/rejected": -1682.409912109375, |
|
"loss": 0.2355, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09827397763729095, |
|
"rewards/margins": 0.281690388917923, |
|
"rewards/rejected": -0.3799643814563751, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.205293880283552e-07, |
|
"logits/chosen": -1.5573115348815918, |
|
"logits/rejected": -0.13623039424419403, |
|
"logps/chosen": -671.4677124023438, |
|
"logps/rejected": -1707.608642578125, |
|
"loss": 0.1752, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.05831771343946457, |
|
"rewards/margins": 0.3186204433441162, |
|
"rewards/rejected": -0.37693825364112854, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.998127623207404e-07, |
|
"logits/chosen": -1.2270171642303467, |
|
"logits/rejected": -0.16427640616893768, |
|
"logps/chosen": -636.1573486328125, |
|
"logps/rejected": -1320.9652099609375, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04943504184484482, |
|
"rewards/margins": 0.23794174194335938, |
|
"rewards/rejected": -0.2873767912387848, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.794709354512073e-07, |
|
"logits/chosen": -1.4142221212387085, |
|
"logits/rejected": -0.6630762219429016, |
|
"logps/chosen": -694.4979858398438, |
|
"logps/rejected": -1861.2236328125, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0681043490767479, |
|
"rewards/margins": 0.33745378255844116, |
|
"rewards/rejected": -0.40555816888809204, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5950771910944603e-07, |
|
"logits/chosen": -1.386041522026062, |
|
"logits/rejected": -0.4771800637245178, |
|
"logps/chosen": -552.6729736328125, |
|
"logps/rejected": -1493.256103515625, |
|
"loss": 0.1758, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06818665564060211, |
|
"rewards/margins": 0.2598266899585724, |
|
"rewards/rejected": -0.3280133306980133, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.399268540403975e-07, |
|
"logits/chosen": -1.6429307460784912, |
|
"logits/rejected": -0.7215126752853394, |
|
"logps/chosen": -692.6094970703125, |
|
"logps/rejected": -1617.4793701171875, |
|
"loss": 0.1561, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.04670856520533562, |
|
"rewards/margins": 0.3174007534980774, |
|
"rewards/rejected": -0.3641093373298645, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.2073200934330316e-07, |
|
"logits/chosen": -1.318565011024475, |
|
"logits/rejected": 0.31595462560653687, |
|
"logps/chosen": -688.9269409179688, |
|
"logps/rejected": -1576.12939453125, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.05915086343884468, |
|
"rewards/margins": 0.2843713164329529, |
|
"rewards/rejected": -0.34352222084999084, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.019267817841835e-07, |
|
"logits/chosen": -1.4014190435409546, |
|
"logits/rejected": 0.06803856045007706, |
|
"logps/chosen": -661.041015625, |
|
"logps/rejected": -1782.5843505859375, |
|
"loss": 0.1339, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.060147546231746674, |
|
"rewards/margins": 0.367543488740921, |
|
"rewards/rejected": -0.4276910424232483, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.8351469512186656e-07, |
|
"logits/chosen": -1.293666124343872, |
|
"logits/rejected": 0.01516579370945692, |
|
"logps/chosen": -703.8981323242188, |
|
"logps/rejected": -1585.350830078125, |
|
"loss": 0.2612, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07257186621427536, |
|
"rewards/margins": 0.24214370548725128, |
|
"rewards/rejected": -0.31471556425094604, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.654991994477039e-07, |
|
"logits/chosen": -1.4482967853546143, |
|
"logits/rejected": -0.5136088132858276, |
|
"logps/chosen": -739.101318359375, |
|
"logps/rejected": -1636.627685546875, |
|
"loss": 0.2446, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08362185955047607, |
|
"rewards/margins": 0.2510630488395691, |
|
"rewards/rejected": -0.33468490839004517, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4788367053908087e-07, |
|
"logits/chosen": -1.464727520942688, |
|
"logits/rejected": -0.6895856261253357, |
|
"logps/chosen": -649.2825927734375, |
|
"logps/rejected": -1706.675048828125, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0619744174182415, |
|
"rewards/margins": 0.3182070851325989, |
|
"rewards/rejected": -0.3801814913749695, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3067140922686175e-07, |
|
"logits/chosen": -1.2893580198287964, |
|
"logits/rejected": -0.02746570110321045, |
|
"logps/chosen": -637.2613525390625, |
|
"logps/rejected": -1635.1025390625, |
|
"loss": 0.1475, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06607834994792938, |
|
"rewards/margins": 0.306417852640152, |
|
"rewards/rejected": -0.37249621748924255, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.1386564077687115e-07, |
|
"logits/chosen": -1.2429146766662598, |
|
"logits/rejected": -0.5083945989608765, |
|
"logps/chosen": -689.4899291992188, |
|
"logps/rejected": -1385.622802734375, |
|
"loss": 0.2019, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08055596053600311, |
|
"rewards/margins": 0.19417758285999298, |
|
"rewards/rejected": -0.2747335135936737, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9746951428553884e-07, |
|
"logits/chosen": -1.2200576066970825, |
|
"logits/rejected": 0.4126719534397125, |
|
"logps/chosen": -697.4050903320312, |
|
"logps/rejected": -1761.609375, |
|
"loss": 0.1621, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.057256706058979034, |
|
"rewards/margins": 0.3544352650642395, |
|
"rewards/rejected": -0.41169196367263794, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.814861020898146e-07, |
|
"logits/chosen": -1.5707600116729736, |
|
"logits/rejected": -0.5523526668548584, |
|
"logps/chosen": -807.9168090820312, |
|
"logps/rejected": -1893.062255859375, |
|
"loss": 0.12, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.036843474954366684, |
|
"rewards/margins": 0.38658010959625244, |
|
"rewards/rejected": -0.4234235882759094, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -1.3549106121063232, |
|
"logits/rejected": -0.0543874129652977, |
|
"logps/chosen": -659.9330444335938, |
|
"logps/rejected": -1517.608642578125, |
|
"loss": 0.1865, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08612764626741409, |
|
"rewards/margins": 0.2554120123386383, |
|
"rewards/rejected": -0.3415396809577942, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.507693226958871e-07, |
|
"logits/chosen": -1.5055897235870361, |
|
"logits/rejected": -0.7960633635520935, |
|
"logps/chosen": -594.6507568359375, |
|
"logps/rejected": -1544.100341796875, |
|
"loss": 0.1835, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.05378856509923935, |
|
"rewards/margins": 0.26992538571357727, |
|
"rewards/rejected": -0.3237139582633972, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.360417112654481e-07, |
|
"logits/chosen": -1.3403241634368896, |
|
"logits/rejected": -0.036334630101919174, |
|
"logps/chosen": -747.6497802734375, |
|
"logps/rejected": -1497.166748046875, |
|
"loss": 0.2369, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09375442564487457, |
|
"rewards/margins": 0.2080894410610199, |
|
"rewards/rejected": -0.30184388160705566, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2173832458762146e-07, |
|
"logits/chosen": -1.3305310010910034, |
|
"logits/rejected": 0.5647405385971069, |
|
"logps/chosen": -708.2887573242188, |
|
"logps/rejected": -1672.27734375, |
|
"loss": 0.1525, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0794595405459404, |
|
"rewards/margins": 0.27278995513916016, |
|
"rewards/rejected": -0.35224950313568115, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.07861842857843e-07, |
|
"logits/chosen": -1.3758533000946045, |
|
"logits/rejected": -0.3346394896507263, |
|
"logps/chosen": -641.7481689453125, |
|
"logps/rejected": -1659.6363525390625, |
|
"loss": 0.1304, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.05183352157473564, |
|
"rewards/margins": 0.3061942458152771, |
|
"rewards/rejected": -0.35802772641181946, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9441486627729987e-07, |
|
"logits/chosen": -1.2939542531967163, |
|
"logits/rejected": -0.2226782590150833, |
|
"logps/chosen": -574.517822265625, |
|
"logps/rejected": -1345.4554443359375, |
|
"loss": 0.2427, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.044371530413627625, |
|
"rewards/margins": 0.25636622309684753, |
|
"rewards/rejected": -0.30073776841163635, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8139991456569694e-07, |
|
"logits/chosen": -1.5377174615859985, |
|
"logits/rejected": -0.4445236623287201, |
|
"logps/chosen": -666.4637451171875, |
|
"logps/rejected": -1826.706298828125, |
|
"loss": 0.1407, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.04511731117963791, |
|
"rewards/margins": 0.3437032103538513, |
|
"rewards/rejected": -0.3888205587863922, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6881942648911077e-07, |
|
"logits/chosen": -1.1588428020477295, |
|
"logits/rejected": -0.33162426948547363, |
|
"logps/chosen": -692.0267333984375, |
|
"logps/rejected": -1666.332763671875, |
|
"loss": 0.1417, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08260687440633774, |
|
"rewards/margins": 0.27366960048675537, |
|
"rewards/rejected": -0.3562764525413513, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5667575940300384e-07, |
|
"logits/chosen": -1.2564775943756104, |
|
"logits/rejected": 0.01690312661230564, |
|
"logps/chosen": -673.8323974609375, |
|
"logps/rejected": -1666.777099609375, |
|
"loss": 0.1716, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0681915134191513, |
|
"rewards/margins": 0.31038275361061096, |
|
"rewards/rejected": -0.3785742521286011, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.449711888105046e-07, |
|
"logits/chosen": -1.518640160560608, |
|
"logits/rejected": -0.6442452669143677, |
|
"logps/chosen": -570.983154296875, |
|
"logps/rejected": -1282.307373046875, |
|
"loss": 0.2478, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0646091103553772, |
|
"rewards/margins": 0.1985008716583252, |
|
"rewards/rejected": -0.2631099820137024, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3370790793601373e-07, |
|
"logits/chosen": -1.3143935203552246, |
|
"logits/rejected": -0.862291157245636, |
|
"logps/chosen": -554.1536254882812, |
|
"logps/rejected": -1571.299560546875, |
|
"loss": 0.1953, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.04557369276881218, |
|
"rewards/margins": 0.30963796377182007, |
|
"rewards/rejected": -0.35521167516708374, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2288802731423882e-07, |
|
"logits/chosen": -1.0400464534759521, |
|
"logits/rejected": -0.2425573766231537, |
|
"logps/chosen": -563.1722412109375, |
|
"logps/rejected": -1669.356201171875, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.032249629497528076, |
|
"rewards/margins": 0.35338449478149414, |
|
"rewards/rejected": -0.38563409447669983, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.125135743947145e-07, |
|
"logits/chosen": -1.392665982246399, |
|
"logits/rejected": -0.2731800079345703, |
|
"logps/chosen": -636.9364013671875, |
|
"logps/rejected": -1649.939453125, |
|
"loss": 0.1732, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05704592913389206, |
|
"rewards/margins": 0.2943916916847229, |
|
"rewards/rejected": -0.35143759846687317, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0258649316189722e-07, |
|
"logits/chosen": -1.448233723640442, |
|
"logits/rejected": -0.09116245806217194, |
|
"logps/chosen": -595.1334838867188, |
|
"logps/rejected": -1466.4798583984375, |
|
"loss": 0.1563, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.029435504227876663, |
|
"rewards/margins": 0.2879069745540619, |
|
"rewards/rejected": -0.31734246015548706, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.310864377089696e-08, |
|
"logits/chosen": -1.298662543296814, |
|
"logits/rejected": 0.7022291421890259, |
|
"logps/chosen": -692.7830810546875, |
|
"logps/rejected": -1592.398193359375, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06722499430179596, |
|
"rewards/margins": 0.26029545068740845, |
|
"rewards/rejected": -0.327520489692688, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.408180219891899e-08, |
|
"logits/chosen": -1.0684707164764404, |
|
"logits/rejected": -0.727800726890564, |
|
"logps/chosen": -588.0787353515625, |
|
"logps/rejected": -1701.8707275390625, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.026762153953313828, |
|
"rewards/margins": 0.3723045885562897, |
|
"rewards/rejected": -0.3990667462348938, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.550765991247655e-08, |
|
"logits/chosen": -1.3413885831832886, |
|
"logits/rejected": -0.5428125262260437, |
|
"logps/chosen": -576.0411376953125, |
|
"logps/rejected": -1852.245361328125, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.046153269708156586, |
|
"rewards/margins": 0.3955245614051819, |
|
"rewards/rejected": -0.44167786836624146, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -1.3843357563018799, |
|
"logits/rejected": -0.3473323881626129, |
|
"logps/chosen": -653.3553466796875, |
|
"logps/rejected": -1551.5245361328125, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07392759621143341, |
|
"rewards/margins": 0.27120834589004517, |
|
"rewards/rejected": -0.3451359272003174, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.972381462298643e-08, |
|
"logits/chosen": -1.446597695350647, |
|
"logits/rejected": -0.7707004547119141, |
|
"logps/chosen": -587.423095703125, |
|
"logps/rejected": -1533.5751953125, |
|
"loss": 0.1378, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.048751670867204666, |
|
"rewards/margins": 0.2718280255794525, |
|
"rewards/rejected": -0.3205797076225281, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.2517069226488694e-08, |
|
"logits/chosen": -1.3285058736801147, |
|
"logits/rejected": 0.6017956733703613, |
|
"logps/chosen": -635.4934692382812, |
|
"logps/rejected": -1658.809326171875, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.054670076817274094, |
|
"rewards/margins": 0.33853715658187866, |
|
"rewards/rejected": -0.39320722222328186, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.576893777442415e-08, |
|
"logits/chosen": -1.455540418624878, |
|
"logits/rejected": -0.42580240964889526, |
|
"logps/chosen": -567.0203857421875, |
|
"logps/rejected": -1439.871826171875, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04314614459872246, |
|
"rewards/margins": 0.26481324434280396, |
|
"rewards/rejected": -0.3079594075679779, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.9480684744327145e-08, |
|
"logits/chosen": -0.8030007481575012, |
|
"logits/rejected": -0.6547081470489502, |
|
"logps/chosen": -714.3633422851562, |
|
"logps/rejected": -1776.2916259765625, |
|
"loss": 0.135, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.07487257570028305, |
|
"rewards/margins": 0.3637450039386749, |
|
"rewards/rejected": -0.4386175274848938, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.3653488440851255e-08, |
|
"logits/chosen": -1.4338642358779907, |
|
"logits/rejected": -0.2775370478630066, |
|
"logps/chosen": -522.8925170898438, |
|
"logps/rejected": -1403.964599609375, |
|
"loss": 0.1429, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.028084104880690575, |
|
"rewards/margins": 0.2945864796638489, |
|
"rewards/rejected": -0.322670578956604, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.82884407749745e-08, |
|
"logits/chosen": -1.548905611038208, |
|
"logits/rejected": -0.1580895483493805, |
|
"logps/chosen": -721.1185913085938, |
|
"logps/rejected": -1815.273681640625, |
|
"loss": 0.168, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.058462172746658325, |
|
"rewards/margins": 0.334224134683609, |
|
"rewards/rejected": -0.39268630743026733, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3386547059396634e-08, |
|
"logits/chosen": -1.3936518430709839, |
|
"logits/rejected": -0.42037662863731384, |
|
"logps/chosen": -727.2462768554688, |
|
"logps/rejected": -1849.8060302734375, |
|
"loss": 0.1504, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.053724952042102814, |
|
"rewards/margins": 0.34565088152885437, |
|
"rewards/rejected": -0.3993757963180542, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.8948725820160663e-08, |
|
"logits/chosen": -1.5373561382293701, |
|
"logits/rejected": -0.5124548673629761, |
|
"logps/chosen": -707.9256591796875, |
|
"logps/rejected": -1602.8634033203125, |
|
"loss": 0.152, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06985752284526825, |
|
"rewards/margins": 0.3106308579444885, |
|
"rewards/rejected": -0.3804883658885956, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.497580862453829e-08, |
|
"logits/chosen": -1.3587336540222168, |
|
"logits/rejected": 0.12183968722820282, |
|
"logps/chosen": -682.3876342773438, |
|
"logps/rejected": -1501.242431640625, |
|
"loss": 0.179, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07504203170537949, |
|
"rewards/margins": 0.25899559259414673, |
|
"rewards/rejected": -0.3340376317501068, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.14685399252093e-08, |
|
"logits/chosen": -1.2843676805496216, |
|
"logits/rejected": -0.37819939851760864, |
|
"logps/chosen": -639.9012451171875, |
|
"logps/rejected": -1667.867431640625, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.05129992961883545, |
|
"rewards/margins": 0.30902066826820374, |
|
"rewards/rejected": -0.3603206276893616, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.427576920763957e-09, |
|
"logits/chosen": -1.2090356349945068, |
|
"logits/rejected": -0.08205322176218033, |
|
"logps/chosen": -759.0253295898438, |
|
"logps/rejected": -1694.987060546875, |
|
"loss": 0.2816, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.13451895117759705, |
|
"rewards/margins": 0.25275808572769165, |
|
"rewards/rejected": -0.3872770071029663, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.853489432556536e-09, |
|
"logits/chosen": -1.5106983184814453, |
|
"logits/rejected": -0.8639631271362305, |
|
"logps/chosen": -654.6641845703125, |
|
"logps/rejected": -1713.3929443359375, |
|
"loss": 0.1831, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06343318521976471, |
|
"rewards/margins": 0.31247463822364807, |
|
"rewards/rejected": -0.3759078085422516, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.746759797931265e-09, |
|
"logits/chosen": -1.4619848728179932, |
|
"logits/rejected": 0.3511095643043518, |
|
"logps/chosen": -736.3690795898438, |
|
"logps/rejected": -1626.9466552734375, |
|
"loss": 0.1604, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06985476613044739, |
|
"rewards/margins": 0.2860848307609558, |
|
"rewards/rejected": -0.3559395968914032, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.1077827798404728e-09, |
|
"logits/chosen": -1.3730641603469849, |
|
"logits/rejected": -0.6747050881385803, |
|
"logps/chosen": -546.4849853515625, |
|
"logps/rejected": -1590.315673828125, |
|
"loss": 0.1671, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.036424748599529266, |
|
"rewards/margins": 0.34394755959510803, |
|
"rewards/rejected": -0.3803723454475403, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.368654928731958e-10, |
|
"logits/chosen": -1.3955776691436768, |
|
"logits/rejected": -0.6365998983383179, |
|
"logps/chosen": -608.5187377929688, |
|
"logps/rejected": -1592.2816162109375, |
|
"loss": 0.1843, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08722618967294693, |
|
"rewards/margins": 0.30947092175483704, |
|
"rewards/rejected": -0.39669710397720337, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.3422734570816006e-10, |
|
"logits/chosen": -1.4981211423873901, |
|
"logits/rejected": -0.8375118374824524, |
|
"logps/chosen": -656.091796875, |
|
"logps/rejected": -1573.434814453125, |
|
"loss": 0.1836, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.05932006239891052, |
|
"rewards/margins": 0.28673693537712097, |
|
"rewards/rejected": -0.3460569679737091, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.4274616241455078, |
|
"logits/rejected": 0.46426883339881897, |
|
"logps/chosen": -776.08154296875, |
|
"logps/rejected": -1525.145751953125, |
|
"loss": 0.1924, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08221141993999481, |
|
"rewards/margins": 0.24572968482971191, |
|
"rewards/rejected": -0.32794108986854553, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2550, |
|
"total_flos": 0.0, |
|
"train_loss": 0.19951762257837782, |
|
"train_runtime": 10798.5669, |
|
"train_samples_per_second": 0.945, |
|
"train_steps_per_second": 0.236 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2550, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|