|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996190476190476, |
|
"eval_steps": 500, |
|
"global_step": 656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.575757575757576e-08, |
|
"logits/chosen": 0.04974596947431564, |
|
"logits/rejected": 0.2963363230228424, |
|
"logps/chosen": -446.36370849609375, |
|
"logps/rejected": -275.23162841796875, |
|
"loss": 0.3488, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.575757575757576e-07, |
|
"logits/chosen": 0.07917162775993347, |
|
"logits/rejected": 0.2545989155769348, |
|
"logps/chosen": -351.4217529296875, |
|
"logps/rejected": -305.8712463378906, |
|
"loss": 0.3403, |
|
"rewards/accuracies": 0.3472222089767456, |
|
"rewards/chosen": -1.3770493296760833e-06, |
|
"rewards/margins": -5.5950724345166236e-05, |
|
"rewards/rejected": 5.4573672969127074e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"logits/chosen": 0.08743862062692642, |
|
"logits/rejected": 0.2663661539554596, |
|
"logps/chosen": -356.5379333496094, |
|
"logps/rejected": -272.34771728515625, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 8.02798240329139e-05, |
|
"rewards/margins": 0.0002525355666875839, |
|
"rewards/rejected": -0.00017225573537871242, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits/chosen": 0.10032109916210175, |
|
"logits/rejected": 0.2204272300004959, |
|
"logps/chosen": -320.36749267578125, |
|
"logps/rejected": -265.65338134765625, |
|
"loss": 0.3542, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.00015481823356822133, |
|
"rewards/margins": 7.609631575178355e-05, |
|
"rewards/rejected": 7.872191054048017e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"logits/chosen": 0.1070769876241684, |
|
"logits/rejected": 0.2916509211063385, |
|
"logps/chosen": -365.18646240234375, |
|
"logps/rejected": -253.8933868408203, |
|
"loss": 0.3552, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0006457852432504296, |
|
"rewards/margins": 0.0006201790529303253, |
|
"rewards/rejected": 2.560619941505138e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7878787878787882e-06, |
|
"logits/chosen": 0.08771614730358124, |
|
"logits/rejected": 0.28780585527420044, |
|
"logps/chosen": -370.36212158203125, |
|
"logps/rejected": -294.9586181640625, |
|
"loss": 0.3338, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0016288382466882467, |
|
"rewards/margins": 0.0014908717712387443, |
|
"rewards/rejected": 0.000137966446345672, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits/chosen": 0.06850675493478775, |
|
"logits/rejected": 0.276347815990448, |
|
"logps/chosen": -375.48883056640625, |
|
"logps/rejected": -298.6650695800781, |
|
"loss": 0.3422, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.004066063556820154, |
|
"rewards/margins": 0.004035579971969128, |
|
"rewards/rejected": 3.0483581213047728e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999432965739786e-06, |
|
"logits/chosen": 0.1320158988237381, |
|
"logits/rejected": 0.3291288912296295, |
|
"logps/chosen": -330.6249084472656, |
|
"logps/rejected": -303.25128173828125, |
|
"loss": 0.3197, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.007968532852828503, |
|
"rewards/margins": 0.007708103861659765, |
|
"rewards/rejected": 0.0002604298642836511, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9930567839810125e-06, |
|
"logits/chosen": 0.09887860715389252, |
|
"logits/rejected": 0.31491416692733765, |
|
"logps/chosen": -353.16217041015625, |
|
"logps/rejected": -297.11651611328125, |
|
"loss": 0.3248, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.01291077770292759, |
|
"rewards/margins": 0.018067900091409683, |
|
"rewards/rejected": -0.005157124251127243, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.979613761906212e-06, |
|
"logits/chosen": 0.17029765248298645, |
|
"logits/rejected": 0.260185182094574, |
|
"logps/chosen": -352.32379150390625, |
|
"logps/rejected": -288.6719665527344, |
|
"loss": 0.2939, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.007807808928191662, |
|
"rewards/margins": 0.023743372410535812, |
|
"rewards/rejected": -0.015935566276311874, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959142005221991e-06, |
|
"logits/chosen": 0.1353963315486908, |
|
"logits/rejected": 0.2721942365169525, |
|
"logps/chosen": -363.1653137207031, |
|
"logps/rejected": -354.89312744140625, |
|
"loss": 0.2981, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.01541908085346222, |
|
"rewards/margins": 0.04708936810493469, |
|
"rewards/rejected": -0.06250844895839691, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931699543346854e-06, |
|
"logits/chosen": 0.20096346735954285, |
|
"logits/rejected": 0.3208036422729492, |
|
"logps/chosen": -412.08306884765625, |
|
"logps/rejected": -410.16162109375, |
|
"loss": 0.2759, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.06705033034086227, |
|
"rewards/margins": 0.07482309639453888, |
|
"rewards/rejected": -0.14187341928482056, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.897364164920515e-06, |
|
"logits/chosen": 0.15477022528648376, |
|
"logits/rejected": 0.24951288104057312, |
|
"logps/chosen": -500.6244201660156, |
|
"logps/rejected": -552.2005004882812, |
|
"loss": 0.284, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.15765079855918884, |
|
"rewards/margins": 0.10079488903284073, |
|
"rewards/rejected": -0.258445680141449, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8562331973035396e-06, |
|
"logits/chosen": 0.15251179039478302, |
|
"logits/rejected": 0.2451750487089157, |
|
"logps/chosen": -500.58416748046875, |
|
"logps/rejected": -580.9321899414062, |
|
"loss": 0.2784, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.15634217858314514, |
|
"rewards/margins": 0.12097392231225967, |
|
"rewards/rejected": -0.27731606364250183, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.808423230692374e-06, |
|
"logits/chosen": 0.14251364767551422, |
|
"logits/rejected": 0.3138013184070587, |
|
"logps/chosen": -508.4844665527344, |
|
"logps/rejected": -568.8987426757812, |
|
"loss": 0.2638, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.15639568865299225, |
|
"rewards/margins": 0.1208159551024437, |
|
"rewards/rejected": -0.27721160650253296, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.754069787631761e-06, |
|
"logits/chosen": 0.2015986144542694, |
|
"logits/rejected": 0.28302785754203796, |
|
"logps/chosen": -529.3695068359375, |
|
"logps/rejected": -604.30859375, |
|
"loss": 0.251, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.18331554532051086, |
|
"rewards/margins": 0.1465958058834076, |
|
"rewards/rejected": -0.32991132140159607, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.693326938861367e-06, |
|
"logits/chosen": 0.19963108003139496, |
|
"logits/rejected": 0.2948302626609802, |
|
"logps/chosen": -566.9232788085938, |
|
"logps/rejected": -621.8565673828125, |
|
"loss": 0.2514, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2004656344652176, |
|
"rewards/margins": 0.12107620388269424, |
|
"rewards/rejected": -0.32154184579849243, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626366866585528e-06, |
|
"logits/chosen": 0.20774176716804504, |
|
"logits/rejected": 0.31210097670555115, |
|
"logps/chosen": -525.9526977539062, |
|
"logps/rejected": -598.7525634765625, |
|
"loss": 0.2523, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2060777246952057, |
|
"rewards/margins": 0.1383783519268036, |
|
"rewards/rejected": -0.3444560766220093, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.553379376404085e-06, |
|
"logits/chosen": 0.1882271021604538, |
|
"logits/rejected": 0.2884698808193207, |
|
"logps/chosen": -622.5642700195312, |
|
"logps/rejected": -703.97998046875, |
|
"loss": 0.2686, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2503646910190582, |
|
"rewards/margins": 0.14028559625148773, |
|
"rewards/rejected": -0.39065021276474, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.474571359287791e-06, |
|
"logits/chosen": 0.1948501169681549, |
|
"logits/rejected": 0.3517269492149353, |
|
"logps/chosen": -605.8263549804688, |
|
"logps/rejected": -685.9619750976562, |
|
"loss": 0.2324, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.21189594268798828, |
|
"rewards/margins": 0.19469039142131805, |
|
"rewards/rejected": -0.40658634901046753, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3901662051233755e-06, |
|
"logits/chosen": 0.1501813530921936, |
|
"logits/rejected": 0.29967957735061646, |
|
"logps/chosen": -588.287841796875, |
|
"logps/rejected": -692.4317626953125, |
|
"loss": 0.232, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.21489660441875458, |
|
"rewards/margins": 0.20667514204978943, |
|
"rewards/rejected": -0.4215717315673828, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.30040316949064e-06, |
|
"logits/chosen": 0.1845458298921585, |
|
"logits/rejected": 0.25728127360343933, |
|
"logps/chosen": -593.4507446289062, |
|
"logps/rejected": -647.9249267578125, |
|
"loss": 0.268, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2259581983089447, |
|
"rewards/margins": 0.1573750227689743, |
|
"rewards/rejected": -0.3833332657814026, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.205536695466524e-06, |
|
"logits/chosen": 0.19619445502758026, |
|
"logits/rejected": 0.289485901594162, |
|
"logps/chosen": -596.2908325195312, |
|
"logps/rejected": -673.9340209960938, |
|
"loss": 0.2279, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.20733025670051575, |
|
"rewards/margins": 0.1769087016582489, |
|
"rewards/rejected": -0.38423892855644226, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.105835692378557e-06, |
|
"logits/chosen": 0.18007412552833557, |
|
"logits/rejected": 0.23454514145851135, |
|
"logps/chosen": -605.221923828125, |
|
"logps/rejected": -682.0065307617188, |
|
"loss": 0.2477, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2464592009782791, |
|
"rewards/margins": 0.1562257707118988, |
|
"rewards/rejected": -0.4026849865913391, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.001582773552153e-06, |
|
"logits/chosen": 0.17503593862056732, |
|
"logits/rejected": 0.26662883162498474, |
|
"logps/chosen": -545.9107666015625, |
|
"logps/rejected": -640.1486206054688, |
|
"loss": 0.2225, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.23691585659980774, |
|
"rewards/margins": 0.16106419265270233, |
|
"rewards/rejected": -0.39798006415367126, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.893073455212438e-06, |
|
"logits/chosen": 0.200395867228508, |
|
"logits/rejected": 0.3584834933280945, |
|
"logps/chosen": -608.1827392578125, |
|
"logps/rejected": -740.0325317382812, |
|
"loss": 0.2478, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.26550978422164917, |
|
"rewards/margins": 0.1945229470729828, |
|
"rewards/rejected": -0.46003276109695435, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7806153188114027e-06, |
|
"logits/chosen": 0.1786949336528778, |
|
"logits/rejected": 0.2547721266746521, |
|
"logps/chosen": -599.2467041015625, |
|
"logps/rejected": -727.934326171875, |
|
"loss": 0.2601, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.22916042804718018, |
|
"rewards/margins": 0.17522968351840973, |
|
"rewards/rejected": -0.40439003705978394, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6645271391548542e-06, |
|
"logits/chosen": 0.18216630816459656, |
|
"logits/rejected": 0.24754111468791962, |
|
"logps/chosen": -618.3128051757812, |
|
"logps/rejected": -640.1773681640625, |
|
"loss": 0.2761, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.22649279236793518, |
|
"rewards/margins": 0.14542898535728455, |
|
"rewards/rejected": -0.37192174792289734, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5451379808006014e-06, |
|
"logits/chosen": 0.19754137098789215, |
|
"logits/rejected": 0.27186593413352966, |
|
"logps/chosen": -573.2451171875, |
|
"logps/rejected": -707.2796020507812, |
|
"loss": 0.2209, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2302563190460205, |
|
"rewards/margins": 0.19838322699069977, |
|
"rewards/rejected": -0.42863956093788147, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4227862652892106e-06, |
|
"logits/chosen": 0.16008315980434418, |
|
"logits/rejected": 0.27827057242393494, |
|
"logps/chosen": -662.4371948242188, |
|
"logps/rejected": -759.597900390625, |
|
"loss": 0.2408, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2687516212463379, |
|
"rewards/margins": 0.17680145800113678, |
|
"rewards/rejected": -0.4455530643463135, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2978188118513814e-06, |
|
"logits/chosen": 0.17593641579151154, |
|
"logits/rejected": 0.2789141535758972, |
|
"logps/chosen": -642.6029052734375, |
|
"logps/rejected": -753.4479370117188, |
|
"loss": 0.2304, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.27796030044555664, |
|
"rewards/margins": 0.1859740912914276, |
|
"rewards/rejected": -0.46393436193466187, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1705898543111576e-06, |
|
"logits/chosen": 0.13969933986663818, |
|
"logits/rejected": 0.3079048693180084, |
|
"logps/chosen": -630.0859375, |
|
"logps/rejected": -765.4630737304688, |
|
"loss": 0.2384, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2862771153450012, |
|
"rewards/margins": 0.19564750790596008, |
|
"rewards/rejected": -0.4819245934486389, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.041460036971664e-06, |
|
"logits/chosen": 0.14904941618442535, |
|
"logits/rejected": 0.27918586134910583, |
|
"logps/chosen": -619.8759765625, |
|
"logps/rejected": -759.2853393554688, |
|
"loss": 0.2347, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.27361705899238586, |
|
"rewards/margins": 0.19538012146949768, |
|
"rewards/rejected": -0.4689972400665283, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910795392329649e-06, |
|
"logits/chosen": 0.1377524435520172, |
|
"logits/rejected": 0.2504701614379883, |
|
"logps/chosen": -647.4981689453125, |
|
"logps/rejected": -789.5301513671875, |
|
"loss": 0.2352, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.2706158757209778, |
|
"rewards/margins": 0.18975581228733063, |
|
"rewards/rejected": -0.460371732711792, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7789663035166035e-06, |
|
"logits/chosen": 0.18703623116016388, |
|
"logits/rejected": 0.3294333815574646, |
|
"logps/chosen": -656.440673828125, |
|
"logps/rejected": -793.1935424804688, |
|
"loss": 0.2072, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2833554744720459, |
|
"rewards/margins": 0.21432232856750488, |
|
"rewards/rejected": -0.4976778030395508, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6463464544075344e-06, |
|
"logits/chosen": 0.15668293833732605, |
|
"logits/rejected": 0.2470695674419403, |
|
"logps/chosen": -603.0676879882812, |
|
"logps/rejected": -736.9429931640625, |
|
"loss": 0.227, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.27748292684555054, |
|
"rewards/margins": 0.1968688815832138, |
|
"rewards/rejected": -0.4743518829345703, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.513311770373421e-06, |
|
"logits/chosen": 0.09050649404525757, |
|
"logits/rejected": 0.27548736333847046, |
|
"logps/chosen": -645.054443359375, |
|
"logps/rejected": -780.7376708984375, |
|
"loss": 0.2277, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.27279263734817505, |
|
"rewards/margins": 0.2085564136505127, |
|
"rewards/rejected": -0.48134905099868774, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.380239352679908e-06, |
|
"logits/chosen": 0.11263048648834229, |
|
"logits/rejected": 0.23795035481452942, |
|
"logps/chosen": -635.0528564453125, |
|
"logps/rejected": -752.4202270507812, |
|
"loss": 0.2295, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.28419169783592224, |
|
"rewards/margins": 0.18686431646347046, |
|
"rewards/rejected": -0.4710559844970703, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.247506409552795e-06, |
|
"logits/chosen": 0.18179914355278015, |
|
"logits/rejected": 0.2870942950248718, |
|
"logps/chosen": -632.6707153320312, |
|
"logps/rejected": -745.6337890625, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2822064459323883, |
|
"rewards/margins": 0.18581680953502655, |
|
"rewards/rejected": -0.4680232107639313, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1154891869403436e-06, |
|
"logits/chosen": 0.1858624517917633, |
|
"logits/rejected": 0.24371235072612762, |
|
"logps/chosen": -612.2597045898438, |
|
"logps/rejected": -773.7772216796875, |
|
"loss": 0.24, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2878335416316986, |
|
"rewards/margins": 0.1961011290550232, |
|
"rewards/rejected": -0.4839346408843994, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9845619020032552e-06, |
|
"logits/chosen": 0.11868009716272354, |
|
"logits/rejected": 0.2442179173231125, |
|
"logps/chosen": -628.7079467773438, |
|
"logps/rejected": -782.9298095703125, |
|
"loss": 0.2128, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.2861320376396179, |
|
"rewards/margins": 0.2013685703277588, |
|
"rewards/rejected": -0.4875006079673767, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8550956823554708e-06, |
|
"logits/chosen": 0.15941022336483002, |
|
"logits/rejected": 0.26665717363357544, |
|
"logps/chosen": -611.2957763671875, |
|
"logps/rejected": -745.6875610351562, |
|
"loss": 0.2419, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.26627546548843384, |
|
"rewards/margins": 0.20409245789051056, |
|
"rewards/rejected": -0.4703678488731384, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 0.12057618796825409, |
|
"logits/rejected": 0.2439500391483307, |
|
"logps/chosen": -563.8010864257812, |
|
"logps/rejected": -753.3555908203125, |
|
"loss": 0.2241, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.22670452296733856, |
|
"rewards/margins": 0.22902043163776398, |
|
"rewards/rejected": -0.45572495460510254, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6020092013802002e-06, |
|
"logits/chosen": 0.1292915642261505, |
|
"logits/rejected": 0.27418118715286255, |
|
"logps/chosen": -623.4757080078125, |
|
"logps/rejected": -712.2957153320312, |
|
"loss": 0.2196, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.24320180714130402, |
|
"rewards/margins": 0.19605949521064758, |
|
"rewards/rejected": -0.4392613470554352, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4791063411799938e-06, |
|
"logits/chosen": 0.10819476842880249, |
|
"logits/rejected": 0.2511535882949829, |
|
"logps/chosen": -570.8446044921875, |
|
"logps/rejected": -685.5501708984375, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.22663471102714539, |
|
"rewards/margins": 0.20201578736305237, |
|
"rewards/rejected": -0.42865046858787537, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3590973149722103e-06, |
|
"logits/chosen": 0.15888772904872894, |
|
"logits/rejected": 0.2368732988834381, |
|
"logps/chosen": -589.4254150390625, |
|
"logps/rejected": -735.7716674804688, |
|
"loss": 0.2038, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.26341313123703003, |
|
"rewards/margins": 0.1875002086162567, |
|
"rewards/rejected": -0.45091336965560913, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2423223013801946e-06, |
|
"logits/chosen": 0.14813843369483948, |
|
"logits/rejected": 0.21993982791900635, |
|
"logps/chosen": -589.8951416015625, |
|
"logps/rejected": -729.8585205078125, |
|
"loss": 0.2504, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2774823307991028, |
|
"rewards/margins": 0.1887308657169342, |
|
"rewards/rejected": -0.4662131667137146, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1291123118671665e-06, |
|
"logits/chosen": 0.09890522062778473, |
|
"logits/rejected": 0.2421140968799591, |
|
"logps/chosen": -639.2291870117188, |
|
"logps/rejected": -729.2130126953125, |
|
"loss": 0.1871, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2574080526828766, |
|
"rewards/margins": 0.20436222851276398, |
|
"rewards/rejected": -0.46177029609680176, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.019788252448267e-06, |
|
"logits/chosen": 0.1626007854938507, |
|
"logits/rejected": 0.2534940242767334, |
|
"logps/chosen": -614.7600708007812, |
|
"logps/rejected": -789.6011352539062, |
|
"loss": 0.202, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.27494579553604126, |
|
"rewards/margins": 0.21227261424064636, |
|
"rewards/rejected": -0.48721843957901, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.146600140475945e-07, |
|
"logits/chosen": 0.18194663524627686, |
|
"logits/rejected": 0.2571627199649811, |
|
"logps/chosen": -695.4674682617188, |
|
"logps/rejected": -824.3109130859375, |
|
"loss": 0.2242, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.31008249521255493, |
|
"rewards/margins": 0.20844268798828125, |
|
"rewards/rejected": -0.518525242805481, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.140255940787059e-07, |
|
"logits/chosen": 0.1600276529788971, |
|
"logits/rejected": 0.24592892825603485, |
|
"logps/chosen": -621.3486328125, |
|
"logps/rejected": -787.8779296875, |
|
"loss": 0.2379, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2887078523635864, |
|
"rewards/margins": 0.21262690424919128, |
|
"rewards/rejected": -0.5013347864151001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.181702517385789e-07, |
|
"logits/chosen": 0.11802725493907928, |
|
"logits/rejected": 0.24163658916950226, |
|
"logps/chosen": -626.5403442382812, |
|
"logps/rejected": -774.00830078125, |
|
"loss": 0.219, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.27267104387283325, |
|
"rewards/margins": 0.21761374175548553, |
|
"rewards/rejected": -0.4902847409248352, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.273656994094232e-07, |
|
"logits/chosen": 0.1450013965368271, |
|
"logits/rejected": 0.23478934168815613, |
|
"logps/chosen": -608.7362060546875, |
|
"logps/rejected": -739.3218994140625, |
|
"loss": 0.2294, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2396283596754074, |
|
"rewards/margins": 0.21917715668678284, |
|
"rewards/rejected": -0.45880550146102905, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.418693324604082e-07, |
|
"logits/chosen": 0.16014720499515533, |
|
"logits/rejected": 0.25679388642311096, |
|
"logps/chosen": -598.508056640625, |
|
"logps/rejected": -752.1389770507812, |
|
"loss": 0.2315, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.26550406217575073, |
|
"rewards/margins": 0.20078308880329132, |
|
"rewards/rejected": -0.46628713607788086, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.619234996325314e-07, |
|
"logits/chosen": 0.10893194377422333, |
|
"logits/rejected": 0.18960845470428467, |
|
"logps/chosen": -589.2452392578125, |
|
"logps/rejected": -712.2008666992188, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.2257637232542038, |
|
"rewards/margins": 0.20952418446540833, |
|
"rewards/rejected": -0.4352878928184509, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.877548160747768e-07, |
|
"logits/chosen": 0.1536407321691513, |
|
"logits/rejected": 0.25428491830825806, |
|
"logps/chosen": -667.2605590820312, |
|
"logps/rejected": -712.8155517578125, |
|
"loss": 0.2543, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.26264914870262146, |
|
"rewards/margins": 0.1710439920425415, |
|
"rewards/rejected": -0.4336931109428406, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.195735209788528e-07, |
|
"logits/chosen": 0.1331530511379242, |
|
"logits/rejected": 0.25019171833992004, |
|
"logps/chosen": -614.9050903320312, |
|
"logps/rejected": -703.8809814453125, |
|
"loss": 0.2492, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.27054914832115173, |
|
"rewards/margins": 0.14634455740451813, |
|
"rewards/rejected": -0.4168936610221863, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5757288163336806e-07, |
|
"logits/chosen": 0.15042927861213684, |
|
"logits/rejected": 0.2752520442008972, |
|
"logps/chosen": -605.6216430664062, |
|
"logps/rejected": -749.6766357421875, |
|
"loss": 0.2349, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.2648725211620331, |
|
"rewards/margins": 0.18051207065582275, |
|
"rewards/rejected": -0.44538459181785583, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.019286455866981e-07, |
|
"logits/chosen": 0.09744317829608917, |
|
"logits/rejected": 0.30391809344291687, |
|
"logps/chosen": -656.5165405273438, |
|
"logps/rejected": -782.3225708007812, |
|
"loss": 0.2347, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.26627975702285767, |
|
"rewards/margins": 0.2162231206893921, |
|
"rewards/rejected": -0.48250293731689453, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5279854247146703e-07, |
|
"logits/chosen": 0.10827809572219849, |
|
"logits/rejected": 0.23128509521484375, |
|
"logps/chosen": -597.4005126953125, |
|
"logps/rejected": -727.5601196289062, |
|
"loss": 0.2356, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2508590817451477, |
|
"rewards/margins": 0.19355928897857666, |
|
"rewards/rejected": -0.44441837072372437, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1032183690276754e-07, |
|
"logits/chosen": 0.1284581571817398, |
|
"logits/rejected": 0.22490856051445007, |
|
"logps/chosen": -608.4317016601562, |
|
"logps/rejected": -756.5374755859375, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.27711886167526245, |
|
"rewards/margins": 0.19119976460933685, |
|
"rewards/rejected": -0.4683186411857605, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.46189337174788e-08, |
|
"logits/chosen": 0.1757223904132843, |
|
"logits/rejected": 0.26904186606407166, |
|
"logps/chosen": -603.6378173828125, |
|
"logps/rejected": -660.9766235351562, |
|
"loss": 0.2443, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.24105675518512726, |
|
"rewards/margins": 0.1722828447818756, |
|
"rewards/rejected": -0.4133395552635193, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.579103667367385e-08, |
|
"logits/chosen": 0.10849970579147339, |
|
"logits/rejected": 0.19708193838596344, |
|
"logps/chosen": -565.9713134765625, |
|
"logps/rejected": -679.5427856445312, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2512280344963074, |
|
"rewards/margins": 0.17560149729251862, |
|
"rewards/rejected": -0.4268294870853424, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3919861577572924e-08, |
|
"logits/chosen": 0.13952800631523132, |
|
"logits/rejected": 0.20136961340904236, |
|
"logps/chosen": -621.4708862304688, |
|
"logps/rejected": -748.71240234375, |
|
"loss": 0.2234, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.2641890048980713, |
|
"rewards/margins": 0.19575798511505127, |
|
"rewards/rejected": -0.45994701981544495, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.067404651211808e-09, |
|
"logits/chosen": 0.18144121766090393, |
|
"logits/rejected": 0.23935365676879883, |
|
"logps/chosen": -601.0186767578125, |
|
"logps/rejected": -719.6766357421875, |
|
"loss": 0.2458, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.26298612356185913, |
|
"rewards/margins": 0.16590112447738647, |
|
"rewards/rejected": -0.428887277841568, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2757667974155896e-09, |
|
"logits/chosen": 0.13537321984767914, |
|
"logits/rejected": 0.2640915811061859, |
|
"logps/chosen": -576.1346435546875, |
|
"logps/rejected": -705.7242431640625, |
|
"loss": 0.2243, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.22819241881370544, |
|
"rewards/margins": 0.1957322061061859, |
|
"rewards/rejected": -0.42392459511756897, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 656, |
|
"total_flos": 0.0, |
|
"train_loss": 0.19985946376876132, |
|
"train_runtime": 6634.0696, |
|
"train_samples_per_second": 3.165, |
|
"train_steps_per_second": 0.099 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 656, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|