|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996190476190476, |
|
"eval_steps": 500, |
|
"global_step": 656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.575757575757576e-08, |
|
"logits/chosen": 0.040165986865758896, |
|
"logits/rejected": 0.1715753823518753, |
|
"logps/chosen": -294.844482421875, |
|
"logps/rejected": -361.2099914550781, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.575757575757576e-07, |
|
"logits/chosen": 0.08047256618738174, |
|
"logits/rejected": 0.3207971751689911, |
|
"logps/chosen": -393.2153625488281, |
|
"logps/rejected": -318.74615478515625, |
|
"loss": 0.3404, |
|
"rewards/accuracies": 0.4652777910232544, |
|
"rewards/chosen": -0.00022995664039626718, |
|
"rewards/margins": 2.2277235984802246e-05, |
|
"rewards/rejected": -0.000252234167419374, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"logits/chosen": 0.07466734945774078, |
|
"logits/rejected": 0.23236870765686035, |
|
"logps/chosen": -332.4886169433594, |
|
"logps/rejected": -281.853515625, |
|
"loss": 0.3468, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 0.00011478399392217398, |
|
"rewards/margins": -0.0012641319772228599, |
|
"rewards/rejected": 0.0013789159711450338, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits/chosen": 0.07185273617506027, |
|
"logits/rejected": 0.2604687213897705, |
|
"logps/chosen": -358.33782958984375, |
|
"logps/rejected": -292.1524963378906, |
|
"loss": 0.3502, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0017305829096585512, |
|
"rewards/margins": 0.001660021604038775, |
|
"rewards/rejected": 7.056114554870874e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"logits/chosen": 0.1199118122458458, |
|
"logits/rejected": 0.2392597496509552, |
|
"logps/chosen": -347.05926513671875, |
|
"logps/rejected": -286.26080322265625, |
|
"loss": 0.3353, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.005740107037127018, |
|
"rewards/margins": 0.006530737970024347, |
|
"rewards/rejected": -0.0007906301179900765, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7878787878787882e-06, |
|
"logits/chosen": 0.0741933211684227, |
|
"logits/rejected": 0.3117237091064453, |
|
"logps/chosen": -343.31170654296875, |
|
"logps/rejected": -287.1304931640625, |
|
"loss": 0.3286, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.016505183652043343, |
|
"rewards/margins": 0.016020886600017548, |
|
"rewards/rejected": 0.0004842969647143036, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits/chosen": 0.0762203261256218, |
|
"logits/rejected": 0.2750852704048157, |
|
"logps/chosen": -370.8611755371094, |
|
"logps/rejected": -302.7222595214844, |
|
"loss": 0.2924, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.032092947512865067, |
|
"rewards/margins": 0.04518315941095352, |
|
"rewards/rejected": -0.013090210035443306, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999432965739786e-06, |
|
"logits/chosen": 0.07090188562870026, |
|
"logits/rejected": 0.25236833095550537, |
|
"logps/chosen": -321.5715026855469, |
|
"logps/rejected": -301.0986022949219, |
|
"loss": 0.2803, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.05498770996928215, |
|
"rewards/margins": 0.0691528171300888, |
|
"rewards/rejected": -0.014165110886096954, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9930567839810125e-06, |
|
"logits/chosen": 0.10318852961063385, |
|
"logits/rejected": 0.2712559401988983, |
|
"logps/chosen": -358.31500244140625, |
|
"logps/rejected": -302.6133117675781, |
|
"loss": 0.2346, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.08596866577863693, |
|
"rewards/margins": 0.1306326687335968, |
|
"rewards/rejected": -0.044663988053798676, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.979613761906212e-06, |
|
"logits/chosen": 0.1258704960346222, |
|
"logits/rejected": 0.2653108239173889, |
|
"logps/chosen": -316.4566345214844, |
|
"logps/rejected": -285.59442138671875, |
|
"loss": 0.2181, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.10472643375396729, |
|
"rewards/margins": 0.16386187076568604, |
|
"rewards/rejected": -0.05913544446229935, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959142005221991e-06, |
|
"logits/chosen": 0.14865969121456146, |
|
"logits/rejected": 0.2514886260032654, |
|
"logps/chosen": -316.22650146484375, |
|
"logps/rejected": -298.5392150878906, |
|
"loss": 0.2379, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.09984966367483139, |
|
"rewards/margins": 0.20773954689502716, |
|
"rewards/rejected": -0.10788986831903458, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931699543346854e-06, |
|
"logits/chosen": 0.1114228144288063, |
|
"logits/rejected": 0.2948494553565979, |
|
"logps/chosen": -338.43450927734375, |
|
"logps/rejected": -285.25494384765625, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.1445818692445755, |
|
"rewards/margins": 0.19616642594337463, |
|
"rewards/rejected": -0.05158457159996033, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.897364164920515e-06, |
|
"logits/chosen": 0.10053505003452301, |
|
"logits/rejected": 0.27514562010765076, |
|
"logps/chosen": -338.99151611328125, |
|
"logps/rejected": -297.2850646972656, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.14088504016399384, |
|
"rewards/margins": 0.18905261158943176, |
|
"rewards/rejected": -0.048167549073696136, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8562331973035396e-06, |
|
"logits/chosen": 0.08438269048929214, |
|
"logits/rejected": 0.23364977538585663, |
|
"logps/chosen": -322.02117919921875, |
|
"logps/rejected": -303.7391357421875, |
|
"loss": 0.2438, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.146646648645401, |
|
"rewards/margins": 0.1478952169418335, |
|
"rewards/rejected": -0.0012485686456784606, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.808423230692374e-06, |
|
"logits/chosen": 0.09936638176441193, |
|
"logits/rejected": 0.24736297130584717, |
|
"logps/chosen": -310.3603515625, |
|
"logps/rejected": -278.47320556640625, |
|
"loss": 0.222, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.18732748925685883, |
|
"rewards/margins": 0.1890837401151657, |
|
"rewards/rejected": -0.0017562557477504015, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.754069787631761e-06, |
|
"logits/chosen": 0.13391128182411194, |
|
"logits/rejected": 0.2741778492927551, |
|
"logps/chosen": -366.7373046875, |
|
"logps/rejected": -295.2054138183594, |
|
"loss": 0.2071, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.23995013535022736, |
|
"rewards/margins": 0.24667362868785858, |
|
"rewards/rejected": -0.006723466329276562, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.693326938861367e-06, |
|
"logits/chosen": 0.08330532908439636, |
|
"logits/rejected": 0.23301962018013, |
|
"logps/chosen": -295.6157531738281, |
|
"logps/rejected": -277.6016540527344, |
|
"loss": 0.2514, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.19748535752296448, |
|
"rewards/margins": 0.15948796272277832, |
|
"rewards/rejected": 0.03799740970134735, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626366866585528e-06, |
|
"logits/chosen": 0.17037127912044525, |
|
"logits/rejected": 0.3279545307159424, |
|
"logps/chosen": -371.6747131347656, |
|
"logps/rejected": -300.25250244140625, |
|
"loss": 0.226, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2174214869737625, |
|
"rewards/margins": 0.1945430338382721, |
|
"rewards/rejected": 0.022878441959619522, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.553379376404085e-06, |
|
"logits/chosen": 0.12852030992507935, |
|
"logits/rejected": 0.23063895106315613, |
|
"logps/chosen": -339.1563720703125, |
|
"logps/rejected": -277.93841552734375, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.20981720089912415, |
|
"rewards/margins": 0.22972619533538818, |
|
"rewards/rejected": -0.019909001886844635, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.474571359287791e-06, |
|
"logits/chosen": 0.08759725093841553, |
|
"logits/rejected": 0.26223450899124146, |
|
"logps/chosen": -315.0005187988281, |
|
"logps/rejected": -266.51715087890625, |
|
"loss": 0.2166, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.19832256436347961, |
|
"rewards/margins": 0.2093769758939743, |
|
"rewards/rejected": -0.011054400354623795, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3901662051233755e-06, |
|
"logits/chosen": 0.1616448611021042, |
|
"logits/rejected": 0.30854135751724243, |
|
"logps/chosen": -382.2667236328125, |
|
"logps/rejected": -286.1771240234375, |
|
"loss": 0.2198, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.19347664713859558, |
|
"rewards/margins": 0.20408186316490173, |
|
"rewards/rejected": -0.010605214163661003, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.30040316949064e-06, |
|
"logits/chosen": 0.14471155405044556, |
|
"logits/rejected": 0.26751500368118286, |
|
"logps/chosen": -300.742919921875, |
|
"logps/rejected": -251.2882080078125, |
|
"loss": 0.23, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.1704670488834381, |
|
"rewards/margins": 0.18817836046218872, |
|
"rewards/rejected": -0.0177113339304924, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.205536695466524e-06, |
|
"logits/chosen": 0.16468700766563416, |
|
"logits/rejected": 0.24206213653087616, |
|
"logps/chosen": -294.42767333984375, |
|
"logps/rejected": -272.92010498046875, |
|
"loss": 0.2529, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.17376992106437683, |
|
"rewards/margins": 0.16116927564144135, |
|
"rewards/rejected": 0.012600669637322426, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.105835692378557e-06, |
|
"logits/chosen": 0.10714814811944962, |
|
"logits/rejected": 0.2699413597583771, |
|
"logps/chosen": -304.6961364746094, |
|
"logps/rejected": -285.6314392089844, |
|
"loss": 0.1973, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.1876644492149353, |
|
"rewards/margins": 0.20867836475372314, |
|
"rewards/rejected": -0.02101389318704605, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.001582773552153e-06, |
|
"logits/chosen": 0.06082786247134209, |
|
"logits/rejected": 0.29382848739624023, |
|
"logps/chosen": -378.81732177734375, |
|
"logps/rejected": -309.0159912109375, |
|
"loss": 0.1942, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.16840167343616486, |
|
"rewards/margins": 0.24110493063926697, |
|
"rewards/rejected": -0.0727032721042633, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.893073455212438e-06, |
|
"logits/chosen": 0.1116786003112793, |
|
"logits/rejected": 0.2517862915992737, |
|
"logps/chosen": -320.56365966796875, |
|
"logps/rejected": -278.5521545410156, |
|
"loss": 0.2346, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.17214366793632507, |
|
"rewards/margins": 0.2073363959789276, |
|
"rewards/rejected": -0.03519275039434433, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7806153188114027e-06, |
|
"logits/chosen": 0.11448470503091812, |
|
"logits/rejected": 0.22813239693641663, |
|
"logps/chosen": -279.91162109375, |
|
"logps/rejected": -260.7430114746094, |
|
"loss": 0.2402, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1783895492553711, |
|
"rewards/margins": 0.17652130126953125, |
|
"rewards/rejected": 0.001868226332589984, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6645271391548542e-06, |
|
"logits/chosen": 0.06586723029613495, |
|
"logits/rejected": 0.21862812340259552, |
|
"logps/chosen": -302.5594787597656, |
|
"logps/rejected": -270.1592712402344, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.1828652024269104, |
|
"rewards/margins": 0.2052091658115387, |
|
"rewards/rejected": -0.022343963384628296, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5451379808006014e-06, |
|
"logits/chosen": 0.1570790708065033, |
|
"logits/rejected": 0.288757860660553, |
|
"logps/chosen": -347.58013916015625, |
|
"logps/rejected": -294.15716552734375, |
|
"loss": 0.1946, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.21899041533470154, |
|
"rewards/margins": 0.2574598491191864, |
|
"rewards/rejected": -0.03846944123506546, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4227862652892106e-06, |
|
"logits/chosen": 0.10818709433078766, |
|
"logits/rejected": 0.23891910910606384, |
|
"logps/chosen": -348.48370361328125, |
|
"logps/rejected": -307.1477966308594, |
|
"loss": 0.201, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.1834907829761505, |
|
"rewards/margins": 0.22308149933815002, |
|
"rewards/rejected": -0.03959069401025772, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2978188118513814e-06, |
|
"logits/chosen": 0.11096982657909393, |
|
"logits/rejected": 0.24753287434577942, |
|
"logps/chosen": -296.8543701171875, |
|
"logps/rejected": -281.43310546875, |
|
"loss": 0.2306, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.19204005599021912, |
|
"rewards/margins": 0.18743662536144257, |
|
"rewards/rejected": 0.0046034445986151695, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1705898543111576e-06, |
|
"logits/chosen": 0.07924026995897293, |
|
"logits/rejected": 0.2654581665992737, |
|
"logps/chosen": -327.4444274902344, |
|
"logps/rejected": -319.9097595214844, |
|
"loss": 0.2183, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.1851603388786316, |
|
"rewards/margins": 0.23894396424293518, |
|
"rewards/rejected": -0.05378361791372299, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.041460036971664e-06, |
|
"logits/chosen": 0.09540507942438126, |
|
"logits/rejected": 0.22888918220996857, |
|
"logps/chosen": -334.81689453125, |
|
"logps/rejected": -270.1705017089844, |
|
"loss": 0.2204, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.15298177301883698, |
|
"rewards/margins": 0.2034742832183838, |
|
"rewards/rejected": -0.05049251392483711, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910795392329649e-06, |
|
"logits/chosen": 0.11527317762374878, |
|
"logits/rejected": 0.21196472644805908, |
|
"logps/chosen": -315.2773132324219, |
|
"logps/rejected": -283.607421875, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.19066214561462402, |
|
"rewards/margins": 0.22805961966514587, |
|
"rewards/rejected": -0.037397462874650955, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7789663035166035e-06, |
|
"logits/chosen": 0.054509587585926056, |
|
"logits/rejected": 0.21440072357654572, |
|
"logps/chosen": -327.4574279785156, |
|
"logps/rejected": -277.25958251953125, |
|
"loss": 0.201, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.16702668368816376, |
|
"rewards/margins": 0.22345618903636932, |
|
"rewards/rejected": -0.05642951279878616, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6463464544075344e-06, |
|
"logits/chosen": 0.0780135840177536, |
|
"logits/rejected": 0.2536531686782837, |
|
"logps/chosen": -305.2668151855469, |
|
"logps/rejected": -291.6106872558594, |
|
"loss": 0.1948, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.21040305495262146, |
|
"rewards/margins": 0.27535635232925415, |
|
"rewards/rejected": -0.06495330482721329, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.513311770373421e-06, |
|
"logits/chosen": 0.12684503197669983, |
|
"logits/rejected": 0.23648087680339813, |
|
"logps/chosen": -299.1307067871094, |
|
"logps/rejected": -288.98614501953125, |
|
"loss": 0.2112, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.18101270496845245, |
|
"rewards/margins": 0.24970810115337372, |
|
"rewards/rejected": -0.06869538873434067, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.380239352679908e-06, |
|
"logits/chosen": 0.050291478633880615, |
|
"logits/rejected": 0.2234155833721161, |
|
"logps/chosen": -296.54595947265625, |
|
"logps/rejected": -269.51702880859375, |
|
"loss": 0.2095, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.17090751230716705, |
|
"rewards/margins": 0.19086746871471405, |
|
"rewards/rejected": -0.019959963858127594, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.247506409552795e-06, |
|
"logits/chosen": 0.10957841575145721, |
|
"logits/rejected": 0.21787157654762268, |
|
"logps/chosen": -305.3380432128906, |
|
"logps/rejected": -283.4737854003906, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.17482581734657288, |
|
"rewards/margins": 0.22671441733837128, |
|
"rewards/rejected": -0.0518886037170887, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1154891869403436e-06, |
|
"logits/chosen": 0.1384754180908203, |
|
"logits/rejected": 0.25176170468330383, |
|
"logps/chosen": -354.0063171386719, |
|
"logps/rejected": -307.35455322265625, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.2005975991487503, |
|
"rewards/margins": 0.27450570464134216, |
|
"rewards/rejected": -0.07390810549259186, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9845619020032552e-06, |
|
"logits/chosen": 0.09188776463270187, |
|
"logits/rejected": 0.2664358913898468, |
|
"logps/chosen": -328.11083984375, |
|
"logps/rejected": -286.30419921875, |
|
"loss": 0.1895, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.1963554322719574, |
|
"rewards/margins": 0.23746831715106964, |
|
"rewards/rejected": -0.04111289232969284, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8550956823554708e-06, |
|
"logits/chosen": 0.055462319403886795, |
|
"logits/rejected": 0.18411260843276978, |
|
"logps/chosen": -326.14703369140625, |
|
"logps/rejected": -275.77069091796875, |
|
"loss": 0.235, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.1863834410905838, |
|
"rewards/margins": 0.22951778769493103, |
|
"rewards/rejected": -0.043134383857250214, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 0.09742958843708038, |
|
"logits/rejected": 0.23133966326713562, |
|
"logps/chosen": -334.5967712402344, |
|
"logps/rejected": -302.8537902832031, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.21493402123451233, |
|
"rewards/margins": 0.26205217838287354, |
|
"rewards/rejected": -0.047118157148361206, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6020092013802002e-06, |
|
"logits/chosen": 0.061180900782346725, |
|
"logits/rejected": 0.22411946952342987, |
|
"logps/chosen": -326.1264953613281, |
|
"logps/rejected": -270.6374206542969, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.19443106651306152, |
|
"rewards/margins": 0.2181394100189209, |
|
"rewards/rejected": -0.023708324879407883, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4791063411799938e-06, |
|
"logits/chosen": 0.11679482460021973, |
|
"logits/rejected": 0.20075193047523499, |
|
"logps/chosen": -313.3033752441406, |
|
"logps/rejected": -301.5423278808594, |
|
"loss": 0.1941, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.18806949257850647, |
|
"rewards/margins": 0.22217002511024475, |
|
"rewards/rejected": -0.03410057723522186, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3590973149722103e-06, |
|
"logits/chosen": 0.11450199782848358, |
|
"logits/rejected": 0.2375846803188324, |
|
"logps/chosen": -324.9442443847656, |
|
"logps/rejected": -282.5760498046875, |
|
"loss": 0.2164, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.1744813621044159, |
|
"rewards/margins": 0.21334946155548096, |
|
"rewards/rejected": -0.03886810690164566, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2423223013801946e-06, |
|
"logits/chosen": 0.10633231699466705, |
|
"logits/rejected": 0.2275623381137848, |
|
"logps/chosen": -359.95428466796875, |
|
"logps/rejected": -296.4113464355469, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.21253260970115662, |
|
"rewards/margins": 0.24374982714653015, |
|
"rewards/rejected": -0.03121720813214779, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1291123118671665e-06, |
|
"logits/chosen": 0.03388429060578346, |
|
"logits/rejected": 0.17446021735668182, |
|
"logps/chosen": -308.4808349609375, |
|
"logps/rejected": -263.65283203125, |
|
"loss": 0.2128, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.21648752689361572, |
|
"rewards/margins": 0.22755059599876404, |
|
"rewards/rejected": -0.011063081212341785, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.019788252448267e-06, |
|
"logits/chosen": 0.12791678309440613, |
|
"logits/rejected": 0.28256458044052124, |
|
"logps/chosen": -379.8475341796875, |
|
"logps/rejected": -303.40545654296875, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.22206160426139832, |
|
"rewards/margins": 0.2580808699131012, |
|
"rewards/rejected": -0.036019258201122284, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.146600140475945e-07, |
|
"logits/chosen": 0.11331765353679657, |
|
"logits/rejected": 0.2163075953722, |
|
"logps/chosen": -366.92926025390625, |
|
"logps/rejected": -308.08148193359375, |
|
"loss": 0.2333, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.1822303682565689, |
|
"rewards/margins": 0.232683464884758, |
|
"rewards/rejected": -0.050453104078769684, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.140255940787059e-07, |
|
"logits/chosen": 0.1049843281507492, |
|
"logits/rejected": 0.20622961223125458, |
|
"logps/chosen": -283.49822998046875, |
|
"logps/rejected": -262.69561767578125, |
|
"loss": 0.2244, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.19038459658622742, |
|
"rewards/margins": 0.19227764010429382, |
|
"rewards/rejected": -0.001893045729957521, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.181702517385789e-07, |
|
"logits/chosen": 0.06920811533927917, |
|
"logits/rejected": 0.22420334815979004, |
|
"logps/chosen": -313.43670654296875, |
|
"logps/rejected": -280.72442626953125, |
|
"loss": 0.2521, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1772725135087967, |
|
"rewards/margins": 0.1995859146118164, |
|
"rewards/rejected": -0.022313417866826057, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.273656994094232e-07, |
|
"logits/chosen": 0.0857834741473198, |
|
"logits/rejected": 0.1759118139743805, |
|
"logps/chosen": -308.9149475097656, |
|
"logps/rejected": -283.34747314453125, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2155129462480545, |
|
"rewards/margins": 0.22527408599853516, |
|
"rewards/rejected": -0.009761162102222443, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.418693324604082e-07, |
|
"logits/chosen": 0.057937733829021454, |
|
"logits/rejected": 0.24161191284656525, |
|
"logps/chosen": -333.49603271484375, |
|
"logps/rejected": -280.8927917480469, |
|
"loss": 0.2266, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.21429701149463654, |
|
"rewards/margins": 0.22269944846630096, |
|
"rewards/rejected": -0.008402440696954727, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.619234996325314e-07, |
|
"logits/chosen": 0.07015545666217804, |
|
"logits/rejected": 0.2190779447555542, |
|
"logps/chosen": -344.3935852050781, |
|
"logps/rejected": -303.2052917480469, |
|
"loss": 0.2045, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.21076282858848572, |
|
"rewards/margins": 0.25355157256126404, |
|
"rewards/rejected": -0.04278876259922981, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.877548160747768e-07, |
|
"logits/chosen": 0.08561773598194122, |
|
"logits/rejected": 0.2999951243400574, |
|
"logps/chosen": -330.68475341796875, |
|
"logps/rejected": -281.82275390625, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.20333750545978546, |
|
"rewards/margins": 0.23657293617725372, |
|
"rewards/rejected": -0.033235400915145874, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.195735209788528e-07, |
|
"logits/chosen": 0.13458076119422913, |
|
"logits/rejected": 0.27361050248146057, |
|
"logps/chosen": -318.90203857421875, |
|
"logps/rejected": -284.5428466796875, |
|
"loss": 0.2178, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.19831877946853638, |
|
"rewards/margins": 0.23214980959892273, |
|
"rewards/rejected": -0.03383101895451546, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5757288163336806e-07, |
|
"logits/chosen": 0.11606297641992569, |
|
"logits/rejected": 0.22765600681304932, |
|
"logps/chosen": -313.04278564453125, |
|
"logps/rejected": -281.8597717285156, |
|
"loss": 0.1959, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.21330790221691132, |
|
"rewards/margins": 0.25364676117897034, |
|
"rewards/rejected": -0.04033887758851051, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.019286455866981e-07, |
|
"logits/chosen": 0.057523488998413086, |
|
"logits/rejected": 0.19763953983783722, |
|
"logps/chosen": -299.98626708984375, |
|
"logps/rejected": -252.2355194091797, |
|
"loss": 0.2448, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.2040693461894989, |
|
"rewards/margins": 0.18340806663036346, |
|
"rewards/rejected": 0.020661287009716034, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5279854247146703e-07, |
|
"logits/chosen": 0.10874730348587036, |
|
"logits/rejected": 0.26722806692123413, |
|
"logps/chosen": -326.7654724121094, |
|
"logps/rejected": -274.41162109375, |
|
"loss": 0.2041, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.2091568410396576, |
|
"rewards/margins": 0.2396632879972458, |
|
"rewards/rejected": -0.030506467446684837, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1032183690276754e-07, |
|
"logits/chosen": 0.12037558853626251, |
|
"logits/rejected": 0.19973725080490112, |
|
"logps/chosen": -314.14910888671875, |
|
"logps/rejected": -270.01043701171875, |
|
"loss": 0.2167, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.21347804367542267, |
|
"rewards/margins": 0.22940710186958313, |
|
"rewards/rejected": -0.015929043292999268, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.46189337174788e-08, |
|
"logits/chosen": 0.08730605989694595, |
|
"logits/rejected": 0.2364271879196167, |
|
"logps/chosen": -289.814453125, |
|
"logps/rejected": -251.5435791015625, |
|
"loss": 0.2221, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.19059757888317108, |
|
"rewards/margins": 0.19377049803733826, |
|
"rewards/rejected": -0.0031729289330542088, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.579103667367385e-08, |
|
"logits/chosen": 0.13279291987419128, |
|
"logits/rejected": 0.2206091433763504, |
|
"logps/chosen": -342.04498291015625, |
|
"logps/rejected": -276.22613525390625, |
|
"loss": 0.1984, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.19695451855659485, |
|
"rewards/margins": 0.2266121655702591, |
|
"rewards/rejected": -0.029657626524567604, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3919861577572924e-08, |
|
"logits/chosen": 0.0992569848895073, |
|
"logits/rejected": 0.26812419295310974, |
|
"logps/chosen": -336.68817138671875, |
|
"logps/rejected": -261.6818542480469, |
|
"loss": 0.2111, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.21192236244678497, |
|
"rewards/margins": 0.2388623207807541, |
|
"rewards/rejected": -0.026939954608678818, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.067404651211808e-09, |
|
"logits/chosen": 0.04493387043476105, |
|
"logits/rejected": 0.1637609452009201, |
|
"logps/chosen": -332.1258544921875, |
|
"logps/rejected": -283.83709716796875, |
|
"loss": 0.1939, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.20134110748767853, |
|
"rewards/margins": 0.23969101905822754, |
|
"rewards/rejected": -0.03834990784525871, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2757667974155896e-09, |
|
"logits/chosen": 0.1145804151892662, |
|
"logits/rejected": 0.23529252409934998, |
|
"logps/chosen": -351.08197021484375, |
|
"logps/rejected": -293.947998046875, |
|
"loss": 0.2055, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.2066545933485031, |
|
"rewards/margins": 0.22757765650749207, |
|
"rewards/rejected": -0.020923063158988953, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 656, |
|
"total_flos": 0.0, |
|
"train_loss": 0.22703545004492853, |
|
"train_runtime": 7888.8763, |
|
"train_samples_per_second": 2.662, |
|
"train_steps_per_second": 0.083 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 656, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|