|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996190476190476, |
|
"eval_steps": 500, |
|
"global_step": 656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.575757575757576e-08, |
|
"logits/chosen": 0.07398031651973724, |
|
"logits/rejected": 0.059482574462890625, |
|
"logps/chosen": -279.7221984863281, |
|
"logps/rejected": -295.30865478515625, |
|
"loss": 2.4106, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.575757575757576e-07, |
|
"logits/chosen": 0.08179842680692673, |
|
"logits/rejected": 0.2137567102909088, |
|
"logps/chosen": -371.2894287109375, |
|
"logps/rejected": -378.87701416015625, |
|
"loss": 2.1369, |
|
"rewards/accuracies": 0.3958333432674408, |
|
"rewards/chosen": 0.000596717931330204, |
|
"rewards/margins": 0.0007703733863309026, |
|
"rewards/rejected": -0.00017365541134495288, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"logits/chosen": 0.13426382839679718, |
|
"logits/rejected": 0.17069879174232483, |
|
"logps/chosen": -337.7759704589844, |
|
"logps/rejected": -351.1375427246094, |
|
"loss": 2.1857, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0013722162693738937, |
|
"rewards/margins": -0.0006242281524464488, |
|
"rewards/rejected": -0.0007479880005121231, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits/chosen": 0.11453332751989365, |
|
"logits/rejected": 0.1672835648059845, |
|
"logps/chosen": -343.336181640625, |
|
"logps/rejected": -351.83966064453125, |
|
"loss": 2.2006, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.0007104009273461998, |
|
"rewards/margins": 0.0031625095289200544, |
|
"rewards/rejected": -0.0024521087761968374, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"logits/chosen": 0.14377865195274353, |
|
"logits/rejected": 0.23349857330322266, |
|
"logps/chosen": -338.24847412109375, |
|
"logps/rejected": -321.5999450683594, |
|
"loss": 2.0523, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.00015345169231295586, |
|
"rewards/margins": 0.0033258639741688967, |
|
"rewards/rejected": -0.003479315433651209, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7878787878787882e-06, |
|
"logits/chosen": 0.12487177550792694, |
|
"logits/rejected": 0.23440325260162354, |
|
"logps/chosen": -385.6036682128906, |
|
"logps/rejected": -353.2607727050781, |
|
"loss": 2.0721, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0014706759247928858, |
|
"rewards/margins": 0.005332515574991703, |
|
"rewards/rejected": -0.006803191266953945, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits/chosen": 0.1113414317369461, |
|
"logits/rejected": 0.1453917920589447, |
|
"logps/chosen": -375.4793701171875, |
|
"logps/rejected": -355.79571533203125, |
|
"loss": 2.0509, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.002245596144348383, |
|
"rewards/margins": 0.019777730107307434, |
|
"rewards/rejected": -0.02202332578599453, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999432965739786e-06, |
|
"logits/chosen": 0.13350918889045715, |
|
"logits/rejected": 0.1631946861743927, |
|
"logps/chosen": -323.2510986328125, |
|
"logps/rejected": -328.84039306640625, |
|
"loss": 2.0626, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0012016391847282648, |
|
"rewards/margins": 0.03956783190369606, |
|
"rewards/rejected": -0.040769465267658234, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9930567839810125e-06, |
|
"logits/chosen": 0.0718630701303482, |
|
"logits/rejected": 0.19179414212703705, |
|
"logps/chosen": -378.56396484375, |
|
"logps/rejected": -368.79949951171875, |
|
"loss": 2.0501, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.003595351707190275, |
|
"rewards/margins": 0.06342312693595886, |
|
"rewards/rejected": -0.0598277822136879, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.979613761906212e-06, |
|
"logits/chosen": 0.10020647943019867, |
|
"logits/rejected": 0.21944165229797363, |
|
"logps/chosen": -358.9504089355469, |
|
"logps/rejected": -345.926513671875, |
|
"loss": 1.8966, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.010454339906573296, |
|
"rewards/margins": 0.09268515557050705, |
|
"rewards/rejected": -0.1031394973397255, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959142005221991e-06, |
|
"logits/chosen": 0.1388009488582611, |
|
"logits/rejected": 0.26329106092453003, |
|
"logps/chosen": -337.170166015625, |
|
"logps/rejected": -351.45172119140625, |
|
"loss": 1.9484, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.009544052183628082, |
|
"rewards/margins": 0.14409229159355164, |
|
"rewards/rejected": -0.13454824686050415, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931699543346854e-06, |
|
"logits/chosen": 0.10444238036870956, |
|
"logits/rejected": 0.20885030925273895, |
|
"logps/chosen": -329.65020751953125, |
|
"logps/rejected": -367.345458984375, |
|
"loss": 1.7584, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03416838496923447, |
|
"rewards/margins": 0.19879209995269775, |
|
"rewards/rejected": -0.23296049237251282, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.897364164920515e-06, |
|
"logits/chosen": 0.10559381544589996, |
|
"logits/rejected": 0.1961037516593933, |
|
"logps/chosen": -354.2994079589844, |
|
"logps/rejected": -344.41925048828125, |
|
"loss": 1.7929, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.05705786868929863, |
|
"rewards/margins": 0.17845068871974945, |
|
"rewards/rejected": -0.23550856113433838, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8562331973035396e-06, |
|
"logits/chosen": 0.12288101017475128, |
|
"logits/rejected": 0.22050254046916962, |
|
"logps/chosen": -327.88311767578125, |
|
"logps/rejected": -356.74407958984375, |
|
"loss": 1.8191, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.07013808190822601, |
|
"rewards/margins": 0.22160351276397705, |
|
"rewards/rejected": -0.29174157977104187, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.808423230692374e-06, |
|
"logits/chosen": 0.1860750913619995, |
|
"logits/rejected": 0.18267032504081726, |
|
"logps/chosen": -338.1869812011719, |
|
"logps/rejected": -380.5123596191406, |
|
"loss": 1.7142, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.07972263544797897, |
|
"rewards/margins": 0.2994547486305237, |
|
"rewards/rejected": -0.37917739152908325, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.754069787631761e-06, |
|
"logits/chosen": 0.15666987001895905, |
|
"logits/rejected": 0.24864494800567627, |
|
"logps/chosen": -409.4656677246094, |
|
"logps/rejected": -393.28466796875, |
|
"loss": 1.6771, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.06894813477993011, |
|
"rewards/margins": 0.3388601243495941, |
|
"rewards/rejected": -0.40780824422836304, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.693326938861367e-06, |
|
"logits/chosen": 0.12029329687356949, |
|
"logits/rejected": 0.17621104419231415, |
|
"logps/chosen": -326.62701416015625, |
|
"logps/rejected": -357.37164306640625, |
|
"loss": 1.74, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.045461155474185944, |
|
"rewards/margins": 0.287504643201828, |
|
"rewards/rejected": -0.33296579122543335, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626366866585528e-06, |
|
"logits/chosen": 0.17087192833423615, |
|
"logits/rejected": 0.25556960701942444, |
|
"logps/chosen": -338.80230712890625, |
|
"logps/rejected": -350.237060546875, |
|
"loss": 1.6582, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.02211242914199829, |
|
"rewards/margins": 0.2152978479862213, |
|
"rewards/rejected": -0.237410306930542, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.553379376404085e-06, |
|
"logits/chosen": 0.14991971850395203, |
|
"logits/rejected": 0.1636931598186493, |
|
"logps/chosen": -308.0624084472656, |
|
"logps/rejected": -344.2478942871094, |
|
"loss": 1.6719, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.004089848138391972, |
|
"rewards/margins": 0.2675096392631531, |
|
"rewards/rejected": -0.2634198069572449, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.474571359287791e-06, |
|
"logits/chosen": 0.15207740664482117, |
|
"logits/rejected": 0.211543008685112, |
|
"logps/chosen": -336.4064636230469, |
|
"logps/rejected": -340.141357421875, |
|
"loss": 1.6014, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.01036889385432005, |
|
"rewards/margins": 0.2858952283859253, |
|
"rewards/rejected": -0.296264111995697, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3901662051233755e-06, |
|
"logits/chosen": 0.1840183436870575, |
|
"logits/rejected": 0.22152027487754822, |
|
"logps/chosen": -404.16021728515625, |
|
"logps/rejected": -356.364013671875, |
|
"loss": 1.7463, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.03463779762387276, |
|
"rewards/margins": 0.25136780738830566, |
|
"rewards/rejected": -0.21672996878623962, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.30040316949064e-06, |
|
"logits/chosen": 0.1487782895565033, |
|
"logits/rejected": 0.20272579789161682, |
|
"logps/chosen": -347.5115966796875, |
|
"logps/rejected": -344.53753662109375, |
|
"loss": 1.6848, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.019732611253857613, |
|
"rewards/margins": 0.21993084251880646, |
|
"rewards/rejected": -0.2001982480287552, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.205536695466524e-06, |
|
"logits/chosen": 0.11921755224466324, |
|
"logits/rejected": 0.16543138027191162, |
|
"logps/chosen": -302.01861572265625, |
|
"logps/rejected": -345.60491943359375, |
|
"loss": 1.7084, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0038991228211671114, |
|
"rewards/margins": 0.31049156188964844, |
|
"rewards/rejected": -0.3143906593322754, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.105835692378557e-06, |
|
"logits/chosen": 0.13227376341819763, |
|
"logits/rejected": 0.17412447929382324, |
|
"logps/chosen": -337.3625793457031, |
|
"logps/rejected": -365.3255615234375, |
|
"loss": 1.6995, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05614470690488815, |
|
"rewards/margins": 0.28102201223373413, |
|
"rewards/rejected": -0.3371667265892029, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.001582773552153e-06, |
|
"logits/chosen": 0.13456036150455475, |
|
"logits/rejected": 0.22667856514453888, |
|
"logps/chosen": -403.4490966796875, |
|
"logps/rejected": -408.80413818359375, |
|
"loss": 1.4862, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.025508727878332138, |
|
"rewards/margins": 0.3425619602203369, |
|
"rewards/rejected": -0.36807072162628174, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.893073455212438e-06, |
|
"logits/chosen": 0.13273295760154724, |
|
"logits/rejected": 0.21381357312202454, |
|
"logps/chosen": -335.8673095703125, |
|
"logps/rejected": -351.50103759765625, |
|
"loss": 1.5854, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09447745978832245, |
|
"rewards/margins": 0.27237147092819214, |
|
"rewards/rejected": -0.3668489158153534, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7806153188114027e-06, |
|
"logits/chosen": 0.18772640824317932, |
|
"logits/rejected": 0.20650401711463928, |
|
"logps/chosen": -320.12945556640625, |
|
"logps/rejected": -362.57281494140625, |
|
"loss": 1.7323, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.056845515966415405, |
|
"rewards/margins": 0.2855423092842102, |
|
"rewards/rejected": -0.342387855052948, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6645271391548542e-06, |
|
"logits/chosen": 0.154958575963974, |
|
"logits/rejected": 0.19303588569164276, |
|
"logps/chosen": -360.35443115234375, |
|
"logps/rejected": -359.91497802734375, |
|
"loss": 1.6388, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.005552726797759533, |
|
"rewards/margins": 0.27676287293434143, |
|
"rewards/rejected": -0.2712101340293884, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5451379808006014e-06, |
|
"logits/chosen": 0.1470947563648224, |
|
"logits/rejected": 0.19554203748703003, |
|
"logps/chosen": -343.96466064453125, |
|
"logps/rejected": -351.0820007324219, |
|
"loss": 1.6004, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.018333502113819122, |
|
"rewards/margins": 0.3293381631374359, |
|
"rewards/rejected": -0.3110046684741974, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4227862652892106e-06, |
|
"logits/chosen": 0.18207962810993195, |
|
"logits/rejected": 0.25772327184677124, |
|
"logps/chosen": -379.73248291015625, |
|
"logps/rejected": -392.091552734375, |
|
"loss": 1.631, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0037728759925812483, |
|
"rewards/margins": 0.3134722113609314, |
|
"rewards/rejected": -0.3172450661659241, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2978188118513814e-06, |
|
"logits/chosen": 0.18880879878997803, |
|
"logits/rejected": 0.22835353016853333, |
|
"logps/chosen": -318.1977844238281, |
|
"logps/rejected": -360.68096923828125, |
|
"loss": 1.6509, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.051277369260787964, |
|
"rewards/margins": 0.28354349732398987, |
|
"rewards/rejected": -0.33482086658477783, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1705898543111576e-06, |
|
"logits/chosen": 0.1640356034040451, |
|
"logits/rejected": 0.2013256549835205, |
|
"logps/chosen": -345.88494873046875, |
|
"logps/rejected": -396.95489501953125, |
|
"loss": 1.5511, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03583758696913719, |
|
"rewards/margins": 0.3143077492713928, |
|
"rewards/rejected": -0.35014528036117554, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.041460036971664e-06, |
|
"logits/chosen": 0.10814084857702255, |
|
"logits/rejected": 0.17361339926719666, |
|
"logps/chosen": -331.90240478515625, |
|
"logps/rejected": -345.795166015625, |
|
"loss": 1.6032, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06420620530843735, |
|
"rewards/margins": 0.19957861304283142, |
|
"rewards/rejected": -0.26378482580184937, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910795392329649e-06, |
|
"logits/chosen": 0.13951388001441956, |
|
"logits/rejected": 0.19447830319404602, |
|
"logps/chosen": -364.3550720214844, |
|
"logps/rejected": -359.9488830566406, |
|
"loss": 1.595, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.010126419365406036, |
|
"rewards/margins": 0.31530141830444336, |
|
"rewards/rejected": -0.3254278302192688, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7789663035166035e-06, |
|
"logits/chosen": 0.1637967824935913, |
|
"logits/rejected": 0.15295840799808502, |
|
"logps/chosen": -340.40386962890625, |
|
"logps/rejected": -370.7852478027344, |
|
"loss": 1.5882, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09541453421115875, |
|
"rewards/margins": 0.31104880571365356, |
|
"rewards/rejected": -0.4064633250236511, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6463464544075344e-06, |
|
"logits/chosen": 0.14287754893302917, |
|
"logits/rejected": 0.21446409821510315, |
|
"logps/chosen": -355.376220703125, |
|
"logps/rejected": -388.11944580078125, |
|
"loss": 1.4669, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.019069373607635498, |
|
"rewards/margins": 0.37106311321258545, |
|
"rewards/rejected": -0.39013251662254333, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.513311770373421e-06, |
|
"logits/chosen": 0.13659325242042542, |
|
"logits/rejected": 0.22452709078788757, |
|
"logps/chosen": -303.7241516113281, |
|
"logps/rejected": -364.5718688964844, |
|
"loss": 1.5243, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.022251833230257034, |
|
"rewards/margins": 0.3609643578529358, |
|
"rewards/rejected": -0.38321617245674133, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.380239352679908e-06, |
|
"logits/chosen": 0.13927368819713593, |
|
"logits/rejected": 0.2216307818889618, |
|
"logps/chosen": -325.48150634765625, |
|
"logps/rejected": -382.0296325683594, |
|
"loss": 1.5408, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.0005920141702517867, |
|
"rewards/margins": 0.3364425599575043, |
|
"rewards/rejected": -0.3358505666255951, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.247506409552795e-06, |
|
"logits/chosen": 0.15144166350364685, |
|
"logits/rejected": 0.20430748164653778, |
|
"logps/chosen": -369.327880859375, |
|
"logps/rejected": -383.3594055175781, |
|
"loss": 1.6408, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.05453786998987198, |
|
"rewards/margins": 0.41130223870277405, |
|
"rewards/rejected": -0.46584004163742065, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1154891869403436e-06, |
|
"logits/chosen": 0.11367179453372955, |
|
"logits/rejected": 0.20442676544189453, |
|
"logps/chosen": -361.3442077636719, |
|
"logps/rejected": -390.0636291503906, |
|
"loss": 1.641, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.013566520996391773, |
|
"rewards/margins": 0.45338043570518494, |
|
"rewards/rejected": -0.4669469892978668, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9845619020032552e-06, |
|
"logits/chosen": 0.15614674985408783, |
|
"logits/rejected": 0.20679621398448944, |
|
"logps/chosen": -328.5157165527344, |
|
"logps/rejected": -368.5887451171875, |
|
"loss": 1.6521, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.036636289209127426, |
|
"rewards/margins": 0.31485632061958313, |
|
"rewards/rejected": -0.35149258375167847, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8550956823554708e-06, |
|
"logits/chosen": 0.12708225846290588, |
|
"logits/rejected": 0.21543464064598083, |
|
"logps/chosen": -377.3260498046875, |
|
"logps/rejected": -364.48870849609375, |
|
"loss": 1.5909, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0568900890648365, |
|
"rewards/margins": 0.30653566122055054, |
|
"rewards/rejected": -0.36342576146125793, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 0.20352402329444885, |
|
"logits/rejected": 0.27957600355148315, |
|
"logps/chosen": -347.7279357910156, |
|
"logps/rejected": -355.5521545410156, |
|
"loss": 1.6226, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.06915120780467987, |
|
"rewards/margins": 0.2660463750362396, |
|
"rewards/rejected": -0.3351975977420807, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6020092013802002e-06, |
|
"logits/chosen": 0.14161694049835205, |
|
"logits/rejected": 0.22023169696331024, |
|
"logps/chosen": -323.6744689941406, |
|
"logps/rejected": -365.5609130859375, |
|
"loss": 1.5258, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.026104014366865158, |
|
"rewards/margins": 0.40274888277053833, |
|
"rewards/rejected": -0.4288528859615326, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4791063411799938e-06, |
|
"logits/chosen": 0.20196688175201416, |
|
"logits/rejected": 0.22374701499938965, |
|
"logps/chosen": -346.626220703125, |
|
"logps/rejected": -398.343994140625, |
|
"loss": 1.6026, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.14315533638000488, |
|
"rewards/margins": 0.2796045243740082, |
|
"rewards/rejected": -0.42275986075401306, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3590973149722103e-06, |
|
"logits/chosen": 0.16043411195278168, |
|
"logits/rejected": 0.24400117993354797, |
|
"logps/chosen": -350.2712097167969, |
|
"logps/rejected": -377.76129150390625, |
|
"loss": 1.6442, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.05976473540067673, |
|
"rewards/margins": 0.2989664673805237, |
|
"rewards/rejected": -0.3587311804294586, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2423223013801946e-06, |
|
"logits/chosen": 0.14352941513061523, |
|
"logits/rejected": 0.24110408127307892, |
|
"logps/chosen": -367.91851806640625, |
|
"logps/rejected": -397.81072998046875, |
|
"loss": 1.6837, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.08046244829893112, |
|
"rewards/margins": 0.36456337571144104, |
|
"rewards/rejected": -0.44502583146095276, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1291123118671665e-06, |
|
"logits/chosen": 0.0973966121673584, |
|
"logits/rejected": 0.18068069219589233, |
|
"logps/chosen": -339.65704345703125, |
|
"logps/rejected": -340.48236083984375, |
|
"loss": 1.6314, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.06265803426504135, |
|
"rewards/margins": 0.30512723326683044, |
|
"rewards/rejected": -0.3677853047847748, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.019788252448267e-06, |
|
"logits/chosen": 0.17376969754695892, |
|
"logits/rejected": 0.21862807869911194, |
|
"logps/chosen": -355.0315856933594, |
|
"logps/rejected": -376.3943786621094, |
|
"loss": 1.5767, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.02587694302201271, |
|
"rewards/margins": 0.3213956654071808, |
|
"rewards/rejected": -0.3472725749015808, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.146600140475945e-07, |
|
"logits/chosen": 0.1421867460012436, |
|
"logits/rejected": 0.23107881844043732, |
|
"logps/chosen": -391.0975341796875, |
|
"logps/rejected": -383.6336975097656, |
|
"loss": 1.7495, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10866842418909073, |
|
"rewards/margins": 0.2634437382221222, |
|
"rewards/rejected": -0.3721121549606323, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.140255940787059e-07, |
|
"logits/chosen": 0.13602428138256073, |
|
"logits/rejected": 0.23974844813346863, |
|
"logps/chosen": -341.78582763671875, |
|
"logps/rejected": -399.82904052734375, |
|
"loss": 1.5854, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.04695357754826546, |
|
"rewards/margins": 0.3549908697605133, |
|
"rewards/rejected": -0.40194445848464966, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.181702517385789e-07, |
|
"logits/chosen": 0.170148104429245, |
|
"logits/rejected": 0.21931186318397522, |
|
"logps/chosen": -323.8975524902344, |
|
"logps/rejected": -348.66766357421875, |
|
"loss": 1.7339, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07490874826908112, |
|
"rewards/margins": 0.3289056718349457, |
|
"rewards/rejected": -0.4038144052028656, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.273656994094232e-07, |
|
"logits/chosen": 0.17631427943706512, |
|
"logits/rejected": 0.23277851939201355, |
|
"logps/chosen": -345.8653259277344, |
|
"logps/rejected": -342.0807800292969, |
|
"loss": 1.6504, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.02272309735417366, |
|
"rewards/margins": 0.33489790558815, |
|
"rewards/rejected": -0.3576210141181946, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.418693324604082e-07, |
|
"logits/chosen": 0.1863461136817932, |
|
"logits/rejected": 0.25381818413734436, |
|
"logps/chosen": -358.6033630371094, |
|
"logps/rejected": -392.04302978515625, |
|
"loss": 1.542, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.001642666757106781, |
|
"rewards/margins": 0.41695213317871094, |
|
"rewards/rejected": -0.41530942916870117, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.619234996325314e-07, |
|
"logits/chosen": 0.11545145511627197, |
|
"logits/rejected": 0.20592764019966125, |
|
"logps/chosen": -349.9122619628906, |
|
"logps/rejected": -408.61590576171875, |
|
"loss": 1.5374, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 8.928254101192579e-05, |
|
"rewards/margins": 0.40838712453842163, |
|
"rewards/rejected": -0.4082978367805481, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.877548160747768e-07, |
|
"logits/chosen": 0.12814117968082428, |
|
"logits/rejected": 0.19134709239006042, |
|
"logps/chosen": -337.3287658691406, |
|
"logps/rejected": -354.94415283203125, |
|
"loss": 1.6835, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.006277731154114008, |
|
"rewards/margins": 0.3102794587612152, |
|
"rewards/rejected": -0.30400174856185913, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.195735209788528e-07, |
|
"logits/chosen": 0.1329401135444641, |
|
"logits/rejected": 0.2162102907896042, |
|
"logps/chosen": -341.5213928222656, |
|
"logps/rejected": -338.03179931640625, |
|
"loss": 1.6469, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0008542388677597046, |
|
"rewards/margins": 0.3375299572944641, |
|
"rewards/rejected": -0.338384211063385, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5757288163336806e-07, |
|
"logits/chosen": 0.1493878811597824, |
|
"logits/rejected": 0.20596106350421906, |
|
"logps/chosen": -352.513916015625, |
|
"logps/rejected": -391.4380798339844, |
|
"loss": 1.6831, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.06621219962835312, |
|
"rewards/margins": 0.2802005708217621, |
|
"rewards/rejected": -0.3464128077030182, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.019286455866981e-07, |
|
"logits/chosen": 0.1281604915857315, |
|
"logits/rejected": 0.19645507633686066, |
|
"logps/chosen": -302.35040283203125, |
|
"logps/rejected": -354.00372314453125, |
|
"loss": 1.6607, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.044999100267887115, |
|
"rewards/margins": 0.3108140826225281, |
|
"rewards/rejected": -0.3558131754398346, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5279854247146703e-07, |
|
"logits/chosen": 0.1600816547870636, |
|
"logits/rejected": 0.2620231509208679, |
|
"logps/chosen": -363.3172607421875, |
|
"logps/rejected": -383.7359619140625, |
|
"loss": 1.5077, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.035707950592041016, |
|
"rewards/margins": 0.3358200192451477, |
|
"rewards/rejected": -0.3715279698371887, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1032183690276754e-07, |
|
"logits/chosen": 0.1881883442401886, |
|
"logits/rejected": 0.23025290668010712, |
|
"logps/chosen": -348.2078552246094, |
|
"logps/rejected": -356.3308410644531, |
|
"loss": 1.4724, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.01992412842810154, |
|
"rewards/margins": 0.35621514916419983, |
|
"rewards/rejected": -0.37613925337791443, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.46189337174788e-08, |
|
"logits/chosen": 0.16047361493110657, |
|
"logits/rejected": 0.21806029975414276, |
|
"logps/chosen": -338.9239196777344, |
|
"logps/rejected": -370.13238525390625, |
|
"loss": 1.5501, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.0008598908898420632, |
|
"rewards/margins": 0.35586023330688477, |
|
"rewards/rejected": -0.35672011971473694, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.579103667367385e-08, |
|
"logits/chosen": 0.1737244576215744, |
|
"logits/rejected": 0.2040444165468216, |
|
"logps/chosen": -367.3244323730469, |
|
"logps/rejected": -375.1661071777344, |
|
"loss": 1.6325, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.08697754144668579, |
|
"rewards/margins": 0.260633647441864, |
|
"rewards/rejected": -0.3476111590862274, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3919861577572924e-08, |
|
"logits/chosen": 0.17082975804805756, |
|
"logits/rejected": 0.2609696090221405, |
|
"logps/chosen": -356.7315979003906, |
|
"logps/rejected": -364.6842041015625, |
|
"loss": 1.6992, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03417596220970154, |
|
"rewards/margins": 0.30746400356292725, |
|
"rewards/rejected": -0.34163999557495117, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.067404651211808e-09, |
|
"logits/chosen": 0.07360972464084625, |
|
"logits/rejected": 0.17394272983074188, |
|
"logps/chosen": -343.9101867675781, |
|
"logps/rejected": -367.1744079589844, |
|
"loss": 1.4701, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.053998060524463654, |
|
"rewards/margins": 0.355236679315567, |
|
"rewards/rejected": -0.4092347025871277, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2757667974155896e-09, |
|
"logits/chosen": 0.16294406354427338, |
|
"logits/rejected": 0.23806321620941162, |
|
"logps/chosen": -380.12554931640625, |
|
"logps/rejected": -385.9973449707031, |
|
"loss": 1.6559, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.054873187094926834, |
|
"rewards/margins": 0.2882222533226013, |
|
"rewards/rejected": -0.34309545159339905, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 656, |
|
"total_flos": 0.0, |
|
"train_loss": 1.6983561014256827, |
|
"train_runtime": 7833.1099, |
|
"train_samples_per_second": 2.681, |
|
"train_steps_per_second": 0.084 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 656, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|