|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 48.927791324930695, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.0180829763412476, |
|
"logits/rejected": -0.9883173704147339, |
|
"logps/chosen": -0.2738715410232544, |
|
"logps/rejected": -0.2716783285140991, |
|
"loss": 3.0574, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -2.738715648651123, |
|
"rewards/margins": -0.021932203322649002, |
|
"rewards/rejected": -2.716783046722412, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 39.813279548661036, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.0492197275161743, |
|
"logits/rejected": -0.9815438985824585, |
|
"logps/chosen": -0.2942040264606476, |
|
"logps/rejected": -0.29975026845932007, |
|
"loss": 3.0033, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.942039966583252, |
|
"rewards/margins": 0.055462419986724854, |
|
"rewards/rejected": -2.997502326965332, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 54.64580630838249, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.9780637621879578, |
|
"logits/rejected": -0.9978879690170288, |
|
"logps/chosen": -0.2642993927001953, |
|
"logps/rejected": -0.3006458878517151, |
|
"loss": 2.9877, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.642993688583374, |
|
"rewards/margins": 0.363465279340744, |
|
"rewards/rejected": -3.0064589977264404, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 78.63474777212464, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.9655851125717163, |
|
"logits/rejected": -0.9391099810600281, |
|
"logps/chosen": -0.2776910662651062, |
|
"logps/rejected": -0.291360080242157, |
|
"loss": 2.9252, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.7769107818603516, |
|
"rewards/margins": 0.13669000566005707, |
|
"rewards/rejected": -2.9136006832122803, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 53.858972431024775, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -1.0097562074661255, |
|
"logits/rejected": -0.9812997579574585, |
|
"logps/chosen": -0.2714676260948181, |
|
"logps/rejected": -0.27822521328926086, |
|
"loss": 3.0821, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.7146763801574707, |
|
"rewards/margins": 0.06757592409849167, |
|
"rewards/rejected": -2.782252073287964, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 44.312475927746796, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -0.9986146688461304, |
|
"logits/rejected": -0.9536568522453308, |
|
"logps/chosen": -0.27314493060112, |
|
"logps/rejected": -0.27925461530685425, |
|
"loss": 2.937, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -2.731449604034424, |
|
"rewards/margins": 0.06109660863876343, |
|
"rewards/rejected": -2.792546033859253, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 55.321940182511284, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -1.0669300556182861, |
|
"logits/rejected": -0.9896968603134155, |
|
"logps/chosen": -0.29428571462631226, |
|
"logps/rejected": -0.3205253481864929, |
|
"loss": 2.905, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.942857265472412, |
|
"rewards/margins": 0.26239633560180664, |
|
"rewards/rejected": -3.2052536010742188, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 53.68098989474069, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -1.0166269540786743, |
|
"logits/rejected": -0.9719806909561157, |
|
"logps/chosen": -0.2796934247016907, |
|
"logps/rejected": -0.32216984033584595, |
|
"loss": 2.916, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.796934127807617, |
|
"rewards/margins": 0.42476367950439453, |
|
"rewards/rejected": -3.221698045730591, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 36.765236755711314, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -1.0554900169372559, |
|
"logits/rejected": -1.0124839544296265, |
|
"logps/chosen": -0.3013826012611389, |
|
"logps/rejected": -0.3502373695373535, |
|
"loss": 2.9447, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.0138256549835205, |
|
"rewards/margins": 0.4885478913784027, |
|
"rewards/rejected": -3.5023739337921143, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 72.12342853911701, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -1.033050298690796, |
|
"logits/rejected": -0.9839521646499634, |
|
"logps/chosen": -0.3049773573875427, |
|
"logps/rejected": -0.3382193446159363, |
|
"loss": 2.976, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -3.0497734546661377, |
|
"rewards/margins": 0.33241981267929077, |
|
"rewards/rejected": -3.382193088531494, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 67.04896260966717, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -1.0543005466461182, |
|
"logits/rejected": -1.0193541049957275, |
|
"logps/chosen": -0.2847168445587158, |
|
"logps/rejected": -0.34575051069259644, |
|
"loss": 2.7924, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.847168445587158, |
|
"rewards/margins": 0.6103365421295166, |
|
"rewards/rejected": -3.457504987716675, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 53.20515583895435, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -1.1007188558578491, |
|
"logits/rejected": -1.066847801208496, |
|
"logps/chosen": -0.32495683431625366, |
|
"logps/rejected": -0.3465155363082886, |
|
"loss": 2.8738, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -3.249568462371826, |
|
"rewards/margins": 0.215586856007576, |
|
"rewards/rejected": -3.4651551246643066, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 54.54015013992033, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -1.0070468187332153, |
|
"logits/rejected": -0.9784091711044312, |
|
"logps/chosen": -0.37832310795783997, |
|
"logps/rejected": -0.43590840697288513, |
|
"loss": 2.7895, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -3.783231258392334, |
|
"rewards/margins": 0.5758528113365173, |
|
"rewards/rejected": -4.359084129333496, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 38.242775225934125, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -1.0234776735305786, |
|
"logits/rejected": -0.9988471269607544, |
|
"logps/chosen": -0.3544539511203766, |
|
"logps/rejected": -0.4332161545753479, |
|
"loss": 2.8516, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -3.5445396900177, |
|
"rewards/margins": 0.7876222729682922, |
|
"rewards/rejected": -4.332161903381348, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 70.74640041136536, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -0.9775687456130981, |
|
"logits/rejected": -0.9074035882949829, |
|
"logps/chosen": -0.3704521059989929, |
|
"logps/rejected": -0.42546525597572327, |
|
"loss": 2.7815, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -3.704521656036377, |
|
"rewards/margins": 0.5501310229301453, |
|
"rewards/rejected": -4.254652500152588, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 46.9909884312478, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -0.9548114538192749, |
|
"logits/rejected": -0.94190514087677, |
|
"logps/chosen": -0.35945671796798706, |
|
"logps/rejected": -0.4592272639274597, |
|
"loss": 2.7108, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -3.594567060470581, |
|
"rewards/margins": 0.9977054595947266, |
|
"rewards/rejected": -4.592272758483887, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 54.79418392154241, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -0.9647032618522644, |
|
"logits/rejected": -0.9432573318481445, |
|
"logps/chosen": -0.3421172797679901, |
|
"logps/rejected": -0.4004732072353363, |
|
"loss": 2.6569, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -3.421172618865967, |
|
"rewards/margins": 0.5835592746734619, |
|
"rewards/rejected": -4.00473165512085, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 63.87918692389446, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -1.0345466136932373, |
|
"logits/rejected": -0.9992335438728333, |
|
"logps/chosen": -0.4216434061527252, |
|
"logps/rejected": -0.5047457218170166, |
|
"loss": 2.8483, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -4.216434001922607, |
|
"rewards/margins": 0.8310235142707825, |
|
"rewards/rejected": -5.047457695007324, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 53.84055400604519, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -1.0933572053909302, |
|
"logits/rejected": -1.012095332145691, |
|
"logps/chosen": -0.4486677050590515, |
|
"logps/rejected": -0.4948577880859375, |
|
"loss": 2.7206, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -4.4866766929626465, |
|
"rewards/margins": 0.46190088987350464, |
|
"rewards/rejected": -4.948577404022217, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 69.39656295840837, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -0.9949450492858887, |
|
"logits/rejected": -0.9710448384284973, |
|
"logps/chosen": -0.42737340927124023, |
|
"logps/rejected": -0.511344850063324, |
|
"loss": 2.7353, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -4.273734092712402, |
|
"rewards/margins": 0.8397142291069031, |
|
"rewards/rejected": -5.113448143005371, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 63.49627205534197, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -1.00840425491333, |
|
"logits/rejected": -0.9560264348983765, |
|
"logps/chosen": -0.4261465072631836, |
|
"logps/rejected": -0.5318101644515991, |
|
"loss": 2.6988, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -4.261464595794678, |
|
"rewards/margins": 1.0566365718841553, |
|
"rewards/rejected": -5.318101406097412, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 67.07988857179406, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -0.9696318507194519, |
|
"logits/rejected": -0.9108623266220093, |
|
"logps/chosen": -0.48374947905540466, |
|
"logps/rejected": -0.6151714324951172, |
|
"loss": 2.6096, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -4.837494373321533, |
|
"rewards/margins": 1.3142198324203491, |
|
"rewards/rejected": -6.151714324951172, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 80.4417839343177, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -1.033552646636963, |
|
"logits/rejected": -0.9741662740707397, |
|
"logps/chosen": -0.5227991938591003, |
|
"logps/rejected": -0.5981119275093079, |
|
"loss": 2.4723, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -5.227993011474609, |
|
"rewards/margins": 0.7531263828277588, |
|
"rewards/rejected": -5.981118202209473, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 67.69889462049662, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -0.9916391372680664, |
|
"logits/rejected": -0.9028812646865845, |
|
"logps/chosen": -0.5420633554458618, |
|
"logps/rejected": -0.7466092705726624, |
|
"loss": 2.377, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -5.420632839202881, |
|
"rewards/margins": 2.0454587936401367, |
|
"rewards/rejected": -7.466092109680176, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 64.90166370238528, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -1.0659786462783813, |
|
"logits/rejected": -1.0236841440200806, |
|
"logps/chosen": -0.6124440431594849, |
|
"logps/rejected": -0.7124758958816528, |
|
"loss": 2.2955, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -6.1244401931762695, |
|
"rewards/margins": 1.0003182888031006, |
|
"rewards/rejected": -7.124758720397949, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 75.48258438787046, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": -1.0887296199798584, |
|
"logits/rejected": -1.0823543071746826, |
|
"logps/chosen": -0.6110976934432983, |
|
"logps/rejected": -0.8805627822875977, |
|
"loss": 2.1296, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -6.110977649688721, |
|
"rewards/margins": 2.694650888442993, |
|
"rewards/rejected": -8.805627822875977, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 62.13046213587147, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": -1.061156153678894, |
|
"logits/rejected": -1.0147919654846191, |
|
"logps/chosen": -0.7112447023391724, |
|
"logps/rejected": -0.8724945783615112, |
|
"loss": 2.1133, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -7.1124467849731445, |
|
"rewards/margins": 1.6124988794326782, |
|
"rewards/rejected": -8.724946975708008, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 79.80676489486827, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": -1.1300795078277588, |
|
"logits/rejected": -1.1087987422943115, |
|
"logps/chosen": -0.8216513395309448, |
|
"logps/rejected": -0.9944013357162476, |
|
"loss": 2.0323, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -8.216513633728027, |
|
"rewards/margins": 1.7274997234344482, |
|
"rewards/rejected": -9.944013595581055, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 119.11117858285472, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": -1.0456678867340088, |
|
"logits/rejected": -1.0206925868988037, |
|
"logps/chosen": -0.8874173164367676, |
|
"logps/rejected": -1.1297991275787354, |
|
"loss": 2.0077, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -8.874174118041992, |
|
"rewards/margins": 2.423818588256836, |
|
"rewards/rejected": -11.297992706298828, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 80.60289814144, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": -1.0754765272140503, |
|
"logits/rejected": -1.0576502084732056, |
|
"logps/chosen": -0.9953246116638184, |
|
"logps/rejected": -1.2399874925613403, |
|
"loss": 1.968, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -9.953246116638184, |
|
"rewards/margins": 2.4466278553009033, |
|
"rewards/rejected": -12.399874687194824, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 79.65950829440058, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": -1.080444097518921, |
|
"logits/rejected": -1.0592705011367798, |
|
"logps/chosen": -1.0582973957061768, |
|
"logps/rejected": -1.3756240606307983, |
|
"loss": 1.9981, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -10.582974433898926, |
|
"rewards/margins": 3.173267364501953, |
|
"rewards/rejected": -13.756240844726562, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 81.44098785800907, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": -1.0870612859725952, |
|
"logits/rejected": -1.069802165031433, |
|
"logps/chosen": -1.1801505088806152, |
|
"logps/rejected": -1.5819157361984253, |
|
"loss": 1.9469, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -11.801506042480469, |
|
"rewards/margins": 4.017651557922363, |
|
"rewards/rejected": -15.819157600402832, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 61.9394419875011, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": -1.0850841999053955, |
|
"logits/rejected": -1.061554193496704, |
|
"logps/chosen": -1.1361093521118164, |
|
"logps/rejected": -1.5122711658477783, |
|
"loss": 1.8308, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -11.361093521118164, |
|
"rewards/margins": 3.761617660522461, |
|
"rewards/rejected": -15.122709274291992, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 86.89706327407258, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": -1.0892133712768555, |
|
"logits/rejected": -1.0675928592681885, |
|
"logps/chosen": -1.1062101125717163, |
|
"logps/rejected": -1.4951918125152588, |
|
"loss": 1.7802, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -11.062100410461426, |
|
"rewards/margins": 3.8898162841796875, |
|
"rewards/rejected": -14.951919555664062, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 96.87652305461658, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": -1.1229137182235718, |
|
"logits/rejected": -1.0774867534637451, |
|
"logps/chosen": -1.1681886911392212, |
|
"logps/rejected": -1.4487732648849487, |
|
"loss": 1.6772, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -11.681886672973633, |
|
"rewards/margins": 2.805846691131592, |
|
"rewards/rejected": -14.487733840942383, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 80.44938362402195, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": -1.0816049575805664, |
|
"logits/rejected": -1.0617396831512451, |
|
"logps/chosen": -1.1198861598968506, |
|
"logps/rejected": -1.4944720268249512, |
|
"loss": 1.5945, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -11.198859214782715, |
|
"rewards/margins": 3.7458598613739014, |
|
"rewards/rejected": -14.944720268249512, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 89.9964846943623, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": -1.1142748594284058, |
|
"logits/rejected": -1.061927080154419, |
|
"logps/chosen": -1.1488279104232788, |
|
"logps/rejected": -1.5771600008010864, |
|
"loss": 1.6746, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -11.488279342651367, |
|
"rewards/margins": 4.283320426940918, |
|
"rewards/rejected": -15.771600723266602, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 91.4567322928116, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": -1.113872766494751, |
|
"logits/rejected": -1.1223859786987305, |
|
"logps/chosen": -1.2559322118759155, |
|
"logps/rejected": -1.7311124801635742, |
|
"loss": 1.5468, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -12.55932331085205, |
|
"rewards/margins": 4.751800060272217, |
|
"rewards/rejected": -17.31112289428711, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 71.15679417803156, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": -1.0601518154144287, |
|
"logits/rejected": -1.043198823928833, |
|
"logps/chosen": -1.2675104141235352, |
|
"logps/rejected": -1.6440922021865845, |
|
"loss": 1.6056, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -12.675103187561035, |
|
"rewards/margins": 3.7658183574676514, |
|
"rewards/rejected": -16.440921783447266, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 88.98069899942548, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": -1.0721577405929565, |
|
"logits/rejected": -1.0621263980865479, |
|
"logps/chosen": -1.3113422393798828, |
|
"logps/rejected": -1.726875901222229, |
|
"loss": 1.5055, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -13.113421440124512, |
|
"rewards/margins": 4.155338287353516, |
|
"rewards/rejected": -17.268760681152344, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 96.85728294484134, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": -1.10856032371521, |
|
"logits/rejected": -1.059822916984558, |
|
"logps/chosen": -1.3952258825302124, |
|
"logps/rejected": -1.8415533304214478, |
|
"loss": 1.716, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -13.952260971069336, |
|
"rewards/margins": 4.4632720947265625, |
|
"rewards/rejected": -18.415531158447266, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 98.7584341258845, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": -1.134487271308899, |
|
"logits/rejected": -1.1236417293548584, |
|
"logps/chosen": -1.4038760662078857, |
|
"logps/rejected": -1.8213703632354736, |
|
"loss": 1.5993, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -14.0387601852417, |
|
"rewards/margins": 4.174942970275879, |
|
"rewards/rejected": -18.213703155517578, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 116.36934325190856, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": -1.083676815032959, |
|
"logits/rejected": -1.0672903060913086, |
|
"logps/chosen": -1.3948618173599243, |
|
"logps/rejected": -1.87642502784729, |
|
"loss": 1.4766, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -13.948617935180664, |
|
"rewards/margins": 4.8156328201293945, |
|
"rewards/rejected": -18.76424789428711, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 96.57054428988462, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": -1.1386303901672363, |
|
"logits/rejected": -1.1223524808883667, |
|
"logps/chosen": -1.477141261100769, |
|
"logps/rejected": -1.971549391746521, |
|
"loss": 1.431, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -14.77141284942627, |
|
"rewards/margins": 4.9440813064575195, |
|
"rewards/rejected": -19.71549415588379, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 72.99627339556893, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": -1.1213773488998413, |
|
"logits/rejected": -1.0908575057983398, |
|
"logps/chosen": -1.5149943828582764, |
|
"logps/rejected": -1.969143271446228, |
|
"loss": 1.4658, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -15.149943351745605, |
|
"rewards/margins": 4.541489601135254, |
|
"rewards/rejected": -19.69143295288086, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 75.07337643391894, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": -1.1983073949813843, |
|
"logits/rejected": -1.150994896888733, |
|
"logps/chosen": -1.4561713933944702, |
|
"logps/rejected": -1.9137779474258423, |
|
"loss": 1.3907, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.561712265014648, |
|
"rewards/margins": 4.5760674476623535, |
|
"rewards/rejected": -19.137781143188477, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 89.06305062801928, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": -1.227797031402588, |
|
"logits/rejected": -1.2002477645874023, |
|
"logps/chosen": -1.4925800561904907, |
|
"logps/rejected": -1.9937610626220703, |
|
"loss": 1.4023, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -14.925801277160645, |
|
"rewards/margins": 5.011811256408691, |
|
"rewards/rejected": -19.937610626220703, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 89.0732695289788, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": -1.189410924911499, |
|
"logits/rejected": -1.1908595561981201, |
|
"logps/chosen": -1.4204081296920776, |
|
"logps/rejected": -1.9320650100708008, |
|
"loss": 1.4327, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -14.204083442687988, |
|
"rewards/margins": 5.116568088531494, |
|
"rewards/rejected": -19.320650100708008, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 99.61038425380444, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": -1.2636008262634277, |
|
"logits/rejected": -1.2102385759353638, |
|
"logps/chosen": -1.492004156112671, |
|
"logps/rejected": -2.0921199321746826, |
|
"loss": 1.3755, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -14.920039176940918, |
|
"rewards/margins": 6.001158714294434, |
|
"rewards/rejected": -20.921199798583984, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 143.41066987990183, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": -1.2205616235733032, |
|
"logits/rejected": -1.2053756713867188, |
|
"logps/chosen": -1.5569369792938232, |
|
"logps/rejected": -2.1403331756591797, |
|
"loss": 1.3485, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.569369316101074, |
|
"rewards/margins": 5.833963394165039, |
|
"rewards/rejected": -21.403331756591797, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 71.93551703878607, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": -1.2752165794372559, |
|
"logits/rejected": -1.2500503063201904, |
|
"logps/chosen": -1.6620187759399414, |
|
"logps/rejected": -2.134455442428589, |
|
"loss": 1.4857, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.620187759399414, |
|
"rewards/margins": 4.724367141723633, |
|
"rewards/rejected": -21.344552993774414, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 85.67142749873541, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": -1.2445859909057617, |
|
"logits/rejected": -1.216204047203064, |
|
"logps/chosen": -1.5793449878692627, |
|
"logps/rejected": -2.078167676925659, |
|
"loss": 1.4255, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.793449401855469, |
|
"rewards/margins": 4.988225936889648, |
|
"rewards/rejected": -20.781675338745117, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 74.44253878678798, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": -1.1956579685211182, |
|
"logits/rejected": -1.1797969341278076, |
|
"logps/chosen": -1.6723514795303345, |
|
"logps/rejected": -2.195023536682129, |
|
"loss": 1.3414, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -16.723514556884766, |
|
"rewards/margins": 5.22672176361084, |
|
"rewards/rejected": -21.95023536682129, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 114.96460787224315, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": -1.1912486553192139, |
|
"logits/rejected": -1.1648938655853271, |
|
"logps/chosen": -1.463266134262085, |
|
"logps/rejected": -2.001335620880127, |
|
"loss": 1.3473, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -14.632661819458008, |
|
"rewards/margins": 5.3806915283203125, |
|
"rewards/rejected": -20.013355255126953, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 75.32343278326546, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": -1.1563775539398193, |
|
"logits/rejected": -1.1241414546966553, |
|
"logps/chosen": -1.4626271724700928, |
|
"logps/rejected": -1.924564003944397, |
|
"loss": 1.4306, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -14.626272201538086, |
|
"rewards/margins": 4.619367599487305, |
|
"rewards/rejected": -19.24563980102539, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 112.30854407154642, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": -1.2617356777191162, |
|
"logits/rejected": -1.2384282350540161, |
|
"logps/chosen": -1.5061413049697876, |
|
"logps/rejected": -2.0556976795196533, |
|
"loss": 1.3243, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -15.06141185760498, |
|
"rewards/margins": 5.495565891265869, |
|
"rewards/rejected": -20.556978225708008, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 102.49061452491978, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": -1.2426598072052002, |
|
"logits/rejected": -1.2112630605697632, |
|
"logps/chosen": -1.5387237071990967, |
|
"logps/rejected": -2.120283842086792, |
|
"loss": 1.0685, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -15.387234687805176, |
|
"rewards/margins": 5.815601825714111, |
|
"rewards/rejected": -21.202838897705078, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 86.79253258499234, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": -1.2638859748840332, |
|
"logits/rejected": -1.2220103740692139, |
|
"logps/chosen": -1.564584493637085, |
|
"logps/rejected": -2.053191661834717, |
|
"loss": 1.3472, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.645845413208008, |
|
"rewards/margins": 4.886073589324951, |
|
"rewards/rejected": -20.531917572021484, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 93.44397121318542, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": -1.2045689821243286, |
|
"logits/rejected": -1.191235899925232, |
|
"logps/chosen": -1.5795795917510986, |
|
"logps/rejected": -2.093400239944458, |
|
"loss": 1.1752, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.795794486999512, |
|
"rewards/margins": 5.138205528259277, |
|
"rewards/rejected": -20.934001922607422, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 83.42376671175532, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": -1.128404974937439, |
|
"logits/rejected": -1.1020969152450562, |
|
"logps/chosen": -1.6557916402816772, |
|
"logps/rejected": -2.1081161499023438, |
|
"loss": 1.574, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -16.557918548583984, |
|
"rewards/margins": 4.523244857788086, |
|
"rewards/rejected": -21.081159591674805, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 87.28007107027204, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": -1.235114574432373, |
|
"logits/rejected": -1.19254469871521, |
|
"logps/chosen": -1.6206077337265015, |
|
"logps/rejected": -2.079169750213623, |
|
"loss": 1.3167, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -16.206077575683594, |
|
"rewards/margins": 4.585621356964111, |
|
"rewards/rejected": -20.791696548461914, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 105.45685254547827, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": -1.267155408859253, |
|
"logits/rejected": -1.2484853267669678, |
|
"logps/chosen": -1.6359084844589233, |
|
"logps/rejected": -2.1494529247283936, |
|
"loss": 1.3629, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -16.359085083007812, |
|
"rewards/margins": 5.135441780090332, |
|
"rewards/rejected": -21.49452781677246, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 87.29974596975983, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": -1.2641007900238037, |
|
"logits/rejected": -1.2104285955429077, |
|
"logps/chosen": -1.6273491382598877, |
|
"logps/rejected": -2.1474812030792236, |
|
"loss": 1.3491, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.273488998413086, |
|
"rewards/margins": 5.201323509216309, |
|
"rewards/rejected": -21.47481346130371, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 93.70048699997521, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": -1.247004747390747, |
|
"logits/rejected": -1.2437224388122559, |
|
"logps/chosen": -1.6495912075042725, |
|
"logps/rejected": -2.273390293121338, |
|
"loss": 1.2651, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -16.495912551879883, |
|
"rewards/margins": 6.237987518310547, |
|
"rewards/rejected": -22.73390007019043, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 76.81018981722117, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": -1.26289701461792, |
|
"logits/rejected": -1.2167103290557861, |
|
"logps/chosen": -1.7046712636947632, |
|
"logps/rejected": -2.372957944869995, |
|
"loss": 1.3105, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -17.046714782714844, |
|
"rewards/margins": 6.682864189147949, |
|
"rewards/rejected": -23.72957992553711, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 73.64401812634293, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": -1.178143858909607, |
|
"logits/rejected": -1.1377698183059692, |
|
"logps/chosen": -1.6760982275009155, |
|
"logps/rejected": -2.156362533569336, |
|
"loss": 1.3515, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.760982513427734, |
|
"rewards/margins": 4.802641868591309, |
|
"rewards/rejected": -21.56362533569336, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 100.69110505698991, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": -1.2217228412628174, |
|
"logits/rejected": -1.20427405834198, |
|
"logps/chosen": -1.6592464447021484, |
|
"logps/rejected": -2.2141623497009277, |
|
"loss": 1.1861, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.592464447021484, |
|
"rewards/margins": 5.549159049987793, |
|
"rewards/rejected": -22.141624450683594, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 96.51234191429023, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": -1.2305556535720825, |
|
"logits/rejected": -1.1986171007156372, |
|
"logps/chosen": -1.5974411964416504, |
|
"logps/rejected": -2.150116443634033, |
|
"loss": 1.3127, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.974411010742188, |
|
"rewards/margins": 5.526752948760986, |
|
"rewards/rejected": -21.501163482666016, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 82.01592774884807, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": -1.2591183185577393, |
|
"logits/rejected": -1.2364073991775513, |
|
"logps/chosen": -1.667109727859497, |
|
"logps/rejected": -2.311634063720703, |
|
"loss": 1.3624, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.671098709106445, |
|
"rewards/margins": 6.445242404937744, |
|
"rewards/rejected": -23.1163387298584, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 122.79704197237824, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": -1.2474735975265503, |
|
"logits/rejected": -1.2506452798843384, |
|
"logps/chosen": -1.5353481769561768, |
|
"logps/rejected": -2.0822863578796387, |
|
"loss": 1.2838, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.353483200073242, |
|
"rewards/margins": 5.4693803787231445, |
|
"rewards/rejected": -20.822864532470703, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 96.25560337558127, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": -1.1812379360198975, |
|
"logits/rejected": -1.1956241130828857, |
|
"logps/chosen": -1.5455963611602783, |
|
"logps/rejected": -2.1094608306884766, |
|
"loss": 1.1903, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -15.455963134765625, |
|
"rewards/margins": 5.638647079467773, |
|
"rewards/rejected": -21.0946102142334, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 79.54472628433167, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": -1.2227225303649902, |
|
"logits/rejected": -1.223512053489685, |
|
"logps/chosen": -1.562652349472046, |
|
"logps/rejected": -2.2054429054260254, |
|
"loss": 1.3011, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -15.626523971557617, |
|
"rewards/margins": 6.4279046058654785, |
|
"rewards/rejected": -22.054428100585938, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 67.31957818166626, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": -1.2667648792266846, |
|
"logits/rejected": -1.204973816871643, |
|
"logps/chosen": -1.611985206604004, |
|
"logps/rejected": -2.2405993938446045, |
|
"loss": 1.2638, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -16.119850158691406, |
|
"rewards/margins": 6.286141872406006, |
|
"rewards/rejected": -22.405994415283203, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 96.4652631691847, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": -1.1908237934112549, |
|
"logits/rejected": -1.1797075271606445, |
|
"logps/chosen": -1.5888497829437256, |
|
"logps/rejected": -2.0819642543792725, |
|
"loss": 1.2368, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -15.888498306274414, |
|
"rewards/margins": 4.931147575378418, |
|
"rewards/rejected": -20.819644927978516, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 77.32657538767864, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": -1.22549307346344, |
|
"logits/rejected": -1.2306774854660034, |
|
"logps/chosen": -1.7054897546768188, |
|
"logps/rejected": -2.2909984588623047, |
|
"loss": 1.2394, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -17.05489730834961, |
|
"rewards/margins": 5.855085372924805, |
|
"rewards/rejected": -22.909982681274414, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 122.77103138361475, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": -1.2489427328109741, |
|
"logits/rejected": -1.2302041053771973, |
|
"logps/chosen": -1.59738028049469, |
|
"logps/rejected": -2.1246509552001953, |
|
"loss": 1.3954, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -15.973803520202637, |
|
"rewards/margins": 5.272706031799316, |
|
"rewards/rejected": -21.246509552001953, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 73.11130573539627, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": -1.231930136680603, |
|
"logits/rejected": -1.2102787494659424, |
|
"logps/chosen": -1.6145036220550537, |
|
"logps/rejected": -2.3103325366973877, |
|
"loss": 1.0955, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.145038604736328, |
|
"rewards/margins": 6.958285331726074, |
|
"rewards/rejected": -23.103322982788086, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 80.68579596437256, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": -1.2316021919250488, |
|
"logits/rejected": -1.2188332080841064, |
|
"logps/chosen": -1.6731784343719482, |
|
"logps/rejected": -2.2686033248901367, |
|
"loss": 1.2377, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.73178482055664, |
|
"rewards/margins": 5.95424747467041, |
|
"rewards/rejected": -22.686031341552734, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 77.88673283635482, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": -1.2400703430175781, |
|
"logits/rejected": -1.2209936380386353, |
|
"logps/chosen": -1.5812984704971313, |
|
"logps/rejected": -2.1461308002471924, |
|
"loss": 1.2574, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -15.812983512878418, |
|
"rewards/margins": 5.648324012756348, |
|
"rewards/rejected": -21.461307525634766, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 64.27634143705052, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": -1.2457908391952515, |
|
"logits/rejected": -1.2307510375976562, |
|
"logps/chosen": -1.6303634643554688, |
|
"logps/rejected": -2.144191265106201, |
|
"loss": 1.2352, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.303632736206055, |
|
"rewards/margins": 5.138282775878906, |
|
"rewards/rejected": -21.441913604736328, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": -1.4050133228302002, |
|
"eval_logits/rejected": -1.4148539304733276, |
|
"eval_logps/chosen": -1.6315457820892334, |
|
"eval_logps/rejected": -2.184220314025879, |
|
"eval_loss": 1.3035991191864014, |
|
"eval_rewards/accuracies": 0.8313007950782776, |
|
"eval_rewards/chosen": -16.315458297729492, |
|
"eval_rewards/margins": 5.526745319366455, |
|
"eval_rewards/rejected": -21.842201232910156, |
|
"eval_runtime": 114.1272, |
|
"eval_samples_per_second": 17.183, |
|
"eval_steps_per_second": 1.078, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 93.3112085508996, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": -1.2086267471313477, |
|
"logits/rejected": -1.2275283336639404, |
|
"logps/chosen": -1.705394983291626, |
|
"logps/rejected": -2.2604918479919434, |
|
"loss": 1.2335, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -17.053951263427734, |
|
"rewards/margins": 5.550968647003174, |
|
"rewards/rejected": -22.604917526245117, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 147.49347048623574, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": -1.224875569343567, |
|
"logits/rejected": -1.2125729322433472, |
|
"logps/chosen": -1.6484178304672241, |
|
"logps/rejected": -2.173166036605835, |
|
"loss": 1.3786, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -16.48417854309082, |
|
"rewards/margins": 5.247479438781738, |
|
"rewards/rejected": -21.731660842895508, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 72.56853127664434, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": -1.216326355934143, |
|
"logits/rejected": -1.1681609153747559, |
|
"logps/chosen": -1.584081768989563, |
|
"logps/rejected": -2.2398409843444824, |
|
"loss": 1.3224, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.84081745147705, |
|
"rewards/margins": 6.557589530944824, |
|
"rewards/rejected": -22.398406982421875, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 96.60767749787689, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": -1.252618432044983, |
|
"logits/rejected": -1.206176996231079, |
|
"logps/chosen": -1.533342719078064, |
|
"logps/rejected": -2.1350560188293457, |
|
"loss": 1.2256, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -15.333427429199219, |
|
"rewards/margins": 6.0171332359313965, |
|
"rewards/rejected": -21.350561141967773, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 102.43117197696006, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": -1.2422844171524048, |
|
"logits/rejected": -1.2342640161514282, |
|
"logps/chosen": -1.7160053253173828, |
|
"logps/rejected": -2.2498655319213867, |
|
"loss": 1.4539, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -17.160053253173828, |
|
"rewards/margins": 5.338602542877197, |
|
"rewards/rejected": -22.498653411865234, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 95.6241453357728, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": -1.232668161392212, |
|
"logits/rejected": -1.2184712886810303, |
|
"logps/chosen": -1.7022215127944946, |
|
"logps/rejected": -2.2985284328460693, |
|
"loss": 1.267, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -17.022212982177734, |
|
"rewards/margins": 5.963072299957275, |
|
"rewards/rejected": -22.985288619995117, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 76.99966381399814, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": -1.182472825050354, |
|
"logits/rejected": -1.1473052501678467, |
|
"logps/chosen": -1.5595757961273193, |
|
"logps/rejected": -2.154953956604004, |
|
"loss": 1.2015, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -15.595758438110352, |
|
"rewards/margins": 5.953780174255371, |
|
"rewards/rejected": -21.54953956604004, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 84.23154902337001, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": -1.2160289287567139, |
|
"logits/rejected": -1.1602892875671387, |
|
"logps/chosen": -1.6690679788589478, |
|
"logps/rejected": -2.205056667327881, |
|
"loss": 1.245, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.690677642822266, |
|
"rewards/margins": 5.359889984130859, |
|
"rewards/rejected": -22.050569534301758, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 75.27496517042923, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": -1.1662180423736572, |
|
"logits/rejected": -1.1511404514312744, |
|
"logps/chosen": -1.6556246280670166, |
|
"logps/rejected": -2.3011534214019775, |
|
"loss": 1.1055, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.556243896484375, |
|
"rewards/margins": 6.4552903175354, |
|
"rewards/rejected": -23.011533737182617, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 92.06659067628235, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": -1.278028130531311, |
|
"logits/rejected": -1.2591049671173096, |
|
"logps/chosen": -1.6093896627426147, |
|
"logps/rejected": -2.1693015098571777, |
|
"loss": 1.3206, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.093896865844727, |
|
"rewards/margins": 5.599120140075684, |
|
"rewards/rejected": -21.693017959594727, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 100.7331017689662, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": -1.2293764352798462, |
|
"logits/rejected": -1.1971036195755005, |
|
"logps/chosen": -1.6129881143569946, |
|
"logps/rejected": -2.264960765838623, |
|
"loss": 1.0669, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -16.129878997802734, |
|
"rewards/margins": 6.5197248458862305, |
|
"rewards/rejected": -22.649606704711914, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 88.7868280064186, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": -1.241369366645813, |
|
"logits/rejected": -1.2309256792068481, |
|
"logps/chosen": -1.6784422397613525, |
|
"logps/rejected": -2.295506000518799, |
|
"loss": 1.1912, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -16.784420013427734, |
|
"rewards/margins": 6.170637607574463, |
|
"rewards/rejected": -22.95505714416504, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 89.2084840240269, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": -1.2126357555389404, |
|
"logits/rejected": -1.2189154624938965, |
|
"logps/chosen": -1.7046855688095093, |
|
"logps/rejected": -2.3294055461883545, |
|
"loss": 1.3123, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -17.046857833862305, |
|
"rewards/margins": 6.247200965881348, |
|
"rewards/rejected": -23.294055938720703, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 1.7982526555561662, |
|
"train_runtime": 17001.7268, |
|
"train_samples_per_second": 3.522, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|