|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 100, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021376085504342017, |
|
"grad_norm": 4.503899550790205, |
|
"learning_rate": 2.127659574468085e-08, |
|
"logits/chosen": -0.8003637194633484, |
|
"logits/rejected": -0.8448871970176697, |
|
"logps/chosen": -212.04685974121094, |
|
"logps/rejected": -206.4463348388672, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0042752171008684035, |
|
"grad_norm": 4.89256031461174, |
|
"learning_rate": 4.25531914893617e-08, |
|
"logits/chosen": -0.750135064125061, |
|
"logits/rejected": -0.7247368097305298, |
|
"logps/chosen": -271.5355529785156, |
|
"logps/rejected": -260.5343322753906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006412825651302605, |
|
"grad_norm": 4.511049028695194, |
|
"learning_rate": 6.382978723404254e-08, |
|
"logits/chosen": -0.9132480621337891, |
|
"logits/rejected": -0.9213609099388123, |
|
"logps/chosen": -259.10791015625, |
|
"logps/rejected": -262.6512756347656, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0005805277032777667, |
|
"rewards/margins": -0.001751818461343646, |
|
"rewards/rejected": 0.0011712908744812012, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008550434201736807, |
|
"grad_norm": 5.0258481504448485, |
|
"learning_rate": 8.51063829787234e-08, |
|
"logits/chosen": -0.8424134850502014, |
|
"logits/rejected": -0.8080853223800659, |
|
"logps/chosen": -251.00387573242188, |
|
"logps/rejected": -255.1189422607422, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0018655203748494387, |
|
"rewards/margins": 0.0010831927647814155, |
|
"rewards/rejected": 0.0007823276100680232, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 4.75851133644133, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -0.9411681294441223, |
|
"logits/rejected": -0.9376619458198547, |
|
"logps/chosen": -289.8980407714844, |
|
"logps/rejected": -274.7005615234375, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0026531934272497892, |
|
"rewards/margins": 0.0023759508039802313, |
|
"rewards/rejected": 0.00027724262326955795, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01282565130260521, |
|
"grad_norm": 4.443327602655402, |
|
"learning_rate": 1.2765957446808508e-07, |
|
"logits/chosen": -0.7161233425140381, |
|
"logits/rejected": -0.6978777647018433, |
|
"logps/chosen": -223.0089569091797, |
|
"logps/rejected": -222.1771240234375, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.0012396120000630617, |
|
"rewards/margins": -0.0016972327139228582, |
|
"rewards/rejected": 0.00045762062654830515, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014963259853039413, |
|
"grad_norm": 5.506063836746189, |
|
"learning_rate": 1.4893617021276595e-07, |
|
"logits/chosen": -0.9607124924659729, |
|
"logits/rejected": -0.9491544961929321, |
|
"logps/chosen": -310.2432556152344, |
|
"logps/rejected": -305.9755554199219, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0012061572633683681, |
|
"rewards/margins": -0.0002907347516156733, |
|
"rewards/rejected": 0.0014968919567763805, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.017100868403473614, |
|
"grad_norm": 4.851635423100062, |
|
"learning_rate": 1.702127659574468e-07, |
|
"logits/chosen": -0.8928542137145996, |
|
"logits/rejected": -0.8853560090065002, |
|
"logps/chosen": -247.1142120361328, |
|
"logps/rejected": -244.08663940429688, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0002220773312728852, |
|
"rewards/margins": 0.0013035106239840388, |
|
"rewards/rejected": -0.0010814334964379668, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.019238476953907815, |
|
"grad_norm": 4.87939101936585, |
|
"learning_rate": 1.9148936170212765e-07, |
|
"logits/chosen": -0.8140461444854736, |
|
"logits/rejected": -0.8076512813568115, |
|
"logps/chosen": -272.2711486816406, |
|
"logps/rejected": -284.1283264160156, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0010817217407748103, |
|
"rewards/margins": -0.001508195186033845, |
|
"rewards/rejected": 0.0004264736198820174, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 4.562355516566984, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -0.8849949836730957, |
|
"logits/rejected": -0.8811756372451782, |
|
"logps/chosen": -229.57052612304688, |
|
"logps/rejected": -231.6889190673828, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00039355267654173076, |
|
"rewards/margins": 0.0002220940077677369, |
|
"rewards/rejected": 0.0001714585960144177, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02351369405477622, |
|
"grad_norm": 4.67288441235731, |
|
"learning_rate": 2.3404255319148937e-07, |
|
"logits/chosen": -0.8189717531204224, |
|
"logits/rejected": -0.8200615644454956, |
|
"logps/chosen": -273.552734375, |
|
"logps/rejected": -277.36859130859375, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0009033679380081594, |
|
"rewards/margins": -0.0013172316830605268, |
|
"rewards/rejected": 0.0004138636286370456, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02565130260521042, |
|
"grad_norm": 4.805681101367893, |
|
"learning_rate": 2.5531914893617016e-07, |
|
"logits/chosen": -0.9043698906898499, |
|
"logits/rejected": -0.8993241190910339, |
|
"logps/chosen": -273.664306640625, |
|
"logps/rejected": -268.0246887207031, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0001288223429583013, |
|
"rewards/margins": -3.8141035474836826e-05, |
|
"rewards/rejected": 0.00016696332022547722, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02778891115564462, |
|
"grad_norm": 4.91733558840618, |
|
"learning_rate": 2.7659574468085106e-07, |
|
"logits/chosen": -0.8745774626731873, |
|
"logits/rejected": -0.8446710705757141, |
|
"logps/chosen": -243.00827026367188, |
|
"logps/rejected": -229.5283203125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.002203774405643344, |
|
"rewards/margins": -0.00030036212410777807, |
|
"rewards/rejected": -0.0019034123979508877, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.029926519706078826, |
|
"grad_norm": 5.299324976103458, |
|
"learning_rate": 2.978723404255319e-07, |
|
"logits/chosen": -0.7348307967185974, |
|
"logits/rejected": -0.7354189157485962, |
|
"logps/chosen": -186.85391235351562, |
|
"logps/rejected": -199.67623901367188, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0016946769319474697, |
|
"rewards/margins": -0.0006936597637832165, |
|
"rewards/rejected": -0.001001017284579575, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 4.755602904170831, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.7406636476516724, |
|
"logits/rejected": -0.7166301608085632, |
|
"logps/chosen": -199.6678466796875, |
|
"logps/rejected": -194.37559509277344, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0003121185291092843, |
|
"rewards/margins": 0.0012607788667082787, |
|
"rewards/rejected": -0.0009486603084951639, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03420173680694723, |
|
"grad_norm": 4.853620806434979, |
|
"learning_rate": 3.404255319148936e-07, |
|
"logits/chosen": -0.78841632604599, |
|
"logits/rejected": -0.7843498587608337, |
|
"logps/chosen": -266.4180908203125, |
|
"logps/rejected": -271.6226806640625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0009592341957613826, |
|
"rewards/margins": 0.0019549226853996515, |
|
"rewards/rejected": -0.002914156997576356, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03633934535738143, |
|
"grad_norm": 5.423827110862174, |
|
"learning_rate": 3.617021276595745e-07, |
|
"logits/chosen": -0.9736945629119873, |
|
"logits/rejected": -0.9769234657287598, |
|
"logps/chosen": -258.8900146484375, |
|
"logps/rejected": -264.2679748535156, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.0061743613332509995, |
|
"rewards/margins": -0.001890932791866362, |
|
"rewards/rejected": -0.0042834291234612465, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03847695390781563, |
|
"grad_norm": 4.824497254280432, |
|
"learning_rate": 3.829787234042553e-07, |
|
"logits/chosen": -0.851763904094696, |
|
"logits/rejected": -0.8533320426940918, |
|
"logps/chosen": -273.1241760253906, |
|
"logps/rejected": -269.42315673828125, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0023198507260531187, |
|
"rewards/margins": -0.0010572766186669469, |
|
"rewards/rejected": -0.0012625741073861718, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.040614562458249834, |
|
"grad_norm": 4.885682499438778, |
|
"learning_rate": 4.0425531914893614e-07, |
|
"logits/chosen": -0.9122135043144226, |
|
"logits/rejected": -0.9140520095825195, |
|
"logps/chosen": -336.9332580566406, |
|
"logps/rejected": -327.79571533203125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.007231764495372772, |
|
"rewards/margins": -0.00179797422606498, |
|
"rewards/rejected": -0.005433791317045689, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 4.403742601709981, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.8458749055862427, |
|
"logits/rejected": -0.8761993646621704, |
|
"logps/chosen": -258.8704833984375, |
|
"logps/rejected": -263.5494079589844, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.005775632336735725, |
|
"rewards/margins": 0.00025997147895395756, |
|
"rewards/rejected": -0.006035604514181614, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.044889779559118236, |
|
"grad_norm": 5.1980600006783195, |
|
"learning_rate": 4.4680851063829783e-07, |
|
"logits/chosen": -0.7707018852233887, |
|
"logits/rejected": -0.7247700691223145, |
|
"logps/chosen": -233.66183471679688, |
|
"logps/rejected": -255.91018676757812, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.006643190514296293, |
|
"rewards/margins": 0.0013489744160324335, |
|
"rewards/rejected": -0.00799216516315937, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04702738810955244, |
|
"grad_norm": 4.514553831312047, |
|
"learning_rate": 4.6808510638297873e-07, |
|
"logits/chosen": -0.8653970956802368, |
|
"logits/rejected": -0.8456276059150696, |
|
"logps/chosen": -245.4098663330078, |
|
"logps/rejected": -248.41461181640625, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.005286152008920908, |
|
"rewards/margins": 0.000985494116321206, |
|
"rewards/rejected": -0.006271645426750183, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04916499665998664, |
|
"grad_norm": 4.80080663754473, |
|
"learning_rate": 4.893617021276595e-07, |
|
"logits/chosen": -0.8655314445495605, |
|
"logits/rejected": -0.8451917171478271, |
|
"logps/chosen": -252.33546447753906, |
|
"logps/rejected": -260.81475830078125, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.006377603858709335, |
|
"rewards/margins": 0.00297079561278224, |
|
"rewards/rejected": -0.009348399937152863, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05130260521042084, |
|
"grad_norm": 5.481285264708149, |
|
"learning_rate": 5.106382978723403e-07, |
|
"logits/chosen": -0.7310451865196228, |
|
"logits/rejected": -0.7366085648536682, |
|
"logps/chosen": -238.02166748046875, |
|
"logps/rejected": -245.17308044433594, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.010805780068039894, |
|
"rewards/margins": 0.0019894172437489033, |
|
"rewards/rejected": -0.01279519684612751, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 4.561792775392447, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -0.9254141449928284, |
|
"logits/rejected": -0.939468502998352, |
|
"logps/chosen": -269.6241455078125, |
|
"logps/rejected": -282.4432067871094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.01286405324935913, |
|
"rewards/margins": 0.001102518755942583, |
|
"rewards/rejected": -0.013966571539640427, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05557782231128924, |
|
"grad_norm": 4.85781011184185, |
|
"learning_rate": 5.531914893617021e-07, |
|
"logits/chosen": -0.8391819000244141, |
|
"logits/rejected": -0.8546662330627441, |
|
"logps/chosen": -271.26068115234375, |
|
"logps/rejected": -267.31024169921875, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.008641035296022892, |
|
"rewards/margins": 0.0034573455341160297, |
|
"rewards/rejected": -0.012098381295800209, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05771543086172345, |
|
"grad_norm": 5.072033355975492, |
|
"learning_rate": 5.74468085106383e-07, |
|
"logits/chosen": -0.8844251036643982, |
|
"logits/rejected": -0.8849300742149353, |
|
"logps/chosen": -243.93980407714844, |
|
"logps/rejected": -248.54537963867188, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.015005933120846748, |
|
"rewards/margins": 0.0034025944769382477, |
|
"rewards/rejected": -0.018408527597784996, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05985303941215765, |
|
"grad_norm": 4.905934366826652, |
|
"learning_rate": 5.957446808510638e-07, |
|
"logits/chosen": -0.724337637424469, |
|
"logits/rejected": -0.7232470512390137, |
|
"logps/chosen": -262.2066345214844, |
|
"logps/rejected": -267.26116943359375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.014386076480150223, |
|
"rewards/margins": -0.0017046784050762653, |
|
"rewards/rejected": -0.012681398540735245, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06199064796259185, |
|
"grad_norm": 4.7342802483142705, |
|
"learning_rate": 6.170212765957446e-07, |
|
"logits/chosen": -0.8244236707687378, |
|
"logits/rejected": -0.8045285940170288, |
|
"logps/chosen": -218.7688751220703, |
|
"logps/rejected": -219.35711669921875, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.014018207788467407, |
|
"rewards/margins": 0.00474612507969141, |
|
"rewards/rejected": -0.018764331936836243, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 5.185028135772882, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -0.7685129642486572, |
|
"logits/rejected": -0.7588883638381958, |
|
"logps/chosen": -265.58447265625, |
|
"logps/rejected": -271.6627502441406, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.023013589903712273, |
|
"rewards/margins": -6.0059886891394854e-05, |
|
"rewards/rejected": -0.02295352704823017, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06626586506346026, |
|
"grad_norm": 5.174402492219036, |
|
"learning_rate": 6.595744680851063e-07, |
|
"logits/chosen": -0.8060805797576904, |
|
"logits/rejected": -0.8104574084281921, |
|
"logps/chosen": -253.12918090820312, |
|
"logps/rejected": -262.47772216796875, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.027180161327123642, |
|
"rewards/margins": 0.0009983510244637728, |
|
"rewards/rejected": -0.028178514912724495, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06840347361389446, |
|
"grad_norm": 4.839677584710031, |
|
"learning_rate": 6.808510638297872e-07, |
|
"logits/chosen": -0.8107847571372986, |
|
"logits/rejected": -0.8056558966636658, |
|
"logps/chosen": -247.47384643554688, |
|
"logps/rejected": -259.930419921875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.03201708570122719, |
|
"rewards/margins": 0.0023030471056699753, |
|
"rewards/rejected": -0.03432013466954231, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07054108216432865, |
|
"grad_norm": 4.418696566904475, |
|
"learning_rate": 7.021276595744681e-07, |
|
"logits/chosen": -0.8691257834434509, |
|
"logits/rejected": -0.891472339630127, |
|
"logps/chosen": -229.89974975585938, |
|
"logps/rejected": -220.62893676757812, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.023654459044337273, |
|
"rewards/margins": 0.0017885996494442225, |
|
"rewards/rejected": -0.025443056598305702, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07267869071476286, |
|
"grad_norm": 5.281949481266581, |
|
"learning_rate": 7.23404255319149e-07, |
|
"logits/chosen": -0.7926970720291138, |
|
"logits/rejected": -0.7971447706222534, |
|
"logps/chosen": -201.50173950195312, |
|
"logps/rejected": -209.24432373046875, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0300833098590374, |
|
"rewards/margins": 0.011433225125074387, |
|
"rewards/rejected": -0.041516534984111786, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 5.310361096502114, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -0.910358726978302, |
|
"logits/rejected": -0.8681845664978027, |
|
"logps/chosen": -293.49481201171875, |
|
"logps/rejected": -264.9764709472656, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.042106445878744125, |
|
"rewards/margins": -0.00045869359746575356, |
|
"rewards/rejected": -0.04164774715900421, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07695390781563126, |
|
"grad_norm": 4.880148293966411, |
|
"learning_rate": 7.659574468085106e-07, |
|
"logits/chosen": -0.9195268154144287, |
|
"logits/rejected": -0.9358838796615601, |
|
"logps/chosen": -219.29908752441406, |
|
"logps/rejected": -223.91160583496094, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03970751538872719, |
|
"rewards/margins": 0.009188718162477016, |
|
"rewards/rejected": -0.04889623448252678, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07909151636606547, |
|
"grad_norm": 4.918837324305735, |
|
"learning_rate": 7.872340425531915e-07, |
|
"logits/chosen": -0.7983888387680054, |
|
"logits/rejected": -0.7829576134681702, |
|
"logps/chosen": -236.22479248046875, |
|
"logps/rejected": -230.52279663085938, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03082606941461563, |
|
"rewards/margins": 0.007684895768761635, |
|
"rewards/rejected": -0.038510967046022415, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08122912491649967, |
|
"grad_norm": 4.697759235789417, |
|
"learning_rate": 8.085106382978723e-07, |
|
"logits/chosen": -0.9536780118942261, |
|
"logits/rejected": -0.9445628523826599, |
|
"logps/chosen": -239.7415771484375, |
|
"logps/rejected": -250.46978759765625, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.04406914860010147, |
|
"rewards/margins": 0.007817601785063744, |
|
"rewards/rejected": -0.05188675597310066, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08336673346693386, |
|
"grad_norm": 4.942849749477713, |
|
"learning_rate": 8.297872340425532e-07, |
|
"logits/chosen": -0.8406745195388794, |
|
"logits/rejected": -0.8202511668205261, |
|
"logps/chosen": -283.8332824707031, |
|
"logps/rejected": -289.7784729003906, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.0502498485147953, |
|
"rewards/margins": 0.014361884444952011, |
|
"rewards/rejected": -0.06461173295974731, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 5.117709083830907, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -0.8214735984802246, |
|
"logits/rejected": -0.811273992061615, |
|
"logps/chosen": -210.29600524902344, |
|
"logps/rejected": -199.48020935058594, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.04497796297073364, |
|
"rewards/margins": 0.01133386418223381, |
|
"rewards/rejected": -0.05631183087825775, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08764195056780227, |
|
"grad_norm": 5.136196664411302, |
|
"learning_rate": 8.723404255319149e-07, |
|
"logits/chosen": -0.969085693359375, |
|
"logits/rejected": -0.9578003287315369, |
|
"logps/chosen": -252.95278930664062, |
|
"logps/rejected": -256.9606018066406, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.07370009273290634, |
|
"rewards/margins": 0.004004061222076416, |
|
"rewards/rejected": -0.07770414650440216, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08977955911823647, |
|
"grad_norm": 4.838693140519435, |
|
"learning_rate": 8.936170212765957e-07, |
|
"logits/chosen": -0.8661520481109619, |
|
"logits/rejected": -0.8457835912704468, |
|
"logps/chosen": -304.5137634277344, |
|
"logps/rejected": -289.595947265625, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.07082202285528183, |
|
"rewards/margins": 0.015582293272018433, |
|
"rewards/rejected": -0.08640430867671967, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09191716766867067, |
|
"grad_norm": 4.957200914658608, |
|
"learning_rate": 9.148936170212766e-07, |
|
"logits/chosen": -0.8786011338233948, |
|
"logits/rejected": -0.8692121505737305, |
|
"logps/chosen": -241.05532836914062, |
|
"logps/rejected": -243.45684814453125, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07279819995164871, |
|
"rewards/margins": 0.020279541611671448, |
|
"rewards/rejected": -0.09307773411273956, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09405477621910488, |
|
"grad_norm": 5.332532100522966, |
|
"learning_rate": 9.361702127659575e-07, |
|
"logits/chosen": -0.714208722114563, |
|
"logits/rejected": -0.7126749157905579, |
|
"logps/chosen": -319.6092834472656, |
|
"logps/rejected": -301.8595886230469, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.07854731380939484, |
|
"rewards/margins": 0.005709480959922075, |
|
"rewards/rejected": -0.08425679802894592, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 5.165598994277126, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -0.8318406343460083, |
|
"logits/rejected": -0.849963903427124, |
|
"logps/chosen": -255.63446044921875, |
|
"logps/rejected": -259.7432556152344, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09484049677848816, |
|
"rewards/margins": 0.010637722909450531, |
|
"rewards/rejected": -0.10547823458909988, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09832999331997327, |
|
"grad_norm": 4.871720241221463, |
|
"learning_rate": 9.78723404255319e-07, |
|
"logits/chosen": -0.8702428936958313, |
|
"logits/rejected": -0.8339990377426147, |
|
"logps/chosen": -316.18670654296875, |
|
"logps/rejected": -329.9319152832031, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.14160332083702087, |
|
"rewards/margins": 0.013123790733516216, |
|
"rewards/rejected": -0.15472710132598877, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10046760187040749, |
|
"grad_norm": 5.158837089218199, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.8626521229743958, |
|
"logits/rejected": -0.8603638410568237, |
|
"logps/chosen": -247.8237762451172, |
|
"logps/rejected": -249.759033203125, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.09033364802598953, |
|
"rewards/margins": 0.0012807990424335003, |
|
"rewards/rejected": -0.09161444753408432, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10260521042084168, |
|
"grad_norm": 5.2967028714823785, |
|
"learning_rate": 9.999860125306348e-07, |
|
"logits/chosen": -0.8659788370132446, |
|
"logits/rejected": -0.8618423342704773, |
|
"logps/chosen": -272.1561279296875, |
|
"logps/rejected": -280.98040771484375, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11396947503089905, |
|
"rewards/margins": -0.0073202308267354965, |
|
"rewards/rejected": -0.106649249792099, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.10474281897127588, |
|
"grad_norm": 5.51515197326478, |
|
"learning_rate": 9.999440509051367e-07, |
|
"logits/chosen": -0.7946774363517761, |
|
"logits/rejected": -0.8100728988647461, |
|
"logps/chosen": -302.84283447265625, |
|
"logps/rejected": -298.60955810546875, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1010870561003685, |
|
"rewards/margins": 0.017633313313126564, |
|
"rewards/rejected": -0.11872036755084991, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 5.870386943751237, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -0.90606290102005, |
|
"logits/rejected": -0.9065860509872437, |
|
"logps/chosen": -257.7372741699219, |
|
"logps/rejected": -241.87298583984375, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12090699374675751, |
|
"rewards/margins": 0.024912692606449127, |
|
"rewards/rejected": -0.14581969380378723, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10901803607214429, |
|
"grad_norm": 5.544085731964276, |
|
"learning_rate": 9.997762161417517e-07, |
|
"logits/chosen": -0.8597516417503357, |
|
"logits/rejected": -0.8242354393005371, |
|
"logps/chosen": -244.0271759033203, |
|
"logps/rejected": -262.000732421875, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.11861881613731384, |
|
"rewards/margins": 0.04306299239397049, |
|
"rewards/rejected": -0.16168181598186493, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11115564462257849, |
|
"grad_norm": 5.08779280072292, |
|
"learning_rate": 9.996503523941992e-07, |
|
"logits/chosen": -0.8984640836715698, |
|
"logits/rejected": -0.8927853107452393, |
|
"logps/chosen": -292.3353576660156, |
|
"logps/rejected": -283.715576171875, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.1378980576992035, |
|
"rewards/margins": -0.0063457973301410675, |
|
"rewards/rejected": -0.13155226409435272, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1132932531730127, |
|
"grad_norm": 5.528861132333211, |
|
"learning_rate": 9.994965332706572e-07, |
|
"logits/chosen": -0.7924266457557678, |
|
"logits/rejected": -0.7879197597503662, |
|
"logps/chosen": -299.14617919921875, |
|
"logps/rejected": -305.268798828125, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.14223326742649078, |
|
"rewards/margins": 0.029797088354825974, |
|
"rewards/rejected": -0.17203034460544586, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1154308617234469, |
|
"grad_norm": 5.45602380596692, |
|
"learning_rate": 9.99314767377287e-07, |
|
"logits/chosen": -0.9068719744682312, |
|
"logits/rejected": -0.8776203393936157, |
|
"logps/chosen": -288.920166015625, |
|
"logps/rejected": -288.0073547363281, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12053351104259491, |
|
"rewards/margins": 0.04062645137310028, |
|
"rewards/rejected": -0.161159947514534, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 5.381814409133637, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -0.8684936165809631, |
|
"logits/rejected": -0.8693514466285706, |
|
"logps/chosen": -300.5417175292969, |
|
"logps/rejected": -297.93609619140625, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08863644301891327, |
|
"rewards/margins": 0.03236062452197075, |
|
"rewards/rejected": -0.12099706381559372, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1197060788243153, |
|
"grad_norm": 4.981246770478922, |
|
"learning_rate": 9.98867437523228e-07, |
|
"logits/chosen": -0.7902661561965942, |
|
"logits/rejected": -0.7963244915008545, |
|
"logps/chosen": -302.9090576171875, |
|
"logps/rejected": -296.0736389160156, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.10501404106616974, |
|
"rewards/margins": 0.045005738735198975, |
|
"rewards/rejected": -0.15001976490020752, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1218436873747495, |
|
"grad_norm": 5.95244558017509, |
|
"learning_rate": 9.986018985905899e-07, |
|
"logits/chosen": -0.933331310749054, |
|
"logits/rejected": -0.9271438121795654, |
|
"logps/chosen": -257.21197509765625, |
|
"logps/rejected": -258.4394226074219, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1376655399799347, |
|
"rewards/margins": 0.020258434116840363, |
|
"rewards/rejected": -0.15792396664619446, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1239812959251837, |
|
"grad_norm": 5.625394184294828, |
|
"learning_rate": 9.983084629428244e-07, |
|
"logits/chosen": -0.790676474571228, |
|
"logits/rejected": -0.7989400625228882, |
|
"logps/chosen": -216.31825256347656, |
|
"logps/rejected": -239.0472869873047, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12253975123167038, |
|
"rewards/margins": 0.032085709273815155, |
|
"rewards/rejected": -0.15462547540664673, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1261189044756179, |
|
"grad_norm": 5.3018065912112835, |
|
"learning_rate": 9.979871469976195e-07, |
|
"logits/chosen": -0.7393543720245361, |
|
"logits/rejected": -0.7129000425338745, |
|
"logps/chosen": -311.56878662109375, |
|
"logps/rejected": -291.1382751464844, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.24215912818908691, |
|
"rewards/margins": -0.015711378306150436, |
|
"rewards/rejected": -0.22644776105880737, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 5.89246728884038, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -0.8696700930595398, |
|
"logits/rejected": -0.8711199760437012, |
|
"logps/chosen": -246.91502380371094, |
|
"logps/rejected": -262.1573791503906, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1573953628540039, |
|
"rewards/margins": 0.03878934681415558, |
|
"rewards/rejected": -0.1961846947669983, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1303941215764863, |
|
"grad_norm": 5.3302706046399555, |
|
"learning_rate": 9.972609476841365e-07, |
|
"logits/chosen": -0.915327787399292, |
|
"logits/rejected": -0.8959137201309204, |
|
"logps/chosen": -273.5627136230469, |
|
"logps/rejected": -297.04962158203125, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.21575269103050232, |
|
"rewards/margins": 0.07058089226484299, |
|
"rewards/rejected": -0.2863335907459259, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.13253173012692052, |
|
"grad_norm": 5.298125253303342, |
|
"learning_rate": 9.968561049466213e-07, |
|
"logits/chosen": -0.8035833239555359, |
|
"logits/rejected": -0.8177482485771179, |
|
"logps/chosen": -258.7190246582031, |
|
"logps/rejected": -260.00408935546875, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1654270738363266, |
|
"rewards/margins": 0.03356565535068512, |
|
"rewards/rejected": -0.19899272918701172, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1346693386773547, |
|
"grad_norm": 5.644014199691322, |
|
"learning_rate": 9.964234631709185e-07, |
|
"logits/chosen": -0.8946092128753662, |
|
"logits/rejected": -0.8983243703842163, |
|
"logps/chosen": -272.2535095214844, |
|
"logps/rejected": -278.0460205078125, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.18145883083343506, |
|
"rewards/margins": 0.05900725722312927, |
|
"rewards/rejected": -0.24046610295772552, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1368069472277889, |
|
"grad_norm": 6.088482546530936, |
|
"learning_rate": 9.959630465632831e-07, |
|
"logits/chosen": -0.8606098890304565, |
|
"logits/rejected": -0.8623652458190918, |
|
"logps/chosen": -256.6067199707031, |
|
"logps/rejected": -273.53668212890625, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.17427769303321838, |
|
"rewards/margins": 0.05003924295306206, |
|
"rewards/rejected": -0.22431692481040955, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 5.611060962151761, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -0.8806792497634888, |
|
"logits/rejected": -0.8958165645599365, |
|
"logps/chosen": -275.52301025390625, |
|
"logps/rejected": -273.21563720703125, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.2500859200954437, |
|
"rewards/margins": 0.017448339611291885, |
|
"rewards/rejected": -0.2675342261791229, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1410821643286573, |
|
"grad_norm": 5.918149017741809, |
|
"learning_rate": 9.949589934457814e-07, |
|
"logits/chosen": -0.8888027667999268, |
|
"logits/rejected": -0.871585488319397, |
|
"logps/chosen": -248.55703735351562, |
|
"logps/rejected": -258.9693603515625, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1963491439819336, |
|
"rewards/margins": 0.04409556835889816, |
|
"rewards/rejected": -0.24044471979141235, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14321977287909152, |
|
"grad_norm": 6.698179177771139, |
|
"learning_rate": 9.944154131125642e-07, |
|
"logits/chosen": -0.853302001953125, |
|
"logits/rejected": -0.848848819732666, |
|
"logps/chosen": -277.59442138671875, |
|
"logps/rejected": -297.14141845703125, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.24216346442699432, |
|
"rewards/margins": 0.10889790952205658, |
|
"rewards/rejected": -0.3510614037513733, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.14535738142952573, |
|
"grad_norm": 5.596769283806181, |
|
"learning_rate": 9.938441702975689e-07, |
|
"logits/chosen": -0.7764022350311279, |
|
"logits/rejected": -0.7560886144638062, |
|
"logps/chosen": -250.94287109375, |
|
"logps/rejected": -250.5952606201172, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2156543880701065, |
|
"rewards/margins": 0.03958458825945854, |
|
"rewards/rejected": -0.25523898005485535, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1474949899799599, |
|
"grad_norm": 5.913968144404886, |
|
"learning_rate": 9.932452969617607e-07, |
|
"logits/chosen": -0.7237470746040344, |
|
"logits/rejected": -0.7399138808250427, |
|
"logps/chosen": -244.21449279785156, |
|
"logps/rejected": -254.1151123046875, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1943284124135971, |
|
"rewards/margins": 0.060313306748867035, |
|
"rewards/rejected": -0.25464171171188354, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 5.940310444508497, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -0.8615679144859314, |
|
"logits/rejected": -0.8436312675476074, |
|
"logps/chosen": -256.257080078125, |
|
"logps/rejected": -262.7105712890625, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.17094658315181732, |
|
"rewards/margins": 0.055061645805835724, |
|
"rewards/rejected": -0.22600823640823364, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15177020708082833, |
|
"grad_norm": 5.928886998788439, |
|
"learning_rate": 9.919647942993147e-07, |
|
"logits/chosen": -0.8513661623001099, |
|
"logits/rejected": -0.8609136343002319, |
|
"logps/chosen": -299.2288818359375, |
|
"logps/rejected": -326.5621032714844, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.22696439921855927, |
|
"rewards/margins": 0.04341430217027664, |
|
"rewards/rejected": -0.2703787088394165, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.15390781563126252, |
|
"grad_norm": 5.791936546761731, |
|
"learning_rate": 9.912832366166441e-07, |
|
"logits/chosen": -0.756388783454895, |
|
"logits/rejected": -0.734666109085083, |
|
"logps/chosen": -299.2653503417969, |
|
"logps/rejected": -307.1020202636719, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3383556604385376, |
|
"rewards/margins": 0.01706152781844139, |
|
"rewards/rejected": -0.3554171919822693, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.15604542418169673, |
|
"grad_norm": 6.05455714563325, |
|
"learning_rate": 9.905741916970863e-07, |
|
"logits/chosen": -0.9010551571846008, |
|
"logits/rejected": -0.8836992383003235, |
|
"logps/chosen": -339.32806396484375, |
|
"logps/rejected": -335.24285888671875, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.40806159377098083, |
|
"rewards/margins": -0.019612746313214302, |
|
"rewards/rejected": -0.38844889402389526, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.15818303273213094, |
|
"grad_norm": 6.2106979275919025, |
|
"learning_rate": 9.898376992116177e-07, |
|
"logits/chosen": -0.9612334370613098, |
|
"logits/rejected": -0.9398088455200195, |
|
"logps/chosen": -282.431640625, |
|
"logps/rejected": -281.66558837890625, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3227473497390747, |
|
"rewards/margins": 0.047923244535923004, |
|
"rewards/rejected": -0.3706705868244171, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 5.916909013866816, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -0.8319401741027832, |
|
"logits/rejected": -0.815265953540802, |
|
"logps/chosen": -281.00518798828125, |
|
"logps/rejected": -273.9776306152344, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3827057480812073, |
|
"rewards/margins": 0.0613156333565712, |
|
"rewards/rejected": -0.4440213441848755, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16245824983299934, |
|
"grad_norm": 6.67755131316461, |
|
"learning_rate": 9.882825379029882e-07, |
|
"logits/chosen": -0.8953054547309875, |
|
"logits/rejected": -0.894780695438385, |
|
"logps/chosen": -312.055908203125, |
|
"logps/rejected": -330.704833984375, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.45433858036994934, |
|
"rewards/margins": 0.07449479401111603, |
|
"rewards/rejected": -0.5288333892822266, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.16459585838343355, |
|
"grad_norm": 6.2117918809225765, |
|
"learning_rate": 9.874639560909118e-07, |
|
"logits/chosen": -0.9046330451965332, |
|
"logits/rejected": -0.898413360118866, |
|
"logps/chosen": -294.0129089355469, |
|
"logps/rejected": -299.68560791015625, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4101888835430145, |
|
"rewards/margins": 0.12080243229866028, |
|
"rewards/rejected": -0.5309913158416748, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.16673346693386773, |
|
"grad_norm": 5.632634022928361, |
|
"learning_rate": 9.866181007302256e-07, |
|
"logits/chosen": -0.6335713267326355, |
|
"logits/rejected": -0.6313363313674927, |
|
"logps/chosen": -281.41400146484375, |
|
"logps/rejected": -291.63800048828125, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.36711931228637695, |
|
"rewards/margins": 0.1256605088710785, |
|
"rewards/rejected": -0.49277979135513306, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16887107548430194, |
|
"grad_norm": 6.069106827042514, |
|
"learning_rate": 9.857450191464337e-07, |
|
"logits/chosen": -0.7797252535820007, |
|
"logits/rejected": -0.7820223569869995, |
|
"logps/chosen": -256.88421630859375, |
|
"logps/rejected": -279.4145812988281, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3781868815422058, |
|
"rewards/margins": 0.07347656786441803, |
|
"rewards/rejected": -0.45166343450546265, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 6.074173522598461, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -0.8622775673866272, |
|
"logits/rejected": -0.8331011533737183, |
|
"logps/chosen": -309.4617614746094, |
|
"logps/rejected": -330.5566101074219, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.49035871028900146, |
|
"rewards/margins": 0.17824454605579376, |
|
"rewards/rejected": -0.6686033010482788, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17314629258517034, |
|
"grad_norm": 6.5133260441754395, |
|
"learning_rate": 9.839173742253334e-07, |
|
"logits/chosen": -0.7489383816719055, |
|
"logits/rejected": -0.781232476234436, |
|
"logps/chosen": -296.9482116699219, |
|
"logps/rejected": -327.5967712402344, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.5791828036308289, |
|
"rewards/margins": 0.188523530960083, |
|
"rewards/rejected": -0.7677063345909119, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.17528390113560455, |
|
"grad_norm": 5.74672853077721, |
|
"learning_rate": 9.82962913144534e-07, |
|
"logits/chosen": -0.8432500958442688, |
|
"logits/rejected": -0.8211543560028076, |
|
"logps/chosen": -293.7790222167969, |
|
"logps/rejected": -304.9800720214844, |
|
"loss": 0.6522, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.47243934869766235, |
|
"rewards/margins": 0.12751685082912445, |
|
"rewards/rejected": -0.599956214427948, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.17742150968603873, |
|
"grad_norm": 6.3990299421699675, |
|
"learning_rate": 9.819814303479267e-07, |
|
"logits/chosen": -0.9426258206367493, |
|
"logits/rejected": -0.9214622378349304, |
|
"logps/chosen": -290.99407958984375, |
|
"logps/rejected": -301.18212890625, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.49375712871551514, |
|
"rewards/margins": 0.1566104143857956, |
|
"rewards/rejected": -0.6503674983978271, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.17955911823647294, |
|
"grad_norm": 6.534280177132367, |
|
"learning_rate": 9.80972980749353e-07, |
|
"logits/chosen": -0.8522071838378906, |
|
"logits/rejected": -0.8386092185974121, |
|
"logps/chosen": -345.668212890625, |
|
"logps/rejected": -346.40960693359375, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6266711950302124, |
|
"rewards/margins": 0.09361431002616882, |
|
"rewards/rejected": -0.7202855348587036, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 6.649073031684906, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -0.7474217414855957, |
|
"logits/rejected": -0.7404229044914246, |
|
"logps/chosen": -275.940673828125, |
|
"logps/rejected": -290.2484130859375, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.41143321990966797, |
|
"rewards/margins": 0.08391736447811127, |
|
"rewards/rejected": -0.49535059928894043, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18383433533734134, |
|
"grad_norm": 6.963291541132159, |
|
"learning_rate": 9.788754083424652e-07, |
|
"logits/chosen": -0.824079692363739, |
|
"logits/rejected": -0.8041766285896301, |
|
"logps/chosen": -321.2813720703125, |
|
"logps/rejected": -339.7249450683594, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5758030414581299, |
|
"rewards/margins": 0.19611942768096924, |
|
"rewards/rejected": -0.7719224095344543, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.18597194388777555, |
|
"grad_norm": 6.945463717696004, |
|
"learning_rate": 9.777864028930705e-07, |
|
"logits/chosen": -0.7686063647270203, |
|
"logits/rejected": -0.7663296461105347, |
|
"logps/chosen": -349.73004150390625, |
|
"logps/rejected": -375.2843017578125, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6469497680664062, |
|
"rewards/margins": 0.2732096314430237, |
|
"rewards/rejected": -0.9201593399047852, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.18810955243820976, |
|
"grad_norm": 6.714366991925423, |
|
"learning_rate": 9.766706653529812e-07, |
|
"logits/chosen": -0.782423734664917, |
|
"logits/rejected": -0.7881312966346741, |
|
"logps/chosen": -301.2457275390625, |
|
"logps/rejected": -310.0863037109375, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5998435616493225, |
|
"rewards/margins": 0.09575268626213074, |
|
"rewards/rejected": -0.6955962777137756, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.19024716098864394, |
|
"grad_norm": 7.241214530195881, |
|
"learning_rate": 9.755282581475767e-07, |
|
"logits/chosen": -0.8655251860618591, |
|
"logits/rejected": -0.8472452163696289, |
|
"logps/chosen": -398.3143310546875, |
|
"logps/rejected": -434.9195556640625, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8902552723884583, |
|
"rewards/margins": 0.29991066455841064, |
|
"rewards/rejected": -1.1901659965515137, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 7.90505927903396, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -0.8578193783760071, |
|
"logits/rejected": -0.8561904430389404, |
|
"logps/chosen": -281.01824951171875, |
|
"logps/rejected": -304.5150146484375, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7082527875900269, |
|
"rewards/margins": 0.08812718093395233, |
|
"rewards/rejected": -0.7963800430297852, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19452237808951237, |
|
"grad_norm": 7.5921079251944, |
|
"learning_rate": 9.73163691899582e-07, |
|
"logits/chosen": -0.678159236907959, |
|
"logits/rejected": -0.6668828725814819, |
|
"logps/chosen": -300.15338134765625, |
|
"logps/rejected": -306.63525390625, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6471429467201233, |
|
"rewards/margins": 0.08188958466053009, |
|
"rewards/rejected": -0.729032576084137, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.19665998663994655, |
|
"grad_norm": 7.137628936459269, |
|
"learning_rate": 9.719416651541837e-07, |
|
"logits/chosen": -0.8150886297225952, |
|
"logits/rejected": -0.8088028430938721, |
|
"logps/chosen": -431.6229248046875, |
|
"logps/rejected": -458.9399108886719, |
|
"loss": 0.643, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0566003322601318, |
|
"rewards/margins": 0.2619227468967438, |
|
"rewards/rejected": -1.3185230493545532, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.19879759519038076, |
|
"grad_norm": 6.729473146383851, |
|
"learning_rate": 9.706932333304517e-07, |
|
"logits/chosen": -0.8243950605392456, |
|
"logits/rejected": -0.838744580745697, |
|
"logps/chosen": -312.406494140625, |
|
"logps/rejected": -335.4088134765625, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.7556511759757996, |
|
"rewards/margins": 0.031043091788887978, |
|
"rewards/rejected": -0.7866942882537842, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.20093520374081497, |
|
"grad_norm": 6.624154045427617, |
|
"learning_rate": 9.694184662779929e-07, |
|
"logits/chosen": -0.783348560333252, |
|
"logits/rejected": -0.7991134524345398, |
|
"logps/chosen": -289.2900695800781, |
|
"logps/rejected": -290.5962829589844, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.6890867352485657, |
|
"rewards/margins": 0.08654731512069702, |
|
"rewards/rejected": -0.7756341099739075, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 7.588312119029146, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -0.8928613066673279, |
|
"logits/rejected": -0.9167020916938782, |
|
"logps/chosen": -263.0621032714844, |
|
"logps/rejected": -291.3228759765625, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.6083306670188904, |
|
"rewards/margins": 0.10731954127550125, |
|
"rewards/rejected": -0.715650200843811, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.20521042084168337, |
|
"grad_norm": 7.93175048638323, |
|
"learning_rate": 9.667902132486008e-07, |
|
"logits/chosen": -0.7266509532928467, |
|
"logits/rejected": -0.7005448341369629, |
|
"logps/chosen": -355.4562072753906, |
|
"logps/rejected": -368.688232421875, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9357940554618835, |
|
"rewards/margins": 0.1980743259191513, |
|
"rewards/rejected": -1.1338684558868408, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.20734802939211758, |
|
"grad_norm": 7.282370392547328, |
|
"learning_rate": 9.65436874322102e-07, |
|
"logits/chosen": -0.7565743327140808, |
|
"logits/rejected": -0.765534520149231, |
|
"logps/chosen": -360.4274597167969, |
|
"logps/rejected": -397.3307189941406, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9580825567245483, |
|
"rewards/margins": 0.27880433201789856, |
|
"rewards/rejected": -1.2368868589401245, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.20948563794255176, |
|
"grad_norm": 7.307890632023091, |
|
"learning_rate": 9.640574942595194e-07, |
|
"logits/chosen": -0.6865275502204895, |
|
"logits/rejected": -0.6510294079780579, |
|
"logps/chosen": -299.5666198730469, |
|
"logps/rejected": -315.7306823730469, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6532863974571228, |
|
"rewards/margins": 0.1549152433872223, |
|
"rewards/rejected": -0.8082016706466675, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.21162324649298597, |
|
"grad_norm": 7.447581281931192, |
|
"learning_rate": 9.626521502369983e-07, |
|
"logits/chosen": -0.6352126598358154, |
|
"logits/rejected": -0.6191614866256714, |
|
"logps/chosen": -293.2029113769531, |
|
"logps/rejected": -306.13330078125, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7388824224472046, |
|
"rewards/margins": 0.15435971319675446, |
|
"rewards/rejected": -0.8932421803474426, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 6.648161187751906, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -0.7408478856086731, |
|
"logits/rejected": -0.7513828277587891, |
|
"logps/chosen": -301.5423583984375, |
|
"logps/rejected": -345.68682861328125, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7927481532096863, |
|
"rewards/margins": 0.24078083038330078, |
|
"rewards/rejected": -1.0335289239883423, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"eval_logits/chosen": -0.7527643442153931, |
|
"eval_logits/rejected": -0.7538674473762512, |
|
"eval_logps/chosen": -343.6059875488281, |
|
"eval_logps/rejected": -362.7133483886719, |
|
"eval_loss": 0.6641345024108887, |
|
"eval_rewards/accuracies": 0.6239837408065796, |
|
"eval_rewards/chosen": -0.8805798888206482, |
|
"eval_rewards/margins": 0.1340140700340271, |
|
"eval_rewards/rejected": -1.0145939588546753, |
|
"eval_runtime": 372.3126, |
|
"eval_samples_per_second": 5.267, |
|
"eval_steps_per_second": 0.33, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21589846359385437, |
|
"grad_norm": 7.778718674441958, |
|
"learning_rate": 9.597638862757253e-07, |
|
"logits/chosen": -0.8201433420181274, |
|
"logits/rejected": -0.8069182634353638, |
|
"logps/chosen": -256.0120849609375, |
|
"logps/rejected": -269.8443603515625, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.656363844871521, |
|
"rewards/margins": 0.05751778930425644, |
|
"rewards/rejected": -0.7138815522193909, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.21803607214428858, |
|
"grad_norm": 7.5706021854045185, |
|
"learning_rate": 9.58281127934988e-07, |
|
"logits/chosen": -0.6860804557800293, |
|
"logits/rejected": -0.7110453844070435, |
|
"logps/chosen": -368.2939453125, |
|
"logps/rejected": -393.86029052734375, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.020776391029358, |
|
"rewards/margins": 0.1516759693622589, |
|
"rewards/rejected": -1.172452449798584, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2201736806947228, |
|
"grad_norm": 8.607842129213472, |
|
"learning_rate": 9.567727288213004e-07, |
|
"logits/chosen": -0.7699592113494873, |
|
"logits/rejected": -0.7589491605758667, |
|
"logps/chosen": -324.6326904296875, |
|
"logps/rejected": -358.59820556640625, |
|
"loss": 0.7094, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.0056777000427246, |
|
"rewards/margins": 0.18924392759799957, |
|
"rewards/rejected": -1.1949217319488525, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.22231128924515697, |
|
"grad_norm": 7.291755560282041, |
|
"learning_rate": 9.552387733294078e-07, |
|
"logits/chosen": -0.6555180549621582, |
|
"logits/rejected": -0.6659807562828064, |
|
"logps/chosen": -330.6410827636719, |
|
"logps/rejected": -359.6870422363281, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8741835355758667, |
|
"rewards/margins": 0.2183988094329834, |
|
"rewards/rejected": -1.09258234500885, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 7.775554579983475, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -0.6701323986053467, |
|
"logits/rejected": -0.6589778661727905, |
|
"logps/chosen": -285.3506164550781, |
|
"logps/rejected": -288.3831481933594, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.7929357290267944, |
|
"rewards/margins": 0.09136777371168137, |
|
"rewards/rejected": -0.8843034505844116, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2265865063460254, |
|
"grad_norm": 7.266567693754681, |
|
"learning_rate": 9.520945379345699e-07, |
|
"logits/chosen": -0.8183209300041199, |
|
"logits/rejected": -0.8361554741859436, |
|
"logps/chosen": -397.4153747558594, |
|
"logps/rejected": -423.17333984375, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.1003904342651367, |
|
"rewards/margins": 0.1266530454158783, |
|
"rewards/rejected": -1.2270435094833374, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.22872411489645958, |
|
"grad_norm": 7.518282958403423, |
|
"learning_rate": 9.504844339512094e-07, |
|
"logits/chosen": -0.8879948854446411, |
|
"logits/rejected": -0.8571330904960632, |
|
"logps/chosen": -287.59051513671875, |
|
"logps/rejected": -297.351318359375, |
|
"loss": 0.6476, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7065630555152893, |
|
"rewards/margins": 0.15939508378505707, |
|
"rewards/rejected": -0.8659580945968628, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2308617234468938, |
|
"grad_norm": 7.824671981101, |
|
"learning_rate": 9.488491254189716e-07, |
|
"logits/chosen": -0.8066489696502686, |
|
"logits/rejected": -0.8055952191352844, |
|
"logps/chosen": -404.3518981933594, |
|
"logps/rejected": -442.6438903808594, |
|
"loss": 0.6404, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0566518306732178, |
|
"rewards/margins": 0.3284587264060974, |
|
"rewards/rejected": -1.3851103782653809, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.232999331997328, |
|
"grad_norm": 8.83083617083813, |
|
"learning_rate": 9.471887038331684e-07, |
|
"logits/chosen": -0.7246598601341248, |
|
"logits/rejected": -0.7441533207893372, |
|
"logps/chosen": -354.1577453613281, |
|
"logps/rejected": -366.261962890625, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9086767435073853, |
|
"rewards/margins": 0.14375557005405426, |
|
"rewards/rejected": -1.0524324178695679, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 6.762910416252425, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -0.7163547277450562, |
|
"logits/rejected": -0.7031821608543396, |
|
"logps/chosen": -281.1316833496094, |
|
"logps/rejected": -283.372314453125, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.6051456332206726, |
|
"rewards/margins": 0.09137356281280518, |
|
"rewards/rejected": -0.6965191960334778, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2372745490981964, |
|
"grad_norm": 7.354791673779593, |
|
"learning_rate": 9.43792894502277e-07, |
|
"logits/chosen": -0.6413677334785461, |
|
"logits/rejected": -0.6314007043838501, |
|
"logps/chosen": -341.87396240234375, |
|
"logps/rejected": -356.4854736328125, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8350028395652771, |
|
"rewards/margins": 0.21570800244808197, |
|
"rewards/rejected": -1.050710916519165, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2394121576486306, |
|
"grad_norm": 7.625646719699033, |
|
"learning_rate": 9.420576967523048e-07, |
|
"logits/chosen": -0.7540197968482971, |
|
"logits/rejected": -0.7288798093795776, |
|
"logps/chosen": -290.5899963378906, |
|
"logps/rejected": -294.30804443359375, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6742240786552429, |
|
"rewards/margins": 0.1976398229598999, |
|
"rewards/rejected": -0.8718639612197876, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2415497661990648, |
|
"grad_norm": 7.749312449639858, |
|
"learning_rate": 9.402977659283689e-07, |
|
"logits/chosen": -0.773981511592865, |
|
"logits/rejected": -0.7674249410629272, |
|
"logps/chosen": -323.57000732421875, |
|
"logps/rejected": -349.71990966796875, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7962589859962463, |
|
"rewards/margins": 0.18393486738204956, |
|
"rewards/rejected": -0.9801937937736511, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.243687374749499, |
|
"grad_norm": 7.4503816098925055, |
|
"learning_rate": 9.385132004983832e-07, |
|
"logits/chosen": -0.7875250577926636, |
|
"logits/rejected": -0.7886217832565308, |
|
"logps/chosen": -289.820068359375, |
|
"logps/rejected": -307.18914794921875, |
|
"loss": 0.6351, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6762690544128418, |
|
"rewards/margins": 0.16321714222431183, |
|
"rewards/rejected": -0.8394861817359924, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 7.383473143295883, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -0.811254620552063, |
|
"logits/rejected": -0.8413273692131042, |
|
"logps/chosen": -328.42877197265625, |
|
"logps/rejected": -360.35430908203125, |
|
"loss": 0.6373, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6836004257202148, |
|
"rewards/margins": 0.1819692850112915, |
|
"rewards/rejected": -0.8655696511268616, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2479625918503674, |
|
"grad_norm": 6.933138308165148, |
|
"learning_rate": 9.348705665778477e-07, |
|
"logits/chosen": -0.7606134414672852, |
|
"logits/rejected": -0.7490028142929077, |
|
"logps/chosen": -342.7862548828125, |
|
"logps/rejected": -355.22943115234375, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.9342496991157532, |
|
"rewards/margins": 0.09783473610877991, |
|
"rewards/rejected": -1.0320844650268555, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2501002004008016, |
|
"grad_norm": 6.9991891498789744, |
|
"learning_rate": 9.330127018922193e-07, |
|
"logits/chosen": -0.7081186771392822, |
|
"logits/rejected": -0.7329989075660706, |
|
"logps/chosen": -361.0794372558594, |
|
"logps/rejected": -369.33721923828125, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8992618322372437, |
|
"rewards/margins": 0.12093706429004669, |
|
"rewards/rejected": -1.020198941230774, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2522378089512358, |
|
"grad_norm": 6.891968237145087, |
|
"learning_rate": 9.311306101989812e-07, |
|
"logits/chosen": -0.7707226872444153, |
|
"logits/rejected": -0.775468111038208, |
|
"logps/chosen": -328.4278869628906, |
|
"logps/rejected": -375.73712158203125, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7947558164596558, |
|
"rewards/margins": 0.2816506326198578, |
|
"rewards/rejected": -1.076406478881836, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.25437541750167003, |
|
"grad_norm": 7.78238688784484, |
|
"learning_rate": 9.29224396800933e-07, |
|
"logits/chosen": -0.8061501383781433, |
|
"logits/rejected": -0.7823886275291443, |
|
"logps/chosen": -322.4601135253906, |
|
"logps/rejected": -329.06744384765625, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8091481924057007, |
|
"rewards/margins": -0.014971929602324963, |
|
"rewards/rejected": -0.7941762208938599, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 7.538013946361546, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -0.7215307950973511, |
|
"logits/rejected": -0.7078826427459717, |
|
"logps/chosen": -356.06005859375, |
|
"logps/rejected": -357.49774169921875, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8885184526443481, |
|
"rewards/margins": 0.18230639398097992, |
|
"rewards/rejected": -1.0708248615264893, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2586506346025384, |
|
"grad_norm": 7.545420813102953, |
|
"learning_rate": 9.253400328436698e-07, |
|
"logits/chosen": -0.7346601486206055, |
|
"logits/rejected": -0.7335522174835205, |
|
"logps/chosen": -344.805419921875, |
|
"logps/rejected": -350.87725830078125, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8253999948501587, |
|
"rewards/margins": 0.0806780755519867, |
|
"rewards/rejected": -0.9060779809951782, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2607882431529726, |
|
"grad_norm": 7.7074461457899535, |
|
"learning_rate": 9.233620996141421e-07, |
|
"logits/chosen": -0.8815721273422241, |
|
"logits/rejected": -0.8621220588684082, |
|
"logps/chosen": -336.6763610839844, |
|
"logps/rejected": -341.74798583984375, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7817858457565308, |
|
"rewards/margins": 0.07886642962694168, |
|
"rewards/rejected": -0.8606522083282471, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2629258517034068, |
|
"grad_norm": 7.761484323525846, |
|
"learning_rate": 9.213604793270196e-07, |
|
"logits/chosen": -0.8222033977508545, |
|
"logits/rejected": -0.8148404955863953, |
|
"logps/chosen": -303.2247009277344, |
|
"logps/rejected": -315.91888427734375, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7392472624778748, |
|
"rewards/margins": 0.13012456893920898, |
|
"rewards/rejected": -0.869371771812439, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.26506346025384103, |
|
"grad_norm": 8.151352928349633, |
|
"learning_rate": 9.19335283972712e-07, |
|
"logits/chosen": -0.7656688690185547, |
|
"logits/rejected": -0.7709140181541443, |
|
"logps/chosen": -374.8747253417969, |
|
"logps/rejected": -376.098876953125, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.005488634109497, |
|
"rewards/margins": 0.06390834599733353, |
|
"rewards/rejected": -1.06939697265625, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 7.63262703375028, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -0.755223274230957, |
|
"logits/rejected": -0.7677374482154846, |
|
"logps/chosen": -372.7818603515625, |
|
"logps/rejected": -385.9354248046875, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8667163848876953, |
|
"rewards/margins": 0.20413543283939362, |
|
"rewards/rejected": -1.0708518028259277, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2693386773547094, |
|
"grad_norm": 7.478834701874977, |
|
"learning_rate": 9.152146226129518e-07, |
|
"logits/chosen": -0.7996259927749634, |
|
"logits/rejected": -0.7835624814033508, |
|
"logps/chosen": -292.76129150390625, |
|
"logps/rejected": -333.20477294921875, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7281415462493896, |
|
"rewards/margins": 0.3209742605686188, |
|
"rewards/rejected": -1.049115777015686, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2714762859051436, |
|
"grad_norm": 7.082169803011623, |
|
"learning_rate": 9.131193871579974e-07, |
|
"logits/chosen": -0.8138784766197205, |
|
"logits/rejected": -0.829187273979187, |
|
"logps/chosen": -353.7518615722656, |
|
"logps/rejected": -404.1153564453125, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8756864070892334, |
|
"rewards/margins": 0.2420441061258316, |
|
"rewards/rejected": -1.1177304983139038, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2736138944555778, |
|
"grad_norm": 7.237862112577957, |
|
"learning_rate": 9.11001037723955e-07, |
|
"logits/chosen": -0.7936111688613892, |
|
"logits/rejected": -0.8008431196212769, |
|
"logps/chosen": -332.17718505859375, |
|
"logps/rejected": -352.5616760253906, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7826520800590515, |
|
"rewards/margins": 0.20596377551555634, |
|
"rewards/rejected": -0.988615870475769, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.27575150300601203, |
|
"grad_norm": 8.604847241866956, |
|
"learning_rate": 9.088596928322157e-07, |
|
"logits/chosen": -0.8067824840545654, |
|
"logits/rejected": -0.8039845824241638, |
|
"logps/chosen": -333.2156982421875, |
|
"logps/rejected": -357.6597595214844, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.787762463092804, |
|
"rewards/margins": 0.017246991395950317, |
|
"rewards/rejected": -0.8050093650817871, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 8.324207089057085, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": -0.6775297522544861, |
|
"logits/rejected": -0.7070217132568359, |
|
"logps/chosen": -324.43402099609375, |
|
"logps/rejected": -350.53851318359375, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7566977739334106, |
|
"rewards/margins": 0.24282482266426086, |
|
"rewards/rejected": -0.9995224475860596, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2800267201068804, |
|
"grad_norm": 7.364170729863742, |
|
"learning_rate": 9.045084971874737e-07, |
|
"logits/chosen": -0.7260534167289734, |
|
"logits/rejected": -0.7187973260879517, |
|
"logps/chosen": -294.4010925292969, |
|
"logps/rejected": -310.21136474609375, |
|
"loss": 0.6398, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.7721171379089355, |
|
"rewards/margins": 0.15222765505313873, |
|
"rewards/rejected": -0.9243447184562683, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2821643286573146, |
|
"grad_norm": 7.35116325693309, |
|
"learning_rate": 9.022988898833342e-07, |
|
"logits/chosen": -0.7463628053665161, |
|
"logits/rejected": -0.7459514141082764, |
|
"logps/chosen": -329.623779296875, |
|
"logps/rejected": -356.4615783691406, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8810457587242126, |
|
"rewards/margins": 0.1869642734527588, |
|
"rewards/rejected": -1.0680099725723267, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.2843019372077488, |
|
"grad_norm": 7.43517337943034, |
|
"learning_rate": 9.000667740056032e-07, |
|
"logits/chosen": -0.7253285646438599, |
|
"logits/rejected": -0.7020008563995361, |
|
"logps/chosen": -341.2428894042969, |
|
"logps/rejected": -399.8907470703125, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9917829036712646, |
|
"rewards/margins": 0.3243829011917114, |
|
"rewards/rejected": -1.3161659240722656, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.28643954575818303, |
|
"grad_norm": 8.02042016596172, |
|
"learning_rate": 8.978122744408905e-07, |
|
"logits/chosen": -0.6935924887657166, |
|
"logits/rejected": -0.6478650569915771, |
|
"logps/chosen": -383.7857971191406, |
|
"logps/rejected": -403.4067077636719, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.0208508968353271, |
|
"rewards/margins": 0.2477567493915558, |
|
"rewards/rejected": -1.2686076164245605, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 7.085674453391065, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": -0.7271559238433838, |
|
"logits/rejected": -0.7309106588363647, |
|
"logps/chosen": -310.3777160644531, |
|
"logps/rejected": -329.25323486328125, |
|
"loss": 0.6007, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7959171533584595, |
|
"rewards/margins": 0.21840126812458038, |
|
"rewards/rejected": -1.0143184661865234, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29071476285905146, |
|
"grad_norm": 7.837723075107936, |
|
"learning_rate": 8.932366300517249e-07, |
|
"logits/chosen": -0.771674633026123, |
|
"logits/rejected": -0.7675716280937195, |
|
"logps/chosen": -381.0829772949219, |
|
"logps/rejected": -408.50616455078125, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.0345312356948853, |
|
"rewards/margins": 0.18286427855491638, |
|
"rewards/rejected": -1.217395544052124, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2928523714094856, |
|
"grad_norm": 9.181399284387098, |
|
"learning_rate": 8.909157412340149e-07, |
|
"logits/chosen": -0.837311863899231, |
|
"logits/rejected": -0.8280692100524902, |
|
"logps/chosen": -368.6721496582031, |
|
"logps/rejected": -397.39056396484375, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.0871038436889648, |
|
"rewards/margins": 0.1502176821231842, |
|
"rewards/rejected": -1.2373214960098267, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2949899799599198, |
|
"grad_norm": 8.547486747659995, |
|
"learning_rate": 8.885729807284854e-07, |
|
"logits/chosen": -0.6511350274085999, |
|
"logits/rejected": -0.6316956877708435, |
|
"logps/chosen": -367.9530029296875, |
|
"logps/rejected": -376.24652099609375, |
|
"loss": 0.6666, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1004064083099365, |
|
"rewards/margins": 0.1894245594739914, |
|
"rewards/rejected": -1.2898309230804443, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.29712758851035403, |
|
"grad_norm": 7.209713050210504, |
|
"learning_rate": 8.862084796122997e-07, |
|
"logits/chosen": -0.7271043658256531, |
|
"logits/rejected": -0.7313827276229858, |
|
"logps/chosen": -305.42919921875, |
|
"logps/rejected": -366.8320007324219, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8596370816230774, |
|
"rewards/margins": 0.34605735540390015, |
|
"rewards/rejected": -1.205694556236267, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 8.777840373189521, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": -0.8329405188560486, |
|
"logits/rejected": -0.8425594568252563, |
|
"logps/chosen": -334.819580078125, |
|
"logps/rejected": -353.1991882324219, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.9176943898200989, |
|
"rewards/margins": 0.08534470945596695, |
|
"rewards/rejected": -1.0030391216278076, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30140280561122246, |
|
"grad_norm": 8.349695032017713, |
|
"learning_rate": 8.814147859311332e-07, |
|
"logits/chosen": -0.7287541031837463, |
|
"logits/rejected": -0.747150182723999, |
|
"logps/chosen": -338.96990966796875, |
|
"logps/rejected": -393.1916809082031, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.868696928024292, |
|
"rewards/margins": 0.3027462959289551, |
|
"rewards/rejected": -1.1714433431625366, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.30354041416165667, |
|
"grad_norm": 8.507916003943253, |
|
"learning_rate": 8.789858615727264e-07, |
|
"logits/chosen": -0.6775808930397034, |
|
"logits/rejected": -0.6213993430137634, |
|
"logps/chosen": -374.7777099609375, |
|
"logps/rejected": -441.28265380859375, |
|
"loss": 0.5921, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.076945424079895, |
|
"rewards/margins": 0.4260719418525696, |
|
"rewards/rejected": -1.5030174255371094, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3056780227120908, |
|
"grad_norm": 8.266582578388473, |
|
"learning_rate": 8.765357330018055e-07, |
|
"logits/chosen": -0.7523927092552185, |
|
"logits/rejected": -0.7748714685440063, |
|
"logps/chosen": -353.6466064453125, |
|
"logps/rejected": -402.60662841796875, |
|
"loss": 0.625, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0103652477264404, |
|
"rewards/margins": 0.3086761236190796, |
|
"rewards/rejected": -1.31904137134552, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.30781563126252504, |
|
"grad_norm": 8.078736110217639, |
|
"learning_rate": 8.740645373027634e-07, |
|
"logits/chosen": -0.72418212890625, |
|
"logits/rejected": -0.7301138639450073, |
|
"logps/chosen": -414.23004150390625, |
|
"logps/rejected": -465.2354736328125, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1818435192108154, |
|
"rewards/margins": 0.26852208375930786, |
|
"rewards/rejected": -1.450365662574768, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 8.551096723015775, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": -0.7613222599029541, |
|
"logits/rejected": -0.7519202828407288, |
|
"logps/chosen": -376.8845520019531, |
|
"logps/rejected": -391.2483215332031, |
|
"loss": 0.6422, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2536505460739136, |
|
"rewards/margins": 0.05302443727850914, |
|
"rewards/rejected": -1.3066749572753906, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31209084836339346, |
|
"grad_norm": 10.264598567431593, |
|
"learning_rate": 8.690594987436704e-07, |
|
"logits/chosen": -0.6667072772979736, |
|
"logits/rejected": -0.651785135269165, |
|
"logps/chosen": -407.5121765136719, |
|
"logps/rejected": -414.15325927734375, |
|
"loss": 0.7022, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3455419540405273, |
|
"rewards/margins": 0.1597069501876831, |
|
"rewards/rejected": -1.5052489042282104, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.31422845691382767, |
|
"grad_norm": 8.003275854261016, |
|
"learning_rate": 8.66525935914913e-07, |
|
"logits/chosen": -0.70644611120224, |
|
"logits/rejected": -0.7072776556015015, |
|
"logps/chosen": -298.8578186035156, |
|
"logps/rejected": -352.6321105957031, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7530163526535034, |
|
"rewards/margins": 0.3677568733692169, |
|
"rewards/rejected": -1.1207730770111084, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3163660654642619, |
|
"grad_norm": 9.622881147148561, |
|
"learning_rate": 8.639718660049554e-07, |
|
"logits/chosen": -0.7758994102478027, |
|
"logits/rejected": -0.7696230411529541, |
|
"logps/chosen": -305.4625549316406, |
|
"logps/rejected": -307.0013732910156, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.9157548546791077, |
|
"rewards/margins": 0.10377232730388641, |
|
"rewards/rejected": -1.0195271968841553, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.31850367401469604, |
|
"grad_norm": 9.830442606055694, |
|
"learning_rate": 8.613974319136957e-07, |
|
"logits/chosen": -0.6808797121047974, |
|
"logits/rejected": -0.6591075658798218, |
|
"logps/chosen": -328.95526123046875, |
|
"logps/rejected": -344.4721374511719, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.1110159158706665, |
|
"rewards/margins": 0.16696244478225708, |
|
"rewards/rejected": -1.2779783010482788, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 8.745762248320213, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": -0.7875892519950867, |
|
"logits/rejected": -0.7677904963493347, |
|
"logps/chosen": -357.419677734375, |
|
"logps/rejected": -373.40460205078125, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0266187191009521, |
|
"rewards/margins": 0.21186724305152893, |
|
"rewards/rejected": -1.2384859323501587, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32277889111556446, |
|
"grad_norm": 8.312609091320738, |
|
"learning_rate": 8.561880484756724e-07, |
|
"logits/chosen": -0.7948569059371948, |
|
"logits/rejected": -0.7845500707626343, |
|
"logps/chosen": -341.4780578613281, |
|
"logps/rejected": -384.87615966796875, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9681000113487244, |
|
"rewards/margins": 0.3343212306499481, |
|
"rewards/rejected": -1.30242121219635, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.32491649966599867, |
|
"grad_norm": 9.180312248349901, |
|
"learning_rate": 8.535533905932737e-07, |
|
"logits/chosen": -0.7717313170433044, |
|
"logits/rejected": -0.7632758617401123, |
|
"logps/chosen": -349.5531921386719, |
|
"logps/rejected": -348.3159484863281, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1458628177642822, |
|
"rewards/margins": 0.07357059419155121, |
|
"rewards/rejected": -1.2194334268569946, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3270541082164329, |
|
"grad_norm": 8.40986643995496, |
|
"learning_rate": 8.508989514419958e-07, |
|
"logits/chosen": -0.6287474036216736, |
|
"logits/rejected": -0.5992534160614014, |
|
"logps/chosen": -327.4925842285156, |
|
"logps/rejected": -357.3055725097656, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.0919466018676758, |
|
"rewards/margins": 0.2580554485321045, |
|
"rewards/rejected": -1.3500020503997803, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3291917167668671, |
|
"grad_norm": 9.217933303499299, |
|
"learning_rate": 8.482248795373835e-07, |
|
"logits/chosen": -0.7915253639221191, |
|
"logits/rejected": -0.7664984464645386, |
|
"logps/chosen": -368.6262512207031, |
|
"logps/rejected": -391.03564453125, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.0133848190307617, |
|
"rewards/margins": 0.11703261733055115, |
|
"rewards/rejected": -1.1304173469543457, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 8.472153097719477, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": -0.8312329649925232, |
|
"logits/rejected": -0.8426264524459839, |
|
"logps/chosen": -341.4083251953125, |
|
"logps/rejected": -377.6736145019531, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0413603782653809, |
|
"rewards/margins": 0.24394717812538147, |
|
"rewards/rejected": -1.2853076457977295, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33346693386773546, |
|
"grad_norm": 8.186342714745207, |
|
"learning_rate": 8.428184370142171e-07, |
|
"logits/chosen": -0.6921215653419495, |
|
"logits/rejected": -0.7096705436706543, |
|
"logps/chosen": -363.21539306640625, |
|
"logps/rejected": -384.2535400390625, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9978117346763611, |
|
"rewards/margins": 0.18856188654899597, |
|
"rewards/rejected": -1.1863737106323242, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.33560454241816967, |
|
"grad_norm": 8.626047256669759, |
|
"learning_rate": 8.400863688854596e-07, |
|
"logits/chosen": -0.8120739459991455, |
|
"logits/rejected": -0.8196284770965576, |
|
"logps/chosen": -347.4595947265625, |
|
"logps/rejected": -357.397705078125, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0381972789764404, |
|
"rewards/margins": 0.1279696524143219, |
|
"rewards/rejected": -1.1661670207977295, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3377421509686039, |
|
"grad_norm": 11.860996272985476, |
|
"learning_rate": 8.373352729660372e-07, |
|
"logits/chosen": -0.7756985425949097, |
|
"logits/rejected": -0.7191120386123657, |
|
"logps/chosen": -395.55401611328125, |
|
"logps/rejected": -403.5904541015625, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2536171674728394, |
|
"rewards/margins": 0.026868807151913643, |
|
"rewards/rejected": -1.280485987663269, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3398797595190381, |
|
"grad_norm": 9.122376865267006, |
|
"learning_rate": 8.34565303179429e-07, |
|
"logits/chosen": -0.8109874725341797, |
|
"logits/rejected": -0.784782886505127, |
|
"logps/chosen": -349.2673645019531, |
|
"logps/rejected": -355.31427001953125, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1418871879577637, |
|
"rewards/margins": 0.0541604682803154, |
|
"rewards/rejected": -1.1960475444793701, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 10.446536418824028, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": -0.8555909395217896, |
|
"logits/rejected": -0.8299651145935059, |
|
"logps/chosen": -393.7392272949219, |
|
"logps/rejected": -433.85565185546875, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2185660600662231, |
|
"rewards/margins": 0.22462578117847443, |
|
"rewards/rejected": -1.4431917667388916, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34415497661990646, |
|
"grad_norm": 7.825411706225049, |
|
"learning_rate": 8.289693629698563e-07, |
|
"logits/chosen": -0.7958833575248718, |
|
"logits/rejected": -0.8027774095535278, |
|
"logps/chosen": -402.79913330078125, |
|
"logps/rejected": -437.49591064453125, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1367416381835938, |
|
"rewards/margins": 0.3376201391220093, |
|
"rewards/rejected": -1.4743616580963135, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.34629258517034067, |
|
"grad_norm": 9.034553362218846, |
|
"learning_rate": 8.261437056390606e-07, |
|
"logits/chosen": -0.697302520275116, |
|
"logits/rejected": -0.6625763773918152, |
|
"logps/chosen": -349.05950927734375, |
|
"logps/rejected": -353.0817565917969, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.9678500294685364, |
|
"rewards/margins": 0.10605783760547638, |
|
"rewards/rejected": -1.0739078521728516, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3484301937207749, |
|
"grad_norm": 8.737777630064887, |
|
"learning_rate": 8.232998006078997e-07, |
|
"logits/chosen": -0.674803614616394, |
|
"logits/rejected": -0.6823403835296631, |
|
"logps/chosen": -358.0148620605469, |
|
"logps/rejected": -384.6661071777344, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1485981941223145, |
|
"rewards/margins": 0.25151392817497253, |
|
"rewards/rejected": -1.4001121520996094, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3505678022712091, |
|
"grad_norm": 9.343701031382219, |
|
"learning_rate": 8.20437806992512e-07, |
|
"logits/chosen": -0.7436198592185974, |
|
"logits/rejected": -0.7431969046592712, |
|
"logps/chosen": -316.6277770996094, |
|
"logps/rejected": -367.4931335449219, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0274879932403564, |
|
"rewards/margins": 0.1997983604669571, |
|
"rewards/rejected": -1.2272862195968628, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 8.418205426089232, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": -0.7993863224983215, |
|
"logits/rejected": -0.7827702164649963, |
|
"logps/chosen": -393.60791015625, |
|
"logps/rejected": -424.71124267578125, |
|
"loss": 0.6392, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1842068433761597, |
|
"rewards/margins": 0.3191227316856384, |
|
"rewards/rejected": -1.5033295154571533, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.35484301937207746, |
|
"grad_norm": 8.984910438057955, |
|
"learning_rate": 8.146601955249187e-07, |
|
"logits/chosen": -0.7122502326965332, |
|
"logits/rejected": -0.7099603414535522, |
|
"logps/chosen": -365.7021179199219, |
|
"logps/rejected": -365.78912353515625, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2422032356262207, |
|
"rewards/margins": -0.04617507755756378, |
|
"rewards/rejected": -1.196028232574463, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3569806279225117, |
|
"grad_norm": 7.926871474687121, |
|
"learning_rate": 8.117449009293668e-07, |
|
"logits/chosen": -0.7609111666679382, |
|
"logits/rejected": -0.7424649000167847, |
|
"logps/chosen": -367.951416015625, |
|
"logps/rejected": -388.4793395996094, |
|
"loss": 0.6288, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1471714973449707, |
|
"rewards/margins": 0.1946582943201065, |
|
"rewards/rejected": -1.341829776763916, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3591182364729459, |
|
"grad_norm": 8.370915442021108, |
|
"learning_rate": 8.088121642448089e-07, |
|
"logits/chosen": -0.7230314016342163, |
|
"logits/rejected": -0.7338634729385376, |
|
"logps/chosen": -383.22216796875, |
|
"logps/rejected": -422.01068115234375, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0255863666534424, |
|
"rewards/margins": 0.5024391412734985, |
|
"rewards/rejected": -1.528025507926941, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3612558450233801, |
|
"grad_norm": 8.13714962488371, |
|
"learning_rate": 8.058621495575031e-07, |
|
"logits/chosen": -0.6844447255134583, |
|
"logits/rejected": -0.6487768888473511, |
|
"logps/chosen": -350.7132568359375, |
|
"logps/rejected": -367.8271484375, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.101859450340271, |
|
"rewards/margins": 0.21480971574783325, |
|
"rewards/rejected": -1.316669225692749, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 9.892000882662467, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": -0.5773683190345764, |
|
"logits/rejected": -0.5765209794044495, |
|
"logps/chosen": -370.47796630859375, |
|
"logps/rejected": -415.53167724609375, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1410350799560547, |
|
"rewards/margins": 0.3918205499649048, |
|
"rewards/rejected": -1.532855749130249, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3655310621242485, |
|
"grad_norm": 8.106890489682133, |
|
"learning_rate": 7.999109473439569e-07, |
|
"logits/chosen": -0.6529942154884338, |
|
"logits/rejected": -0.6343085169792175, |
|
"logps/chosen": -358.777099609375, |
|
"logps/rejected": -388.01995849609375, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.1139953136444092, |
|
"rewards/margins": 0.19699575006961823, |
|
"rewards/rejected": -1.3109909296035767, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.3676686706746827, |
|
"grad_norm": 7.818768691894028, |
|
"learning_rate": 7.969100927867507e-07, |
|
"logits/chosen": -0.7647715210914612, |
|
"logits/rejected": -0.768187940120697, |
|
"logps/chosen": -315.3676452636719, |
|
"logps/rejected": -344.0660095214844, |
|
"loss": 0.6095, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9741207361221313, |
|
"rewards/margins": 0.2204282581806183, |
|
"rewards/rejected": -1.1945490837097168, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3698062792251169, |
|
"grad_norm": 8.22923262916057, |
|
"learning_rate": 7.938926261462365e-07, |
|
"logits/chosen": -0.7851884961128235, |
|
"logits/rejected": -0.8041540384292603, |
|
"logps/chosen": -318.61712646484375, |
|
"logps/rejected": -398.345458984375, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9148141741752625, |
|
"rewards/margins": 0.5934224128723145, |
|
"rewards/rejected": -1.5082364082336426, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3719438877755511, |
|
"grad_norm": 8.000153942367305, |
|
"learning_rate": 7.908587162493028e-07, |
|
"logits/chosen": -0.6852933168411255, |
|
"logits/rejected": -0.6849787831306458, |
|
"logps/chosen": -406.8155212402344, |
|
"logps/rejected": -447.3205261230469, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2369593381881714, |
|
"rewards/margins": 0.23439320921897888, |
|
"rewards/rejected": -1.4713525772094727, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 9.316195243095242, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": -0.7409089803695679, |
|
"logits/rejected": -0.7157390713691711, |
|
"logps/chosen": -338.6581115722656, |
|
"logps/rejected": -357.2071838378906, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.051304817199707, |
|
"rewards/margins": 0.1540244072675705, |
|
"rewards/rejected": -1.2053292989730835, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3762191048764195, |
|
"grad_norm": 9.548217777892644, |
|
"learning_rate": 7.84742246584226e-07, |
|
"logits/chosen": -0.644868016242981, |
|
"logits/rejected": -0.6358535885810852, |
|
"logps/chosen": -280.54644775390625, |
|
"logps/rejected": -320.94866943359375, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9811806082725525, |
|
"rewards/margins": 0.36237311363220215, |
|
"rewards/rejected": -1.3435536623001099, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.37835671342685373, |
|
"grad_norm": 8.213256746442323, |
|
"learning_rate": 7.81660029031811e-07, |
|
"logits/chosen": -0.7351135015487671, |
|
"logits/rejected": -0.7099937796592712, |
|
"logps/chosen": -403.18609619140625, |
|
"logps/rejected": -427.598876953125, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3992477655410767, |
|
"rewards/margins": 0.21843519806861877, |
|
"rewards/rejected": -1.6176831722259521, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.3804943219772879, |
|
"grad_norm": 9.877087816575154, |
|
"learning_rate": 7.785620526352861e-07, |
|
"logits/chosen": -0.6065413355827332, |
|
"logits/rejected": -0.6187620759010315, |
|
"logps/chosen": -417.3489074707031, |
|
"logps/rejected": -418.964599609375, |
|
"loss": 0.6396, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4968540668487549, |
|
"rewards/margins": 0.15608513355255127, |
|
"rewards/rejected": -1.6529392004013062, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3826319305277221, |
|
"grad_norm": 9.518985768520599, |
|
"learning_rate": 7.754484907260512e-07, |
|
"logits/chosen": -0.6335625648498535, |
|
"logits/rejected": -0.6501979231834412, |
|
"logps/chosen": -320.66973876953125, |
|
"logps/rejected": -377.0364990234375, |
|
"loss": 0.6203, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9643224477767944, |
|
"rewards/margins": 0.4927278459072113, |
|
"rewards/rejected": -1.4570502042770386, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 8.327661579909856, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": -0.8008890748023987, |
|
"logits/rejected": -0.8070433735847473, |
|
"logps/chosen": -385.35711669921875, |
|
"logps/rejected": -417.4620056152344, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1224894523620605, |
|
"rewards/margins": 0.2396533489227295, |
|
"rewards/rejected": -1.36214280128479, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3869071476285905, |
|
"grad_norm": 8.177222796415196, |
|
"learning_rate": 7.691753080453411e-07, |
|
"logits/chosen": -0.7654060125350952, |
|
"logits/rejected": -0.7563324570655823, |
|
"logps/chosen": -372.6927185058594, |
|
"logps/rejected": -392.412841796875, |
|
"loss": 0.6178, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2625975608825684, |
|
"rewards/margins": 0.1758994460105896, |
|
"rewards/rejected": -1.4384969472885132, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.38904475617902473, |
|
"grad_norm": 8.268029500490163, |
|
"learning_rate": 7.660160382576683e-07, |
|
"logits/chosen": -0.8044633865356445, |
|
"logits/rejected": -0.8295111060142517, |
|
"logps/chosen": -387.167724609375, |
|
"logps/rejected": -421.73223876953125, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2140891551971436, |
|
"rewards/margins": 0.26494261622428894, |
|
"rewards/rejected": -1.4790318012237549, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.39118236472945894, |
|
"grad_norm": 8.205646872076233, |
|
"learning_rate": 7.628418849052523e-07, |
|
"logits/chosen": -0.7259032726287842, |
|
"logits/rejected": -0.7147877812385559, |
|
"logps/chosen": -332.19952392578125, |
|
"logps/rejected": -358.916748046875, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2255278825759888, |
|
"rewards/margins": 0.18311724066734314, |
|
"rewards/rejected": -1.4086451530456543, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3933199732798931, |
|
"grad_norm": 9.273642724003066, |
|
"learning_rate": 7.596530255815845e-07, |
|
"logits/chosen": -0.6111272573471069, |
|
"logits/rejected": -0.6174825429916382, |
|
"logps/chosen": -431.119140625, |
|
"logps/rejected": -474.2237854003906, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.340221643447876, |
|
"rewards/margins": 0.47385597229003906, |
|
"rewards/rejected": -1.814077615737915, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 9.03001535554559, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": -0.5710060000419617, |
|
"logits/rejected": -0.6027272343635559, |
|
"logps/chosen": -402.5880432128906, |
|
"logps/rejected": -461.81390380859375, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2832955121994019, |
|
"rewards/margins": 0.44037097692489624, |
|
"rewards/rejected": -1.7236665487289429, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3975951903807615, |
|
"grad_norm": 10.65885585737553, |
|
"learning_rate": 7.532319034984614e-07, |
|
"logits/chosen": -0.6792325377464294, |
|
"logits/rejected": -0.7070844769477844, |
|
"logps/chosen": -345.3462219238281, |
|
"logps/rejected": -380.2834167480469, |
|
"loss": 0.606, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0898866653442383, |
|
"rewards/margins": 0.2850308120250702, |
|
"rewards/rejected": -1.3749175071716309, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.39973279893119573, |
|
"grad_norm": 10.832365903487103, |
|
"learning_rate": 7.5e-07, |
|
"logits/chosen": -0.6209002733230591, |
|
"logits/rejected": -0.5837200880050659, |
|
"logps/chosen": -448.4324035644531, |
|
"logps/rejected": -443.7789306640625, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -1.5713595151901245, |
|
"rewards/margins": 0.1559199094772339, |
|
"rewards/rejected": -1.7272793054580688, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.40187040748162994, |
|
"grad_norm": 8.78650176694853, |
|
"learning_rate": 7.467541090321733e-07, |
|
"logits/chosen": -0.6626260876655579, |
|
"logits/rejected": -0.6681480407714844, |
|
"logps/chosen": -357.17535400390625, |
|
"logps/rejected": -392.3631591796875, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.0751440525054932, |
|
"rewards/margins": 0.28642958402633667, |
|
"rewards/rejected": -1.361573576927185, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.40400801603206415, |
|
"grad_norm": 9.268270452493919, |
|
"learning_rate": 7.434944122021836e-07, |
|
"logits/chosen": -0.7080458402633667, |
|
"logits/rejected": -0.6918138861656189, |
|
"logps/chosen": -428.0231628417969, |
|
"logps/rejected": -447.84588623046875, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2396382093429565, |
|
"rewards/margins": 0.2672892212867737, |
|
"rewards/rejected": -1.506927490234375, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 9.083796097067385, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": -0.6990772485733032, |
|
"logits/rejected": -0.6821334362030029, |
|
"logps/chosen": -330.4400329589844, |
|
"logps/rejected": -351.8786926269531, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.078837275505066, |
|
"rewards/margins": 0.31049594283103943, |
|
"rewards/rejected": -1.3893331289291382, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4082832331329325, |
|
"grad_norm": 8.342069569715447, |
|
"learning_rate": 7.369343312364993e-07, |
|
"logits/chosen": -0.6898236870765686, |
|
"logits/rejected": -0.7303708791732788, |
|
"logps/chosen": -365.74688720703125, |
|
"logps/rejected": -406.60125732421875, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3478041887283325, |
|
"rewards/margins": 0.3217250108718872, |
|
"rewards/rejected": -1.6695290803909302, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.41042084168336673, |
|
"grad_norm": 11.11112446563951, |
|
"learning_rate": 7.33634314136531e-07, |
|
"logits/chosen": -0.567010223865509, |
|
"logits/rejected": -0.5823702812194824, |
|
"logps/chosen": -351.3297119140625, |
|
"logps/rejected": -352.91400146484375, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.202454924583435, |
|
"rewards/margins": 0.1386478990316391, |
|
"rewards/rejected": -1.3411028385162354, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.41255845023380094, |
|
"grad_norm": 10.094586281846308, |
|
"learning_rate": 7.303212252253161e-07, |
|
"logits/chosen": -0.6867839694023132, |
|
"logits/rejected": -0.631986677646637, |
|
"logps/chosen": -446.3096008300781, |
|
"logps/rejected": -481.1722717285156, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.420650601387024, |
|
"rewards/margins": 0.3154638409614563, |
|
"rewards/rejected": -1.7361143827438354, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.41469605878423516, |
|
"grad_norm": 9.22745603420781, |
|
"learning_rate": 7.269952498697734e-07, |
|
"logits/chosen": -0.6122913360595703, |
|
"logits/rejected": -0.5846338868141174, |
|
"logps/chosen": -404.2279052734375, |
|
"logps/rejected": -479.53546142578125, |
|
"loss": 0.5926, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4300614595413208, |
|
"rewards/margins": 0.682074785232544, |
|
"rewards/rejected": -2.112136125564575, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 8.922810963128454, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": -0.7965989112854004, |
|
"logits/rejected": -0.8105958104133606, |
|
"logps/chosen": -412.70068359375, |
|
"logps/rejected": -459.9786376953125, |
|
"loss": 0.6098, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1933541297912598, |
|
"rewards/margins": 0.4532015025615692, |
|
"rewards/rejected": -1.6465556621551514, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4189712758851035, |
|
"grad_norm": 9.70951721797377, |
|
"learning_rate": 7.203053848879418e-07, |
|
"logits/chosen": -0.66545569896698, |
|
"logits/rejected": -0.6426224708557129, |
|
"logps/chosen": -417.62750244140625, |
|
"logps/rejected": -446.0390625, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4516615867614746, |
|
"rewards/margins": 0.234949991106987, |
|
"rewards/rejected": -1.6866116523742676, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.42110888443553773, |
|
"grad_norm": 10.834268818449964, |
|
"learning_rate": 7.16941869558779e-07, |
|
"logits/chosen": -0.6952583193778992, |
|
"logits/rejected": -0.6965677738189697, |
|
"logps/chosen": -447.6587829589844, |
|
"logps/rejected": -491.00872802734375, |
|
"loss": 0.6368, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.6263762712478638, |
|
"rewards/margins": 0.3232609033584595, |
|
"rewards/rejected": -1.9496371746063232, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.42324649298597194, |
|
"grad_norm": 10.734655546374897, |
|
"learning_rate": 7.135662163585984e-07, |
|
"logits/chosen": -0.7219685316085815, |
|
"logits/rejected": -0.7239058613777161, |
|
"logps/chosen": -379.8273620605469, |
|
"logps/rejected": -416.1621398925781, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4015758037567139, |
|
"rewards/margins": 0.279184490442276, |
|
"rewards/rejected": -1.6807602643966675, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.42538410153640616, |
|
"grad_norm": 10.226763136881486, |
|
"learning_rate": 7.101786141547828e-07, |
|
"logits/chosen": -0.6653244495391846, |
|
"logits/rejected": -0.6480982303619385, |
|
"logps/chosen": -388.545166015625, |
|
"logps/rejected": -400.65447998046875, |
|
"loss": 0.6346, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3625783920288086, |
|
"rewards/margins": 0.17150306701660156, |
|
"rewards/rejected": -1.5340813398361206, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 11.800159452188982, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": -0.802920401096344, |
|
"logits/rejected": -0.7953581213951111, |
|
"logps/chosen": -469.48583984375, |
|
"logps/rejected": -514.489501953125, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7296805381774902, |
|
"rewards/margins": 0.4207393229007721, |
|
"rewards/rejected": -2.1504197120666504, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"eval_logits/chosen": -0.6628897190093994, |
|
"eval_logits/rejected": -0.6649256348609924, |
|
"eval_logps/chosen": -392.1436767578125, |
|
"eval_logps/rejected": -424.3627624511719, |
|
"eval_loss": 0.635185182094574, |
|
"eval_rewards/accuracies": 0.6544715166091919, |
|
"eval_rewards/chosen": -1.3659569025039673, |
|
"eval_rewards/margins": 0.2651316225528717, |
|
"eval_rewards/rejected": -1.6310884952545166, |
|
"eval_runtime": 376.3857, |
|
"eval_samples_per_second": 5.21, |
|
"eval_steps_per_second": 0.327, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4296593186372745, |
|
"grad_norm": 10.509050348979823, |
|
"learning_rate": 7.033683215379002e-07, |
|
"logits/chosen": -0.7490158081054688, |
|
"logits/rejected": -0.7795702219009399, |
|
"logps/chosen": -444.27264404296875, |
|
"logps/rejected": -450.5096435546875, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.6222877502441406, |
|
"rewards/margins": 0.0773380845785141, |
|
"rewards/rejected": -1.699625849723816, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.43179692718770873, |
|
"grad_norm": 9.361779380994284, |
|
"learning_rate": 6.999460121598704e-07, |
|
"logits/chosen": -0.8867595195770264, |
|
"logits/rejected": -0.8778724074363708, |
|
"logps/chosen": -395.88262939453125, |
|
"logps/rejected": -424.9254455566406, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.3475773334503174, |
|
"rewards/margins": 0.2797107696533203, |
|
"rewards/rejected": -1.6272879838943481, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.43393453573814295, |
|
"grad_norm": 10.465713404951545, |
|
"learning_rate": 6.965125158269618e-07, |
|
"logits/chosen": -0.7478022575378418, |
|
"logits/rejected": -0.7213735580444336, |
|
"logps/chosen": -375.4535217285156, |
|
"logps/rejected": -400.4565734863281, |
|
"loss": 0.6452, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.3242757320404053, |
|
"rewards/margins": 0.18743321299552917, |
|
"rewards/rejected": -1.5117088556289673, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.43607214428857716, |
|
"grad_norm": 9.542403717436502, |
|
"learning_rate": 6.93068024642873e-07, |
|
"logits/chosen": -0.7434294819831848, |
|
"logits/rejected": -0.7202074527740479, |
|
"logps/chosen": -367.4134216308594, |
|
"logps/rejected": -395.12396240234375, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.321439266204834, |
|
"rewards/margins": 0.3816969692707062, |
|
"rewards/rejected": -1.7031362056732178, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 11.318421364351005, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": -0.6576538681983948, |
|
"logits/rejected": -0.6434054374694824, |
|
"logps/chosen": -381.1850280761719, |
|
"logps/rejected": -385.73736572265625, |
|
"loss": 0.6267, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.294406771659851, |
|
"rewards/margins": 0.12336639314889908, |
|
"rewards/rejected": -1.4177730083465576, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4403473613894456, |
|
"grad_norm": 8.514452329680676, |
|
"learning_rate": 6.861468292009726e-07, |
|
"logits/chosen": -0.652076780796051, |
|
"logits/rejected": -0.6382969617843628, |
|
"logps/chosen": -392.5809326171875, |
|
"logps/rejected": -430.596923828125, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.44416344165802, |
|
"rewards/margins": 0.37998878955841064, |
|
"rewards/rejected": -1.8241522312164307, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.44248496993987974, |
|
"grad_norm": 9.679027742003948, |
|
"learning_rate": 6.826705121831976e-07, |
|
"logits/chosen": -0.7171617746353149, |
|
"logits/rejected": -0.7156708240509033, |
|
"logps/chosen": -378.35528564453125, |
|
"logps/rejected": -411.4539489746094, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.3946869373321533, |
|
"rewards/margins": 0.37037399411201477, |
|
"rewards/rejected": -1.7650609016418457, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.44462257849031395, |
|
"grad_norm": 10.610288706227443, |
|
"learning_rate": 6.7918397477265e-07, |
|
"logits/chosen": -0.6665509939193726, |
|
"logits/rejected": -0.6577183604240417, |
|
"logps/chosen": -365.9376525878906, |
|
"logps/rejected": -362.8846740722656, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1479219198226929, |
|
"rewards/margins": 0.01849663257598877, |
|
"rewards/rejected": -1.1664186716079712, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.44676018704074816, |
|
"grad_norm": 9.112597710939323, |
|
"learning_rate": 6.756874120406714e-07, |
|
"logits/chosen": -0.6265541315078735, |
|
"logits/rejected": -0.61783766746521, |
|
"logps/chosen": -381.3807067871094, |
|
"logps/rejected": -425.4930725097656, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.280937671661377, |
|
"rewards/margins": 0.3031711280345917, |
|
"rewards/rejected": -1.584108829498291, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 9.390736537982283, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": -0.7654869556427002, |
|
"logits/rejected": -0.7667275071144104, |
|
"logps/chosen": -417.27069091796875, |
|
"logps/rejected": -449.1034851074219, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3596787452697754, |
|
"rewards/margins": 0.3042774796485901, |
|
"rewards/rejected": -1.6639561653137207, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4510354041416166, |
|
"grad_norm": 9.394078254466548, |
|
"learning_rate": 6.68664993691415e-07, |
|
"logits/chosen": -0.6547084450721741, |
|
"logits/rejected": -0.647241473197937, |
|
"logps/chosen": -336.94915771484375, |
|
"logps/rejected": -371.03515625, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9777745604515076, |
|
"rewards/margins": 0.2829311490058899, |
|
"rewards/rejected": -1.2607057094573975, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4531730126920508, |
|
"grad_norm": 8.760414806290829, |
|
"learning_rate": 6.651395309775836e-07, |
|
"logits/chosen": -0.6064110398292542, |
|
"logits/rejected": -0.5819242000579834, |
|
"logps/chosen": -353.7124938964844, |
|
"logps/rejected": -384.0793151855469, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1342616081237793, |
|
"rewards/margins": 0.3782859742641449, |
|
"rewards/rejected": -1.512547492980957, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.45531062124248495, |
|
"grad_norm": 9.331952485323354, |
|
"learning_rate": 6.6160482872723e-07, |
|
"logits/chosen": -0.6409544944763184, |
|
"logits/rejected": -0.6478085517883301, |
|
"logps/chosen": -374.2773742675781, |
|
"logps/rejected": -397.2945861816406, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2687841653823853, |
|
"rewards/margins": 0.11245452612638474, |
|
"rewards/rejected": -1.3812386989593506, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.45744822979291916, |
|
"grad_norm": 9.844190008748196, |
|
"learning_rate": 6.580610847065123e-07, |
|
"logits/chosen": -0.6078667640686035, |
|
"logits/rejected": -0.60109543800354, |
|
"logps/chosen": -357.74810791015625, |
|
"logps/rejected": -391.06268310546875, |
|
"loss": 0.614, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1224141120910645, |
|
"rewards/margins": 0.23654705286026, |
|
"rewards/rejected": -1.3589611053466797, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 9.317047438854233, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": -0.608707845211029, |
|
"logits/rejected": -0.6254767775535583, |
|
"logps/chosen": -340.4634094238281, |
|
"logps/rejected": -377.37152099609375, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0063505172729492, |
|
"rewards/margins": 0.29992133378982544, |
|
"rewards/rejected": -1.3062719106674194, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4617234468937876, |
|
"grad_norm": 9.52121536372048, |
|
"learning_rate": 6.509472649369509e-07, |
|
"logits/chosen": -0.642886221408844, |
|
"logits/rejected": -0.6272940039634705, |
|
"logps/chosen": -324.8238525390625, |
|
"logps/rejected": -367.4193115234375, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9792121052742004, |
|
"rewards/margins": 0.3688339293003082, |
|
"rewards/rejected": -1.3480459451675415, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.4638610554442218, |
|
"grad_norm": 10.890742309360663, |
|
"learning_rate": 6.473775872054521e-07, |
|
"logits/chosen": -0.6968441009521484, |
|
"logits/rejected": -0.6998182535171509, |
|
"logps/chosen": -425.0888977050781, |
|
"logps/rejected": -457.17889404296875, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.394580364227295, |
|
"rewards/margins": 0.2801092267036438, |
|
"rewards/rejected": -1.6746896505355835, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.465998663994656, |
|
"grad_norm": 8.973438938365845, |
|
"learning_rate": 6.437996637160086e-07, |
|
"logits/chosen": -0.6339977979660034, |
|
"logits/rejected": -0.605747401714325, |
|
"logps/chosen": -359.0996398925781, |
|
"logps/rejected": -398.10284423828125, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1331509351730347, |
|
"rewards/margins": 0.29103800654411316, |
|
"rewards/rejected": -1.4241892099380493, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.46813627254509016, |
|
"grad_norm": 9.024594454350343, |
|
"learning_rate": 6.402136946530014e-07, |
|
"logits/chosen": -0.6726840734481812, |
|
"logits/rejected": -0.6727656722068787, |
|
"logps/chosen": -411.4464111328125, |
|
"logps/rejected": -438.1039733886719, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.137448787689209, |
|
"rewards/margins": 0.2949088513851166, |
|
"rewards/rejected": -1.4323575496673584, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 9.318954884923675, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": -0.5828653573989868, |
|
"logits/rejected": -0.5879778861999512, |
|
"logps/chosen": -416.6065673828125, |
|
"logps/rejected": -447.0008544921875, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4899258613586426, |
|
"rewards/margins": 0.2851335406303406, |
|
"rewards/rejected": -1.7750593423843384, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4724114896459586, |
|
"grad_norm": 10.91663306855251, |
|
"learning_rate": 6.330184227833375e-07, |
|
"logits/chosen": -0.6656166315078735, |
|
"logits/rejected": -0.654589056968689, |
|
"logps/chosen": -380.12811279296875, |
|
"logps/rejected": -417.3984069824219, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2443169355392456, |
|
"rewards/margins": 0.4823899269104004, |
|
"rewards/rejected": -1.726706862449646, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.4745490981963928, |
|
"grad_norm": 9.66304655835996, |
|
"learning_rate": 6.294095225512604e-07, |
|
"logits/chosen": -0.6804403066635132, |
|
"logits/rejected": -0.6730751395225525, |
|
"logps/chosen": -391.51995849609375, |
|
"logps/rejected": -434.95928955078125, |
|
"loss": 0.6007, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.164958119392395, |
|
"rewards/margins": 0.4750928580760956, |
|
"rewards/rejected": -1.640051007270813, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.476686706746827, |
|
"grad_norm": 11.307540321918372, |
|
"learning_rate": 6.257933818722542e-07, |
|
"logits/chosen": -0.6279383301734924, |
|
"logits/rejected": -0.6163449883460999, |
|
"logps/chosen": -376.4117736816406, |
|
"logps/rejected": -397.33917236328125, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -1.320064663887024, |
|
"rewards/margins": 0.12475023418664932, |
|
"rewards/rejected": -1.444814920425415, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.4788243152972612, |
|
"grad_norm": 9.673767465041793, |
|
"learning_rate": 6.22170203068947e-07, |
|
"logits/chosen": -0.711574912071228, |
|
"logits/rejected": -0.6971991062164307, |
|
"logps/chosen": -370.3948059082031, |
|
"logps/rejected": -394.70379638671875, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2323672771453857, |
|
"rewards/margins": 0.2632126808166504, |
|
"rewards/rejected": -1.4955798387527466, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 14.301565196390225, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": -0.7201038599014282, |
|
"logits/rejected": -0.713502049446106, |
|
"logps/chosen": -411.15997314453125, |
|
"logps/rejected": -440.4834289550781, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4549378156661987, |
|
"rewards/margins": 0.26348626613616943, |
|
"rewards/rejected": -1.7184242010116577, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4830995323981296, |
|
"grad_norm": 10.44824838559519, |
|
"learning_rate": 6.149035423375098e-07, |
|
"logits/chosen": -0.7044095993041992, |
|
"logits/rejected": -0.7011440992355347, |
|
"logps/chosen": -394.7225341796875, |
|
"logps/rejected": -418.9303283691406, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2195916175842285, |
|
"rewards/margins": 0.20536328852176666, |
|
"rewards/rejected": -1.424954891204834, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.4852371409485638, |
|
"grad_norm": 11.00631388790137, |
|
"learning_rate": 6.112604669781572e-07, |
|
"logits/chosen": -0.735901951789856, |
|
"logits/rejected": -0.6977694034576416, |
|
"logps/chosen": -438.9553527832031, |
|
"logps/rejected": -447.6878662109375, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.478360891342163, |
|
"rewards/margins": 0.119898721575737, |
|
"rewards/rejected": -1.5982595682144165, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.487374749498998, |
|
"grad_norm": 10.507160088155747, |
|
"learning_rate": 6.07611166609311e-07, |
|
"logits/chosen": -0.7429340481758118, |
|
"logits/rejected": -0.7295467257499695, |
|
"logps/chosen": -430.9995422363281, |
|
"logps/rejected": -448.747314453125, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4200453758239746, |
|
"rewards/margins": 0.17433959245681763, |
|
"rewards/rejected": -1.594385027885437, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.4895123580494322, |
|
"grad_norm": 8.307584591306474, |
|
"learning_rate": 6.039558454088795e-07, |
|
"logits/chosen": -0.6406713128089905, |
|
"logits/rejected": -0.6399562358856201, |
|
"logps/chosen": -332.7983703613281, |
|
"logps/rejected": -353.19384765625, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1042307615280151, |
|
"rewards/margins": 0.19572903215885162, |
|
"rewards/rejected": -1.2999597787857056, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 11.994876856372567, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": -0.6426191926002502, |
|
"logits/rejected": -0.6602756977081299, |
|
"logps/chosen": -344.1719665527344, |
|
"logps/rejected": -356.783935546875, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.2262554168701172, |
|
"rewards/margins": 0.10306321084499359, |
|
"rewards/rejected": -1.329318642616272, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4937875751503006, |
|
"grad_norm": 10.289938408873015, |
|
"learning_rate": 5.966279588977766e-07, |
|
"logits/chosen": -0.7598620653152466, |
|
"logits/rejected": -0.7735162377357483, |
|
"logps/chosen": -382.27630615234375, |
|
"logps/rejected": -393.7611083984375, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.1877317428588867, |
|
"rewards/margins": 0.17938965559005737, |
|
"rewards/rejected": -1.3671213388442993, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.4959251837007348, |
|
"grad_norm": 10.32285872025184, |
|
"learning_rate": 5.929558035814574e-07, |
|
"logits/chosen": -0.5800771713256836, |
|
"logits/rejected": -0.5892568826675415, |
|
"logps/chosen": -364.911376953125, |
|
"logps/rejected": -363.5468444824219, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3333511352539062, |
|
"rewards/margins": 0.1056426540017128, |
|
"rewards/rejected": -1.4389936923980713, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.498062792251169, |
|
"grad_norm": 10.3934909690253, |
|
"learning_rate": 5.892784473993183e-07, |
|
"logits/chosen": -0.6197159290313721, |
|
"logits/rejected": -0.6411285400390625, |
|
"logps/chosen": -372.03424072265625, |
|
"logps/rejected": -401.731201171875, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.235286831855774, |
|
"rewards/margins": 0.3155067563056946, |
|
"rewards/rejected": -1.5507938861846924, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5002004008016032, |
|
"grad_norm": 10.257979100996899, |
|
"learning_rate": 5.855960960989876e-07, |
|
"logits/chosen": -0.7090120911598206, |
|
"logits/rejected": -0.6980421543121338, |
|
"logps/chosen": -328.9789123535156, |
|
"logps/rejected": -349.19036865234375, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2482969760894775, |
|
"rewards/margins": 0.14759615063667297, |
|
"rewards/rejected": -1.3958930969238281, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 8.317098893301642, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": -0.7996770739555359, |
|
"logits/rejected": -0.7929503917694092, |
|
"logps/chosen": -331.7429504394531, |
|
"logps/rejected": -369.83746337890625, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.9831193089485168, |
|
"rewards/margins": 0.4169498383998871, |
|
"rewards/rejected": -1.400068998336792, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5044756179024716, |
|
"grad_norm": 10.217065084123991, |
|
"learning_rate": 5.782172325201155e-07, |
|
"logits/chosen": -0.6208564043045044, |
|
"logits/rejected": -0.635725200176239, |
|
"logps/chosen": -344.1796875, |
|
"logps/rejected": -389.52923583984375, |
|
"loss": 0.6368, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1277637481689453, |
|
"rewards/margins": 0.37036919593811035, |
|
"rewards/rejected": -1.4981330633163452, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5066132264529059, |
|
"grad_norm": 10.269160973282458, |
|
"learning_rate": 5.745211330880872e-07, |
|
"logits/chosen": -0.7708931565284729, |
|
"logits/rejected": -0.76704341173172, |
|
"logps/chosen": -433.10064697265625, |
|
"logps/rejected": -450.0901184082031, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.3207013607025146, |
|
"rewards/margins": 0.2793017327785492, |
|
"rewards/rejected": -1.6000031232833862, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5087508350033401, |
|
"grad_norm": 10.399405697134318, |
|
"learning_rate": 5.708208642077945e-07, |
|
"logits/chosen": -0.6624871492385864, |
|
"logits/rejected": -0.6546816825866699, |
|
"logps/chosen": -333.7100524902344, |
|
"logps/rejected": -369.9173278808594, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1885484457015991, |
|
"rewards/margins": 0.3089646100997925, |
|
"rewards/rejected": -1.4975128173828125, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5108884435537742, |
|
"grad_norm": 9.80964346078248, |
|
"learning_rate": 5.671166329088277e-07, |
|
"logits/chosen": -0.7182386517524719, |
|
"logits/rejected": -0.7258840203285217, |
|
"logps/chosen": -356.9850769042969, |
|
"logps/rejected": -380.86285400390625, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3426018953323364, |
|
"rewards/margins": 0.24537137150764465, |
|
"rewards/rejected": -1.5879731178283691, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 10.018454517912645, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": -0.6738543510437012, |
|
"logits/rejected": -0.6593906283378601, |
|
"logps/chosen": -359.072021484375, |
|
"logps/rejected": -400.2255554199219, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3493218421936035, |
|
"rewards/margins": 0.24295195937156677, |
|
"rewards/rejected": -1.5922737121582031, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5151636606546426, |
|
"grad_norm": 9.883809912496002, |
|
"learning_rate": 5.596971122701221e-07, |
|
"logits/chosen": -0.8064689636230469, |
|
"logits/rejected": -0.777323305606842, |
|
"logps/chosen": -383.2707214355469, |
|
"logps/rejected": -394.1632995605469, |
|
"loss": 0.6133, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2935426235198975, |
|
"rewards/margins": 0.17417016625404358, |
|
"rewards/rejected": -1.4677127599716187, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5173012692050768, |
|
"grad_norm": 9.835505308989157, |
|
"learning_rate": 5.559822380516539e-07, |
|
"logits/chosen": -0.74181067943573, |
|
"logits/rejected": -0.76103276014328, |
|
"logps/chosen": -413.28607177734375, |
|
"logps/rejected": -432.421142578125, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.6156772375106812, |
|
"rewards/margins": 0.052678730338811874, |
|
"rewards/rejected": -1.668355941772461, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.519438877755511, |
|
"grad_norm": 10.044057697511136, |
|
"learning_rate": 5.522642316338268e-07, |
|
"logits/chosen": -0.7109071016311646, |
|
"logits/rejected": -0.738073468208313, |
|
"logps/chosen": -371.0440673828125, |
|
"logps/rejected": -417.84588623046875, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2593053579330444, |
|
"rewards/margins": 0.29793858528137207, |
|
"rewards/rejected": -1.557244062423706, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5215764863059452, |
|
"grad_norm": 9.78032272573038, |
|
"learning_rate": 5.48543301038644e-07, |
|
"logits/chosen": -0.8035364747047424, |
|
"logits/rejected": -0.817245364189148, |
|
"logps/chosen": -408.1662292480469, |
|
"logps/rejected": -431.6890869140625, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3148820400238037, |
|
"rewards/margins": 0.27137643098831177, |
|
"rewards/rejected": -1.5862585306167603, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 11.286066879084709, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": -0.8000929355621338, |
|
"logits/rejected": -0.7960721254348755, |
|
"logps/chosen": -348.828369140625, |
|
"logps/rejected": -370.3882751464844, |
|
"loss": 0.6377, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3299049139022827, |
|
"rewards/margins": 0.21843267977237701, |
|
"rewards/rejected": -1.5483375787734985, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5258517034068136, |
|
"grad_norm": 9.544361896918447, |
|
"learning_rate": 5.410935002106152e-07, |
|
"logits/chosen": -0.7660020589828491, |
|
"logits/rejected": -0.7475563883781433, |
|
"logps/chosen": -402.910400390625, |
|
"logps/rejected": -406.4446105957031, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.283268690109253, |
|
"rewards/margins": 0.24972115457057953, |
|
"rewards/rejected": -1.5329898595809937, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5279893119572479, |
|
"grad_norm": 9.1575605917451, |
|
"learning_rate": 5.373650467932121e-07, |
|
"logits/chosen": -0.741169273853302, |
|
"logits/rejected": -0.7101236581802368, |
|
"logps/chosen": -353.3587951660156, |
|
"logps/rejected": -394.3509521484375, |
|
"loss": 0.5927, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2977474927902222, |
|
"rewards/margins": 0.445009708404541, |
|
"rewards/rejected": -1.7427570819854736, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5301269205076821, |
|
"grad_norm": 10.605879178884328, |
|
"learning_rate": 5.336345028060199e-07, |
|
"logits/chosen": -0.735455334186554, |
|
"logits/rejected": -0.7146904468536377, |
|
"logps/chosen": -415.8868103027344, |
|
"logps/rejected": -471.6510925292969, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4680719375610352, |
|
"rewards/margins": 0.37900200486183167, |
|
"rewards/rejected": -1.8470739126205444, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5322645290581163, |
|
"grad_norm": 9.468418276322426, |
|
"learning_rate": 5.299020769725171e-07, |
|
"logits/chosen": -0.6703728437423706, |
|
"logits/rejected": -0.6554571986198425, |
|
"logps/chosen": -414.7881164550781, |
|
"logps/rejected": -444.9212646484375, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.5948419570922852, |
|
"rewards/margins": 0.24465849995613098, |
|
"rewards/rejected": -1.8395004272460938, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 10.105402066971774, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": -0.6142255663871765, |
|
"logits/rejected": -0.5848169922828674, |
|
"logps/chosen": -390.851806640625, |
|
"logps/rejected": -430.1876220703125, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5544801950454712, |
|
"rewards/margins": 0.30869632959365845, |
|
"rewards/rejected": -1.8631765842437744, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5365397461589846, |
|
"grad_norm": 9.810217626805535, |
|
"learning_rate": 5.224324151752575e-07, |
|
"logits/chosen": -0.6183363795280457, |
|
"logits/rejected": -0.6150676608085632, |
|
"logps/chosen": -367.5179443359375, |
|
"logps/rejected": -416.97332763671875, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2182056903839111, |
|
"rewards/margins": 0.5702115893363953, |
|
"rewards/rejected": -1.7884173393249512, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5386773547094188, |
|
"grad_norm": 11.63997005510131, |
|
"learning_rate": 5.18695597138163e-07, |
|
"logits/chosen": -0.7786095142364502, |
|
"logits/rejected": -0.7649445533752441, |
|
"logps/chosen": -406.415771484375, |
|
"logps/rejected": -438.0752868652344, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4087908267974854, |
|
"rewards/margins": 0.3740866780281067, |
|
"rewards/rejected": -1.7828774452209473, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.540814963259853, |
|
"grad_norm": 8.971335597918381, |
|
"learning_rate": 5.149577330846992e-07, |
|
"logits/chosen": -0.722287118434906, |
|
"logits/rejected": -0.7298377752304077, |
|
"logps/chosen": -385.11602783203125, |
|
"logps/rejected": -462.1266784667969, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3030569553375244, |
|
"rewards/margins": 0.4734255373477936, |
|
"rewards/rejected": -1.7764827013015747, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5429525718102872, |
|
"grad_norm": 10.226248494849832, |
|
"learning_rate": 5.112190321479025e-07, |
|
"logits/chosen": -0.7946709990501404, |
|
"logits/rejected": -0.7953794598579407, |
|
"logps/chosen": -365.50604248046875, |
|
"logps/rejected": -393.0849914550781, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3220914602279663, |
|
"rewards/margins": 0.16996119916439056, |
|
"rewards/rejected": -1.492052674293518, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 9.70395445393845, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": -0.8279726505279541, |
|
"logits/rejected": -0.8029213547706604, |
|
"logps/chosen": -363.36956787109375, |
|
"logps/rejected": -353.39300537109375, |
|
"loss": 0.5921, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -1.4433786869049072, |
|
"rewards/margins": 0.08160518109798431, |
|
"rewards/rejected": -1.5249840021133423, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5472277889111556, |
|
"grad_norm": 10.09145149251664, |
|
"learning_rate": 5.037399563788664e-07, |
|
"logits/chosen": -0.6333373785018921, |
|
"logits/rejected": -0.6277045011520386, |
|
"logps/chosen": -363.3057861328125, |
|
"logps/rejected": -414.02874755859375, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2467057704925537, |
|
"rewards/margins": 0.4376518726348877, |
|
"rewards/rejected": -1.6843575239181519, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5493653974615899, |
|
"grad_norm": 10.227202697175395, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.7193889021873474, |
|
"logits/rejected": -0.7205474376678467, |
|
"logps/chosen": -384.7895812988281, |
|
"logps/rejected": -409.3086242675781, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4086096286773682, |
|
"rewards/margins": 0.14859981834888458, |
|
"rewards/rejected": -1.5572093725204468, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5515030060120241, |
|
"grad_norm": 10.455321411037115, |
|
"learning_rate": 4.962600436211335e-07, |
|
"logits/chosen": -0.7665015459060669, |
|
"logits/rejected": -0.751805305480957, |
|
"logps/chosen": -353.6752624511719, |
|
"logps/rejected": -387.00494384765625, |
|
"loss": 0.6357, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2000305652618408, |
|
"rewards/margins": 0.3206770420074463, |
|
"rewards/rejected": -1.5207074880599976, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5536406145624583, |
|
"grad_norm": 10.67800131716867, |
|
"learning_rate": 4.925202964923683e-07, |
|
"logits/chosen": -0.67658931016922, |
|
"logits/rejected": -0.6737143397331238, |
|
"logps/chosen": -357.2424011230469, |
|
"logps/rejected": -380.38238525390625, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1178100109100342, |
|
"rewards/margins": 0.21349495649337769, |
|
"rewards/rejected": -1.3313050270080566, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 10.755754910243839, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": -0.7121912240982056, |
|
"logits/rejected": -0.6941719055175781, |
|
"logps/chosen": -311.152587890625, |
|
"logps/rejected": -341.55682373046875, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0979379415512085, |
|
"rewards/margins": 0.32470834255218506, |
|
"rewards/rejected": -1.4226462841033936, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5579158316633267, |
|
"grad_norm": 10.029694987313983, |
|
"learning_rate": 4.850422669153009e-07, |
|
"logits/chosen": -0.7704156041145325, |
|
"logits/rejected": -0.7731869220733643, |
|
"logps/chosen": -433.1751403808594, |
|
"logps/rejected": -475.2624206542969, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5628148317337036, |
|
"rewards/margins": 0.27969640493392944, |
|
"rewards/rejected": -1.842511534690857, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5600534402137608, |
|
"grad_norm": 9.985077304371496, |
|
"learning_rate": 4.813044028618372e-07, |
|
"logits/chosen": -0.655546247959137, |
|
"logits/rejected": -0.5991637110710144, |
|
"logps/chosen": -311.8508605957031, |
|
"logps/rejected": -352.53912353515625, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1894161701202393, |
|
"rewards/margins": 0.4629126489162445, |
|
"rewards/rejected": -1.6523289680480957, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.562191048764195, |
|
"grad_norm": 11.42063256185086, |
|
"learning_rate": 4.775675848247427e-07, |
|
"logits/chosen": -0.7124533653259277, |
|
"logits/rejected": -0.7007814645767212, |
|
"logps/chosen": -349.7750549316406, |
|
"logps/rejected": -395.9293518066406, |
|
"loss": 0.606, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2669329643249512, |
|
"rewards/margins": 0.35066768527030945, |
|
"rewards/rejected": -1.617600679397583, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5643286573146292, |
|
"grad_norm": 10.025460802753425, |
|
"learning_rate": 4.7383202187852804e-07, |
|
"logits/chosen": -0.6652883887290955, |
|
"logits/rejected": -0.6626304388046265, |
|
"logps/chosen": -350.379150390625, |
|
"logps/rejected": -388.03955078125, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4017623662948608, |
|
"rewards/margins": 0.309231698513031, |
|
"rewards/rejected": -1.710994005203247, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 10.653930828086093, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": -0.7248793244361877, |
|
"logits/rejected": -0.750960648059845, |
|
"logps/chosen": -394.911865234375, |
|
"logps/rejected": -443.53643798828125, |
|
"loss": 0.6179, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.381069540977478, |
|
"rewards/margins": 0.36748361587524414, |
|
"rewards/rejected": -1.7485532760620117, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5686038744154976, |
|
"grad_norm": 9.87511145308802, |
|
"learning_rate": 4.6636549719398016e-07, |
|
"logits/chosen": -0.7590113878250122, |
|
"logits/rejected": -0.7530328035354614, |
|
"logps/chosen": -422.1754150390625, |
|
"logps/rejected": -463.882080078125, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4037176370620728, |
|
"rewards/margins": 0.3301146626472473, |
|
"rewards/rejected": -1.7338322401046753, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5707414829659319, |
|
"grad_norm": 10.40705399730886, |
|
"learning_rate": 4.626349532067879e-07, |
|
"logits/chosen": -0.5113621950149536, |
|
"logits/rejected": -0.4636048972606659, |
|
"logps/chosen": -402.9996337890625, |
|
"logps/rejected": -432.8628845214844, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4809210300445557, |
|
"rewards/margins": 0.3719174861907959, |
|
"rewards/rejected": -1.8528385162353516, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5728790915163661, |
|
"grad_norm": 9.883415102033252, |
|
"learning_rate": 4.5890649978938487e-07, |
|
"logits/chosen": -0.7086624503135681, |
|
"logits/rejected": -0.6735981702804565, |
|
"logps/chosen": -396.3412170410156, |
|
"logps/rejected": -393.7690734863281, |
|
"loss": 0.5721, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.459097146987915, |
|
"rewards/margins": 0.10202471911907196, |
|
"rewards/rejected": -1.561121940612793, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5750167000668003, |
|
"grad_norm": 11.674476580668774, |
|
"learning_rate": 4.5518034554828327e-07, |
|
"logits/chosen": -0.7449507117271423, |
|
"logits/rejected": -0.722856879234314, |
|
"logps/chosen": -426.8054504394531, |
|
"logps/rejected": -444.621337890625, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3702492713928223, |
|
"rewards/margins": 0.2847437858581543, |
|
"rewards/rejected": -1.6549930572509766, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 10.79085847307073, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": -0.7816205024719238, |
|
"logits/rejected": -0.7831264734268188, |
|
"logps/chosen": -380.7841796875, |
|
"logps/rejected": -405.67108154296875, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4601352214813232, |
|
"rewards/margins": 0.22311216592788696, |
|
"rewards/rejected": -1.6832473278045654, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5792919171676687, |
|
"grad_norm": 13.754123888989874, |
|
"learning_rate": 4.477357683661733e-07, |
|
"logits/chosen": -0.6621173620223999, |
|
"logits/rejected": -0.6234359741210938, |
|
"logps/chosen": -376.8826599121094, |
|
"logps/rejected": -421.74981689453125, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3359074592590332, |
|
"rewards/margins": 0.37606099247932434, |
|
"rewards/rejected": -1.7119684219360352, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5814295257181029, |
|
"grad_norm": 10.684496603394274, |
|
"learning_rate": 4.4401776194834603e-07, |
|
"logits/chosen": -0.7525122761726379, |
|
"logits/rejected": -0.6963589787483215, |
|
"logps/chosen": -329.6082458496094, |
|
"logps/rejected": -376.0239562988281, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1713266372680664, |
|
"rewards/margins": 0.28129494190216064, |
|
"rewards/rejected": -1.452621579170227, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5835671342685371, |
|
"grad_norm": 9.352339379386558, |
|
"learning_rate": 4.403028877298779e-07, |
|
"logits/chosen": -0.6548051238059998, |
|
"logits/rejected": -0.632011890411377, |
|
"logps/chosen": -384.2966003417969, |
|
"logps/rejected": -421.78839111328125, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.369025707244873, |
|
"rewards/margins": 0.33843424916267395, |
|
"rewards/rejected": -1.707459807395935, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5857047428189712, |
|
"grad_norm": 9.078233343454654, |
|
"learning_rate": 4.3659135355752593e-07, |
|
"logits/chosen": -0.6783146858215332, |
|
"logits/rejected": -0.6960130929946899, |
|
"logps/chosen": -353.8924560546875, |
|
"logps/rejected": -399.84918212890625, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3400187492370605, |
|
"rewards/margins": 0.32464563846588135, |
|
"rewards/rejected": -1.664664387702942, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 15.480458345502447, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": -0.651485025882721, |
|
"logits/rejected": -0.6426280736923218, |
|
"logps/chosen": -407.6331481933594, |
|
"logps/rejected": -407.97271728515625, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.497727394104004, |
|
"rewards/margins": 0.0166710764169693, |
|
"rewards/rejected": -1.514398455619812, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5899799599198396, |
|
"grad_norm": 9.317803722726895, |
|
"learning_rate": 4.2917913579220553e-07, |
|
"logits/chosen": -0.7354484796524048, |
|
"logits/rejected": -0.7279876470565796, |
|
"logps/chosen": -336.7724914550781, |
|
"logps/rejected": -337.7842712402344, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3154979944229126, |
|
"rewards/margins": 0.18782049417495728, |
|
"rewards/rejected": -1.5033185482025146, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5921175684702739, |
|
"grad_norm": 10.981393496040226, |
|
"learning_rate": 4.254788669119127e-07, |
|
"logits/chosen": -0.6517477631568909, |
|
"logits/rejected": -0.6439751386642456, |
|
"logps/chosen": -398.1854553222656, |
|
"logps/rejected": -394.73992919921875, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.5254887342453003, |
|
"rewards/margins": 0.09052658081054688, |
|
"rewards/rejected": -1.6160151958465576, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5942551770207081, |
|
"grad_norm": 9.363953745062531, |
|
"learning_rate": 4.2178276747988444e-07, |
|
"logits/chosen": -0.7151267528533936, |
|
"logits/rejected": -0.6989988088607788, |
|
"logps/chosen": -399.211669921875, |
|
"logps/rejected": -472.8134765625, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5025238990783691, |
|
"rewards/margins": 0.6458090543746948, |
|
"rewards/rejected": -2.1483330726623535, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5963927855711423, |
|
"grad_norm": 10.996768453375289, |
|
"learning_rate": 4.180910442924311e-07, |
|
"logits/chosen": -0.6743846535682678, |
|
"logits/rejected": -0.6869890093803406, |
|
"logps/chosen": -349.3891296386719, |
|
"logps/rejected": -385.591064453125, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1990691423416138, |
|
"rewards/margins": 0.24665698409080505, |
|
"rewards/rejected": -1.4457261562347412, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 12.076546659241137, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": -0.7634164094924927, |
|
"logits/rejected": -0.7913932204246521, |
|
"logps/chosen": -363.1887512207031, |
|
"logps/rejected": -416.5007019042969, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0668609142303467, |
|
"rewards/margins": 0.44077447056770325, |
|
"rewards/rejected": -1.507635474205017, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6006680026720107, |
|
"grad_norm": 11.14137973799164, |
|
"learning_rate": 4.107215526006817e-07, |
|
"logits/chosen": -0.7002226114273071, |
|
"logits/rejected": -0.7134915590286255, |
|
"logps/chosen": -370.8570556640625, |
|
"logps/rejected": -408.5722961425781, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3522167205810547, |
|
"rewards/margins": 0.2257714569568634, |
|
"rewards/rejected": -1.5779881477355957, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6028056112224449, |
|
"grad_norm": 10.43225088057876, |
|
"learning_rate": 4.070441964185427e-07, |
|
"logits/chosen": -0.6937713623046875, |
|
"logits/rejected": -0.6445334553718567, |
|
"logps/chosen": -320.21636962890625, |
|
"logps/rejected": -386.27337646484375, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0563011169433594, |
|
"rewards/margins": 0.5464246869087219, |
|
"rewards/rejected": -1.6027257442474365, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6049432197728791, |
|
"grad_norm": 9.579570581028333, |
|
"learning_rate": 4.0337204110222347e-07, |
|
"logits/chosen": -0.7348592281341553, |
|
"logits/rejected": -0.7190099954605103, |
|
"logps/chosen": -368.09918212890625, |
|
"logps/rejected": -410.2267761230469, |
|
"loss": 0.6029, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1717023849487305, |
|
"rewards/margins": 0.4082415699958801, |
|
"rewards/rejected": -1.5799440145492554, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6070808283233133, |
|
"grad_norm": 10.220765953116597, |
|
"learning_rate": 3.997052921083636e-07, |
|
"logits/chosen": -0.6168830394744873, |
|
"logits/rejected": -0.6260079145431519, |
|
"logps/chosen": -374.4775695800781, |
|
"logps/rejected": -405.28546142578125, |
|
"loss": 0.6056, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2549736499786377, |
|
"rewards/margins": 0.39645275473594666, |
|
"rewards/rejected": -1.6514263153076172, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 11.754142515548534, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": -0.7724018096923828, |
|
"logits/rejected": -0.8003143668174744, |
|
"logps/chosen": -411.28765869140625, |
|
"logps/rejected": -446.0718688964844, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3750414848327637, |
|
"rewards/margins": 0.19460612535476685, |
|
"rewards/rejected": -1.5696475505828857, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6113560454241816, |
|
"grad_norm": 11.71195693335506, |
|
"learning_rate": 3.92388833390689e-07, |
|
"logits/chosen": -0.6072220206260681, |
|
"logits/rejected": -0.5882732272148132, |
|
"logps/chosen": -362.8934020996094, |
|
"logps/rejected": -384.33685302734375, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4378963708877563, |
|
"rewards/margins": 0.3072332739830017, |
|
"rewards/rejected": -1.7451298236846924, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6134936539746159, |
|
"grad_norm": 10.423314767059496, |
|
"learning_rate": 3.8873953302184283e-07, |
|
"logits/chosen": -0.6478594541549683, |
|
"logits/rejected": -0.6148996949195862, |
|
"logps/chosen": -402.24993896484375, |
|
"logps/rejected": -419.41510009765625, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.573075532913208, |
|
"rewards/margins": 0.27030453085899353, |
|
"rewards/rejected": -1.8433799743652344, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6156312625250501, |
|
"grad_norm": 11.589829757981947, |
|
"learning_rate": 3.8509645766249034e-07, |
|
"logits/chosen": -0.7512708902359009, |
|
"logits/rejected": -0.7593178749084473, |
|
"logps/chosen": -430.9858093261719, |
|
"logps/rejected": -473.86578369140625, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5662776231765747, |
|
"rewards/margins": 0.3962094187736511, |
|
"rewards/rejected": -1.9624871015548706, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6177688710754843, |
|
"grad_norm": 38.79630588602357, |
|
"learning_rate": 3.814598111422513e-07, |
|
"logits/chosen": -0.7107813358306885, |
|
"logits/rejected": -0.7043961882591248, |
|
"logps/chosen": -359.62713623046875, |
|
"logps/rejected": -373.19805908203125, |
|
"loss": 0.6213, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.3326873779296875, |
|
"rewards/margins": 0.10174018889665604, |
|
"rewards/rejected": -1.4344274997711182, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 9.198178073702046, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": -0.7122032046318054, |
|
"logits/rejected": -0.7226367592811584, |
|
"logps/chosen": -360.8456726074219, |
|
"logps/rejected": -395.7742614746094, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.3359217643737793, |
|
"rewards/margins": 0.3005616068840027, |
|
"rewards/rejected": -1.6364833116531372, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6220440881763527, |
|
"grad_norm": 11.585726416356431, |
|
"learning_rate": 3.742066181277457e-07, |
|
"logits/chosen": -0.6904798150062561, |
|
"logits/rejected": -0.6984922885894775, |
|
"logps/chosen": -385.777587890625, |
|
"logps/rejected": -417.1224670410156, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4644272327423096, |
|
"rewards/margins": 0.26233839988708496, |
|
"rewards/rejected": -1.7267656326293945, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.6241816967267869, |
|
"grad_norm": 10.98477533754328, |
|
"learning_rate": 3.7059047744873955e-07, |
|
"logits/chosen": -0.6717097759246826, |
|
"logits/rejected": -0.6137974262237549, |
|
"logps/chosen": -388.8412780761719, |
|
"logps/rejected": -408.81964111328125, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4904993772506714, |
|
"rewards/margins": 0.2106323540210724, |
|
"rewards/rejected": -1.701131820678711, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6263193052772211, |
|
"grad_norm": 11.098357379348672, |
|
"learning_rate": 3.669815772166625e-07, |
|
"logits/chosen": -0.7643608450889587, |
|
"logits/rejected": -0.7616855502128601, |
|
"logps/chosen": -399.7235412597656, |
|
"logps/rejected": -444.3169250488281, |
|
"loss": 0.5882, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1846121549606323, |
|
"rewards/margins": 0.3367740213871002, |
|
"rewards/rejected": -1.5213862657546997, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6284569138276553, |
|
"grad_norm": 9.823709075894158, |
|
"learning_rate": 3.6338011934904e-07, |
|
"logits/chosen": -0.7340261936187744, |
|
"logits/rejected": -0.7253273129463196, |
|
"logps/chosen": -415.0310974121094, |
|
"logps/rejected": -479.2626953125, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4078618288040161, |
|
"rewards/margins": 0.7003488540649414, |
|
"rewards/rejected": -2.108210802078247, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 10.87697682105901, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": -0.6595284342765808, |
|
"logits/rejected": -0.6863126754760742, |
|
"logps/chosen": -365.39013671875, |
|
"logps/rejected": -408.86541748046875, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.497340202331543, |
|
"rewards/margins": 0.30657070875167847, |
|
"rewards/rejected": -1.803910732269287, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6327321309285238, |
|
"grad_norm": 11.045318981796374, |
|
"learning_rate": 3.562003362839914e-07, |
|
"logits/chosen": -0.7206366062164307, |
|
"logits/rejected": -0.7295577526092529, |
|
"logps/chosen": -461.62225341796875, |
|
"logps/rejected": -458.39703369140625, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.7048362493515015, |
|
"rewards/margins": 0.04338730126619339, |
|
"rewards/rejected": -1.7482235431671143, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6348697394789579, |
|
"grad_norm": 10.855323994177997, |
|
"learning_rate": 3.526224127945478e-07, |
|
"logits/chosen": -0.6919922828674316, |
|
"logits/rejected": -0.6954550743103027, |
|
"logps/chosen": -336.01556396484375, |
|
"logps/rejected": -376.9835205078125, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3392897844314575, |
|
"rewards/margins": 0.2275466024875641, |
|
"rewards/rejected": -1.5668363571166992, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6370073480293921, |
|
"grad_norm": 9.58788965705626, |
|
"learning_rate": 3.49052735063049e-07, |
|
"logits/chosen": -0.8874866962432861, |
|
"logits/rejected": -0.8917239904403687, |
|
"logps/chosen": -403.54693603515625, |
|
"logps/rejected": -442.32305908203125, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.3754301071166992, |
|
"rewards/margins": 0.39617669582366943, |
|
"rewards/rejected": -1.771606683731079, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6391449565798263, |
|
"grad_norm": 10.56963230174736, |
|
"learning_rate": 3.454915028125263e-07, |
|
"logits/chosen": -0.6784321665763855, |
|
"logits/rejected": -0.6550740003585815, |
|
"logps/chosen": -406.8092041015625, |
|
"logps/rejected": -409.7593688964844, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4954036474227905, |
|
"rewards/margins": 0.18573154509067535, |
|
"rewards/rejected": -1.6811351776123047, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 10.005595422402594, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": -0.7489890456199646, |
|
"logits/rejected": -0.7623311281204224, |
|
"logps/chosen": -440.9601135253906, |
|
"logps/rejected": -441.59466552734375, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6095386743545532, |
|
"rewards/margins": 0.23035281896591187, |
|
"rewards/rejected": -1.8398916721343994, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"eval_logits/chosen": -0.6758147478103638, |
|
"eval_logits/rejected": -0.6752761006355286, |
|
"eval_logps/chosen": -390.88177490234375, |
|
"eval_logps/rejected": -425.3858947753906, |
|
"eval_loss": 0.6177628040313721, |
|
"eval_rewards/accuracies": 0.6747967600822449, |
|
"eval_rewards/chosen": -1.3533374071121216, |
|
"eval_rewards/margins": 0.28798195719718933, |
|
"eval_rewards/rejected": -1.6413193941116333, |
|
"eval_runtime": 377.299, |
|
"eval_samples_per_second": 5.197, |
|
"eval_steps_per_second": 0.326, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6434201736806947, |
|
"grad_norm": 10.329473531690297, |
|
"learning_rate": 3.3839517127277004e-07, |
|
"logits/chosen": -0.7601391673088074, |
|
"logits/rejected": -0.7844873070716858, |
|
"logps/chosen": -393.47540283203125, |
|
"logps/rejected": -435.7286071777344, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.259373426437378, |
|
"rewards/margins": 0.349065899848938, |
|
"rewards/rejected": -1.608439326286316, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6455577822311289, |
|
"grad_norm": 8.767557926350584, |
|
"learning_rate": 3.348604690224166e-07, |
|
"logits/chosen": -0.8301680088043213, |
|
"logits/rejected": -0.8203250169754028, |
|
"logps/chosen": -425.7115478515625, |
|
"logps/rejected": -468.8160095214844, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3414942026138306, |
|
"rewards/margins": 0.44216763973236084, |
|
"rewards/rejected": -1.7836618423461914, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6476953907815631, |
|
"grad_norm": 11.252600228651138, |
|
"learning_rate": 3.31335006308585e-07, |
|
"logits/chosen": -0.7533825635910034, |
|
"logits/rejected": -0.732757031917572, |
|
"logps/chosen": -393.85040283203125, |
|
"logps/rejected": -415.14080810546875, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3789258003234863, |
|
"rewards/margins": 0.23148328065872192, |
|
"rewards/rejected": -1.6104090213775635, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6498329993319973, |
|
"grad_norm": 10.257975436900558, |
|
"learning_rate": 3.2781898038048237e-07, |
|
"logits/chosen": -0.6510428786277771, |
|
"logits/rejected": -0.6685248613357544, |
|
"logps/chosen": -390.3652038574219, |
|
"logps/rejected": -393.8785400390625, |
|
"loss": 0.6683, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.5787415504455566, |
|
"rewards/margins": 0.11641066521406174, |
|
"rewards/rejected": -1.6951522827148438, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 9.860462354210497, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": -0.7366013526916504, |
|
"logits/rejected": -0.7264673709869385, |
|
"logps/chosen": -361.6595458984375, |
|
"logps/rejected": -408.7252197265625, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1662776470184326, |
|
"rewards/margins": 0.39646124839782715, |
|
"rewards/rejected": -1.5627388954162598, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6541082164328658, |
|
"grad_norm": 11.932221586274338, |
|
"learning_rate": 3.2081602522734985e-07, |
|
"logits/chosen": -0.7773129343986511, |
|
"logits/rejected": -0.7762659788131714, |
|
"logps/chosen": -384.003662109375, |
|
"logps/rejected": -423.2783203125, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3898005485534668, |
|
"rewards/margins": 0.3622281551361084, |
|
"rewards/rejected": -1.7520288228988647, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6562458249833, |
|
"grad_norm": 9.369038313539917, |
|
"learning_rate": 3.173294878168025e-07, |
|
"logits/chosen": -0.6643047332763672, |
|
"logits/rejected": -0.6601549386978149, |
|
"logps/chosen": -372.4691162109375, |
|
"logps/rejected": -406.27996826171875, |
|
"loss": 0.6158, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4243800640106201, |
|
"rewards/margins": 0.29933756589889526, |
|
"rewards/rejected": -1.7237175703048706, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6583834335337342, |
|
"grad_norm": 10.15702054511366, |
|
"learning_rate": 3.138531707990274e-07, |
|
"logits/chosen": -0.6945326328277588, |
|
"logits/rejected": -0.6813417673110962, |
|
"logps/chosen": -367.9193115234375, |
|
"logps/rejected": -422.1386413574219, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1640138626098633, |
|
"rewards/margins": 0.5344864726066589, |
|
"rewards/rejected": -1.6985002756118774, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6605210420841683, |
|
"grad_norm": 10.774899794292791, |
|
"learning_rate": 3.1038726867353583e-07, |
|
"logits/chosen": -0.678726315498352, |
|
"logits/rejected": -0.706427276134491, |
|
"logps/chosen": -402.2789001464844, |
|
"logps/rejected": -475.16436767578125, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.296879529953003, |
|
"rewards/margins": 0.6074644327163696, |
|
"rewards/rejected": -1.904344081878662, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 9.326333800621224, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": -0.7166895866394043, |
|
"logits/rejected": -0.7176540493965149, |
|
"logps/chosen": -386.4005432128906, |
|
"logps/rejected": -395.38970947265625, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.5514951944351196, |
|
"rewards/margins": 0.11902564764022827, |
|
"rewards/rejected": -1.6705207824707031, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6647962591850367, |
|
"grad_norm": 11.49416505541279, |
|
"learning_rate": 3.034874841730382e-07, |
|
"logits/chosen": -0.7580830454826355, |
|
"logits/rejected": -0.7336598634719849, |
|
"logps/chosen": -402.9891052246094, |
|
"logps/rejected": -430.2671813964844, |
|
"loss": 0.6368, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3294634819030762, |
|
"rewards/margins": 0.3222670555114746, |
|
"rewards/rejected": -1.6517305374145508, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6669338677354709, |
|
"grad_norm": 10.887373926899288, |
|
"learning_rate": 3.000539878401296e-07, |
|
"logits/chosen": -0.6197298765182495, |
|
"logits/rejected": -0.5989848375320435, |
|
"logps/chosen": -391.74951171875, |
|
"logps/rejected": -449.1798400878906, |
|
"loss": 0.6082, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4147597551345825, |
|
"rewards/margins": 0.5554874539375305, |
|
"rewards/rejected": -1.9702472686767578, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6690714762859051, |
|
"grad_norm": 11.171229600938071, |
|
"learning_rate": 2.9663167846209996e-07, |
|
"logits/chosen": -0.6838382482528687, |
|
"logits/rejected": -0.6743027567863464, |
|
"logps/chosen": -368.8251037597656, |
|
"logps/rejected": -415.6241149902344, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.388074278831482, |
|
"rewards/margins": 0.4560723900794983, |
|
"rewards/rejected": -1.844146490097046, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6712090848363393, |
|
"grad_norm": 10.489570114197578, |
|
"learning_rate": 2.9322074751673974e-07, |
|
"logits/chosen": -0.6488001346588135, |
|
"logits/rejected": -0.6053016781806946, |
|
"logps/chosen": -422.6211853027344, |
|
"logps/rejected": -449.6383972167969, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6949717998504639, |
|
"rewards/margins": 0.3523138463497162, |
|
"rewards/rejected": -2.047285556793213, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 9.335833798624803, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": -0.7407481670379639, |
|
"logits/rejected": -0.6984574794769287, |
|
"logps/chosen": -426.7627868652344, |
|
"logps/rejected": -433.4222106933594, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.5585707426071167, |
|
"rewards/margins": 0.30426639318466187, |
|
"rewards/rejected": -1.8628369569778442, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6754843019372078, |
|
"grad_norm": 9.82583152531341, |
|
"learning_rate": 2.864337836414018e-07, |
|
"logits/chosen": -0.7897535562515259, |
|
"logits/rejected": -0.7509832382202148, |
|
"logps/chosen": -440.0413818359375, |
|
"logps/rejected": -473.6156311035156, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.778262972831726, |
|
"rewards/margins": 0.3270663917064667, |
|
"rewards/rejected": -2.1053295135498047, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.677621910487642, |
|
"grad_norm": 10.397708781784715, |
|
"learning_rate": 2.8305813044122093e-07, |
|
"logits/chosen": -0.5974478125572205, |
|
"logits/rejected": -0.5807868242263794, |
|
"logps/chosen": -366.0530090332031, |
|
"logps/rejected": -355.85882568359375, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3970279693603516, |
|
"rewards/margins": 0.0002168789505958557, |
|
"rewards/rejected": -1.3972446918487549, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6797595190380762, |
|
"grad_norm": 10.464645314526035, |
|
"learning_rate": 2.7969461511205806e-07, |
|
"logits/chosen": -0.626457691192627, |
|
"logits/rejected": -0.5530537366867065, |
|
"logps/chosen": -330.521240234375, |
|
"logps/rejected": -358.5421142578125, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.4030221700668335, |
|
"rewards/margins": 0.22237975895404816, |
|
"rewards/rejected": -1.6254019737243652, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6818971275885104, |
|
"grad_norm": 9.874403173091292, |
|
"learning_rate": 2.763434258421836e-07, |
|
"logits/chosen": -0.7100091576576233, |
|
"logits/rejected": -0.6709161996841431, |
|
"logps/chosen": -342.3360595703125, |
|
"logps/rejected": -356.4312744140625, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2349555492401123, |
|
"rewards/margins": 0.18816961348056793, |
|
"rewards/rejected": -1.4231250286102295, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 10.416140198085172, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": -0.7924367785453796, |
|
"logits/rejected": -0.7890709638595581, |
|
"logps/chosen": -402.750244140625, |
|
"logps/rejected": -433.9951171875, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3218122720718384, |
|
"rewards/margins": 0.32942885160446167, |
|
"rewards/rejected": -1.6512411832809448, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6861723446893787, |
|
"grad_norm": 10.264424479929405, |
|
"learning_rate": 2.696787747746839e-07, |
|
"logits/chosen": -0.7326480150222778, |
|
"logits/rejected": -0.727679967880249, |
|
"logps/chosen": -335.9344177246094, |
|
"logps/rejected": -376.9026794433594, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2939304113388062, |
|
"rewards/margins": 0.3992197811603546, |
|
"rewards/rejected": -1.6931501626968384, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6883099532398129, |
|
"grad_norm": 9.672418793392822, |
|
"learning_rate": 2.6636568586346897e-07, |
|
"logits/chosen": -0.7330962419509888, |
|
"logits/rejected": -0.7231791615486145, |
|
"logps/chosen": -344.6290588378906, |
|
"logps/rejected": -368.3528137207031, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1829473972320557, |
|
"rewards/margins": 0.23583151400089264, |
|
"rewards/rejected": -1.4187790155410767, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6904475617902471, |
|
"grad_norm": 12.909094410970068, |
|
"learning_rate": 2.6306566876350067e-07, |
|
"logits/chosen": -0.7223283648490906, |
|
"logits/rejected": -0.6862327456474304, |
|
"logps/chosen": -427.14727783203125, |
|
"logps/rejected": -453.6234436035156, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5469043254852295, |
|
"rewards/margins": 0.278840035200119, |
|
"rewards/rejected": -1.825744390487671, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6925851703406813, |
|
"grad_norm": 13.707807531422917, |
|
"learning_rate": 2.597789081103313e-07, |
|
"logits/chosen": -0.7629610300064087, |
|
"logits/rejected": -0.727975070476532, |
|
"logps/chosen": -382.91278076171875, |
|
"logps/rejected": -421.9703369140625, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4632904529571533, |
|
"rewards/margins": 0.4552845358848572, |
|
"rewards/rejected": -1.9185751676559448, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 10.588462296925226, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": -0.621019184589386, |
|
"logits/rejected": -0.5743827223777771, |
|
"logps/chosen": -433.55267333984375, |
|
"logps/rejected": -461.3088073730469, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.6322648525238037, |
|
"rewards/margins": 0.2886176109313965, |
|
"rewards/rejected": -1.9208825826644897, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6968603874415498, |
|
"grad_norm": 9.6025165386732, |
|
"learning_rate": 2.5324589096782656e-07, |
|
"logits/chosen": -0.6759508848190308, |
|
"logits/rejected": -0.6631283760070801, |
|
"logps/chosen": -414.1610107421875, |
|
"logps/rejected": -420.1568603515625, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.416655421257019, |
|
"rewards/margins": 0.17679718136787415, |
|
"rewards/rejected": -1.5934526920318604, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.698997995991984, |
|
"grad_norm": 12.80932336769188, |
|
"learning_rate": 2.500000000000001e-07, |
|
"logits/chosen": -0.6499335765838623, |
|
"logits/rejected": -0.662979245185852, |
|
"logps/chosen": -405.96063232421875, |
|
"logps/rejected": -447.20172119140625, |
|
"loss": 0.6288, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4347225427627563, |
|
"rewards/margins": 0.31777456402778625, |
|
"rewards/rejected": -1.7524970769882202, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7011356045424182, |
|
"grad_norm": 12.08919140781653, |
|
"learning_rate": 2.467680965015387e-07, |
|
"logits/chosen": -0.7271804213523865, |
|
"logits/rejected": -0.7305589914321899, |
|
"logps/chosen": -362.54632568359375, |
|
"logps/rejected": -384.875, |
|
"loss": 0.634, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2877211570739746, |
|
"rewards/margins": 0.24846753478050232, |
|
"rewards/rejected": -1.5361886024475098, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7032732130928524, |
|
"grad_norm": 10.873874736167313, |
|
"learning_rate": 2.4355036129704696e-07, |
|
"logits/chosen": -0.6805239915847778, |
|
"logits/rejected": -0.6776773929595947, |
|
"logps/chosen": -472.7155456542969, |
|
"logps/rejected": -522.7994384765625, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.6964683532714844, |
|
"rewards/margins": 0.3618759214878082, |
|
"rewards/rejected": -2.0583443641662598, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 16.081627749911508, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": -0.7133939266204834, |
|
"logits/rejected": -0.7143837809562683, |
|
"logps/chosen": -382.49078369140625, |
|
"logps/rejected": -437.5693664550781, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.266930341720581, |
|
"rewards/margins": 0.49213629961013794, |
|
"rewards/rejected": -1.7590665817260742, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7075484301937208, |
|
"grad_norm": 10.322962661870067, |
|
"learning_rate": 2.371581150947476e-07, |
|
"logits/chosen": -0.8041883707046509, |
|
"logits/rejected": -0.8093154430389404, |
|
"logps/chosen": -430.856689453125, |
|
"logps/rejected": -477.88787841796875, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3982198238372803, |
|
"rewards/margins": 0.3465649485588074, |
|
"rewards/rejected": -1.7447847127914429, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7096860387441549, |
|
"grad_norm": 10.446630163251449, |
|
"learning_rate": 2.3398396174233176e-07, |
|
"logits/chosen": -0.6520624160766602, |
|
"logits/rejected": -0.6437772512435913, |
|
"logps/chosen": -422.0386047363281, |
|
"logps/rejected": -486.0862731933594, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6643366813659668, |
|
"rewards/margins": 0.3572143316268921, |
|
"rewards/rejected": -2.0215511322021484, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7118236472945891, |
|
"grad_norm": 11.809866444989042, |
|
"learning_rate": 2.3082469195465893e-07, |
|
"logits/chosen": -0.7520323395729065, |
|
"logits/rejected": -0.7196107506752014, |
|
"logps/chosen": -411.33251953125, |
|
"logps/rejected": -455.638916015625, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6863501071929932, |
|
"rewards/margins": 0.3991457223892212, |
|
"rewards/rejected": -2.085495710372925, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.7139612558450233, |
|
"grad_norm": 11.4706777134489, |
|
"learning_rate": 2.2768048249248644e-07, |
|
"logits/chosen": -0.6395952105522156, |
|
"logits/rejected": -0.612390398979187, |
|
"logps/chosen": -408.4999084472656, |
|
"logps/rejected": -444.7389831542969, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7306082248687744, |
|
"rewards/margins": 0.3128102421760559, |
|
"rewards/rejected": -2.0434184074401855, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 10.253605586246742, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": -0.6910028457641602, |
|
"logits/rejected": -0.6887121200561523, |
|
"logps/chosen": -373.79571533203125, |
|
"logps/rejected": -457.9923095703125, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5869219303131104, |
|
"rewards/margins": 0.5055859088897705, |
|
"rewards/rejected": -2.0925076007843018, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7182364729458918, |
|
"grad_norm": 10.09520238328347, |
|
"learning_rate": 2.2143794736471388e-07, |
|
"logits/chosen": -0.7225451469421387, |
|
"logits/rejected": -0.7483439445495605, |
|
"logps/chosen": -484.85748291015625, |
|
"logps/rejected": -529.5263061523438, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8494200706481934, |
|
"rewards/margins": 0.2027570605278015, |
|
"rewards/rejected": -2.0521771907806396, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.720374081496326, |
|
"grad_norm": 10.071232143800623, |
|
"learning_rate": 2.1833997096818895e-07, |
|
"logits/chosen": -0.5754382610321045, |
|
"logits/rejected": -0.5392119288444519, |
|
"logps/chosen": -344.8245544433594, |
|
"logps/rejected": -379.4691162109375, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2857069969177246, |
|
"rewards/margins": 0.34328341484069824, |
|
"rewards/rejected": -1.6289904117584229, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7225116900467602, |
|
"grad_norm": 10.65729584502251, |
|
"learning_rate": 2.1525775341577402e-07, |
|
"logits/chosen": -0.6606283187866211, |
|
"logits/rejected": -0.6608355045318604, |
|
"logps/chosen": -414.2405700683594, |
|
"logps/rejected": -429.601806640625, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3188872337341309, |
|
"rewards/margins": 0.19126060605049133, |
|
"rewards/rejected": -1.5101479291915894, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7246492985971944, |
|
"grad_norm": 10.538063937615522, |
|
"learning_rate": 2.121914671571633e-07, |
|
"logits/chosen": -0.7743428945541382, |
|
"logits/rejected": -0.7525985836982727, |
|
"logps/chosen": -367.4284973144531, |
|
"logps/rejected": -441.0301818847656, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.3432848453521729, |
|
"rewards/margins": 0.6236953735351562, |
|
"rewards/rejected": -1.9669800996780396, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 9.94512421358411, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": -0.7715132236480713, |
|
"logits/rejected": -0.7799488306045532, |
|
"logps/chosen": -431.17169189453125, |
|
"logps/rejected": -494.21197509765625, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5074207782745361, |
|
"rewards/margins": 0.48499661684036255, |
|
"rewards/rejected": -1.9924174547195435, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7289245156980628, |
|
"grad_norm": 12.36515548240608, |
|
"learning_rate": 2.0610737385376348e-07, |
|
"logits/chosen": -0.7136672139167786, |
|
"logits/rejected": -0.6798695921897888, |
|
"logps/chosen": -405.33905029296875, |
|
"logps/rejected": -437.63330078125, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4325660467147827, |
|
"rewards/margins": 0.23246146738529205, |
|
"rewards/rejected": -1.6650276184082031, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.731062124248497, |
|
"grad_norm": 10.594170284983056, |
|
"learning_rate": 2.0308990721324926e-07, |
|
"logits/chosen": -0.6517391800880432, |
|
"logits/rejected": -0.6472780108451843, |
|
"logps/chosen": -456.90570068359375, |
|
"logps/rejected": -480.61138916015625, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.691187858581543, |
|
"rewards/margins": 0.42632347345352173, |
|
"rewards/rejected": -2.117511034011841, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7331997327989312, |
|
"grad_norm": 11.483481596708172, |
|
"learning_rate": 2.0008905265604315e-07, |
|
"logits/chosen": -0.7073544263839722, |
|
"logits/rejected": -0.6990326642990112, |
|
"logps/chosen": -409.7100524902344, |
|
"logps/rejected": -450.9366455078125, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6152515411376953, |
|
"rewards/margins": 0.3995630741119385, |
|
"rewards/rejected": -2.014814853668213, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7353373413493653, |
|
"grad_norm": 13.346456381620781, |
|
"learning_rate": 1.971049780795901e-07, |
|
"logits/chosen": -0.7003156542778015, |
|
"logits/rejected": -0.6687884330749512, |
|
"logps/chosen": -310.5570373535156, |
|
"logps/rejected": -344.05859375, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0297322273254395, |
|
"rewards/margins": 0.3454705476760864, |
|
"rewards/rejected": -1.3752026557922363, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 10.139219194086207, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": -0.6944881677627563, |
|
"logits/rejected": -0.6632839441299438, |
|
"logps/chosen": -381.2460632324219, |
|
"logps/rejected": -414.730712890625, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.378089427947998, |
|
"rewards/margins": 0.35800689458847046, |
|
"rewards/rejected": -1.7360961437225342, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7396125584502338, |
|
"grad_norm": 9.84020912855359, |
|
"learning_rate": 1.9118783575519109e-07, |
|
"logits/chosen": -0.7444390058517456, |
|
"logits/rejected": -0.7687693238258362, |
|
"logps/chosen": -441.13104248046875, |
|
"logps/rejected": -471.73797607421875, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.675041675567627, |
|
"rewards/margins": 0.1502073109149933, |
|
"rewards/rejected": -1.8252489566802979, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.741750167000668, |
|
"grad_norm": 11.291373399374436, |
|
"learning_rate": 1.8825509907063326e-07, |
|
"logits/chosen": -0.7405213117599487, |
|
"logits/rejected": -0.7411251068115234, |
|
"logps/chosen": -346.1521301269531, |
|
"logps/rejected": -372.3758850097656, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4540354013442993, |
|
"rewards/margins": 0.28074249625205994, |
|
"rewards/rejected": -1.7347780466079712, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7438877755511022, |
|
"grad_norm": 9.478462210282277, |
|
"learning_rate": 1.8533980447508135e-07, |
|
"logits/chosen": -0.7745504975318909, |
|
"logits/rejected": -0.7580114603042603, |
|
"logps/chosen": -364.4132995605469, |
|
"logps/rejected": -376.5552978515625, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.342382550239563, |
|
"rewards/margins": 0.21273840963840485, |
|
"rewards/rejected": -1.5551210641860962, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7460253841015364, |
|
"grad_norm": 11.376031296146607, |
|
"learning_rate": 1.824421150789106e-07, |
|
"logits/chosen": -0.588141918182373, |
|
"logits/rejected": -0.6058573126792908, |
|
"logps/chosen": -402.21026611328125, |
|
"logps/rejected": -441.880615234375, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4959633350372314, |
|
"rewards/margins": 0.359602689743042, |
|
"rewards/rejected": -1.8555659055709839, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 9.3095000239465, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": -0.7804590463638306, |
|
"logits/rejected": -0.768570601940155, |
|
"logps/chosen": -395.44970703125, |
|
"logps/rejected": -442.4303894042969, |
|
"loss": 0.5622, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3593695163726807, |
|
"rewards/margins": 0.4249880313873291, |
|
"rewards/rejected": -1.7843575477600098, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7503006012024048, |
|
"grad_norm": 10.971792168406296, |
|
"learning_rate": 1.7670019939210023e-07, |
|
"logits/chosen": -0.6696098446846008, |
|
"logits/rejected": -0.6669338941574097, |
|
"logps/chosen": -451.68768310546875, |
|
"logps/rejected": -497.6464538574219, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7285881042480469, |
|
"rewards/margins": 0.4477997124195099, |
|
"rewards/rejected": -2.1763877868652344, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.752438209752839, |
|
"grad_norm": 11.419971323161318, |
|
"learning_rate": 1.7385629436093956e-07, |
|
"logits/chosen": -0.6907357573509216, |
|
"logits/rejected": -0.637013852596283, |
|
"logps/chosen": -432.1939392089844, |
|
"logps/rejected": -469.10162353515625, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7298922538757324, |
|
"rewards/margins": 0.38807937502861023, |
|
"rewards/rejected": -2.117971420288086, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7545758183032732, |
|
"grad_norm": 12.388342839443734, |
|
"learning_rate": 1.710306370301437e-07, |
|
"logits/chosen": -0.7042302489280701, |
|
"logits/rejected": -0.7210839986801147, |
|
"logps/chosen": -481.449951171875, |
|
"logps/rejected": -541.4148559570312, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7111109495162964, |
|
"rewards/margins": 0.5329866409301758, |
|
"rewards/rejected": -2.2440977096557617, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.7567134268537075, |
|
"grad_norm": 10.900263207759233, |
|
"learning_rate": 1.6822338549489446e-07, |
|
"logits/chosen": -0.6276527047157288, |
|
"logits/rejected": -0.6185672879219055, |
|
"logps/chosen": -353.99462890625, |
|
"logps/rejected": -390.6813659667969, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.318265438079834, |
|
"rewards/margins": 0.3258642554283142, |
|
"rewards/rejected": -1.6441295146942139, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 11.630075473409493, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": -0.7092128992080688, |
|
"logits/rejected": -0.6994844079017639, |
|
"logps/chosen": -449.5421142578125, |
|
"logps/rejected": -491.48406982421875, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.727628231048584, |
|
"rewards/margins": 0.27954497933387756, |
|
"rewards/rejected": -2.0071730613708496, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7609886439545758, |
|
"grad_norm": 11.035433473738337, |
|
"learning_rate": 1.6266472703396284e-07, |
|
"logits/chosen": -0.801999568939209, |
|
"logits/rejected": -0.7807914614677429, |
|
"logps/chosen": -436.8575439453125, |
|
"logps/rejected": -460.9193115234375, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5792278051376343, |
|
"rewards/margins": 0.3968886435031891, |
|
"rewards/rejected": -1.976116418838501, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.76312625250501, |
|
"grad_norm": 11.722081233691071, |
|
"learning_rate": 1.599136311145402e-07, |
|
"logits/chosen": -0.6747885942459106, |
|
"logits/rejected": -0.618495523929596, |
|
"logps/chosen": -422.78729248046875, |
|
"logps/rejected": -472.6419372558594, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.606202244758606, |
|
"rewards/margins": 0.4832748472690582, |
|
"rewards/rejected": -2.089477062225342, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7652638610554442, |
|
"grad_norm": 12.64820349502746, |
|
"learning_rate": 1.5718156298578288e-07, |
|
"logits/chosen": -0.7273571491241455, |
|
"logits/rejected": -0.6881564855575562, |
|
"logps/chosen": -425.4215087890625, |
|
"logps/rejected": -444.314208984375, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6894898414611816, |
|
"rewards/margins": 0.1857471764087677, |
|
"rewards/rejected": -1.875237226486206, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7674014696058784, |
|
"grad_norm": 10.96328639456088, |
|
"learning_rate": 1.5446867550656767e-07, |
|
"logits/chosen": -0.6399669647216797, |
|
"logits/rejected": -0.6358177661895752, |
|
"logps/chosen": -372.887939453125, |
|
"logps/rejected": -401.3673095703125, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.543872356414795, |
|
"rewards/margins": 0.2215932011604309, |
|
"rewards/rejected": -1.7654657363891602, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 11.515812302503546, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": -0.7418455481529236, |
|
"logits/rejected": -0.6992334127426147, |
|
"logps/chosen": -440.9914245605469, |
|
"logps/rejected": -484.72882080078125, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.6546094417572021, |
|
"rewards/margins": 0.40860795974731445, |
|
"rewards/rejected": -2.0632174015045166, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7716766867067468, |
|
"grad_norm": 11.301203400803265, |
|
"learning_rate": 1.4910104855800426e-07, |
|
"logits/chosen": -0.5830298066139221, |
|
"logits/rejected": -0.541452944278717, |
|
"logps/chosen": -428.9151611328125, |
|
"logps/rejected": -450.665283203125, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6960718631744385, |
|
"rewards/margins": 0.26461082696914673, |
|
"rewards/rejected": -1.96068274974823, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.773814295257181, |
|
"grad_norm": 10.448871439187576, |
|
"learning_rate": 1.4644660940672627e-07, |
|
"logits/chosen": -0.643266499042511, |
|
"logits/rejected": -0.6516848802566528, |
|
"logps/chosen": -382.15582275390625, |
|
"logps/rejected": -422.58001708984375, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.693763017654419, |
|
"rewards/margins": 0.31615814566612244, |
|
"rewards/rejected": -2.0099213123321533, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7759519038076153, |
|
"grad_norm": 12.065289362336179, |
|
"learning_rate": 1.4381195152432769e-07, |
|
"logits/chosen": -0.7809977531433105, |
|
"logits/rejected": -0.7569341659545898, |
|
"logps/chosen": -402.4347229003906, |
|
"logps/rejected": -426.5815124511719, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4668489694595337, |
|
"rewards/margins": 0.27162590622901917, |
|
"rewards/rejected": -1.7384748458862305, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7780895123580495, |
|
"grad_norm": 11.450406850059426, |
|
"learning_rate": 1.4119722231959403e-07, |
|
"logits/chosen": -0.7261683940887451, |
|
"logits/rejected": -0.7380213737487793, |
|
"logps/chosen": -320.5738830566406, |
|
"logps/rejected": -376.2132568359375, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1737643480300903, |
|
"rewards/margins": 0.4349837005138397, |
|
"rewards/rejected": -1.608747959136963, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 10.17075140486933, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": -0.6793671250343323, |
|
"logits/rejected": -0.6769421100616455, |
|
"logps/chosen": -396.4522705078125, |
|
"logps/rejected": -426.5015869140625, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5801575183868408, |
|
"rewards/margins": 0.23419132828712463, |
|
"rewards/rejected": -1.8143486976623535, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7823647294589179, |
|
"grad_norm": 9.496259172803326, |
|
"learning_rate": 1.3602813399504458e-07, |
|
"logits/chosen": -0.7178226113319397, |
|
"logits/rejected": -0.7088046073913574, |
|
"logps/chosen": -362.4988098144531, |
|
"logps/rejected": -413.68255615234375, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4096518754959106, |
|
"rewards/margins": 0.4560312330722809, |
|
"rewards/rejected": -1.8656830787658691, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.784502338009352, |
|
"grad_norm": 11.467183173889156, |
|
"learning_rate": 1.3347406408508694e-07, |
|
"logits/chosen": -0.58012455701828, |
|
"logits/rejected": -0.6086165308952332, |
|
"logps/chosen": -381.5002746582031, |
|
"logps/rejected": -446.1846618652344, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.546051263809204, |
|
"rewards/margins": 0.5468287467956543, |
|
"rewards/rejected": -2.0928800106048584, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7866399465597862, |
|
"grad_norm": 11.770361743077546, |
|
"learning_rate": 1.3094050125632972e-07, |
|
"logits/chosen": -0.665503978729248, |
|
"logits/rejected": -0.6807020902633667, |
|
"logps/chosen": -339.297119140625, |
|
"logps/rejected": -378.72283935546875, |
|
"loss": 0.6007, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.239609718322754, |
|
"rewards/margins": 0.311847984790802, |
|
"rewards/rejected": -1.5514576435089111, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.7887775551102204, |
|
"grad_norm": 11.239777792633861, |
|
"learning_rate": 1.284275872613028e-07, |
|
"logits/chosen": -0.7516641020774841, |
|
"logits/rejected": -0.7523844242095947, |
|
"logps/chosen": -465.70562744140625, |
|
"logps/rejected": -494.3858642578125, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.708259105682373, |
|
"rewards/margins": 0.19142737984657288, |
|
"rewards/rejected": -1.899686336517334, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 12.24207530779827, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": -0.7178503274917603, |
|
"logits/rejected": -0.7465229630470276, |
|
"logps/chosen": -350.14300537109375, |
|
"logps/rejected": -426.7923583984375, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.3200982809066772, |
|
"rewards/margins": 0.5981042981147766, |
|
"rewards/rejected": -1.918202519416809, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7930527722110888, |
|
"grad_norm": 11.476654461821495, |
|
"learning_rate": 1.2346426699819456e-07, |
|
"logits/chosen": -0.6654431223869324, |
|
"logits/rejected": -0.6413010954856873, |
|
"logps/chosen": -432.3926086425781, |
|
"logps/rejected": -445.0782165527344, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8410680294036865, |
|
"rewards/margins": 0.26301610469818115, |
|
"rewards/rejected": -2.104084014892578, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.795190380761523, |
|
"grad_norm": 11.015669950808952, |
|
"learning_rate": 1.2101413842727343e-07, |
|
"logits/chosen": -0.748419463634491, |
|
"logits/rejected": -0.7465101480484009, |
|
"logps/chosen": -404.2447204589844, |
|
"logps/rejected": -458.7890625, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4635306596755981, |
|
"rewards/margins": 0.4813007712364197, |
|
"rewards/rejected": -1.9448314905166626, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7973279893119573, |
|
"grad_norm": 11.968874819239444, |
|
"learning_rate": 1.1858521406886674e-07, |
|
"logits/chosen": -0.6935529112815857, |
|
"logits/rejected": -0.6768806576728821, |
|
"logps/chosen": -479.6001892089844, |
|
"logps/rejected": -526.9801025390625, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.146116018295288, |
|
"rewards/margins": 0.5019779205322266, |
|
"rewards/rejected": -2.6480939388275146, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7994655978623915, |
|
"grad_norm": 11.31673592574301, |
|
"learning_rate": 1.1617762982099444e-07, |
|
"logits/chosen": -0.7199594974517822, |
|
"logits/rejected": -0.7195298671722412, |
|
"logps/chosen": -390.56695556640625, |
|
"logps/rejected": -437.9982604980469, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6399167776107788, |
|
"rewards/margins": 0.4196929931640625, |
|
"rewards/rejected": -2.0596096515655518, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 11.65245860510705, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": -0.6417936086654663, |
|
"logits/rejected": -0.5881288051605225, |
|
"logps/chosen": -462.2901611328125, |
|
"logps/rejected": -533.3080444335938, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8019856214523315, |
|
"rewards/margins": 0.778126060962677, |
|
"rewards/rejected": -2.580111503601074, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8037408149632599, |
|
"grad_norm": 11.237123066893254, |
|
"learning_rate": 1.1142701927151454e-07, |
|
"logits/chosen": -0.742131233215332, |
|
"logits/rejected": -0.7236477136611938, |
|
"logps/chosen": -440.7339782714844, |
|
"logps/rejected": -468.85723876953125, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.7191107273101807, |
|
"rewards/margins": 0.3171979784965515, |
|
"rewards/rejected": -2.036308765411377, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8058784235136941, |
|
"grad_norm": 12.182574925046193, |
|
"learning_rate": 1.090842587659851e-07, |
|
"logits/chosen": -0.6230757832527161, |
|
"logits/rejected": -0.6275469064712524, |
|
"logps/chosen": -345.8181457519531, |
|
"logps/rejected": -382.3629150390625, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.315040111541748, |
|
"rewards/margins": 0.3147667646408081, |
|
"rewards/rejected": -1.6298067569732666, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.8080160320641283, |
|
"grad_norm": 12.095497229280761, |
|
"learning_rate": 1.0676336994827512e-07, |
|
"logits/chosen": -0.8505545258522034, |
|
"logits/rejected": -0.8231047987937927, |
|
"logps/chosen": -439.4098205566406, |
|
"logps/rejected": -450.57861328125, |
|
"loss": 0.5882, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8718527555465698, |
|
"rewards/margins": 0.0610598549246788, |
|
"rewards/rejected": -1.9329125881195068, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.8101536406145624, |
|
"grad_norm": 12.306182802408912, |
|
"learning_rate": 1.044644826718295e-07, |
|
"logits/chosen": -0.6553314924240112, |
|
"logits/rejected": -0.6298251152038574, |
|
"logps/chosen": -428.9188537597656, |
|
"logps/rejected": -464.73126220703125, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.740647554397583, |
|
"rewards/margins": 0.34435731172561646, |
|
"rewards/rejected": -2.0850048065185547, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 11.84427292451934, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": -0.6922661662101746, |
|
"logits/rejected": -0.7002755999565125, |
|
"logps/chosen": -382.30987548828125, |
|
"logps/rejected": -423.1776123046875, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.37299644947052, |
|
"rewards/margins": 0.28076884150505066, |
|
"rewards/rejected": -1.6537654399871826, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8144288577154308, |
|
"grad_norm": 11.204366574978794, |
|
"learning_rate": 9.99332259943969e-08, |
|
"logits/chosen": -0.7378983497619629, |
|
"logits/rejected": -0.7215259075164795, |
|
"logps/chosen": -465.00885009765625, |
|
"logps/rejected": -522.8477783203125, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6207081079483032, |
|
"rewards/margins": 0.5316731333732605, |
|
"rewards/rejected": -2.152381420135498, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.816566466265865, |
|
"grad_norm": 10.577264704091782, |
|
"learning_rate": 9.770111011666582e-08, |
|
"logits/chosen": -0.7259981632232666, |
|
"logits/rejected": -0.7045480012893677, |
|
"logps/chosen": -428.8095703125, |
|
"logps/rejected": -492.239013671875, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5366865396499634, |
|
"rewards/margins": 0.7509114146232605, |
|
"rewards/rejected": -2.287597894668579, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8187040748162993, |
|
"grad_norm": 12.483677889539976, |
|
"learning_rate": 9.549150281252632e-08, |
|
"logits/chosen": -0.6887928247451782, |
|
"logits/rejected": -0.6907156705856323, |
|
"logps/chosen": -352.9273681640625, |
|
"logps/rejected": -383.6487121582031, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5845268964767456, |
|
"rewards/margins": 0.28942757844924927, |
|
"rewards/rejected": -1.8739545345306396, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8208416833667335, |
|
"grad_norm": 19.23190107186564, |
|
"learning_rate": 9.330452770923603e-08, |
|
"logits/chosen": -0.762394905090332, |
|
"logits/rejected": -0.7647604942321777, |
|
"logps/chosen": -451.6494140625, |
|
"logps/rejected": -534.89892578125, |
|
"loss": 0.5934, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.8478323221206665, |
|
"rewards/margins": 0.6826062798500061, |
|
"rewards/rejected": -2.5304384231567383, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 12.247364252908152, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": -0.7505077123641968, |
|
"logits/rejected": -0.7758923768997192, |
|
"logps/chosen": -470.6575927734375, |
|
"logps/rejected": -503.64556884765625, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8584275245666504, |
|
"rewards/margins": 0.3208252787590027, |
|
"rewards/rejected": -2.179252862930298, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8251169004676019, |
|
"grad_norm": 11.383815632835855, |
|
"learning_rate": 8.899896227604508e-08, |
|
"logits/chosen": -0.6819490194320679, |
|
"logits/rejected": -0.6731836199760437, |
|
"logps/chosen": -433.1306457519531, |
|
"logps/rejected": -487.12646484375, |
|
"loss": 0.6317, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.6253031492233276, |
|
"rewards/margins": 0.4985421299934387, |
|
"rewards/rejected": -2.123845100402832, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.8272545090180361, |
|
"grad_norm": 13.189195026919496, |
|
"learning_rate": 8.688061284200265e-08, |
|
"logits/chosen": -0.6536362171173096, |
|
"logits/rejected": -0.6316641569137573, |
|
"logps/chosen": -447.10577392578125, |
|
"logps/rejected": -500.36700439453125, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6854324340820312, |
|
"rewards/margins": 0.3739916980266571, |
|
"rewards/rejected": -2.0594239234924316, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.8293921175684703, |
|
"grad_norm": 12.05654473393893, |
|
"learning_rate": 8.478537738704811e-08, |
|
"logits/chosen": -0.7113953232765198, |
|
"logits/rejected": -0.6980003118515015, |
|
"logps/chosen": -437.1040344238281, |
|
"logps/rejected": -477.0093078613281, |
|
"loss": 0.5797, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7935041189193726, |
|
"rewards/margins": 0.35914352536201477, |
|
"rewards/rejected": -2.1526474952697754, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8315297261189045, |
|
"grad_norm": 11.698252029580289, |
|
"learning_rate": 8.271337313934867e-08, |
|
"logits/chosen": -0.624556839466095, |
|
"logits/rejected": -0.6502059698104858, |
|
"logps/chosen": -414.85882568359375, |
|
"logps/rejected": -456.1212158203125, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.6804428100585938, |
|
"rewards/margins": 0.3041376769542694, |
|
"rewards/rejected": -1.9845805168151855, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 12.14771475451631, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": -0.6798664331436157, |
|
"logits/rejected": -0.6738008260726929, |
|
"logps/chosen": -411.55047607421875, |
|
"logps/rejected": -457.5155334472656, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4717459678649902, |
|
"rewards/margins": 0.5215471386909485, |
|
"rewards/rejected": -1.993293046951294, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8358049432197728, |
|
"grad_norm": 12.796578845217505, |
|
"learning_rate": 7.863952067298041e-08, |
|
"logits/chosen": -0.5822688937187195, |
|
"logits/rejected": -0.564083993434906, |
|
"logps/chosen": -431.5522155761719, |
|
"logps/rejected": -450.26739501953125, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9067468643188477, |
|
"rewards/margins": 0.21568900346755981, |
|
"rewards/rejected": -2.1224358081817627, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.837942551770207, |
|
"grad_norm": 11.159935748642301, |
|
"learning_rate": 7.663790038585794e-08, |
|
"logits/chosen": -0.662575364112854, |
|
"logits/rejected": -0.6590286493301392, |
|
"logps/chosen": -444.98162841796875, |
|
"logps/rejected": -497.9795227050781, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.7177234888076782, |
|
"rewards/margins": 0.6272789239883423, |
|
"rewards/rejected": -2.3450024127960205, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8400801603206413, |
|
"grad_norm": 14.31633694385083, |
|
"learning_rate": 7.465996715633027e-08, |
|
"logits/chosen": -0.6459007263183594, |
|
"logits/rejected": -0.6448737382888794, |
|
"logps/chosen": -397.7703552246094, |
|
"logps/rejected": -440.38238525390625, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7266194820404053, |
|
"rewards/margins": 0.44395869970321655, |
|
"rewards/rejected": -2.1705780029296875, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8422177688710755, |
|
"grad_norm": 11.224103010133572, |
|
"learning_rate": 7.270583164951926e-08, |
|
"logits/chosen": -0.6865531206130981, |
|
"logits/rejected": -0.6968246698379517, |
|
"logps/chosen": -354.6371154785156, |
|
"logps/rejected": -424.811279296875, |
|
"loss": 0.6334, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.536527156829834, |
|
"rewards/margins": 0.5009466409683228, |
|
"rewards/rejected": -2.0374739170074463, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 10.826285808287984, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": -0.6569056510925293, |
|
"logits/rejected": -0.6044581532478333, |
|
"logps/chosen": -360.92681884765625, |
|
"logps/rejected": -372.06512451171875, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.6241035461425781, |
|
"rewards/margins": 0.13333997130393982, |
|
"rewards/rejected": -1.7574434280395508, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8464929859719439, |
|
"grad_norm": 11.121731952204106, |
|
"learning_rate": 6.886938980101869e-08, |
|
"logits/chosen": -0.6959440112113953, |
|
"logits/rejected": -0.6976322531700134, |
|
"logps/chosen": -481.72747802734375, |
|
"logps/rejected": -528.6837768554688, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -1.7375783920288086, |
|
"rewards/margins": 0.5362969040870667, |
|
"rewards/rejected": -2.2738752365112305, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8486305945223781, |
|
"grad_norm": 11.676543714442664, |
|
"learning_rate": 6.698729810778064e-08, |
|
"logits/chosen": -0.7131574153900146, |
|
"logits/rejected": -0.6955525875091553, |
|
"logps/chosen": -399.06610107421875, |
|
"logps/rejected": -414.4498291015625, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6074295043945312, |
|
"rewards/margins": 0.191550150513649, |
|
"rewards/rejected": -1.798979640007019, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8507682030728123, |
|
"grad_norm": 11.148999020679877, |
|
"learning_rate": 6.512943342215232e-08, |
|
"logits/chosen": -0.7562680244445801, |
|
"logits/rejected": -0.779510498046875, |
|
"logps/chosen": -484.00506591796875, |
|
"logps/rejected": -511.72882080078125, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8436009883880615, |
|
"rewards/margins": 0.35308870673179626, |
|
"rewards/rejected": -2.1966898441314697, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8529058116232465, |
|
"grad_norm": 11.997120047866387, |
|
"learning_rate": 6.329589969143517e-08, |
|
"logits/chosen": -0.6792132258415222, |
|
"logits/rejected": -0.6694210171699524, |
|
"logps/chosen": -424.2513427734375, |
|
"logps/rejected": -454.1995849609375, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.7970068454742432, |
|
"rewards/margins": 0.36387020349502563, |
|
"rewards/rejected": -2.160876750946045, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 11.238178437853232, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": -0.6610137820243835, |
|
"logits/rejected": -0.6665123105049133, |
|
"logps/chosen": -446.12451171875, |
|
"logps/rejected": -491.7059326171875, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.106739044189453, |
|
"rewards/margins": 0.3179362714290619, |
|
"rewards/rejected": -2.4246749877929688, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": -0.6434622406959534, |
|
"eval_logits/rejected": -0.6439588069915771, |
|
"eval_logps/chosen": -418.7559814453125, |
|
"eval_logps/rejected": -459.1051330566406, |
|
"eval_loss": 0.6088488698005676, |
|
"eval_rewards/accuracies": 0.6829268336296082, |
|
"eval_rewards/chosen": -1.6320796012878418, |
|
"eval_rewards/margins": 0.34643232822418213, |
|
"eval_rewards/rejected": -1.9785118103027344, |
|
"eval_runtime": 373.8135, |
|
"eval_samples_per_second": 5.246, |
|
"eval_steps_per_second": 0.329, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.857181028724115, |
|
"grad_norm": 13.938068351492317, |
|
"learning_rate": 5.9702234071631e-08, |
|
"logits/chosen": -0.6074206233024597, |
|
"logits/rejected": -0.57494056224823, |
|
"logps/chosen": -432.34869384765625, |
|
"logps/rejected": -481.2635498046875, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.724292278289795, |
|
"rewards/margins": 0.546868622303009, |
|
"rewards/rejected": -2.2711610794067383, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.859318637274549, |
|
"grad_norm": 14.889197610496133, |
|
"learning_rate": 5.794230324769517e-08, |
|
"logits/chosen": -0.6924403309822083, |
|
"logits/rejected": -0.695598304271698, |
|
"logps/chosen": -430.3138732910156, |
|
"logps/rejected": -472.5992431640625, |
|
"loss": 0.6165, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.809744119644165, |
|
"rewards/margins": 0.3945625424385071, |
|
"rewards/rejected": -2.2043066024780273, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8614562458249833, |
|
"grad_norm": 10.24820423132373, |
|
"learning_rate": 5.620710549772295e-08, |
|
"logits/chosen": -0.6588191390037537, |
|
"logits/rejected": -0.6449538469314575, |
|
"logps/chosen": -391.6925354003906, |
|
"logps/rejected": -442.3234558105469, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5062448978424072, |
|
"rewards/margins": 0.3859245777130127, |
|
"rewards/rejected": -1.89216947555542, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.8635938543754175, |
|
"grad_norm": 11.857754301901029, |
|
"learning_rate": 5.44967379058161e-08, |
|
"logits/chosen": -0.7503631114959717, |
|
"logits/rejected": -0.7300340533256531, |
|
"logps/chosen": -386.56072998046875, |
|
"logps/rejected": -396.8390808105469, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.6606768369674683, |
|
"rewards/margins": 0.09765629470348358, |
|
"rewards/rejected": -1.7583332061767578, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 10.569416708562631, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": -0.7464025020599365, |
|
"logits/rejected": -0.7271702885627747, |
|
"logps/chosen": -413.1631774902344, |
|
"logps/rejected": -470.1753234863281, |
|
"loss": 0.581, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5785975456237793, |
|
"rewards/margins": 0.4657081961631775, |
|
"rewards/rejected": -2.0443055629730225, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8678690714762859, |
|
"grad_norm": 11.592126458293672, |
|
"learning_rate": 5.11508745810284e-08, |
|
"logits/chosen": -0.667618453502655, |
|
"logits/rejected": -0.6744921207427979, |
|
"logps/chosen": -404.13824462890625, |
|
"logps/rejected": -412.8828430175781, |
|
"loss": 0.6282, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.6986844539642334, |
|
"rewards/margins": 0.08317200094461441, |
|
"rewards/rejected": -1.7818565368652344, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8700066800267201, |
|
"grad_norm": 12.223879763686206, |
|
"learning_rate": 4.951556604879048e-08, |
|
"logits/chosen": -0.6467772126197815, |
|
"logits/rejected": -0.6247937679290771, |
|
"logps/chosen": -442.8312683105469, |
|
"logps/rejected": -498.9490051269531, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.812768816947937, |
|
"rewards/margins": 0.4102476239204407, |
|
"rewards/rejected": -2.2230165004730225, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8721442885771543, |
|
"grad_norm": 13.609539677706314, |
|
"learning_rate": 4.7905462065429946e-08, |
|
"logits/chosen": -0.838919997215271, |
|
"logits/rejected": -0.8245532512664795, |
|
"logps/chosen": -415.890869140625, |
|
"logps/rejected": -435.3166198730469, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5928852558135986, |
|
"rewards/margins": 0.25430333614349365, |
|
"rewards/rejected": -1.8471887111663818, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8742818971275885, |
|
"grad_norm": 10.454604993981434, |
|
"learning_rate": 4.6320652716067555e-08, |
|
"logits/chosen": -0.7226736545562744, |
|
"logits/rejected": -0.7249311208724976, |
|
"logps/chosen": -406.7791748046875, |
|
"logps/rejected": -448.5416259765625, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.5601541996002197, |
|
"rewards/margins": 0.37674978375434875, |
|
"rewards/rejected": -1.936903953552246, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 10.922415025272509, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": -0.7072638869285583, |
|
"logits/rejected": -0.6469031572341919, |
|
"logps/chosen": -437.16253662109375, |
|
"logps/rejected": -464.5462951660156, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.673638105392456, |
|
"rewards/margins": 0.27518972754478455, |
|
"rewards/rejected": -1.948827862739563, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.878557114228457, |
|
"grad_norm": 11.15308213585594, |
|
"learning_rate": 4.322727117869951e-08, |
|
"logits/chosen": -0.5786024332046509, |
|
"logits/rejected": -0.5698223114013672, |
|
"logps/chosen": -387.2678527832031, |
|
"logps/rejected": -420.85101318359375, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5769679546356201, |
|
"rewards/margins": 0.32589417695999146, |
|
"rewards/rejected": -1.9028621912002563, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8806947227788912, |
|
"grad_norm": 14.488282375387797, |
|
"learning_rate": 4.17188720650119e-08, |
|
"logits/chosen": -0.7604373097419739, |
|
"logits/rejected": -0.7526075839996338, |
|
"logps/chosen": -510.45159912109375, |
|
"logps/rejected": -509.4095458984375, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.090540885925293, |
|
"rewards/margins": 0.033110879361629486, |
|
"rewards/rejected": -2.1236515045166016, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8828323313293254, |
|
"grad_norm": 11.024440296301012, |
|
"learning_rate": 4.023611372427471e-08, |
|
"logits/chosen": -0.7349828481674194, |
|
"logits/rejected": -0.7459964156150818, |
|
"logps/chosen": -388.6877746582031, |
|
"logps/rejected": -420.792236328125, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7269947528839111, |
|
"rewards/margins": 0.28061679005622864, |
|
"rewards/rejected": -2.0076115131378174, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8849699398797595, |
|
"grad_norm": 11.21797390523024, |
|
"learning_rate": 3.877907911663542e-08, |
|
"logits/chosen": -0.6687692403793335, |
|
"logits/rejected": -0.6710121631622314, |
|
"logps/chosen": -361.7718200683594, |
|
"logps/rejected": -406.8067321777344, |
|
"loss": 0.5766, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2828166484832764, |
|
"rewards/margins": 0.4076838493347168, |
|
"rewards/rejected": -1.6905003786087036, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 11.310104114342186, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": -0.7112718820571899, |
|
"logits/rejected": -0.6793174743652344, |
|
"logps/chosen": -395.9449768066406, |
|
"logps/rejected": -415.42108154296875, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5930345058441162, |
|
"rewards/margins": 0.189566969871521, |
|
"rewards/rejected": -1.7826014757156372, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8892451569806279, |
|
"grad_norm": 11.184536783628738, |
|
"learning_rate": 3.594250574048058e-08, |
|
"logits/chosen": -0.6613335609436035, |
|
"logits/rejected": -0.6428050994873047, |
|
"logps/chosen": -367.02874755859375, |
|
"logps/rejected": -389.9844970703125, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.537019968032837, |
|
"rewards/margins": 0.1499500423669815, |
|
"rewards/rejected": -1.6869698762893677, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.8913827655310621, |
|
"grad_norm": 12.205433845637979, |
|
"learning_rate": 3.456312567789793e-08, |
|
"logits/chosen": -0.7070876955986023, |
|
"logits/rejected": -0.7160503268241882, |
|
"logps/chosen": -469.0753173828125, |
|
"logps/rejected": -494.7930908203125, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9284939765930176, |
|
"rewards/margins": 0.23351570963859558, |
|
"rewards/rejected": -2.1620097160339355, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.8935203740814963, |
|
"grad_norm": 11.816949999049964, |
|
"learning_rate": 3.3209786751399184e-08, |
|
"logits/chosen": -0.6653708815574646, |
|
"logits/rejected": -0.6532600522041321, |
|
"logps/chosen": -464.9654541015625, |
|
"logps/rejected": -504.6747741699219, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9524656534194946, |
|
"rewards/margins": 0.3868550658226013, |
|
"rewards/rejected": -2.339320659637451, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8956579826319305, |
|
"grad_norm": 10.879487484866441, |
|
"learning_rate": 3.188256468013139e-08, |
|
"logits/chosen": -0.6497898101806641, |
|
"logits/rejected": -0.6454100608825684, |
|
"logps/chosen": -478.6482238769531, |
|
"logps/rejected": -530.9612426757812, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8596200942993164, |
|
"rewards/margins": 0.5317977070808411, |
|
"rewards/rejected": -2.3914177417755127, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 11.880007198303698, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": -0.6183308959007263, |
|
"logits/rejected": -0.6003840565681458, |
|
"logps/chosen": -459.9405212402344, |
|
"logps/rejected": -505.4646911621094, |
|
"loss": 0.6181, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.9765161275863647, |
|
"rewards/margins": 0.4471958875656128, |
|
"rewards/rejected": -2.4237117767333984, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.899933199732799, |
|
"grad_norm": 13.426952468351809, |
|
"learning_rate": 2.9306766669548457e-08, |
|
"logits/chosen": -0.7094901204109192, |
|
"logits/rejected": -0.6653531193733215, |
|
"logps/chosen": -466.24029541015625, |
|
"logps/rejected": -487.9982604980469, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9488886594772339, |
|
"rewards/margins": 0.351326048374176, |
|
"rewards/rejected": -2.3002147674560547, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9020708082832332, |
|
"grad_norm": 11.556511924801233, |
|
"learning_rate": 2.805833484581621e-08, |
|
"logits/chosen": -0.8073502779006958, |
|
"logits/rejected": -0.7438942790031433, |
|
"logps/chosen": -459.5665588378906, |
|
"logps/rejected": -462.51519775390625, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.806351900100708, |
|
"rewards/margins": 0.16977502405643463, |
|
"rewards/rejected": -1.976126790046692, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9042084168336674, |
|
"grad_norm": 12.210595464753169, |
|
"learning_rate": 2.6836308100417872e-08, |
|
"logits/chosen": -0.6977376341819763, |
|
"logits/rejected": -0.6720814108848572, |
|
"logps/chosen": -427.6357727050781, |
|
"logps/rejected": -460.1345520019531, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.711733102798462, |
|
"rewards/margins": 0.44918665289878845, |
|
"rewards/rejected": -2.1609199047088623, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9063460253841016, |
|
"grad_norm": 9.940416325858004, |
|
"learning_rate": 2.5640754805600128e-08, |
|
"logits/chosen": -0.7047473788261414, |
|
"logits/rejected": -0.7050879597663879, |
|
"logps/chosen": -355.5130615234375, |
|
"logps/rejected": -383.0091552734375, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.397302508354187, |
|
"rewards/margins": 0.2378145009279251, |
|
"rewards/rejected": -1.6351170539855957, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 12.20154656454581, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": -0.7828183174133301, |
|
"logits/rejected": -0.7872889041900635, |
|
"logps/chosen": -358.20751953125, |
|
"logps/rejected": -394.5291748046875, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3747402429580688, |
|
"rewards/margins": 0.26737523078918457, |
|
"rewards/rejected": -1.6421154737472534, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9106212424849699, |
|
"grad_norm": 10.941418508752523, |
|
"learning_rate": 2.3329334647018694e-08, |
|
"logits/chosen": -0.6170888543128967, |
|
"logits/rejected": -0.5692444443702698, |
|
"logps/chosen": -472.42864990234375, |
|
"logps/rejected": -516.1029052734375, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.0783095359802246, |
|
"rewards/margins": 0.4643981158733368, |
|
"rewards/rejected": -2.542707681655884, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9127588510354041, |
|
"grad_norm": 11.303024362125633, |
|
"learning_rate": 2.2213597106929605e-08, |
|
"logits/chosen": -0.5531542301177979, |
|
"logits/rejected": -0.5305842161178589, |
|
"logps/chosen": -422.59100341796875, |
|
"logps/rejected": -460.8222961425781, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7473095655441284, |
|
"rewards/margins": 0.3811667263507843, |
|
"rewards/rejected": -2.12847638130188, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.9148964595858383, |
|
"grad_norm": 14.94800877117672, |
|
"learning_rate": 2.1124591657534774e-08, |
|
"logits/chosen": -0.6627920866012573, |
|
"logits/rejected": -0.6768360733985901, |
|
"logps/chosen": -437.7267150878906, |
|
"logps/rejected": -494.59075927734375, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8751461505889893, |
|
"rewards/margins": 0.4180339574813843, |
|
"rewards/rejected": -2.293180227279663, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9170340681362725, |
|
"grad_norm": 11.706241248792244, |
|
"learning_rate": 2.0062379228555525e-08, |
|
"logits/chosen": -0.6479263305664062, |
|
"logits/rejected": -0.6203778386116028, |
|
"logps/chosen": -371.62310791015625, |
|
"logps/rejected": -380.03436279296875, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4537248611450195, |
|
"rewards/margins": 0.1625545471906662, |
|
"rewards/rejected": -1.6162794828414917, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 11.343840156206866, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": -0.7142120003700256, |
|
"logits/rejected": -0.7347142696380615, |
|
"logps/chosen": -411.76312255859375, |
|
"logps/rejected": -476.20697021484375, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6978678703308105, |
|
"rewards/margins": 0.46169888973236084, |
|
"rewards/rejected": -2.159566640853882, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.921309285237141, |
|
"grad_norm": 10.191297953603781, |
|
"learning_rate": 1.8018569652073378e-08, |
|
"logits/chosen": -0.5895026922225952, |
|
"logits/rejected": -0.5850787162780762, |
|
"logps/chosen": -406.5594482421875, |
|
"logps/rejected": -485.04217529296875, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.616127371788025, |
|
"rewards/margins": 0.5671988129615784, |
|
"rewards/rejected": -2.183326244354248, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9234468937875752, |
|
"grad_norm": 12.93643748441664, |
|
"learning_rate": 1.7037086855465898e-08, |
|
"logits/chosen": -0.7007228136062622, |
|
"logits/rejected": -0.6858587265014648, |
|
"logps/chosen": -412.77496337890625, |
|
"logps/rejected": -458.0037841796875, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7415974140167236, |
|
"rewards/margins": 0.3250362277030945, |
|
"rewards/rejected": -2.066633701324463, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9255845023380094, |
|
"grad_norm": 12.402383758119484, |
|
"learning_rate": 1.6082625774666792e-08, |
|
"logits/chosen": -0.6870225667953491, |
|
"logits/rejected": -0.6988283395767212, |
|
"logps/chosen": -401.1902770996094, |
|
"logps/rejected": -415.27093505859375, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.706886649131775, |
|
"rewards/margins": 0.06286803632974625, |
|
"rewards/rejected": -1.7697547674179077, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.9277221108884436, |
|
"grad_norm": 11.598555340742855, |
|
"learning_rate": 1.5155239811656562e-08, |
|
"logits/chosen": -0.7391936182975769, |
|
"logits/rejected": -0.7346464395523071, |
|
"logps/chosen": -362.8863525390625, |
|
"logps/rejected": -407.58392333984375, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4570672512054443, |
|
"rewards/margins": 0.35567349195480347, |
|
"rewards/rejected": -1.8127408027648926, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 12.334039197101353, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": -0.7256093621253967, |
|
"logits/rejected": -0.7030697464942932, |
|
"logps/chosen": -498.5211181640625, |
|
"logps/rejected": -495.9085693359375, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.0069806575775146, |
|
"rewards/margins": 0.12008260935544968, |
|
"rewards/rejected": -2.127063274383545, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.931997327989312, |
|
"grad_norm": 10.831899946588043, |
|
"learning_rate": 1.3381899269774289e-08, |
|
"logits/chosen": -0.7507193088531494, |
|
"logits/rejected": -0.7519603967666626, |
|
"logps/chosen": -359.82000732421875, |
|
"logps/rejected": -395.96771240234375, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.372948408126831, |
|
"rewards/margins": 0.3809196352958679, |
|
"rewards/rejected": -1.7538681030273438, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.9341349365397461, |
|
"grad_norm": 14.210280355388763, |
|
"learning_rate": 1.253604390908819e-08, |
|
"logits/chosen": -0.5923041701316833, |
|
"logits/rejected": -0.6011568307876587, |
|
"logps/chosen": -345.30633544921875, |
|
"logps/rejected": -392.51751708984375, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.585827350616455, |
|
"rewards/margins": 0.3826131224632263, |
|
"rewards/rejected": -1.968440294265747, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9362725450901803, |
|
"grad_norm": 9.851284631791968, |
|
"learning_rate": 1.1717462097011855e-08, |
|
"logits/chosen": -0.6331924796104431, |
|
"logits/rejected": -0.6489231586456299, |
|
"logps/chosen": -429.216796875, |
|
"logps/rejected": -477.13836669921875, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8334829807281494, |
|
"rewards/margins": 0.3744773864746094, |
|
"rewards/rejected": -2.207960605621338, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9384101536406145, |
|
"grad_norm": 11.272588511911392, |
|
"learning_rate": 1.0926199633097154e-08, |
|
"logits/chosen": -0.5822413563728333, |
|
"logits/rejected": -0.5487803220748901, |
|
"logps/chosen": -428.51934814453125, |
|
"logps/rejected": -487.8909606933594, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6083500385284424, |
|
"rewards/margins": 0.4286819398403168, |
|
"rewards/rejected": -2.037031888961792, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 10.801295437898501, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": -0.7505050897598267, |
|
"logits/rejected": -0.7208874225616455, |
|
"logps/chosen": -511.32110595703125, |
|
"logps/rejected": -563.9736938476562, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.1491966247558594, |
|
"rewards/margins": 0.5160467624664307, |
|
"rewards/rejected": -2.665243625640869, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.942685370741483, |
|
"grad_norm": 12.022377343895434, |
|
"learning_rate": 9.425808302913728e-09, |
|
"logits/chosen": -0.6826910972595215, |
|
"logits/rejected": -0.7009281516075134, |
|
"logps/chosen": -396.803466796875, |
|
"logps/rejected": -475.8189697265625, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5288541316986084, |
|
"rewards/margins": 0.5459466576576233, |
|
"rewards/rejected": -2.074800968170166, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9448229792919172, |
|
"grad_norm": 11.529829218714097, |
|
"learning_rate": 8.716763383355862e-09, |
|
"logits/chosen": -0.6541940569877625, |
|
"logits/rejected": -0.6755858063697815, |
|
"logps/chosen": -480.7030944824219, |
|
"logps/rejected": -526.9130249023438, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.1652467250823975, |
|
"rewards/margins": 0.42291441559791565, |
|
"rewards/rejected": -2.588160991668701, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9469605878423514, |
|
"grad_norm": 11.296144808610602, |
|
"learning_rate": 8.035205700685165e-09, |
|
"logits/chosen": -0.5620754361152649, |
|
"logits/rejected": -0.5832556486129761, |
|
"logps/chosen": -406.50115966796875, |
|
"logps/rejected": -483.1033935546875, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7801560163497925, |
|
"rewards/margins": 0.6258376240730286, |
|
"rewards/rejected": -2.4059934616088867, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9490981963927856, |
|
"grad_norm": 14.459436916194253, |
|
"learning_rate": 7.381173387970397e-09, |
|
"logits/chosen": -0.6875967979431152, |
|
"logits/rejected": -0.7037211060523987, |
|
"logps/chosen": -387.79193115234375, |
|
"logps/rejected": -406.17730712890625, |
|
"loss": 0.625, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.775752067565918, |
|
"rewards/margins": 0.16077642142772675, |
|
"rewards/rejected": -1.9365284442901611, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 12.667943463380384, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": -0.6868771314620972, |
|
"logits/rejected": -0.6806486248970032, |
|
"logps/chosen": -391.65704345703125, |
|
"logps/rejected": -439.91534423828125, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5933175086975098, |
|
"rewards/margins": 0.549602746963501, |
|
"rewards/rejected": -2.14292049407959, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.953373413493654, |
|
"grad_norm": 13.999625605626278, |
|
"learning_rate": 6.15582970243117e-09, |
|
"logits/chosen": -0.679996132850647, |
|
"logits/rejected": -0.6954419612884521, |
|
"logps/chosen": -411.924072265625, |
|
"logps/rejected": -465.689453125, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5961847305297852, |
|
"rewards/margins": 0.44558507204055786, |
|
"rewards/rejected": -2.0417697429656982, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9555110220440882, |
|
"grad_norm": 10.363695148382087, |
|
"learning_rate": 5.5845868874357385e-09, |
|
"logits/chosen": -0.6567386388778687, |
|
"logits/rejected": -0.6833846569061279, |
|
"logps/chosen": -491.9205627441406, |
|
"logps/rejected": -569.992431640625, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7513771057128906, |
|
"rewards/margins": 0.6100505590438843, |
|
"rewards/rejected": -2.3614273071289062, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.9576486305945224, |
|
"grad_norm": 13.618449735155838, |
|
"learning_rate": 5.0410065542185184e-09, |
|
"logits/chosen": -0.5561550855636597, |
|
"logits/rejected": -0.5477365851402283, |
|
"logps/chosen": -404.7331848144531, |
|
"logps/rejected": -456.8463134765625, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8092013597488403, |
|
"rewards/margins": 0.39521071314811707, |
|
"rewards/rejected": -2.2044119834899902, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.9597862391449565, |
|
"grad_norm": 11.691098922849898, |
|
"learning_rate": 4.5251191160326495e-09, |
|
"logits/chosen": -0.7571395039558411, |
|
"logits/rejected": -0.6862713098526001, |
|
"logps/chosen": -404.239501953125, |
|
"logps/rejected": -430.745849609375, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.5349277257919312, |
|
"rewards/margins": 0.27732717990875244, |
|
"rewards/rejected": -1.8122549057006836, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 11.212537524360101, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": -0.6666488647460938, |
|
"logits/rejected": -0.6362468600273132, |
|
"logps/chosen": -390.62713623046875, |
|
"logps/rejected": -432.0528564453125, |
|
"loss": 0.5807, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.528037428855896, |
|
"rewards/margins": 0.3787933588027954, |
|
"rewards/rejected": -1.9068307876586914, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.964061456245825, |
|
"grad_norm": 11.304696270661244, |
|
"learning_rate": 3.5765368290813223e-09, |
|
"logits/chosen": -0.6946466565132141, |
|
"logits/rejected": -0.7254693508148193, |
|
"logps/chosen": -417.11187744140625, |
|
"logps/rejected": -472.14288330078125, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.704296588897705, |
|
"rewards/margins": 0.47184205055236816, |
|
"rewards/rejected": -2.1761388778686523, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.9661990647962592, |
|
"grad_norm": 12.23326718620082, |
|
"learning_rate": 3.1438950533786977e-09, |
|
"logits/chosen": -0.727628767490387, |
|
"logits/rejected": -0.7244228720664978, |
|
"logps/chosen": -368.25653076171875, |
|
"logps/rejected": -406.3876647949219, |
|
"loss": 0.6045, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.741332769393921, |
|
"rewards/margins": 0.22878167033195496, |
|
"rewards/rejected": -1.9701144695281982, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.9683366733466934, |
|
"grad_norm": 12.889743033859292, |
|
"learning_rate": 2.739052315863355e-09, |
|
"logits/chosen": -0.7463970184326172, |
|
"logits/rejected": -0.7229277491569519, |
|
"logps/chosen": -395.8512878417969, |
|
"logps/rejected": -452.2123718261719, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4541352987289429, |
|
"rewards/margins": 0.4854976534843445, |
|
"rewards/rejected": -1.9396328926086426, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.9704742818971276, |
|
"grad_norm": 11.397457544749724, |
|
"learning_rate": 2.3620312674367816e-09, |
|
"logits/chosen": -0.7733277678489685, |
|
"logits/rejected": -0.761780858039856, |
|
"logps/chosen": -469.01544189453125, |
|
"logps/rejected": -496.7162780761719, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8822633028030396, |
|
"rewards/margins": 0.15627171099185944, |
|
"rewards/rejected": -2.0385348796844482, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 12.018897554978574, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": -0.6989326477050781, |
|
"logits/rejected": -0.7310012578964233, |
|
"logps/chosen": -407.86639404296875, |
|
"logps/rejected": -467.86798095703125, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.7583638429641724, |
|
"rewards/margins": 0.5012065172195435, |
|
"rewards/rejected": -2.2595701217651367, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.974749498997996, |
|
"grad_norm": 12.451774147613516, |
|
"learning_rate": 1.6915370571756181e-09, |
|
"logits/chosen": -0.7267682552337646, |
|
"logits/rejected": -0.7152563333511353, |
|
"logps/chosen": -450.92230224609375, |
|
"logps/rejected": -483.6854248046875, |
|
"loss": 0.6282, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.7722840309143066, |
|
"rewards/margins": 0.15732887387275696, |
|
"rewards/rejected": -1.9296131134033203, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.9768871075484302, |
|
"grad_norm": 12.126324151502713, |
|
"learning_rate": 1.3981014094099353e-09, |
|
"logits/chosen": -0.7544288635253906, |
|
"logits/rejected": -0.7525961995124817, |
|
"logps/chosen": -397.43109130859375, |
|
"logps/rejected": -431.8571472167969, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4742791652679443, |
|
"rewards/margins": 0.3458634316921234, |
|
"rewards/rejected": -1.8201426267623901, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9790247160988644, |
|
"grad_norm": 9.907495266444734, |
|
"learning_rate": 1.1325624767719588e-09, |
|
"logits/chosen": -0.6586907505989075, |
|
"logits/rejected": -0.6237790584564209, |
|
"logps/chosen": -395.1099853515625, |
|
"logps/rejected": -438.49237060546875, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5770848989486694, |
|
"rewards/margins": 0.40755948424339294, |
|
"rewards/rejected": -1.9846441745758057, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.9811623246492986, |
|
"grad_norm": 13.123065251970033, |
|
"learning_rate": 8.949351161324225e-10, |
|
"logits/chosen": -0.6515368223190308, |
|
"logits/rejected": -0.6513477563858032, |
|
"logps/chosen": -411.0286560058594, |
|
"logps/rejected": -474.112548828125, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7529451847076416, |
|
"rewards/margins": 0.5362708568572998, |
|
"rewards/rejected": -2.2892158031463623, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 12.071426108415315, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": -0.7456957697868347, |
|
"logits/rejected": -0.6752879023551941, |
|
"logps/chosen": -450.5098876953125, |
|
"logps/rejected": -455.85577392578125, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8361914157867432, |
|
"rewards/margins": 0.20122388005256653, |
|
"rewards/rejected": -2.037415027618408, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.985437541750167, |
|
"grad_norm": 11.748757454977515, |
|
"learning_rate": 5.034667293427053e-10, |
|
"logits/chosen": -0.7174670696258545, |
|
"logits/rejected": -0.6987491250038147, |
|
"logps/chosen": -434.594482421875, |
|
"logps/rejected": -480.0499267578125, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.7917670011520386, |
|
"rewards/margins": 0.3851429224014282, |
|
"rewards/rejected": -2.176909923553467, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9875751503006012, |
|
"grad_norm": 20.824968125195323, |
|
"learning_rate": 3.4964760580069585e-10, |
|
"logits/chosen": -0.555869460105896, |
|
"logits/rejected": -0.5152798891067505, |
|
"logps/chosen": -407.359375, |
|
"logps/rejected": -415.696533203125, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.7928351163864136, |
|
"rewards/margins": 0.13394100964069366, |
|
"rewards/rejected": -1.9267761707305908, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9897127588510354, |
|
"grad_norm": 11.441495646450653, |
|
"learning_rate": 2.2378385824833866e-10, |
|
"logits/chosen": -0.7355363965034485, |
|
"logits/rejected": -0.727141261100769, |
|
"logps/chosen": -411.1090087890625, |
|
"logps/rejected": -475.9149475097656, |
|
"loss": 0.6229, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.861409068107605, |
|
"rewards/margins": 0.488926500082016, |
|
"rewards/rejected": -2.3503353595733643, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9918503674014696, |
|
"grad_norm": 11.714079308527252, |
|
"learning_rate": 1.2588252874673466e-10, |
|
"logits/chosen": -0.8587902784347534, |
|
"logits/rejected": -0.8111391663551331, |
|
"logps/chosen": -470.9283447265625, |
|
"logps/rejected": -455.00372314453125, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.885197639465332, |
|
"rewards/margins": 0.07718580961227417, |
|
"rewards/rejected": -1.9623833894729614, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 16.820068325011835, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": -0.5276237726211548, |
|
"logits/rejected": -0.5462942123413086, |
|
"logps/chosen": -459.17626953125, |
|
"logps/rejected": -478.39056396484375, |
|
"loss": 0.6987, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8299469947814941, |
|
"rewards/margins": 0.1971490979194641, |
|
"rewards/rejected": -2.0270960330963135, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.996125584502338, |
|
"grad_norm": 10.981775368002003, |
|
"learning_rate": 1.3987469365095429e-11, |
|
"logits/chosen": -0.787868082523346, |
|
"logits/rejected": -0.8146266937255859, |
|
"logps/chosen": -463.4134826660156, |
|
"logps/rejected": -492.61859130859375, |
|
"loss": 0.5814, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6699291467666626, |
|
"rewards/margins": 0.16003668308258057, |
|
"rewards/rejected": -1.8299658298492432, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"grad_norm": 10.869184778690476, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.7284511923789978, |
|
"logits/rejected": -0.7266198992729187, |
|
"logps/chosen": -403.50836181640625, |
|
"logps/rejected": -409.22369384765625, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5633559226989746, |
|
"rewards/margins": 0.2905767261981964, |
|
"rewards/rejected": -1.8539327383041382, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6321173631915189, |
|
"train_runtime": 21471.9268, |
|
"train_samples_per_second": 2.789, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|