gitpullpull's picture
Upload folder using huggingface_hub
2e13d50 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.3783224024853298,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002761477390403866,
"grad_norm": 0.5198726654052734,
"learning_rate": 0.0,
"log_odds_chosen": 0.4317269027233124,
"log_odds_ratio": -0.5042418837547302,
"logits/chosen": -0.5456271767616272,
"logits/rejected": -0.10779725015163422,
"logps/chosen": -1.998489260673523,
"logps/rejected": -2.3799774646759033,
"loss": 2.1834,
"nll_loss": 2.1329703330993652,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.199848935008049,
"rewards/margins": 0.03814878687262535,
"rewards/rejected": -0.23799772560596466,
"step": 1
},
{
"epoch": 0.005522954780807732,
"grad_norm": 0.7381364107131958,
"learning_rate": 4.587155963302753e-08,
"log_odds_chosen": 0.4411306381225586,
"log_odds_ratio": -0.4988963305950165,
"logits/chosen": -0.6594648361206055,
"logits/rejected": -0.07251911610364914,
"logps/chosen": -2.0794589519500732,
"logps/rejected": -2.473759889602661,
"loss": 2.2463,
"nll_loss": 2.196385383605957,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20794589817523956,
"rewards/margins": 0.039430104196071625,
"rewards/rejected": -0.24737600982189178,
"step": 2
},
{
"epoch": 0.008284432171211598,
"grad_norm": 0.6377604603767395,
"learning_rate": 9.174311926605506e-08,
"log_odds_chosen": 0.5233978033065796,
"log_odds_ratio": -0.4696895480155945,
"logits/chosen": -0.7436237931251526,
"logits/rejected": -0.04083387181162834,
"logps/chosen": -2.0129284858703613,
"logps/rejected": -2.4808387756347656,
"loss": 2.1678,
"nll_loss": 2.120811700820923,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20129284262657166,
"rewards/margins": 0.04679100960493088,
"rewards/rejected": -0.24808385968208313,
"step": 3
},
{
"epoch": 0.011045909561615464,
"grad_norm": 0.5183601379394531,
"learning_rate": 1.376146788990826e-07,
"log_odds_chosen": 0.4738181531429291,
"log_odds_ratio": -0.4919201731681824,
"logits/chosen": -0.5707842111587524,
"logits/rejected": -0.06899966299533844,
"logps/chosen": -2.006629467010498,
"logps/rejected": -2.429466724395752,
"loss": 2.1541,
"nll_loss": 2.104935646057129,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20066295564174652,
"rewards/margins": 0.04228372871875763,
"rewards/rejected": -0.24294668436050415,
"step": 4
},
{
"epoch": 0.013807386952019331,
"grad_norm": 0.5362528562545776,
"learning_rate": 1.8348623853211012e-07,
"log_odds_chosen": 0.47211968898773193,
"log_odds_ratio": -0.49660325050354004,
"logits/chosen": -0.5831207633018494,
"logits/rejected": -0.13220791518688202,
"logps/chosen": -1.9650582075119019,
"logps/rejected": -2.3817272186279297,
"loss": 2.1187,
"nll_loss": 2.0690417289733887,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1965058296918869,
"rewards/margins": 0.04166688770055771,
"rewards/rejected": -0.23817269504070282,
"step": 5
},
{
"epoch": 0.016568864342423197,
"grad_norm": 0.5392005443572998,
"learning_rate": 2.2935779816513764e-07,
"log_odds_chosen": 0.3942825496196747,
"log_odds_ratio": -0.518440306186676,
"logits/chosen": -0.5348646640777588,
"logits/rejected": -0.23154743015766144,
"logps/chosen": -2.012246608734131,
"logps/rejected": -2.361220359802246,
"loss": 2.178,
"nll_loss": 2.126155376434326,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2012246698141098,
"rewards/margins": 0.03489736467599869,
"rewards/rejected": -0.2361220419406891,
"step": 6
},
{
"epoch": 0.019330341732827064,
"grad_norm": 0.5474534630775452,
"learning_rate": 2.752293577981652e-07,
"log_odds_chosen": 0.2622841000556946,
"log_odds_ratio": -0.576733410358429,
"logits/chosen": -0.5426779985427856,
"logits/rejected": -0.2341059297323227,
"logps/chosen": -2.074805736541748,
"logps/rejected": -2.310166120529175,
"loss": 2.2374,
"nll_loss": 2.179717779159546,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2074805647134781,
"rewards/margins": 0.023536043241620064,
"rewards/rejected": -0.231016606092453,
"step": 7
},
{
"epoch": 0.022091819123230928,
"grad_norm": 0.5940824747085571,
"learning_rate": 3.211009174311927e-07,
"log_odds_chosen": 0.41454389691352844,
"log_odds_ratio": -0.5088675618171692,
"logits/chosen": -0.45266416668891907,
"logits/rejected": -0.03741999715566635,
"logps/chosen": -2.081907033920288,
"logps/rejected": -2.452606439590454,
"loss": 2.2474,
"nll_loss": 2.19650936126709,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2081906944513321,
"rewards/margins": 0.037069931626319885,
"rewards/rejected": -0.24526062607765198,
"step": 8
},
{
"epoch": 0.024853296513634795,
"grad_norm": 0.5987945795059204,
"learning_rate": 3.6697247706422023e-07,
"log_odds_chosen": 0.666429340839386,
"log_odds_ratio": -0.42349112033843994,
"logits/chosen": -0.7504861950874329,
"logits/rejected": -0.16368302702903748,
"logps/chosen": -1.9702867269515991,
"logps/rejected": -2.566361427307129,
"loss": 2.1456,
"nll_loss": 2.10321307182312,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19702866673469543,
"rewards/margins": 0.05960746854543686,
"rewards/rejected": -0.2566361427307129,
"step": 9
},
{
"epoch": 0.027614773904038662,
"grad_norm": 0.7403894662857056,
"learning_rate": 4.128440366972478e-07,
"log_odds_chosen": 0.44253280758857727,
"log_odds_ratio": -0.502151370048523,
"logits/chosen": -0.7380926012992859,
"logits/rejected": -0.23479697108268738,
"logps/chosen": -2.0925681591033936,
"logps/rejected": -2.4903366565704346,
"loss": 2.2572,
"nll_loss": 2.206953287124634,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20925681293010712,
"rewards/margins": 0.03977686166763306,
"rewards/rejected": -0.24903367459774017,
"step": 10
},
{
"epoch": 0.030376251294442526,
"grad_norm": 0.6613215208053589,
"learning_rate": 4.587155963302753e-07,
"log_odds_chosen": 0.5335031151771545,
"log_odds_ratio": -0.4685918688774109,
"logits/chosen": -0.6707795262336731,
"logits/rejected": -0.13829460740089417,
"logps/chosen": -2.0549967288970947,
"logps/rejected": -2.5354185104370117,
"loss": 2.2247,
"nll_loss": 2.1778128147125244,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20549967885017395,
"rewards/margins": 0.04804220795631409,
"rewards/rejected": -0.25354188680648804,
"step": 11
},
{
"epoch": 0.03313772868484639,
"grad_norm": 0.5580596327781677,
"learning_rate": 5.045871559633028e-07,
"log_odds_chosen": 0.4023962616920471,
"log_odds_ratio": -0.5220973491668701,
"logits/chosen": -0.5904384851455688,
"logits/rejected": 0.0007063774392008781,
"logps/chosen": -1.958735704421997,
"logps/rejected": -2.3157620429992676,
"loss": 2.1455,
"nll_loss": 2.09328293800354,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.19587357342243195,
"rewards/margins": 0.03570263832807541,
"rewards/rejected": -0.23157618939876556,
"step": 12
},
{
"epoch": 0.03589920607525026,
"grad_norm": 0.4629175662994385,
"learning_rate": 5.504587155963304e-07,
"log_odds_chosen": 0.4922281503677368,
"log_odds_ratio": -0.4793952405452728,
"logits/chosen": -0.489984929561615,
"logits/rejected": -0.08446945250034332,
"logps/chosen": -1.8884625434875488,
"logps/rejected": -2.318817377090454,
"loss": 2.0661,
"nll_loss": 2.0181963443756104,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18884626030921936,
"rewards/margins": 0.043035492300987244,
"rewards/rejected": -0.2318817526102066,
"step": 13
},
{
"epoch": 0.03866068346565413,
"grad_norm": 0.6970926523208618,
"learning_rate": 5.963302752293579e-07,
"log_odds_chosen": 0.46570533514022827,
"log_odds_ratio": -0.49952778220176697,
"logits/chosen": -0.6894131898880005,
"logits/rejected": -0.010050175711512566,
"logps/chosen": -2.0864779949188232,
"logps/rejected": -2.503685235977173,
"loss": 2.2329,
"nll_loss": 2.182929277420044,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.20864778757095337,
"rewards/margins": 0.041720740497112274,
"rewards/rejected": -0.25036853551864624,
"step": 14
},
{
"epoch": 0.04142216085605799,
"grad_norm": 0.7035249471664429,
"learning_rate": 6.422018348623854e-07,
"log_odds_chosen": 0.5717816948890686,
"log_odds_ratio": -0.44831815361976624,
"logits/chosen": -0.7732019424438477,
"logits/rejected": 0.13750173151493073,
"logps/chosen": -2.0055058002471924,
"logps/rejected": -2.513883590698242,
"loss": 2.1667,
"nll_loss": 2.1218552589416504,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20055057108402252,
"rewards/margins": 0.05083777755498886,
"rewards/rejected": -0.2513883411884308,
"step": 15
},
{
"epoch": 0.044183638246461855,
"grad_norm": 0.489059180021286,
"learning_rate": 6.880733944954129e-07,
"log_odds_chosen": 0.45178472995758057,
"log_odds_ratio": -0.501604437828064,
"logits/chosen": -0.6046928763389587,
"logits/rejected": 0.05104057490825653,
"logps/chosen": -1.8943061828613281,
"logps/rejected": -2.2864151000976562,
"loss": 2.0731,
"nll_loss": 2.022980213165283,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18943063914775848,
"rewards/margins": 0.03921087831258774,
"rewards/rejected": -0.22864152491092682,
"step": 16
},
{
"epoch": 0.04694511563686572,
"grad_norm": 0.5798972249031067,
"learning_rate": 7.339449541284405e-07,
"log_odds_chosen": 0.521186113357544,
"log_odds_ratio": -0.4737667441368103,
"logits/chosen": -0.5957604646682739,
"logits/rejected": 0.05874314904212952,
"logps/chosen": -1.9864083528518677,
"logps/rejected": -2.4512085914611816,
"loss": 2.1331,
"nll_loss": 2.085712194442749,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1986408233642578,
"rewards/margins": 0.04648003727197647,
"rewards/rejected": -0.24512089788913727,
"step": 17
},
{
"epoch": 0.04970659302726959,
"grad_norm": 0.602294921875,
"learning_rate": 7.79816513761468e-07,
"log_odds_chosen": 0.6343832612037659,
"log_odds_ratio": -0.42983177304267883,
"logits/chosen": -0.6467751264572144,
"logits/rejected": -0.0914345234632492,
"logps/chosen": -2.020120143890381,
"logps/rejected": -2.5913381576538086,
"loss": 2.1654,
"nll_loss": 2.1224629878997803,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2020120471715927,
"rewards/margins": 0.05712177976965904,
"rewards/rejected": -0.25913381576538086,
"step": 18
},
{
"epoch": 0.05246807041767346,
"grad_norm": 0.6167095303535461,
"learning_rate": 8.256880733944956e-07,
"log_odds_chosen": 0.23837895691394806,
"log_odds_ratio": -0.5851647853851318,
"logits/chosen": -0.6367707848548889,
"logits/rejected": -0.06072646751999855,
"logps/chosen": -2.055112838745117,
"logps/rejected": -2.2656140327453613,
"loss": 2.2217,
"nll_loss": 2.163205146789551,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20551128685474396,
"rewards/margins": 0.021050114184617996,
"rewards/rejected": -0.22656141221523285,
"step": 19
},
{
"epoch": 0.055229547808077324,
"grad_norm": 0.5270997285842896,
"learning_rate": 8.71559633027523e-07,
"log_odds_chosen": 0.4399060010910034,
"log_odds_ratio": -0.5041620135307312,
"logits/chosen": -0.5500829815864563,
"logits/rejected": 0.014720816165208817,
"logps/chosen": -1.9720858335494995,
"logps/rejected": -2.3600573539733887,
"loss": 2.1571,
"nll_loss": 2.1066458225250244,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19720861315727234,
"rewards/margins": 0.038797151297330856,
"rewards/rejected": -0.2360057532787323,
"step": 20
},
{
"epoch": 0.057991025198481184,
"grad_norm": 0.639258086681366,
"learning_rate": 9.174311926605506e-07,
"log_odds_chosen": 0.35250768065452576,
"log_odds_ratio": -0.5405789017677307,
"logits/chosen": -0.45625555515289307,
"logits/rejected": -0.03890611231327057,
"logps/chosen": -2.1359739303588867,
"logps/rejected": -2.454291820526123,
"loss": 2.2807,
"nll_loss": 2.2266433238983154,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.21359741687774658,
"rewards/margins": 0.0318317785859108,
"rewards/rejected": -0.2454291731119156,
"step": 21
},
{
"epoch": 0.06075250258888505,
"grad_norm": 0.5805932283401489,
"learning_rate": 9.633027522935782e-07,
"log_odds_chosen": 0.5011261105537415,
"log_odds_ratio": -0.4887810945510864,
"logits/chosen": -0.6073933839797974,
"logits/rejected": -0.18206848204135895,
"logps/chosen": -1.9807653427124023,
"logps/rejected": -2.4255237579345703,
"loss": 2.1351,
"nll_loss": 2.0862059593200684,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1980765163898468,
"rewards/margins": 0.044475845992565155,
"rewards/rejected": -0.24255238473415375,
"step": 22
},
{
"epoch": 0.06351397997928893,
"grad_norm": 0.5500138998031616,
"learning_rate": 1.0091743119266057e-06,
"log_odds_chosen": 0.3974594175815582,
"log_odds_ratio": -0.5229506492614746,
"logits/chosen": -0.5682663321495056,
"logits/rejected": -0.3609941005706787,
"logps/chosen": -1.963416576385498,
"logps/rejected": -2.318310499191284,
"loss": 2.1453,
"nll_loss": 2.0929956436157227,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19634166359901428,
"rewards/margins": 0.03548937663435936,
"rewards/rejected": -0.23183104395866394,
"step": 23
},
{
"epoch": 0.06627545736969279,
"grad_norm": 0.5702619552612305,
"learning_rate": 1.055045871559633e-06,
"log_odds_chosen": 0.522091805934906,
"log_odds_ratio": -0.4669547379016876,
"logits/chosen": -0.5776705741882324,
"logits/rejected": 0.03815801814198494,
"logps/chosen": -1.9660505056381226,
"logps/rejected": -2.429518461227417,
"loss": 2.1353,
"nll_loss": 2.0886194705963135,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19660505652427673,
"rewards/margins": 0.04634679853916168,
"rewards/rejected": -0.24295185506343842,
"step": 24
},
{
"epoch": 0.06903693476009665,
"grad_norm": 0.6908774375915527,
"learning_rate": 1.1009174311926608e-06,
"log_odds_chosen": 0.6555817723274231,
"log_odds_ratio": -0.42638978362083435,
"logits/chosen": -0.6369448304176331,
"logits/rejected": -0.0680353045463562,
"logps/chosen": -1.996623158454895,
"logps/rejected": -2.585740327835083,
"loss": 2.157,
"nll_loss": 2.1143739223480225,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19966234266757965,
"rewards/margins": 0.058911725878715515,
"rewards/rejected": -0.25857406854629517,
"step": 25
},
{
"epoch": 0.07179841215050052,
"grad_norm": 0.6247107982635498,
"learning_rate": 1.1467889908256882e-06,
"log_odds_chosen": 0.37967437505722046,
"log_odds_ratio": -0.528645396232605,
"logits/chosen": -0.5880488157272339,
"logits/rejected": -0.11361770331859589,
"logps/chosen": -2.0194289684295654,
"logps/rejected": -2.3577256202697754,
"loss": 2.1956,
"nll_loss": 2.142758369445801,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20194292068481445,
"rewards/margins": 0.03382965177297592,
"rewards/rejected": -0.23577255010604858,
"step": 26
},
{
"epoch": 0.07455988954090438,
"grad_norm": 0.44772788882255554,
"learning_rate": 1.1926605504587159e-06,
"log_odds_chosen": 0.4675738513469696,
"log_odds_ratio": -0.4964170753955841,
"logits/chosen": -0.434501588344574,
"logits/rejected": -0.17547550797462463,
"logps/chosen": -1.865759253501892,
"logps/rejected": -2.272789239883423,
"loss": 2.0462,
"nll_loss": 1.996600866317749,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18657593429088593,
"rewards/margins": 0.04070296883583069,
"rewards/rejected": -0.2272789031267166,
"step": 27
},
{
"epoch": 0.07732136693130826,
"grad_norm": 0.5303645730018616,
"learning_rate": 1.2385321100917433e-06,
"log_odds_chosen": 0.5267918109893799,
"log_odds_ratio": -0.46872708201408386,
"logits/chosen": -0.5293477773666382,
"logits/rejected": -0.20712637901306152,
"logps/chosen": -1.9557033777236938,
"logps/rejected": -2.422588586807251,
"loss": 2.123,
"nll_loss": 2.0761377811431885,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19557033479213715,
"rewards/margins": 0.04668852686882019,
"rewards/rejected": -0.24225887656211853,
"step": 28
},
{
"epoch": 0.08008284432171212,
"grad_norm": 0.558870792388916,
"learning_rate": 1.2844036697247707e-06,
"log_odds_chosen": 0.4761933982372284,
"log_odds_ratio": -0.49172335863113403,
"logits/chosen": -0.5614610314369202,
"logits/rejected": -0.03958575427532196,
"logps/chosen": -2.044667959213257,
"logps/rejected": -2.4699392318725586,
"loss": 2.2024,
"nll_loss": 2.153231143951416,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2044668048620224,
"rewards/margins": 0.042527101933956146,
"rewards/rejected": -0.24699391424655914,
"step": 29
},
{
"epoch": 0.08284432171211598,
"grad_norm": 0.6042240262031555,
"learning_rate": 1.3302752293577984e-06,
"log_odds_chosen": 0.6456740498542786,
"log_odds_ratio": -0.4289007782936096,
"logits/chosen": -0.5478062629699707,
"logits/rejected": -0.0002663079649209976,
"logps/chosen": -2.02502703666687,
"logps/rejected": -2.60587477684021,
"loss": 2.1737,
"nll_loss": 2.1308515071868896,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20250271260738373,
"rewards/margins": 0.058084748685359955,
"rewards/rejected": -0.2605874538421631,
"step": 30
},
{
"epoch": 0.08560579910251985,
"grad_norm": 0.6082971692085266,
"learning_rate": 1.3761467889908258e-06,
"log_odds_chosen": 0.5572243928909302,
"log_odds_ratio": -0.45846420526504517,
"logits/chosen": -0.5786283016204834,
"logits/rejected": -0.06976839900016785,
"logps/chosen": -2.0147552490234375,
"logps/rejected": -2.5146052837371826,
"loss": 2.1834,
"nll_loss": 2.1375184059143066,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20147554576396942,
"rewards/margins": 0.049984999001026154,
"rewards/rejected": -0.2514605224132538,
"step": 31
},
{
"epoch": 0.08836727649292371,
"grad_norm": 0.4792933762073517,
"learning_rate": 1.4220183486238535e-06,
"log_odds_chosen": 0.43500036001205444,
"log_odds_ratio": -0.5046517252922058,
"logits/chosen": -0.4258999824523926,
"logits/rejected": 0.003258749842643738,
"logps/chosen": -2.011561870574951,
"logps/rejected": -2.398580312728882,
"loss": 2.1831,
"nll_loss": 2.132627010345459,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20115619897842407,
"rewards/margins": 0.03870181739330292,
"rewards/rejected": -0.239858016371727,
"step": 32
},
{
"epoch": 0.09112875388332758,
"grad_norm": 0.5385621786117554,
"learning_rate": 1.467889908256881e-06,
"log_odds_chosen": 0.573083758354187,
"log_odds_ratio": -0.4524250328540802,
"logits/chosen": -0.4552776515483856,
"logits/rejected": -0.06403280049562454,
"logps/chosen": -1.9241302013397217,
"logps/rejected": -2.4318630695343018,
"loss": 2.0919,
"nll_loss": 2.046658754348755,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19241301715373993,
"rewards/margins": 0.05077328532934189,
"rewards/rejected": -0.24318630993366241,
"step": 33
},
{
"epoch": 0.09389023127373144,
"grad_norm": 0.4870143234729767,
"learning_rate": 1.5137614678899084e-06,
"log_odds_chosen": 0.6046110391616821,
"log_odds_ratio": -0.4510130286216736,
"logits/chosen": -0.40763065218925476,
"logits/rejected": -0.08770006895065308,
"logps/chosen": -1.922775387763977,
"logps/rejected": -2.462465524673462,
"loss": 2.0741,
"nll_loss": 2.028975009918213,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19227753579616547,
"rewards/margins": 0.053969040513038635,
"rewards/rejected": -0.2462465912103653,
"step": 34
},
{
"epoch": 0.09665170866413532,
"grad_norm": 0.6609561443328857,
"learning_rate": 1.559633027522936e-06,
"log_odds_chosen": 0.40986764430999756,
"log_odds_ratio": -0.5126968026161194,
"logits/chosen": -0.8081121444702148,
"logits/rejected": -0.20409950613975525,
"logps/chosen": -1.9795390367507935,
"logps/rejected": -2.3391332626342773,
"loss": 2.152,
"nll_loss": 2.1007797718048096,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19795390963554382,
"rewards/margins": 0.03595941513776779,
"rewards/rejected": -0.23391331732273102,
"step": 35
},
{
"epoch": 0.09941318605453918,
"grad_norm": 0.6118403673171997,
"learning_rate": 1.6055045871559635e-06,
"log_odds_chosen": 0.603810727596283,
"log_odds_ratio": -0.4452260434627533,
"logits/chosen": -0.6869708299636841,
"logits/rejected": 0.01393081247806549,
"logps/chosen": -1.9751290082931519,
"logps/rejected": -2.515267848968506,
"loss": 2.1348,
"nll_loss": 2.0902533531188965,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1975128948688507,
"rewards/margins": 0.05401391163468361,
"rewards/rejected": -0.251526802778244,
"step": 36
},
{
"epoch": 0.10217466344494304,
"grad_norm": 0.6636885404586792,
"learning_rate": 1.6513761467889911e-06,
"log_odds_chosen": 0.5960386991500854,
"log_odds_ratio": -0.44556429982185364,
"logits/chosen": -0.5324239134788513,
"logits/rejected": -0.1962709128856659,
"logps/chosen": -2.1164088249206543,
"logps/rejected": -2.6588618755340576,
"loss": 2.2904,
"nll_loss": 2.2458741664886475,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2116408795118332,
"rewards/margins": 0.054245319217443466,
"rewards/rejected": -0.26588618755340576,
"step": 37
},
{
"epoch": 0.10493614083534691,
"grad_norm": 0.5414693355560303,
"learning_rate": 1.6972477064220186e-06,
"log_odds_chosen": 0.4305243492126465,
"log_odds_ratio": -0.5040389895439148,
"logits/chosen": -0.5475431680679321,
"logits/rejected": -0.15733303129673004,
"logps/chosen": -1.9466073513031006,
"logps/rejected": -2.3275275230407715,
"loss": 2.1303,
"nll_loss": 2.079880475997925,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1946607381105423,
"rewards/margins": 0.03809202462434769,
"rewards/rejected": -0.2327527403831482,
"step": 38
},
{
"epoch": 0.10769761822575077,
"grad_norm": 0.7092136740684509,
"learning_rate": 1.743119266055046e-06,
"log_odds_chosen": 0.49458950757980347,
"log_odds_ratio": -0.499477356672287,
"logits/chosen": -0.47480347752571106,
"logits/rejected": 0.009076721966266632,
"logps/chosen": -2.077540159225464,
"logps/rejected": -2.527543067932129,
"loss": 2.2296,
"nll_loss": 2.1796536445617676,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.20775403082370758,
"rewards/margins": 0.04500027373433113,
"rewards/rejected": -0.2527543008327484,
"step": 39
},
{
"epoch": 0.11045909561615465,
"grad_norm": 0.7840990424156189,
"learning_rate": 1.7889908256880737e-06,
"log_odds_chosen": 0.27720558643341064,
"log_odds_ratio": -0.5738192796707153,
"logits/chosen": -0.6754797101020813,
"logits/rejected": -0.1666824221611023,
"logps/chosen": -2.097449779510498,
"logps/rejected": -2.346257448196411,
"loss": 2.2706,
"nll_loss": 2.2131741046905518,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.20974498987197876,
"rewards/margins": 0.02488076500594616,
"rewards/rejected": -0.23462577164173126,
"step": 40
},
{
"epoch": 0.11322057300655851,
"grad_norm": 0.49594300985336304,
"learning_rate": 1.8348623853211011e-06,
"log_odds_chosen": 0.4892931878566742,
"log_odds_ratio": -0.4875307083129883,
"logits/chosen": -0.5313754081726074,
"logits/rejected": -0.11813461780548096,
"logps/chosen": -1.9190640449523926,
"logps/rejected": -2.3516781330108643,
"loss": 2.0847,
"nll_loss": 2.035940647125244,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1919064074754715,
"rewards/margins": 0.043261416256427765,
"rewards/rejected": -0.23516784608364105,
"step": 41
},
{
"epoch": 0.11598205039696237,
"grad_norm": 0.6706519722938538,
"learning_rate": 1.8807339449541288e-06,
"log_odds_chosen": 0.3553638756275177,
"log_odds_ratio": -0.5406981706619263,
"logits/chosen": -0.6044270992279053,
"logits/rejected": -0.1877760887145996,
"logps/chosen": -2.110844850540161,
"logps/rejected": -2.4292314052581787,
"loss": 2.2811,
"nll_loss": 2.2269935607910156,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2110844999551773,
"rewards/margins": 0.031838610768318176,
"rewards/rejected": -0.24292311072349548,
"step": 42
},
{
"epoch": 0.11874352778736624,
"grad_norm": 0.569320023059845,
"learning_rate": 1.9266055045871564e-06,
"log_odds_chosen": 0.4219287633895874,
"log_odds_ratio": -0.5136047005653381,
"logits/chosen": -0.564371645450592,
"logits/rejected": -0.1906774342060089,
"logps/chosen": -1.9711859226226807,
"logps/rejected": -2.341031312942505,
"loss": 2.1533,
"nll_loss": 2.101966381072998,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1971185803413391,
"rewards/margins": 0.036984533071517944,
"rewards/rejected": -0.23410312831401825,
"step": 43
},
{
"epoch": 0.1215050051777701,
"grad_norm": 0.6019275188446045,
"learning_rate": 1.9724770642201837e-06,
"log_odds_chosen": 0.3504191040992737,
"log_odds_ratio": -0.5448323488235474,
"logits/chosen": -0.5221514105796814,
"logits/rejected": -0.2111213058233261,
"logps/chosen": -2.0630786418914795,
"logps/rejected": -2.3758115768432617,
"loss": 2.226,
"nll_loss": 2.1714861392974854,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.20630787312984467,
"rewards/margins": 0.03127329424023628,
"rewards/rejected": -0.23758116364479065,
"step": 44
},
{
"epoch": 0.12426648256817398,
"grad_norm": 0.5997087955474854,
"learning_rate": 2.0183486238532113e-06,
"log_odds_chosen": 0.6440756916999817,
"log_odds_ratio": -0.42918679118156433,
"logits/chosen": -0.6537622213363647,
"logits/rejected": -0.3867274224758148,
"logps/chosen": -1.964981198310852,
"logps/rejected": -2.541144371032715,
"loss": 2.1164,
"nll_loss": 2.073434352874756,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1964981108903885,
"rewards/margins": 0.057616353034973145,
"rewards/rejected": -0.25411444902420044,
"step": 45
},
{
"epoch": 0.12702795995857785,
"grad_norm": 0.5181168913841248,
"learning_rate": 2.064220183486239e-06,
"log_odds_chosen": 0.564142107963562,
"log_odds_ratio": -0.4582359790802002,
"logits/chosen": -0.5351958870887756,
"logits/rejected": -0.1501241773366928,
"logps/chosen": -1.84321928024292,
"logps/rejected": -2.331892967224121,
"loss": 2.0016,
"nll_loss": 1.9557558298110962,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18432192504405975,
"rewards/margins": 0.04886738583445549,
"rewards/rejected": -0.23318932950496674,
"step": 46
},
{
"epoch": 0.1297894373489817,
"grad_norm": 0.5089595913887024,
"learning_rate": 2.110091743119266e-06,
"log_odds_chosen": 0.4004448652267456,
"log_odds_ratio": -0.5262866616249084,
"logits/chosen": -0.4425760507583618,
"logits/rejected": -0.17353124916553497,
"logps/chosen": -2.045093059539795,
"logps/rejected": -2.4056942462921143,
"loss": 2.2153,
"nll_loss": 2.162646770477295,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.20450931787490845,
"rewards/margins": 0.03606009483337402,
"rewards/rejected": -0.24056941270828247,
"step": 47
},
{
"epoch": 0.13255091473938557,
"grad_norm": 0.6208946108818054,
"learning_rate": 2.155963302752294e-06,
"log_odds_chosen": 0.18348652124404907,
"log_odds_ratio": -0.6307563781738281,
"logits/chosen": -0.45777827501296997,
"logits/rejected": -0.11396709084510803,
"logps/chosen": -2.1183199882507324,
"logps/rejected": -2.2930657863616943,
"loss": 2.2819,
"nll_loss": 2.21886944770813,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.21183200180530548,
"rewards/margins": 0.01747458055615425,
"rewards/rejected": -0.22930656373500824,
"step": 48
},
{
"epoch": 0.13531239212978943,
"grad_norm": 0.6294072866439819,
"learning_rate": 2.2018348623853215e-06,
"log_odds_chosen": 0.6346040964126587,
"log_odds_ratio": -0.43739554286003113,
"logits/chosen": -0.542172908782959,
"logits/rejected": -0.039900042116642,
"logps/chosen": -2.052166223526001,
"logps/rejected": -2.628195285797119,
"loss": 2.2026,
"nll_loss": 2.1588120460510254,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.205216646194458,
"rewards/margins": 0.05760287865996361,
"rewards/rejected": -0.2628195285797119,
"step": 49
},
{
"epoch": 0.1380738695201933,
"grad_norm": 0.6679416298866272,
"learning_rate": 2.2477064220183487e-06,
"log_odds_chosen": 0.5639493465423584,
"log_odds_ratio": -0.47183454036712646,
"logits/chosen": -0.6221768856048584,
"logits/rejected": -0.0222525242716074,
"logps/chosen": -2.010540008544922,
"logps/rejected": -2.51875901222229,
"loss": 2.1569,
"nll_loss": 2.1097309589385986,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.20105400681495667,
"rewards/margins": 0.05082192271947861,
"rewards/rejected": -0.25187593698501587,
"step": 50
},
{
"epoch": 0.14083534691059718,
"grad_norm": 0.6739494800567627,
"learning_rate": 2.2935779816513764e-06,
"log_odds_chosen": 0.4806385040283203,
"log_odds_ratio": -0.486289381980896,
"logits/chosen": -0.5340446829795837,
"logits/rejected": -0.3175644278526306,
"logps/chosen": -2.032139301300049,
"logps/rejected": -2.4611167907714844,
"loss": 2.1976,
"nll_loss": 2.1489548683166504,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2032139152288437,
"rewards/margins": 0.04289776086807251,
"rewards/rejected": -0.2461116760969162,
"step": 51
},
{
"epoch": 0.14359682430100104,
"grad_norm": 0.5641347765922546,
"learning_rate": 2.339449541284404e-06,
"log_odds_chosen": 0.4302893579006195,
"log_odds_ratio": -0.5059407353401184,
"logits/chosen": -0.4652354121208191,
"logits/rejected": -0.14794279634952545,
"logps/chosen": -1.98146390914917,
"logps/rejected": -2.362124443054199,
"loss": 2.1395,
"nll_loss": 2.0888633728027344,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19814638793468475,
"rewards/margins": 0.0380660817027092,
"rewards/rejected": -0.23621246218681335,
"step": 52
},
{
"epoch": 0.1463583016914049,
"grad_norm": 0.6226781606674194,
"learning_rate": 2.3853211009174317e-06,
"log_odds_chosen": 0.38262999057769775,
"log_odds_ratio": -0.5264566540718079,
"logits/chosen": -0.601564884185791,
"logits/rejected": -0.23214676976203918,
"logps/chosen": -1.9446452856063843,
"logps/rejected": -2.2811543941497803,
"loss": 2.1236,
"nll_loss": 2.070988178253174,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1944645196199417,
"rewards/margins": 0.03365091234445572,
"rewards/rejected": -0.22811545431613922,
"step": 53
},
{
"epoch": 0.14911977908180876,
"grad_norm": 0.576812207698822,
"learning_rate": 2.431192660550459e-06,
"log_odds_chosen": 0.3935718238353729,
"log_odds_ratio": -0.5239484310150146,
"logits/chosen": -0.5574474930763245,
"logits/rejected": -0.1828337013721466,
"logps/chosen": -1.9322319030761719,
"logps/rejected": -2.280305862426758,
"loss": 2.1135,
"nll_loss": 2.061114549636841,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19322317838668823,
"rewards/margins": 0.03480737283825874,
"rewards/rejected": -0.22803056240081787,
"step": 54
},
{
"epoch": 0.15188125647221262,
"grad_norm": 0.5661758184432983,
"learning_rate": 2.4770642201834866e-06,
"log_odds_chosen": 0.39002981781959534,
"log_odds_ratio": -0.5207180380821228,
"logits/chosen": -0.6281000971794128,
"logits/rejected": -0.12741385400295258,
"logps/chosen": -1.9887436628341675,
"logps/rejected": -2.3331456184387207,
"loss": 2.1681,
"nll_loss": 2.115997314453125,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.198874369263649,
"rewards/margins": 0.03444019705057144,
"rewards/rejected": -0.23331457376480103,
"step": 55
},
{
"epoch": 0.1546427338626165,
"grad_norm": 0.6655378937721252,
"learning_rate": 2.522935779816514e-06,
"log_odds_chosen": 0.6584038138389587,
"log_odds_ratio": -0.427642822265625,
"logits/chosen": -0.6279682517051697,
"logits/rejected": -0.12986338138580322,
"logps/chosen": -1.9852056503295898,
"logps/rejected": -2.5761494636535645,
"loss": 2.1584,
"nll_loss": 2.115626335144043,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19852055609226227,
"rewards/margins": 0.059094399213790894,
"rewards/rejected": -0.25761497020721436,
"step": 56
},
{
"epoch": 0.15740421125302037,
"grad_norm": 0.48872971534729004,
"learning_rate": 2.5688073394495415e-06,
"log_odds_chosen": 0.42161181569099426,
"log_odds_ratio": -0.514995276927948,
"logits/chosen": -0.49338769912719727,
"logits/rejected": -0.07346588373184204,
"logps/chosen": -1.8429884910583496,
"logps/rejected": -2.2116591930389404,
"loss": 2.014,
"nll_loss": 1.9625122547149658,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18429884314537048,
"rewards/margins": 0.036867089569568634,
"rewards/rejected": -0.22116592526435852,
"step": 57
},
{
"epoch": 0.16016568864342423,
"grad_norm": 0.6582737565040588,
"learning_rate": 2.6146788990825687e-06,
"log_odds_chosen": 0.5138990879058838,
"log_odds_ratio": -0.4847075939178467,
"logits/chosen": -0.5234618186950684,
"logits/rejected": -0.12736433744430542,
"logps/chosen": -1.9835774898529053,
"logps/rejected": -2.4381330013275146,
"loss": 2.1515,
"nll_loss": 2.1030287742614746,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19835779070854187,
"rewards/margins": 0.045455530285835266,
"rewards/rejected": -0.24381330609321594,
"step": 58
},
{
"epoch": 0.1629271660338281,
"grad_norm": 0.5795738697052002,
"learning_rate": 2.6605504587155968e-06,
"log_odds_chosen": 0.2905232906341553,
"log_odds_ratio": -0.5645147562026978,
"logits/chosen": -0.47340482473373413,
"logits/rejected": -0.21826684474945068,
"logps/chosen": -1.9716663360595703,
"logps/rejected": -2.227067470550537,
"loss": 2.1514,
"nll_loss": 2.0949792861938477,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19716663658618927,
"rewards/margins": 0.025540094822645187,
"rewards/rejected": -0.22270673513412476,
"step": 59
},
{
"epoch": 0.16568864342423195,
"grad_norm": 0.5678736567497253,
"learning_rate": 2.706422018348624e-06,
"log_odds_chosen": 0.3714035749435425,
"log_odds_ratio": -0.5299574136734009,
"logits/chosen": -0.45862269401550293,
"logits/rejected": 0.08986371755599976,
"logps/chosen": -2.004216432571411,
"logps/rejected": -2.333292245864868,
"loss": 2.1781,
"nll_loss": 2.125133514404297,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.20042164623737335,
"rewards/margins": 0.032907579094171524,
"rewards/rejected": -0.23332923650741577,
"step": 60
},
{
"epoch": 0.16845012081463584,
"grad_norm": 0.5007438063621521,
"learning_rate": 2.7522935779816517e-06,
"log_odds_chosen": 0.667113721370697,
"log_odds_ratio": -0.4193596839904785,
"logits/chosen": -0.5191382765769958,
"logits/rejected": -0.19058826565742493,
"logps/chosen": -1.917590856552124,
"logps/rejected": -2.5098061561584473,
"loss": 2.0632,
"nll_loss": 2.021275281906128,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1917591094970703,
"rewards/margins": 0.0592215433716774,
"rewards/rejected": -0.2509806454181671,
"step": 61
},
{
"epoch": 0.1712115982050397,
"grad_norm": 0.6429389119148254,
"learning_rate": 2.798165137614679e-06,
"log_odds_chosen": 0.41257914900779724,
"log_odds_ratio": -0.5154306888580322,
"logits/chosen": -0.45402857661247253,
"logits/rejected": 0.11148527264595032,
"logps/chosen": -2.081867218017578,
"logps/rejected": -2.45068359375,
"loss": 2.2481,
"nll_loss": 2.1965107917785645,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20818671584129333,
"rewards/margins": 0.03688164800405502,
"rewards/rejected": -0.24506837129592896,
"step": 62
},
{
"epoch": 0.17397307559544356,
"grad_norm": 0.5388195514678955,
"learning_rate": 2.844036697247707e-06,
"log_odds_chosen": 0.5105969905853271,
"log_odds_ratio": -0.4755265414714813,
"logits/chosen": -0.46547558903694153,
"logits/rejected": -0.1431473195552826,
"logps/chosen": -1.9948811531066895,
"logps/rejected": -2.448573589324951,
"loss": 2.1454,
"nll_loss": 2.0978972911834717,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19948811829090118,
"rewards/margins": 0.0453692302107811,
"rewards/rejected": -0.24485734105110168,
"step": 63
},
{
"epoch": 0.17673455298584742,
"grad_norm": 0.4911787211894989,
"learning_rate": 2.8899082568807342e-06,
"log_odds_chosen": 0.5747222304344177,
"log_odds_ratio": -0.4552631676197052,
"logits/chosen": -0.5521761178970337,
"logits/rejected": -0.294252872467041,
"logps/chosen": -1.9650574922561646,
"logps/rejected": -2.4793307781219482,
"loss": 2.1135,
"nll_loss": 2.067988634109497,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19650575518608093,
"rewards/margins": 0.051427341997623444,
"rewards/rejected": -0.24793308973312378,
"step": 64
},
{
"epoch": 0.17949603037625128,
"grad_norm": 0.533399760723114,
"learning_rate": 2.935779816513762e-06,
"log_odds_chosen": 0.3648831844329834,
"log_odds_ratio": -0.5328130722045898,
"logits/chosen": -0.4136759638786316,
"logits/rejected": -0.046070147305727005,
"logps/chosen": -1.8867706060409546,
"logps/rejected": -2.202749729156494,
"loss": 2.0708,
"nll_loss": 2.0175259113311768,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18867707252502441,
"rewards/margins": 0.031597916036844254,
"rewards/rejected": -0.22027498483657837,
"step": 65
},
{
"epoch": 0.18225750776665517,
"grad_norm": 0.5182326436042786,
"learning_rate": 2.981651376146789e-06,
"log_odds_chosen": 0.5400457382202148,
"log_odds_ratio": -0.48501351475715637,
"logits/chosen": -0.6035399436950684,
"logits/rejected": -0.1548141986131668,
"logps/chosen": -1.8529176712036133,
"logps/rejected": -2.325399398803711,
"loss": 2.022,
"nll_loss": 1.973459243774414,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18529179692268372,
"rewards/margins": 0.04724816977977753,
"rewards/rejected": -0.23253995180130005,
"step": 66
},
{
"epoch": 0.18501898515705903,
"grad_norm": 0.5337976813316345,
"learning_rate": 3.0275229357798168e-06,
"log_odds_chosen": 0.33741873502731323,
"log_odds_ratio": -0.5426305532455444,
"logits/chosen": -0.39523714780807495,
"logits/rejected": 0.0008284337818622589,
"logps/chosen": -1.997262716293335,
"logps/rejected": -2.2943520545959473,
"loss": 2.1433,
"nll_loss": 2.0890326499938965,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19972628355026245,
"rewards/margins": 0.029708925634622574,
"rewards/rejected": -0.22943520545959473,
"step": 67
},
{
"epoch": 0.1877804625474629,
"grad_norm": 0.5520625114440918,
"learning_rate": 3.073394495412844e-06,
"log_odds_chosen": 0.4403454065322876,
"log_odds_ratio": -0.5091642141342163,
"logits/chosen": -0.24377387762069702,
"logits/rejected": 0.08138424903154373,
"logps/chosen": -2.0106263160705566,
"logps/rejected": -2.402209758758545,
"loss": 2.2053,
"nll_loss": 2.1543631553649902,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2010626494884491,
"rewards/margins": 0.039158351719379425,
"rewards/rejected": -0.24022099375724792,
"step": 68
},
{
"epoch": 0.19054193993786675,
"grad_norm": 0.650439441204071,
"learning_rate": 3.119266055045872e-06,
"log_odds_chosen": 0.5253904461860657,
"log_odds_ratio": -0.4741411507129669,
"logits/chosen": -0.4677438735961914,
"logits/rejected": -0.013416798785328865,
"logps/chosen": -2.077601909637451,
"logps/rejected": -2.5524415969848633,
"loss": 2.2423,
"nll_loss": 2.194887161254883,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20776019990444183,
"rewards/margins": 0.04748394340276718,
"rewards/rejected": -0.2552441358566284,
"step": 69
},
{
"epoch": 0.19330341732827064,
"grad_norm": 0.48958319425582886,
"learning_rate": 3.1651376146788993e-06,
"log_odds_chosen": 0.5917057991027832,
"log_odds_ratio": -0.45470088720321655,
"logits/chosen": -0.36797553300857544,
"logits/rejected": -0.3181126117706299,
"logps/chosen": -2.022463321685791,
"logps/rejected": -2.5544497966766357,
"loss": 2.1697,
"nll_loss": 2.124249219894409,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20224635303020477,
"rewards/margins": 0.05319864675402641,
"rewards/rejected": -0.2554450035095215,
"step": 70
},
{
"epoch": 0.1960648947186745,
"grad_norm": 0.6293954253196716,
"learning_rate": 3.211009174311927e-06,
"log_odds_chosen": 0.44158488512039185,
"log_odds_ratio": -0.5029429793357849,
"logits/chosen": -0.4253910481929779,
"logits/rejected": -0.14389574527740479,
"logps/chosen": -1.9857234954833984,
"logps/rejected": -2.379268169403076,
"loss": 2.17,
"nll_loss": 2.119678497314453,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19857235252857208,
"rewards/margins": 0.03935447335243225,
"rewards/rejected": -0.23792681097984314,
"step": 71
},
{
"epoch": 0.19882637210907836,
"grad_norm": 0.6024268269538879,
"learning_rate": 3.256880733944954e-06,
"log_odds_chosen": 0.5992636680603027,
"log_odds_ratio": -0.4583205580711365,
"logits/chosen": -0.3624473810195923,
"logits/rejected": -0.16807770729064941,
"logps/chosen": -1.8936195373535156,
"logps/rejected": -2.403137683868408,
"loss": 2.0566,
"nll_loss": 2.0107438564300537,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18936194479465485,
"rewards/margins": 0.05095181241631508,
"rewards/rejected": -0.24031376838684082,
"step": 72
},
{
"epoch": 0.20158784949948222,
"grad_norm": 0.537467360496521,
"learning_rate": 3.3027522935779823e-06,
"log_odds_chosen": 0.38986673951148987,
"log_odds_ratio": -0.518619179725647,
"logits/chosen": -0.28752538561820984,
"logits/rejected": -0.08893117308616638,
"logps/chosen": -2.056885242462158,
"logps/rejected": -2.4047675132751465,
"loss": 2.2167,
"nll_loss": 2.1648688316345215,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.2056885063648224,
"rewards/margins": 0.03478822112083435,
"rewards/rejected": -0.24047674238681793,
"step": 73
},
{
"epoch": 0.20434932688988608,
"grad_norm": 0.48359447717666626,
"learning_rate": 3.3486238532110095e-06,
"log_odds_chosen": 0.39413708448410034,
"log_odds_ratio": -0.5191097259521484,
"logits/chosen": -0.28818562626838684,
"logits/rejected": -0.17117023468017578,
"logps/chosen": -1.9585490226745605,
"logps/rejected": -2.3046271800994873,
"loss": 2.1453,
"nll_loss": 2.093356132507324,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19585487246513367,
"rewards/margins": 0.03460782766342163,
"rewards/rejected": -0.2304627150297165,
"step": 74
},
{
"epoch": 0.20711080428028997,
"grad_norm": 0.6197869777679443,
"learning_rate": 3.394495412844037e-06,
"log_odds_chosen": 0.4853059649467468,
"log_odds_ratio": -0.487282931804657,
"logits/chosen": -0.4553055763244629,
"logits/rejected": -0.07368504256010056,
"logps/chosen": -1.9564940929412842,
"logps/rejected": -2.38771653175354,
"loss": 2.1426,
"nll_loss": 2.093902826309204,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1956494152545929,
"rewards/margins": 0.04312223941087723,
"rewards/rejected": -0.23877164721488953,
"step": 75
},
{
"epoch": 0.20987228167069383,
"grad_norm": 0.5478652119636536,
"learning_rate": 3.4403669724770644e-06,
"log_odds_chosen": 0.4639507532119751,
"log_odds_ratio": -0.4932023882865906,
"logits/chosen": -0.30065810680389404,
"logits/rejected": -0.25181248784065247,
"logps/chosen": -2.044691562652588,
"logps/rejected": -2.460970401763916,
"loss": 2.1933,
"nll_loss": 2.1440277099609375,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20446917414665222,
"rewards/margins": 0.04162788391113281,
"rewards/rejected": -0.24609704315662384,
"step": 76
},
{
"epoch": 0.2126337590610977,
"grad_norm": 0.5865225195884705,
"learning_rate": 3.486238532110092e-06,
"log_odds_chosen": 0.40864938497543335,
"log_odds_ratio": -0.5256964564323425,
"logits/chosen": -0.414636492729187,
"logits/rejected": -0.1841173619031906,
"logps/chosen": -2.0234270095825195,
"logps/rejected": -2.3900880813598633,
"loss": 2.2079,
"nll_loss": 2.155355215072632,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2023427039384842,
"rewards/margins": 0.03666612505912781,
"rewards/rejected": -0.239008828997612,
"step": 77
},
{
"epoch": 0.21539523645150155,
"grad_norm": 0.5245475769042969,
"learning_rate": 3.5321100917431193e-06,
"log_odds_chosen": 0.3867124617099762,
"log_odds_ratio": -0.5356163382530212,
"logits/chosen": -0.22849449515342712,
"logits/rejected": 0.04786547273397446,
"logps/chosen": -2.047503709793091,
"logps/rejected": -2.391859531402588,
"loss": 2.2124,
"nll_loss": 2.1587889194488525,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.20475035905838013,
"rewards/margins": 0.03443560749292374,
"rewards/rejected": -0.23918597400188446,
"step": 78
},
{
"epoch": 0.2181567138419054,
"grad_norm": 0.4416508972644806,
"learning_rate": 3.5779816513761473e-06,
"log_odds_chosen": 0.457964152097702,
"log_odds_ratio": -0.49229007959365845,
"logits/chosen": -0.24249830842018127,
"logits/rejected": -0.1156221255660057,
"logps/chosen": -1.9371097087860107,
"logps/rejected": -2.3405327796936035,
"loss": 2.1264,
"nll_loss": 2.077136516571045,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19371098279953003,
"rewards/margins": 0.0403422936797142,
"rewards/rejected": -0.23405326902866364,
"step": 79
},
{
"epoch": 0.2209181912323093,
"grad_norm": 0.511163055896759,
"learning_rate": 3.6238532110091746e-06,
"log_odds_chosen": 0.3918084502220154,
"log_odds_ratio": -0.527028501033783,
"logits/chosen": -0.3217644989490509,
"logits/rejected": -0.12379680573940277,
"logps/chosen": -1.9086521863937378,
"logps/rejected": -2.250434160232544,
"loss": 2.0875,
"nll_loss": 2.034804105758667,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19086521863937378,
"rewards/margins": 0.03417817875742912,
"rewards/rejected": -0.2250434011220932,
"step": 80
},
{
"epoch": 0.22367966862271316,
"grad_norm": 0.48532167077064514,
"learning_rate": 3.6697247706422022e-06,
"log_odds_chosen": 0.38070449233055115,
"log_odds_ratio": -0.5242205858230591,
"logits/chosen": -0.24639271199703217,
"logits/rejected": -0.02295789122581482,
"logps/chosen": -1.9235130548477173,
"logps/rejected": -2.256152868270874,
"loss": 2.0966,
"nll_loss": 2.044215440750122,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.192351296544075,
"rewards/margins": 0.03326397389173508,
"rewards/rejected": -0.22561527788639069,
"step": 81
},
{
"epoch": 0.22644114601311702,
"grad_norm": 0.4871878921985626,
"learning_rate": 3.7155963302752295e-06,
"log_odds_chosen": 0.3706399202346802,
"log_odds_ratio": -0.5302938222885132,
"logits/chosen": -0.32461822032928467,
"logits/rejected": -0.07178197801113129,
"logps/chosen": -2.002980947494507,
"logps/rejected": -2.3308701515197754,
"loss": 2.1902,
"nll_loss": 2.1371781826019287,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.20029808580875397,
"rewards/margins": 0.03278890624642372,
"rewards/rejected": -0.2330869883298874,
"step": 82
},
{
"epoch": 0.22920262340352088,
"grad_norm": 0.5049198865890503,
"learning_rate": 3.7614678899082575e-06,
"log_odds_chosen": 0.6436396241188049,
"log_odds_ratio": -0.4284180700778961,
"logits/chosen": -0.3081129789352417,
"logits/rejected": -0.1997162252664566,
"logps/chosen": -1.9448068141937256,
"logps/rejected": -2.5192015171051025,
"loss": 2.1169,
"nll_loss": 2.0740115642547607,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19448068737983704,
"rewards/margins": 0.057439468801021576,
"rewards/rejected": -0.2519201636314392,
"step": 83
},
{
"epoch": 0.23196410079392474,
"grad_norm": 0.5246726870536804,
"learning_rate": 3.8073394495412848e-06,
"log_odds_chosen": 0.43793147802352905,
"log_odds_ratio": -0.5015705823898315,
"logits/chosen": -0.29296931624412537,
"logits/rejected": -0.13174469769001007,
"logps/chosen": -1.941349744796753,
"logps/rejected": -2.3253884315490723,
"loss": 2.1333,
"nll_loss": 2.08317232131958,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19413498044013977,
"rewards/margins": 0.03840385749936104,
"rewards/rejected": -0.2325388491153717,
"step": 84
},
{
"epoch": 0.23472557818432863,
"grad_norm": 0.41432732343673706,
"learning_rate": 3.853211009174313e-06,
"log_odds_chosen": 0.5745496153831482,
"log_odds_ratio": -0.4579155147075653,
"logits/chosen": -0.3449418544769287,
"logits/rejected": -0.19380588829517365,
"logps/chosen": -1.8625164031982422,
"logps/rejected": -2.3706510066986084,
"loss": 2.0375,
"nll_loss": 1.9917008876800537,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1862516552209854,
"rewards/margins": 0.050813477486371994,
"rewards/rejected": -0.2370651215314865,
"step": 85
},
{
"epoch": 0.2374870555747325,
"grad_norm": 0.438418984413147,
"learning_rate": 3.89908256880734e-06,
"log_odds_chosen": 0.580481231212616,
"log_odds_ratio": -0.4620014429092407,
"logits/chosen": -0.14796528220176697,
"logits/rejected": -0.32091909646987915,
"logps/chosen": -1.9756391048431396,
"logps/rejected": -2.4986729621887207,
"loss": 2.1484,
"nll_loss": 2.102167844772339,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19756391644477844,
"rewards/margins": 0.052303388714790344,
"rewards/rejected": -0.2498673051595688,
"step": 86
},
{
"epoch": 0.24024853296513635,
"grad_norm": 0.4355674684047699,
"learning_rate": 3.944954128440367e-06,
"log_odds_chosen": 0.5157158970832825,
"log_odds_ratio": -0.4764195382595062,
"logits/chosen": -0.16625456511974335,
"logits/rejected": -0.22959333658218384,
"logps/chosen": -1.8700374364852905,
"logps/rejected": -2.3208043575286865,
"loss": 2.0401,
"nll_loss": 1.9924601316452026,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18700376152992249,
"rewards/margins": 0.04507668316364288,
"rewards/rejected": -0.23208042979240417,
"step": 87
},
{
"epoch": 0.2430100103555402,
"grad_norm": 0.4138505756855011,
"learning_rate": 3.9908256880733945e-06,
"log_odds_chosen": 0.5811448693275452,
"log_odds_ratio": -0.48126012086868286,
"logits/chosen": -0.3125801980495453,
"logits/rejected": -0.16787581145763397,
"logps/chosen": -1.79574453830719,
"logps/rejected": -2.3057985305786133,
"loss": 1.9834,
"nll_loss": 1.9353020191192627,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.17957444489002228,
"rewards/margins": 0.051005423069000244,
"rewards/rejected": -0.23057986795902252,
"step": 88
},
{
"epoch": 0.24577148774594407,
"grad_norm": 0.3883982002735138,
"learning_rate": 4.036697247706423e-06,
"log_odds_chosen": 0.6218655705451965,
"log_odds_ratio": -0.43868428468704224,
"logits/chosen": -0.16164137423038483,
"logits/rejected": -0.2850674092769623,
"logps/chosen": -1.8971762657165527,
"logps/rejected": -2.4469919204711914,
"loss": 2.0667,
"nll_loss": 2.0228123664855957,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.189717635512352,
"rewards/margins": 0.05498155951499939,
"rewards/rejected": -0.24469918012619019,
"step": 89
},
{
"epoch": 0.24853296513634796,
"grad_norm": 0.441839337348938,
"learning_rate": 4.08256880733945e-06,
"log_odds_chosen": 0.5872446894645691,
"log_odds_ratio": -0.44401073455810547,
"logits/chosen": -0.2670595347881317,
"logits/rejected": -0.07647659629583359,
"logps/chosen": -1.8952282667160034,
"logps/rejected": -2.412400245666504,
"loss": 2.066,
"nll_loss": 2.021595001220703,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18952281773090363,
"rewards/margins": 0.051717206835746765,
"rewards/rejected": -0.24124003946781158,
"step": 90
},
{
"epoch": 0.2512944425267518,
"grad_norm": 0.4077044427394867,
"learning_rate": 4.128440366972478e-06,
"log_odds_chosen": 0.4384881556034088,
"log_odds_ratio": -0.5076763033866882,
"logits/chosen": -0.15786704421043396,
"logits/rejected": -0.090408094227314,
"logps/chosen": -1.878911018371582,
"logps/rejected": -2.262026786804199,
"loss": 2.047,
"nll_loss": 1.9962477684020996,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18789111077785492,
"rewards/margins": 0.03831159323453903,
"rewards/rejected": -0.22620268166065216,
"step": 91
},
{
"epoch": 0.2540559199171557,
"grad_norm": 0.3839576244354248,
"learning_rate": 4.174311926605505e-06,
"log_odds_chosen": 0.42869681119918823,
"log_odds_ratio": -0.5070521235466003,
"logits/chosen": -0.08416657149791718,
"logits/rejected": -0.4555465579032898,
"logps/chosen": -1.9147799015045166,
"logps/rejected": -2.292423725128174,
"loss": 2.0814,
"nll_loss": 2.0306570529937744,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19147798418998718,
"rewards/margins": 0.03776439279317856,
"rewards/rejected": -0.22924238443374634,
"step": 92
},
{
"epoch": 0.25681739730755954,
"grad_norm": 0.42368394136428833,
"learning_rate": 4.220183486238532e-06,
"log_odds_chosen": 0.5149534344673157,
"log_odds_ratio": -0.47629106044769287,
"logits/chosen": -0.13718552887439728,
"logits/rejected": -0.20033138990402222,
"logps/chosen": -1.9013878107070923,
"logps/rejected": -2.3537254333496094,
"loss": 2.0789,
"nll_loss": 2.0312626361846924,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1901387870311737,
"rewards/margins": 0.04523376002907753,
"rewards/rejected": -0.23537255823612213,
"step": 93
},
{
"epoch": 0.2595788746979634,
"grad_norm": 0.38014668226242065,
"learning_rate": 4.26605504587156e-06,
"log_odds_chosen": 0.4552799165248871,
"log_odds_ratio": -0.49941790103912354,
"logits/chosen": -0.2649000883102417,
"logits/rejected": -0.20182102918624878,
"logps/chosen": -1.8007447719573975,
"logps/rejected": -2.1948485374450684,
"loss": 1.9637,
"nll_loss": 1.9138000011444092,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18007448315620422,
"rewards/margins": 0.03941037505865097,
"rewards/rejected": -0.2194848358631134,
"step": 94
},
{
"epoch": 0.26234035208836726,
"grad_norm": 0.3550948202610016,
"learning_rate": 4.311926605504588e-06,
"log_odds_chosen": 0.36030256748199463,
"log_odds_ratio": -0.533312976360321,
"logits/chosen": -0.0772688165307045,
"logits/rejected": -0.16052654385566711,
"logps/chosen": -1.8254213333129883,
"logps/rejected": -2.136554002761841,
"loss": 2.0005,
"nll_loss": 1.9472124576568604,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1825421154499054,
"rewards/margins": 0.03111325576901436,
"rewards/rejected": -0.21365538239479065,
"step": 95
},
{
"epoch": 0.26510182947877114,
"grad_norm": 0.40300413966178894,
"learning_rate": 4.357798165137615e-06,
"log_odds_chosen": 0.2818768620491028,
"log_odds_ratio": -0.5676061511039734,
"logits/chosen": -0.10504347085952759,
"logits/rejected": -0.05843639373779297,
"logps/chosen": -1.9888579845428467,
"logps/rejected": -2.2374351024627686,
"loss": 2.1718,
"nll_loss": 2.11505126953125,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19888579845428467,
"rewards/margins": 0.02485768496990204,
"rewards/rejected": -0.2237434834241867,
"step": 96
},
{
"epoch": 0.26786330686917503,
"grad_norm": 0.398783802986145,
"learning_rate": 4.403669724770643e-06,
"log_odds_chosen": 0.5546217560768127,
"log_odds_ratio": -0.46683651208877563,
"logits/chosen": -0.008073419332504272,
"logits/rejected": -0.08634719252586365,
"logps/chosen": -1.8501688241958618,
"logps/rejected": -2.342288017272949,
"loss": 2.0318,
"nll_loss": 1.9851207733154297,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18501687049865723,
"rewards/margins": 0.04921191930770874,
"rewards/rejected": -0.23422878980636597,
"step": 97
},
{
"epoch": 0.27062478425957887,
"grad_norm": 0.39449602365493774,
"learning_rate": 4.44954128440367e-06,
"log_odds_chosen": 0.4133303165435791,
"log_odds_ratio": -0.5122652649879456,
"logits/chosen": -0.05717798322439194,
"logits/rejected": -0.1295921504497528,
"logps/chosen": -1.9054274559020996,
"logps/rejected": -2.266583204269409,
"loss": 2.0813,
"nll_loss": 2.030045509338379,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19054275751113892,
"rewards/margins": 0.03611557558178902,
"rewards/rejected": -0.22665831446647644,
"step": 98
},
{
"epoch": 0.27338626164998275,
"grad_norm": 0.39879634976387024,
"learning_rate": 4.4954128440366975e-06,
"log_odds_chosen": 0.547784149646759,
"log_odds_ratio": -0.46036428213119507,
"logits/chosen": -0.09109216928482056,
"logits/rejected": -0.3699495494365692,
"logps/chosen": -1.9557167291641235,
"logps/rejected": -2.4426751136779785,
"loss": 2.1161,
"nll_loss": 2.070013999938965,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1955716609954834,
"rewards/margins": 0.04869585484266281,
"rewards/rejected": -0.2442675083875656,
"step": 99
},
{
"epoch": 0.2761477390403866,
"grad_norm": 0.3873852491378784,
"learning_rate": 4.541284403669725e-06,
"log_odds_chosen": 0.3448026478290558,
"log_odds_ratio": -0.5393580198287964,
"logits/chosen": -0.1656271517276764,
"logits/rejected": -0.07856383174657822,
"logps/chosen": -1.8394955396652222,
"logps/rejected": -2.1358819007873535,
"loss": 2.0365,
"nll_loss": 1.9825859069824219,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1839495450258255,
"rewards/margins": 0.029638633131980896,
"rewards/rejected": -0.2135881781578064,
"step": 100
},
{
"epoch": 0.2789092164307905,
"grad_norm": 0.3506630063056946,
"learning_rate": 4.587155963302753e-06,
"log_odds_chosen": 0.33486518263816833,
"log_odds_ratio": -0.5475890636444092,
"logits/chosen": 0.009370687417685986,
"logits/rejected": -0.18966984748840332,
"logps/chosen": -1.9532427787780762,
"logps/rejected": -2.2507667541503906,
"loss": 2.132,
"nll_loss": 2.077195405960083,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19532427191734314,
"rewards/margins": 0.02975238859653473,
"rewards/rejected": -0.22507666051387787,
"step": 101
},
{
"epoch": 0.28167069382119436,
"grad_norm": 0.33624136447906494,
"learning_rate": 4.63302752293578e-06,
"log_odds_chosen": 0.5504237413406372,
"log_odds_ratio": -0.4613422155380249,
"logits/chosen": -0.06238182261586189,
"logits/rejected": -0.22859227657318115,
"logps/chosen": -1.791447401046753,
"logps/rejected": -2.2658450603485107,
"loss": 1.9877,
"nll_loss": 1.9415662288665771,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1791447550058365,
"rewards/margins": 0.04743976891040802,
"rewards/rejected": -0.2265845388174057,
"step": 102
},
{
"epoch": 0.2844321712115982,
"grad_norm": 0.3664388656616211,
"learning_rate": 4.678899082568808e-06,
"log_odds_chosen": 0.3437744081020355,
"log_odds_ratio": -0.5439410209655762,
"logits/chosen": -0.1386101245880127,
"logits/rejected": -0.17609171569347382,
"logps/chosen": -1.8379578590393066,
"logps/rejected": -2.1358203887939453,
"loss": 2.022,
"nll_loss": 1.9676285982131958,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1837957799434662,
"rewards/margins": 0.0297862458974123,
"rewards/rejected": -0.21358203887939453,
"step": 103
},
{
"epoch": 0.2871936486020021,
"grad_norm": 0.3293968439102173,
"learning_rate": 4.724770642201835e-06,
"log_odds_chosen": 0.3870212137699127,
"log_odds_ratio": -0.5231497883796692,
"logits/chosen": -0.07520203292369843,
"logits/rejected": -0.15402644872665405,
"logps/chosen": -1.756553053855896,
"logps/rejected": -2.086921453475952,
"loss": 1.9488,
"nll_loss": 1.8965202569961548,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1756553202867508,
"rewards/margins": 0.033036813139915466,
"rewards/rejected": -0.20869213342666626,
"step": 104
},
{
"epoch": 0.2899551259924059,
"grad_norm": 0.33918339014053345,
"learning_rate": 4.770642201834863e-06,
"log_odds_chosen": 0.4852727949619293,
"log_odds_ratio": -0.4833363890647888,
"logits/chosen": -0.004435461014509201,
"logits/rejected": -0.1534399837255478,
"logps/chosen": -1.7944328784942627,
"logps/rejected": -2.2146499156951904,
"loss": 1.9627,
"nll_loss": 1.9144006967544556,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17944329977035522,
"rewards/margins": 0.042021699249744415,
"rewards/rejected": -0.22146499156951904,
"step": 105
},
{
"epoch": 0.2927166033828098,
"grad_norm": 0.3346327841281891,
"learning_rate": 4.816513761467891e-06,
"log_odds_chosen": 0.5177164673805237,
"log_odds_ratio": -0.47983041405677795,
"logits/chosen": -0.00399226788431406,
"logits/rejected": -0.10182341933250427,
"logps/chosen": -1.7794350385665894,
"logps/rejected": -2.226297616958618,
"loss": 1.9511,
"nll_loss": 1.9030946493148804,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17794349789619446,
"rewards/margins": 0.044686250388622284,
"rewards/rejected": -0.22262977063655853,
"step": 106
},
{
"epoch": 0.2954780807732137,
"grad_norm": 0.3454085886478424,
"learning_rate": 4.862385321100918e-06,
"log_odds_chosen": 0.3980882465839386,
"log_odds_ratio": -0.5184234380722046,
"logits/chosen": 0.01898978091776371,
"logits/rejected": -0.21249954402446747,
"logps/chosen": -1.8531229496002197,
"logps/rejected": -2.197577953338623,
"loss": 2.0242,
"nll_loss": 1.9724009037017822,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18531231582164764,
"rewards/margins": 0.03444547951221466,
"rewards/rejected": -0.2197577804327011,
"step": 107
},
{
"epoch": 0.2982395581636175,
"grad_norm": 0.3301767408847809,
"learning_rate": 4.908256880733945e-06,
"log_odds_chosen": 0.44768860936164856,
"log_odds_ratio": -0.5000944137573242,
"logits/chosen": 0.06930336356163025,
"logits/rejected": -0.24058011174201965,
"logps/chosen": -1.8849480152130127,
"logps/rejected": -2.278297185897827,
"loss": 2.0446,
"nll_loss": 1.9945893287658691,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18849481642246246,
"rewards/margins": 0.039334915578365326,
"rewards/rejected": -0.2278297394514084,
"step": 108
},
{
"epoch": 0.3010010355540214,
"grad_norm": 0.3244481086730957,
"learning_rate": 4.954128440366973e-06,
"log_odds_chosen": 0.6234428286552429,
"log_odds_ratio": -0.44249945878982544,
"logits/chosen": 0.045590970665216446,
"logits/rejected": -0.3307313919067383,
"logps/chosen": -1.8896514177322388,
"logps/rejected": -2.4449257850646973,
"loss": 2.0431,
"nll_loss": 1.9988850355148315,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18896515667438507,
"rewards/margins": 0.0555274523794651,
"rewards/rejected": -0.24449260532855988,
"step": 109
},
{
"epoch": 0.30376251294442524,
"grad_norm": 0.31317761540412903,
"learning_rate": 5e-06,
"log_odds_chosen": 0.2916356325149536,
"log_odds_ratio": -0.5743635892868042,
"logits/chosen": 0.144297257065773,
"logits/rejected": -0.26908552646636963,
"logps/chosen": -1.8418867588043213,
"logps/rejected": -2.0925679206848145,
"loss": 2.0441,
"nll_loss": 1.9866299629211426,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.18418867886066437,
"rewards/margins": 0.02506813406944275,
"rewards/rejected": -0.20925679802894592,
"step": 110
},
{
"epoch": 0.30652399033482913,
"grad_norm": 0.29466620087623596,
"learning_rate": 4.999987154315977e-06,
"log_odds_chosen": 0.42353084683418274,
"log_odds_ratio": -0.5111951231956482,
"logits/chosen": 0.05306434631347656,
"logits/rejected": -0.15503177046775818,
"logps/chosen": -1.7407968044281006,
"logps/rejected": -2.0989816188812256,
"loss": 1.924,
"nll_loss": 1.87285315990448,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17407968640327454,
"rewards/margins": 0.03581848740577698,
"rewards/rejected": -0.2098981738090515,
"step": 111
},
{
"epoch": 0.309285467725233,
"grad_norm": 0.3188883364200592,
"learning_rate": 4.999948617395916e-06,
"log_odds_chosen": 0.4043401777744293,
"log_odds_ratio": -0.5218645930290222,
"logits/chosen": 0.16884373128414154,
"logits/rejected": -0.07719510793685913,
"logps/chosen": -1.8007270097732544,
"logps/rejected": -2.151782274246216,
"loss": 1.9777,
"nll_loss": 1.9254791736602783,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18007270991802216,
"rewards/margins": 0.035105518996715546,
"rewards/rejected": -0.2151782363653183,
"step": 112
},
{
"epoch": 0.31204694511563685,
"grad_norm": 0.3535216450691223,
"learning_rate": 4.999884389635843e-06,
"log_odds_chosen": 0.41415858268737793,
"log_odds_ratio": -0.5124688744544983,
"logits/chosen": 0.0558352991938591,
"logits/rejected": -0.09803693741559982,
"logps/chosen": -1.8450539112091064,
"logps/rejected": -2.2039334774017334,
"loss": 2.0336,
"nll_loss": 1.9823946952819824,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1845053881406784,
"rewards/margins": 0.035887960344552994,
"rewards/rejected": -0.2203933596611023,
"step": 113
},
{
"epoch": 0.31480842250604074,
"grad_norm": 0.2991940379142761,
"learning_rate": 4.9997944716957985e-06,
"log_odds_chosen": 0.514029324054718,
"log_odds_ratio": -0.47918182611465454,
"logits/chosen": 0.16882193088531494,
"logits/rejected": -0.24593651294708252,
"logps/chosen": -1.8493305444717407,
"logps/rejected": -2.298590898513794,
"loss": 2.0272,
"nll_loss": 1.9792673587799072,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18493306636810303,
"rewards/margins": 0.044926032423973083,
"rewards/rejected": -0.22985908389091492,
"step": 114
},
{
"epoch": 0.3175698998964446,
"grad_norm": 0.28921976685523987,
"learning_rate": 4.999678864499828e-06,
"log_odds_chosen": 0.38468483090400696,
"log_odds_ratio": -0.5222321152687073,
"logits/chosen": 0.2794942557811737,
"logits/rejected": -0.21004724502563477,
"logps/chosen": -1.8734655380249023,
"logps/rejected": -2.2063117027282715,
"loss": 2.0344,
"nll_loss": 1.9822089672088623,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18734657764434814,
"rewards/margins": 0.03328459709882736,
"rewards/rejected": -0.2206311821937561,
"step": 115
},
{
"epoch": 0.32033137728684846,
"grad_norm": 0.30199047923088074,
"learning_rate": 4.999537569235975e-06,
"log_odds_chosen": 0.35607457160949707,
"log_odds_ratio": -0.5390717387199402,
"logits/chosen": 0.17685924470424652,
"logits/rejected": -0.16482022404670715,
"logps/chosen": -1.8530224561691284,
"logps/rejected": -2.1583948135375977,
"loss": 2.0285,
"nll_loss": 1.9746413230895996,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1853022426366806,
"rewards/margins": 0.030537229031324387,
"rewards/rejected": -0.21583949029445648,
"step": 116
},
{
"epoch": 0.32309285467725235,
"grad_norm": 0.305818647146225,
"learning_rate": 4.999370587356267e-06,
"log_odds_chosen": 0.3153924345970154,
"log_odds_ratio": -0.5565903782844543,
"logits/chosen": 0.17289672791957855,
"logits/rejected": -0.3006405234336853,
"logps/chosen": -1.9371519088745117,
"logps/rejected": -2.214411497116089,
"loss": 2.1096,
"nll_loss": 2.0539159774780273,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1937151849269867,
"rewards/margins": 0.027725949883461,
"rewards/rejected": -0.2214411348104477,
"step": 117
},
{
"epoch": 0.3258543320676562,
"grad_norm": 0.324216365814209,
"learning_rate": 4.9991779205767e-06,
"log_odds_chosen": 0.29256871342658997,
"log_odds_ratio": -0.5621715784072876,
"logits/chosen": 0.055469777435064316,
"logits/rejected": -0.5593094229698181,
"logps/chosen": -1.808518886566162,
"logps/rejected": -2.060049295425415,
"loss": 1.9916,
"nll_loss": 1.9353588819503784,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18085187673568726,
"rewards/margins": 0.02515305206179619,
"rewards/rejected": -0.20600494742393494,
"step": 118
},
{
"epoch": 0.32861580945806007,
"grad_norm": 0.316292941570282,
"learning_rate": 4.998959570877224e-06,
"log_odds_chosen": 0.391570121049881,
"log_odds_ratio": -0.519210159778595,
"logits/chosen": 0.29701903462409973,
"logits/rejected": -0.248467817902565,
"logps/chosen": -1.950477123260498,
"logps/rejected": -2.2915000915527344,
"loss": 2.1349,
"nll_loss": 2.0829989910125732,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.19504770636558533,
"rewards/margins": 0.034102290868759155,
"rewards/rejected": -0.22914999723434448,
"step": 119
},
{
"epoch": 0.3313772868484639,
"grad_norm": 0.29707539081573486,
"learning_rate": 4.99871554050172e-06,
"log_odds_chosen": 0.49879857897758484,
"log_odds_ratio": -0.4770265221595764,
"logits/chosen": 0.1394173949956894,
"logits/rejected": -0.11445163935422897,
"logps/chosen": -1.7406611442565918,
"logps/rejected": -2.167613983154297,
"loss": 1.9065,
"nll_loss": 1.8588043451309204,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17406611144542694,
"rewards/margins": 0.042695268988609314,
"rewards/rejected": -0.21676141023635864,
"step": 120
},
{
"epoch": 0.3341387642388678,
"grad_norm": 0.3011818528175354,
"learning_rate": 4.9984458319579775e-06,
"log_odds_chosen": 0.4734145998954773,
"log_odds_ratio": -0.48969942331314087,
"logits/chosen": 0.10353265702724457,
"logits/rejected": -0.22807854413986206,
"logps/chosen": -1.7799421548843384,
"logps/rejected": -2.1871492862701416,
"loss": 1.9363,
"nll_loss": 1.8872833251953125,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17799422144889832,
"rewards/margins": 0.04072071984410286,
"rewards/rejected": -0.21871493756771088,
"step": 121
},
{
"epoch": 0.3369002416292717,
"grad_norm": 0.28915688395500183,
"learning_rate": 4.99815044801767e-06,
"log_odds_chosen": 0.3440546989440918,
"log_odds_ratio": -0.5443906784057617,
"logits/chosen": 0.22890803217887878,
"logits/rejected": -0.20038765668869019,
"logps/chosen": -1.8269233703613281,
"logps/rejected": -2.1230876445770264,
"loss": 2.0144,
"nll_loss": 1.959984540939331,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18269234895706177,
"rewards/margins": 0.02961641550064087,
"rewards/rejected": -0.21230876445770264,
"step": 122
},
{
"epoch": 0.3396617190196755,
"grad_norm": 0.31248003244400024,
"learning_rate": 4.9978293917163225e-06,
"log_odds_chosen": 0.21560871601104736,
"log_odds_ratio": -0.5958309769630432,
"logits/chosen": 0.2199111431837082,
"logits/rejected": -0.15359394252300262,
"logps/chosen": -1.9724359512329102,
"logps/rejected": -2.162384033203125,
"loss": 2.1306,
"nll_loss": 2.0710067749023438,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.1972435861825943,
"rewards/margins": 0.01899481564760208,
"rewards/rejected": -0.21623840928077698,
"step": 123
},
{
"epoch": 0.3424231964100794,
"grad_norm": 0.3086913824081421,
"learning_rate": 4.997482666353287e-06,
"log_odds_chosen": 0.36193954944610596,
"log_odds_ratio": -0.5371627807617188,
"logits/chosen": 0.07905742526054382,
"logits/rejected": -0.047993652522563934,
"logps/chosen": -1.860518217086792,
"logps/rejected": -2.175654649734497,
"loss": 2.0362,
"nll_loss": 1.982520341873169,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18605183064937592,
"rewards/margins": 0.03151364251971245,
"rewards/rejected": -0.21756546199321747,
"step": 124
},
{
"epoch": 0.34518467380048323,
"grad_norm": 0.2956501543521881,
"learning_rate": 4.997110275491702e-06,
"log_odds_chosen": 0.5367758274078369,
"log_odds_ratio": -0.4794245958328247,
"logits/chosen": 0.17313790321350098,
"logits/rejected": -0.5377508401870728,
"logps/chosen": -1.7679404020309448,
"logps/rejected": -2.2323243618011475,
"loss": 1.9515,
"nll_loss": 1.9035319089889526,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17679405212402344,
"rewards/margins": 0.04643838852643967,
"rewards/rejected": -0.2232324331998825,
"step": 125
},
{
"epoch": 0.3479461511908871,
"grad_norm": 0.2930620014667511,
"learning_rate": 4.9967122229584614e-06,
"log_odds_chosen": 0.5086734890937805,
"log_odds_ratio": -0.47367364168167114,
"logits/chosen": 0.20121866464614868,
"logits/rejected": -0.3580709397792816,
"logps/chosen": -1.772174596786499,
"logps/rejected": -2.211575746536255,
"loss": 1.9502,
"nll_loss": 1.9028148651123047,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17721746861934662,
"rewards/margins": 0.04394011199474335,
"rewards/rejected": -0.22115758061408997,
"step": 126
},
{
"epoch": 0.350707628581291,
"grad_norm": 0.313006728887558,
"learning_rate": 4.996288512844169e-06,
"log_odds_chosen": 0.26984095573425293,
"log_odds_ratio": -0.5770066380500793,
"logits/chosen": 0.30613642930984497,
"logits/rejected": -0.0801864042878151,
"logps/chosen": -1.9204916954040527,
"logps/rejected": -2.153423547744751,
"loss": 2.1089,
"nll_loss": 2.051222324371338,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.19204919040203094,
"rewards/margins": 0.023293154314160347,
"rewards/rejected": -0.21534234285354614,
"step": 127
},
{
"epoch": 0.35346910597169484,
"grad_norm": 0.26883664727211,
"learning_rate": 4.995839149503103e-06,
"log_odds_chosen": 0.4242061972618103,
"log_odds_ratio": -0.5166581273078918,
"logits/chosen": 0.287309467792511,
"logits/rejected": -0.28880101442337036,
"logps/chosen": -1.7962003946304321,
"logps/rejected": -2.1639606952667236,
"loss": 1.9745,
"nll_loss": 1.9228155612945557,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17962004244327545,
"rewards/margins": 0.036776017397642136,
"rewards/rejected": -0.2163960486650467,
"step": 128
},
{
"epoch": 0.35623058336209873,
"grad_norm": 0.3000660240650177,
"learning_rate": 4.995364137553166e-06,
"log_odds_chosen": 0.2731889486312866,
"log_odds_ratio": -0.5769234299659729,
"logits/chosen": 0.17702779173851013,
"logits/rejected": -0.16427305340766907,
"logps/chosen": -1.8508391380310059,
"logps/rejected": -2.0855119228363037,
"loss": 2.0272,
"nll_loss": 1.9694753885269165,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18508392572402954,
"rewards/margins": 0.023467278108000755,
"rewards/rejected": -0.20855121314525604,
"step": 129
},
{
"epoch": 0.35899206075250256,
"grad_norm": 0.27979782223701477,
"learning_rate": 4.994863481875842e-06,
"log_odds_chosen": 0.539387047290802,
"log_odds_ratio": -0.4670449495315552,
"logits/chosen": 0.19368596374988556,
"logits/rejected": -0.4078698754310608,
"logps/chosen": -1.7874817848205566,
"logps/rejected": -2.2576088905334473,
"loss": 1.9686,
"nll_loss": 1.9218828678131104,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17874819040298462,
"rewards/margins": 0.04701270908117294,
"rewards/rejected": -0.22576089203357697,
"step": 130
},
{
"epoch": 0.36175353814290645,
"grad_norm": 0.2961781322956085,
"learning_rate": 4.99433718761614e-06,
"log_odds_chosen": 0.482613205909729,
"log_odds_ratio": -0.48607659339904785,
"logits/chosen": 0.3117329776287079,
"logits/rejected": -0.19324824213981628,
"logps/chosen": -1.8348233699798584,
"logps/rejected": -2.2560348510742188,
"loss": 1.9992,
"nll_loss": 1.9505534172058105,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1834823340177536,
"rewards/margins": 0.042121127247810364,
"rewards/rejected": -0.22560347616672516,
"step": 131
},
{
"epoch": 0.36451501553331034,
"grad_norm": 0.27247706055641174,
"learning_rate": 4.993785260182552e-06,
"log_odds_chosen": 0.5040154457092285,
"log_odds_ratio": -0.47964078187942505,
"logits/chosen": 0.26086652278900146,
"logits/rejected": -0.39971601963043213,
"logps/chosen": -1.7693819999694824,
"logps/rejected": -2.206263780593872,
"loss": 1.9461,
"nll_loss": 1.898113489151001,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17693820595741272,
"rewards/margins": 0.043688178062438965,
"rewards/rejected": -0.2206263691186905,
"step": 132
},
{
"epoch": 0.36727649292371417,
"grad_norm": 0.2559458613395691,
"learning_rate": 4.993207705246983e-06,
"log_odds_chosen": 0.37582507729530334,
"log_odds_ratio": -0.529410719871521,
"logits/chosen": 0.24356283247470856,
"logits/rejected": -0.23510105907917023,
"logps/chosen": -1.8250073194503784,
"logps/rejected": -2.1527535915374756,
"loss": 1.9998,
"nll_loss": 1.9468413591384888,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18250073492527008,
"rewards/margins": 0.03277461230754852,
"rewards/rejected": -0.2152753621339798,
"step": 133
},
{
"epoch": 0.37003797031411806,
"grad_norm": 0.2661672532558441,
"learning_rate": 4.992604528744705e-06,
"log_odds_chosen": 0.4147174656391144,
"log_odds_ratio": -0.5111517906188965,
"logits/chosen": 0.23473379015922546,
"logits/rejected": -0.5313011407852173,
"logps/chosen": -1.8088786602020264,
"logps/rejected": -2.1657345294952393,
"loss": 1.985,
"nll_loss": 1.9338966608047485,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1808878630399704,
"rewards/margins": 0.035685598850250244,
"rewards/rejected": -0.21657346189022064,
"step": 134
},
{
"epoch": 0.37279944770452195,
"grad_norm": 0.26244473457336426,
"learning_rate": 4.9919757368742895e-06,
"log_odds_chosen": 0.4870528280735016,
"log_odds_ratio": -0.4892481565475464,
"logits/chosen": 0.2686365842819214,
"logits/rejected": -0.3376864194869995,
"logps/chosen": -1.7298060655593872,
"logps/rejected": -2.145728349685669,
"loss": 1.9046,
"nll_loss": 1.8556550741195679,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17298059165477753,
"rewards/margins": 0.04159224033355713,
"rewards/rejected": -0.21457283198833466,
"step": 135
},
{
"epoch": 0.3755609250949258,
"grad_norm": 0.26679527759552,
"learning_rate": 4.991321336097546e-06,
"log_odds_chosen": 0.4111718535423279,
"log_odds_ratio": -0.512808084487915,
"logits/chosen": 0.22426243126392365,
"logits/rejected": -0.2763628363609314,
"logps/chosen": -1.8473613262176514,
"logps/rejected": -2.2056474685668945,
"loss": 1.9977,
"nll_loss": 1.9464530944824219,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18473613262176514,
"rewards/margins": 0.03582862764596939,
"rewards/rejected": -0.22056476771831512,
"step": 136
},
{
"epoch": 0.37832240248532967,
"grad_norm": 0.25576797127723694,
"learning_rate": 4.990641333139455e-06,
"log_odds_chosen": 0.435663640499115,
"log_odds_ratio": -0.5039057731628418,
"logits/chosen": 0.29907894134521484,
"logits/rejected": -0.34149447083473206,
"logps/chosen": -1.7228469848632812,
"logps/rejected": -2.093015670776367,
"loss": 1.8956,
"nll_loss": 1.8452366590499878,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17228470742702484,
"rewards/margins": 0.037016861140728,
"rewards/rejected": -0.20930157601833344,
"step": 137
},
{
"epoch": 0.3810838798757335,
"grad_norm": 0.2576519250869751,
"learning_rate": 4.989935734988098e-06,
"log_odds_chosen": 0.32478412985801697,
"log_odds_ratio": -0.5477995872497559,
"logits/chosen": 0.3516974151134491,
"logits/rejected": -0.4254434406757355,
"logps/chosen": -1.8142131567001343,
"logps/rejected": -2.0922889709472656,
"loss": 1.9776,
"nll_loss": 1.922855257987976,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18142130970954895,
"rewards/margins": 0.02780758962035179,
"rewards/rejected": -0.20922890305519104,
"step": 138
},
{
"epoch": 0.3838453572661374,
"grad_norm": 0.2815987765789032,
"learning_rate": 4.989204548894589e-06,
"log_odds_chosen": 0.32539641857147217,
"log_odds_ratio": -0.5531569719314575,
"logits/chosen": 0.4096101224422455,
"logits/rejected": -0.19687263667583466,
"logps/chosen": -1.8913159370422363,
"logps/rejected": -2.1725118160247803,
"loss": 2.0515,
"nll_loss": 1.99615478515625,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18913161754608154,
"rewards/margins": 0.028119584545493126,
"rewards/rejected": -0.21725118160247803,
"step": 139
},
{
"epoch": 0.3866068346565413,
"grad_norm": 0.25471100211143494,
"learning_rate": 4.988447782372996e-06,
"log_odds_chosen": 0.3162972331047058,
"log_odds_ratio": -0.553735077381134,
"logits/chosen": 0.2287934273481369,
"logits/rejected": -0.36916857957839966,
"logps/chosen": -1.717573642730713,
"logps/rejected": -1.986279010772705,
"loss": 1.8883,
"nll_loss": 1.8329135179519653,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17175735533237457,
"rewards/margins": 0.026870528236031532,
"rewards/rejected": -0.19862788915634155,
"step": 140
},
{
"epoch": 0.3893683120469451,
"grad_norm": 0.23676469922065735,
"learning_rate": 4.9876654432002655e-06,
"log_odds_chosen": 0.41759181022644043,
"log_odds_ratio": -0.5150130391120911,
"logits/chosen": 0.3617481589317322,
"logits/rejected": -0.5005238056182861,
"logps/chosen": -1.7590631246566772,
"logps/rejected": -2.120802640914917,
"loss": 1.9118,
"nll_loss": 1.8603452444076538,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17590634524822235,
"rewards/margins": 0.03617396205663681,
"rewards/rejected": -0.21208029985427856,
"step": 141
},
{
"epoch": 0.392129789437349,
"grad_norm": 0.23819445073604584,
"learning_rate": 4.986857539416144e-06,
"log_odds_chosen": 0.514154851436615,
"log_odds_ratio": -0.47449907660484314,
"logits/chosen": 0.2892986834049225,
"logits/rejected": -0.5338828563690186,
"logps/chosen": -1.732762336730957,
"logps/rejected": -2.1715476512908936,
"loss": 1.8923,
"nll_loss": 1.8448883295059204,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17327623069286346,
"rewards/margins": 0.04387851804494858,
"rewards/rejected": -0.21715475618839264,
"step": 142
},
{
"epoch": 0.39489126682775283,
"grad_norm": 0.2597619891166687,
"learning_rate": 4.986024079323092e-06,
"log_odds_chosen": 0.21202370524406433,
"log_odds_ratio": -0.5989823341369629,
"logits/chosen": 0.31219571828842163,
"logits/rejected": -0.2251828908920288,
"logps/chosen": -1.7001993656158447,
"logps/rejected": -1.8785269260406494,
"loss": 1.9001,
"nll_loss": 1.840247392654419,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.17001992464065552,
"rewards/margins": 0.017832759767770767,
"rewards/rejected": -0.18785269558429718,
"step": 143
},
{
"epoch": 0.3976527442181567,
"grad_norm": 0.24414244294166565,
"learning_rate": 4.985165071486201e-06,
"log_odds_chosen": 0.42103180289268494,
"log_odds_ratio": -0.506747305393219,
"logits/chosen": 0.24320174753665924,
"logits/rejected": -0.47630739212036133,
"logps/chosen": -1.813495397567749,
"logps/rejected": -2.177159309387207,
"loss": 1.9628,
"nll_loss": 1.9121257066726685,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18134953081607819,
"rewards/margins": 0.03636639565229416,
"rewards/rejected": -0.21771591901779175,
"step": 144
},
{
"epoch": 0.4004142216085606,
"grad_norm": 0.25120964646339417,
"learning_rate": 4.984280524733107e-06,
"log_odds_chosen": 0.4766540825366974,
"log_odds_ratio": -0.4867539405822754,
"logits/chosen": 0.23771557211875916,
"logits/rejected": -0.3588354289531708,
"logps/chosen": -1.7543095350265503,
"logps/rejected": -2.1605966091156006,
"loss": 1.92,
"nll_loss": 1.8713701963424683,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17543095350265503,
"rewards/margins": 0.040628716349601746,
"rewards/rejected": -0.21605966985225677,
"step": 145
},
{
"epoch": 0.40317569899896444,
"grad_norm": 0.23762984573841095,
"learning_rate": 4.983370448153896e-06,
"log_odds_chosen": 0.3954075574874878,
"log_odds_ratio": -0.5178768038749695,
"logits/chosen": 0.2783900201320648,
"logits/rejected": -0.3363473117351532,
"logps/chosen": -1.7586162090301514,
"logps/rejected": -2.097499370574951,
"loss": 1.9169,
"nll_loss": 1.8651440143585205,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1758616417646408,
"rewards/margins": 0.033888280391693115,
"rewards/rejected": -0.20974992215633392,
"step": 146
},
{
"epoch": 0.4059371763893683,
"grad_norm": 0.250926673412323,
"learning_rate": 4.9824348511010115e-06,
"log_odds_chosen": 0.37487757205963135,
"log_odds_ratio": -0.5328046083450317,
"logits/chosen": 0.3899734616279602,
"logits/rejected": -0.3820725679397583,
"logps/chosen": -1.8538525104522705,
"logps/rejected": -2.1806483268737793,
"loss": 2.0058,
"nll_loss": 1.9525387287139893,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18538527190685272,
"rewards/margins": 0.03267957270145416,
"rewards/rejected": -0.2180648297071457,
"step": 147
},
{
"epoch": 0.40869865377977216,
"grad_norm": 0.24456721544265747,
"learning_rate": 4.981473743189163e-06,
"log_odds_chosen": 0.6152381300926208,
"log_odds_ratio": -0.4415377974510193,
"logits/chosen": 0.15124794840812683,
"logits/rejected": -0.6728506684303284,
"logps/chosen": -1.630881905555725,
"logps/rejected": -2.1548542976379395,
"loss": 1.8038,
"nll_loss": 1.7596325874328613,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16308818757534027,
"rewards/margins": 0.05239725112915039,
"rewards/rejected": -0.21548543870449066,
"step": 148
},
{
"epoch": 0.41146013117017605,
"grad_norm": 0.2503897547721863,
"learning_rate": 4.98048713429522e-06,
"log_odds_chosen": 0.4051974415779114,
"log_odds_ratio": -0.5133127570152283,
"logits/chosen": 0.3494684100151062,
"logits/rejected": -0.11723777651786804,
"logps/chosen": -1.7437934875488281,
"logps/rejected": -2.086435079574585,
"loss": 1.9057,
"nll_loss": 1.8543728590011597,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1743793487548828,
"rewards/margins": 0.034264158457517624,
"rewards/rejected": -0.20864351093769073,
"step": 149
},
{
"epoch": 0.41422160856057993,
"grad_norm": 0.26401370763778687,
"learning_rate": 4.979475034558115e-06,
"log_odds_chosen": 0.35506629943847656,
"log_odds_ratio": -0.534768283367157,
"logits/chosen": 0.4498825669288635,
"logits/rejected": -0.32746273279190063,
"logps/chosen": -1.8957396745681763,
"logps/rejected": -2.205925464630127,
"loss": 2.042,
"nll_loss": 1.988513469696045,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1895739734172821,
"rewards/margins": 0.0310185756534338,
"rewards/rejected": -0.22059254348278046,
"step": 150
},
{
"epoch": 0.41698308595098377,
"grad_norm": 0.23593436181545258,
"learning_rate": 4.978437454378741e-06,
"log_odds_chosen": 0.36985495686531067,
"log_odds_ratio": -0.5328302979469299,
"logits/chosen": 0.4250433146953583,
"logits/rejected": -0.38277071714401245,
"logps/chosen": -1.7761732339859009,
"logps/rejected": -2.0909667015075684,
"loss": 1.942,
"nll_loss": 1.8887526988983154,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17761734127998352,
"rewards/margins": 0.03147934749722481,
"rewards/rejected": -0.20909668505191803,
"step": 151
},
{
"epoch": 0.41974456334138766,
"grad_norm": 0.26158878207206726,
"learning_rate": 4.977374404419838e-06,
"log_odds_chosen": 0.39601171016693115,
"log_odds_ratio": -0.5228374004364014,
"logits/chosen": 0.42059236764907837,
"logits/rejected": -0.3891626000404358,
"logps/chosen": -1.7959572076797485,
"logps/rejected": -2.140069007873535,
"loss": 1.9499,
"nll_loss": 1.8975740671157837,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1795957237482071,
"rewards/margins": 0.03441117703914642,
"rewards/rejected": -0.21400688588619232,
"step": 152
},
{
"epoch": 0.4225060407317915,
"grad_norm": 0.2561565339565277,
"learning_rate": 4.976285895605888e-06,
"log_odds_chosen": 0.4899553656578064,
"log_odds_ratio": -0.4833296239376068,
"logits/chosen": 0.37404921650886536,
"logits/rejected": -0.5243082046508789,
"logps/chosen": -1.7399574518203735,
"logps/rejected": -2.1531152725219727,
"loss": 1.8971,
"nll_loss": 1.8487193584442139,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1739957481622696,
"rewards/margins": 0.04131579399108887,
"rewards/rejected": -0.21531155705451965,
"step": 153
},
{
"epoch": 0.4252675181221954,
"grad_norm": 0.2550884485244751,
"learning_rate": 4.9751719391230055e-06,
"log_odds_chosen": 0.25457680225372314,
"log_odds_ratio": -0.5827968120574951,
"logits/chosen": 0.426510751247406,
"logits/rejected": -0.38418132066726685,
"logps/chosen": -1.710974931716919,
"logps/rejected": -1.9265732765197754,
"loss": 1.8985,
"nll_loss": 1.840250015258789,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17109748721122742,
"rewards/margins": 0.02155984938144684,
"rewards/rejected": -0.19265732169151306,
"step": 154
},
{
"epoch": 0.42802899551259926,
"grad_norm": 0.25417274236679077,
"learning_rate": 4.974032546418816e-06,
"log_odds_chosen": 0.47524771094322205,
"log_odds_ratio": -0.49181026220321655,
"logits/chosen": 0.43296438455581665,
"logits/rejected": -0.41207337379455566,
"logps/chosen": -1.756546139717102,
"logps/rejected": -2.1634457111358643,
"loss": 1.9365,
"nll_loss": 1.8873004913330078,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17565463483333588,
"rewards/margins": 0.04068994149565697,
"rewards/rejected": -0.21634456515312195,
"step": 155
},
{
"epoch": 0.4307904729030031,
"grad_norm": 0.24467967450618744,
"learning_rate": 4.9728677292023405e-06,
"log_odds_chosen": 0.19028525054454803,
"log_odds_ratio": -0.6092196702957153,
"logits/chosen": 0.5236613750457764,
"logits/rejected": -0.3672065734863281,
"logps/chosen": -1.851803183555603,
"logps/rejected": -2.0145559310913086,
"loss": 2.0125,
"nll_loss": 1.9516232013702393,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18518033623695374,
"rewards/margins": 0.016275260597467422,
"rewards/rejected": -0.20145559310913086,
"step": 156
},
{
"epoch": 0.433551950293407,
"grad_norm": 0.24561214447021484,
"learning_rate": 4.971677499443882e-06,
"log_odds_chosen": 0.34714600443840027,
"log_odds_ratio": -0.538092315196991,
"logits/chosen": 0.42429813742637634,
"logits/rejected": -0.4320365786552429,
"logps/chosen": -1.7550561428070068,
"logps/rejected": -2.0503358840942383,
"loss": 1.9273,
"nll_loss": 1.873533010482788,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1755056083202362,
"rewards/margins": 0.029527965933084488,
"rewards/rejected": -0.2050335705280304,
"step": 157
},
{
"epoch": 0.4363134276838108,
"grad_norm": 0.23480936884880066,
"learning_rate": 4.97046186937489e-06,
"log_odds_chosen": 0.3259715735912323,
"log_odds_ratio": -0.5574356913566589,
"logits/chosen": 0.33684611320495605,
"logits/rejected": -0.48997414112091064,
"logps/chosen": -1.733799934387207,
"logps/rejected": -2.011017084121704,
"loss": 1.8941,
"nll_loss": 1.8383519649505615,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17338000237941742,
"rewards/margins": 0.027721701189875603,
"rewards/rejected": -0.20110172033309937,
"step": 158
},
{
"epoch": 0.4390749050742147,
"grad_norm": 0.23975060880184174,
"learning_rate": 4.9692208514878445e-06,
"log_odds_chosen": 0.2344578355550766,
"log_odds_ratio": -0.5858049392700195,
"logits/chosen": 0.4837522804737091,
"logits/rejected": -0.2647075951099396,
"logps/chosen": -1.8640694618225098,
"logps/rejected": -2.064138412475586,
"loss": 2.0361,
"nll_loss": 1.9775654077529907,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1864069551229477,
"rewards/margins": 0.02000689134001732,
"rewards/rejected": -0.2064138650894165,
"step": 159
},
{
"epoch": 0.4418363824646186,
"grad_norm": 0.25472894310951233,
"learning_rate": 4.967954458536126e-06,
"log_odds_chosen": 0.34545353055000305,
"log_odds_ratio": -0.5416699051856995,
"logits/chosen": 0.4849855303764343,
"logits/rejected": -0.14890551567077637,
"logps/chosen": -1.6954622268676758,
"logps/rejected": -1.9883880615234375,
"loss": 1.8756,
"nll_loss": 1.821388602256775,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1695462316274643,
"rewards/margins": 0.029292574152350426,
"rewards/rejected": -0.19883880019187927,
"step": 160
},
{
"epoch": 0.4445978598550224,
"grad_norm": 0.2551233172416687,
"learning_rate": 4.96666270353388e-06,
"log_odds_chosen": 0.34612518548965454,
"log_odds_ratio": -0.5391160845756531,
"logits/chosen": 0.4446476995944977,
"logits/rejected": -0.4060593545436859,
"logps/chosen": -1.7577486038208008,
"logps/rejected": -2.0534539222717285,
"loss": 1.9068,
"nll_loss": 1.8529114723205566,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17577485740184784,
"rewards/margins": 0.029570531100034714,
"rewards/rejected": -0.20534539222717285,
"step": 161
},
{
"epoch": 0.4473593372454263,
"grad_norm": 0.2522425651550293,
"learning_rate": 4.965345599755888e-06,
"log_odds_chosen": 0.3961338400840759,
"log_odds_ratio": -0.520170271396637,
"logits/chosen": 0.38032418489456177,
"logits/rejected": -0.48995065689086914,
"logps/chosen": -1.815320372581482,
"logps/rejected": -2.156792640686035,
"loss": 1.9657,
"nll_loss": 1.9137252569198608,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18153204023838043,
"rewards/margins": 0.0341472253203392,
"rewards/rejected": -0.21567925810813904,
"step": 162
},
{
"epoch": 0.45012081463583015,
"grad_norm": 0.23575294017791748,
"learning_rate": 4.964003160737429e-06,
"log_odds_chosen": 0.42496663331985474,
"log_odds_ratio": -0.5144410133361816,
"logits/chosen": 0.4265897274017334,
"logits/rejected": -0.6324371695518494,
"logps/chosen": -1.7239199876785278,
"logps/rejected": -2.0822784900665283,
"loss": 1.8878,
"nll_loss": 1.8363168239593506,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17239199578762054,
"rewards/margins": 0.03583585098385811,
"rewards/rejected": -0.20822784304618835,
"step": 163
},
{
"epoch": 0.45288229202623403,
"grad_norm": 0.22830626368522644,
"learning_rate": 4.9626354002741424e-06,
"log_odds_chosen": 0.39489883184432983,
"log_odds_ratio": -0.519772469997406,
"logits/chosen": 0.43731188774108887,
"logits/rejected": -0.5182772874832153,
"logps/chosen": -1.6721280813217163,
"logps/rejected": -2.0018677711486816,
"loss": 1.8343,
"nll_loss": 1.782306432723999,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1672128140926361,
"rewards/margins": 0.032973967492580414,
"rewards/rejected": -0.20018678903579712,
"step": 164
},
{
"epoch": 0.4556437694166379,
"grad_norm": 0.25069254636764526,
"learning_rate": 4.9612423324218816e-06,
"log_odds_chosen": 0.4574624300003052,
"log_odds_ratio": -0.4939710795879364,
"logits/chosen": 0.4768182933330536,
"logits/rejected": -0.6430546045303345,
"logps/chosen": -1.8401563167572021,
"logps/rejected": -2.23877215385437,
"loss": 1.9978,
"nll_loss": 1.9484000205993652,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18401561677455902,
"rewards/margins": 0.03986157849431038,
"rewards/rejected": -0.2238771915435791,
"step": 165
},
{
"epoch": 0.45840524680704176,
"grad_norm": 0.25569233298301697,
"learning_rate": 4.959823971496575e-06,
"log_odds_chosen": 0.31097978353500366,
"log_odds_ratio": -0.5631955862045288,
"logits/chosen": 0.39253953099250793,
"logits/rejected": -0.3464045226573944,
"logps/chosen": -1.7822060585021973,
"logps/rejected": -2.0535507202148438,
"loss": 1.9515,
"nll_loss": 1.8951623439788818,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.17822058498859406,
"rewards/margins": 0.027134478092193604,
"rewards/rejected": -0.20535509288311005,
"step": 166
},
{
"epoch": 0.46116672419744564,
"grad_norm": 0.2754010260105133,
"learning_rate": 4.958380332074074e-06,
"log_odds_chosen": 0.38330915570259094,
"log_odds_ratio": -0.5226565003395081,
"logits/chosen": 0.5406702756881714,
"logits/rejected": -0.2918284833431244,
"logps/chosen": -1.744588851928711,
"logps/rejected": -2.0728600025177,
"loss": 1.9256,
"nll_loss": 1.873305320739746,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17445889115333557,
"rewards/margins": 0.03282713145017624,
"rewards/rejected": -0.2072860151529312,
"step": 167
},
{
"epoch": 0.4639282015878495,
"grad_norm": 0.24440248310565948,
"learning_rate": 4.95691142899001e-06,
"log_odds_chosen": 0.4813528060913086,
"log_odds_ratio": -0.49559223651885986,
"logits/chosen": 0.42939212918281555,
"logits/rejected": -0.5100030303001404,
"logps/chosen": -1.7333720922470093,
"logps/rejected": -2.148804187774658,
"loss": 1.8988,
"nll_loss": 1.8492889404296875,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17333722114562988,
"rewards/margins": 0.04154320806264877,
"rewards/rejected": -0.21488040685653687,
"step": 168
},
{
"epoch": 0.46668967897825336,
"grad_norm": 0.24897295236587524,
"learning_rate": 4.955417277339633e-06,
"log_odds_chosen": 0.3721994459629059,
"log_odds_ratio": -0.5288943648338318,
"logits/chosen": 0.4496070444583893,
"logits/rejected": -0.5069053173065186,
"logps/chosen": -1.7618987560272217,
"logps/rejected": -2.0793538093566895,
"loss": 1.9112,
"nll_loss": 1.858320713043213,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17618988454341888,
"rewards/margins": 0.03174550086259842,
"rewards/rejected": -0.2079353779554367,
"step": 169
},
{
"epoch": 0.46945115636865725,
"grad_norm": 0.24506047368049622,
"learning_rate": 4.953897892477664e-06,
"log_odds_chosen": 0.4158302843570709,
"log_odds_ratio": -0.524972677230835,
"logits/chosen": 0.39215707778930664,
"logits/rejected": -0.6964855790138245,
"logps/chosen": -1.6938952207565308,
"logps/rejected": -2.0564043521881104,
"loss": 1.8573,
"nll_loss": 1.8047559261322021,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.1693895161151886,
"rewards/margins": 0.03625092655420303,
"rewards/rejected": -0.20564045011997223,
"step": 170
},
{
"epoch": 0.4722126337590611,
"grad_norm": 0.25454598665237427,
"learning_rate": 4.952353290018132e-06,
"log_odds_chosen": 0.39419156312942505,
"log_odds_ratio": -0.5228413939476013,
"logits/chosen": 0.4652084410190582,
"logits/rejected": -0.09862995892763138,
"logps/chosen": -1.7650055885314941,
"logps/rejected": -2.102964162826538,
"loss": 1.9365,
"nll_loss": 1.8842390775680542,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17650054395198822,
"rewards/margins": 0.033795878291130066,
"rewards/rejected": -0.2102964073419571,
"step": 171
},
{
"epoch": 0.474974111149465,
"grad_norm": 0.23166699707508087,
"learning_rate": 4.950783485834218e-06,
"log_odds_chosen": 0.45087021589279175,
"log_odds_ratio": -0.4937146306037903,
"logits/chosen": 0.43644362688064575,
"logits/rejected": -0.45175108313560486,
"logps/chosen": -1.6984238624572754,
"logps/rejected": -2.0791146755218506,
"loss": 1.8668,
"nll_loss": 1.817420482635498,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16984236240386963,
"rewards/margins": 0.03806909918785095,
"rewards/rejected": -0.20791146159172058,
"step": 172
},
{
"epoch": 0.4777355885398688,
"grad_norm": 0.22430026531219482,
"learning_rate": 4.949188496058089e-06,
"log_odds_chosen": 0.3452419340610504,
"log_odds_ratio": -0.5469062328338623,
"logits/chosen": 0.42078280448913574,
"logits/rejected": -0.4557611346244812,
"logps/chosen": -1.6979789733886719,
"logps/rejected": -1.9911997318267822,
"loss": 1.8648,
"nll_loss": 1.8101435899734497,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1697978973388672,
"rewards/margins": 0.029322080314159393,
"rewards/rejected": -0.19911997020244598,
"step": 173
},
{
"epoch": 0.4804970659302727,
"grad_norm": 0.25257107615470886,
"learning_rate": 4.947568337080733e-06,
"log_odds_chosen": 0.1399567574262619,
"log_odds_ratio": -0.6351712942123413,
"logits/chosen": 0.3786250352859497,
"logits/rejected": -0.433152973651886,
"logps/chosen": -1.8261394500732422,
"logps/rejected": -1.9481170177459717,
"loss": 1.9906,
"nll_loss": 1.9270497560501099,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.18261395394802094,
"rewards/margins": 0.012197760865092278,
"rewards/rejected": -0.19481170177459717,
"step": 174
},
{
"epoch": 0.4832585433206766,
"grad_norm": 0.25380203127861023,
"learning_rate": 4.945923025551789e-06,
"log_odds_chosen": 0.37491294741630554,
"log_odds_ratio": -0.5253455638885498,
"logits/chosen": 0.44007402658462524,
"logits/rejected": -0.5158473253250122,
"logps/chosen": -1.7649321556091309,
"logps/rejected": -2.0859415531158447,
"loss": 1.9061,
"nll_loss": 1.8535852432250977,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17649321258068085,
"rewards/margins": 0.032100923359394073,
"rewards/rejected": -0.20859414339065552,
"step": 175
},
{
"epoch": 0.4860200207110804,
"grad_norm": 0.23105277121067047,
"learning_rate": 4.944252578379379e-06,
"log_odds_chosen": 0.2923380434513092,
"log_odds_ratio": -0.5638449788093567,
"logits/chosen": 0.3620964288711548,
"logits/rejected": -0.7222499847412109,
"logps/chosen": -1.6879165172576904,
"logps/rejected": -1.9339208602905273,
"loss": 1.8481,
"nll_loss": 1.7917577028274536,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16879163682460785,
"rewards/margins": 0.024600449949502945,
"rewards/rejected": -0.1933920979499817,
"step": 176
},
{
"epoch": 0.4887814981014843,
"grad_norm": 0.2548852264881134,
"learning_rate": 4.942557012729933e-06,
"log_odds_chosen": 0.37703031301498413,
"log_odds_ratio": -0.5374451875686646,
"logits/chosen": 0.451770544052124,
"logits/rejected": -0.6359795331954956,
"logps/chosen": -1.7573479413986206,
"logps/rejected": -2.0781285762786865,
"loss": 1.9276,
"nll_loss": 1.873888373374939,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17573478817939758,
"rewards/margins": 0.03207805007696152,
"rewards/rejected": -0.2078128606081009,
"step": 177
},
{
"epoch": 0.49154297549188813,
"grad_norm": 0.24361710250377655,
"learning_rate": 4.940836346028011e-06,
"log_odds_chosen": 0.564251184463501,
"log_odds_ratio": -0.4567793309688568,
"logits/chosen": 0.36331912875175476,
"logits/rejected": -0.6997017860412598,
"logps/chosen": -1.7021052837371826,
"logps/rejected": -2.1804330348968506,
"loss": 1.8632,
"nll_loss": 1.8174842596054077,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17021054029464722,
"rewards/margins": 0.047832753509283066,
"rewards/rejected": -0.2180432826280594,
"step": 178
},
{
"epoch": 0.494304452882292,
"grad_norm": 0.25140658020973206,
"learning_rate": 4.9390905959561254e-06,
"log_odds_chosen": 0.42371851205825806,
"log_odds_ratio": -0.5053101778030396,
"logits/chosen": 0.4960322678089142,
"logits/rejected": -0.7143914699554443,
"logps/chosen": -1.8060599565505981,
"logps/rejected": -2.168437957763672,
"loss": 1.9361,
"nll_loss": 1.8855350017547607,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18060600757598877,
"rewards/margins": 0.03623779118061066,
"rewards/rejected": -0.21684379875659943,
"step": 179
},
{
"epoch": 0.4970659302726959,
"grad_norm": 0.25267475843429565,
"learning_rate": 4.937319780454559e-06,
"log_odds_chosen": 0.24488888680934906,
"log_odds_ratio": -0.5824177861213684,
"logits/chosen": 0.43725982308387756,
"logits/rejected": -0.5809499621391296,
"logps/chosen": -1.7686806917190552,
"logps/rejected": -1.9778132438659668,
"loss": 1.9319,
"nll_loss": 1.873632550239563,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.17686808109283447,
"rewards/margins": 0.020913248881697655,
"rewards/rejected": -0.19778132438659668,
"step": 180
},
{
"epoch": 0.49982740766309974,
"grad_norm": 0.2517687976360321,
"learning_rate": 4.935523917721182e-06,
"log_odds_chosen": 0.3661497235298157,
"log_odds_ratio": -0.5325534343719482,
"logits/chosen": 0.4761643409729004,
"logits/rejected": -0.4684900641441345,
"logps/chosen": -1.7682512998580933,
"logps/rejected": -2.079028844833374,
"loss": 1.9513,
"nll_loss": 1.8980205059051514,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1768251359462738,
"rewards/margins": 0.031077751889824867,
"rewards/rejected": -0.20790287852287292,
"step": 181
},
{
"epoch": 0.5025888850535036,
"grad_norm": 0.23882247507572174,
"learning_rate": 4.933703026211262e-06,
"log_odds_chosen": 0.40052229166030884,
"log_odds_ratio": -0.5199273824691772,
"logits/chosen": 0.5116904377937317,
"logits/rejected": -0.8737332820892334,
"logps/chosen": -1.7753740549087524,
"logps/rejected": -2.1211981773376465,
"loss": 1.9275,
"nll_loss": 1.8754714727401733,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17753739655017853,
"rewards/margins": 0.03458239883184433,
"rewards/rejected": -0.21211980283260345,
"step": 182
},
{
"epoch": 0.5053503624439075,
"grad_norm": 0.23901337385177612,
"learning_rate": 4.931857124637276e-06,
"log_odds_chosen": 0.3831828832626343,
"log_odds_ratio": -0.5294877886772156,
"logits/chosen": 0.44559329748153687,
"logits/rejected": -0.512153685092926,
"logps/chosen": -1.660269021987915,
"logps/rejected": -1.9844969511032104,
"loss": 1.8228,
"nll_loss": 1.7698148488998413,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16602689027786255,
"rewards/margins": 0.032422810792922974,
"rewards/rejected": -0.19844970107078552,
"step": 183
},
{
"epoch": 0.5081118398343114,
"grad_norm": 0.26273974776268005,
"learning_rate": 4.92998623196872e-06,
"log_odds_chosen": 0.40838003158569336,
"log_odds_ratio": -0.5120916366577148,
"logits/chosen": 0.36793801188468933,
"logits/rejected": -0.883270263671875,
"logps/chosen": -1.7183860540390015,
"logps/rejected": -2.0646042823791504,
"loss": 1.8679,
"nll_loss": 1.8166720867156982,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17183861136436462,
"rewards/margins": 0.034621842205524445,
"rewards/rejected": -0.20646044611930847,
"step": 184
},
{
"epoch": 0.5108733172247152,
"grad_norm": 0.24954968690872192,
"learning_rate": 4.92809036743191e-06,
"log_odds_chosen": 0.44134509563446045,
"log_odds_ratio": -0.5009733438491821,
"logits/chosen": 0.38025662302970886,
"logits/rejected": -0.6959909200668335,
"logps/chosen": -1.6237648725509644,
"logps/rejected": -1.991347312927246,
"loss": 1.7726,
"nll_loss": 1.7225314378738403,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1623764932155609,
"rewards/margins": 0.03675824776291847,
"rewards/rejected": -0.19913475215435028,
"step": 185
},
{
"epoch": 0.5136347946151191,
"grad_norm": 0.24785040318965912,
"learning_rate": 4.926169550509787e-06,
"log_odds_chosen": 0.304913192987442,
"log_odds_ratio": -0.5551115274429321,
"logits/chosen": 0.49904897809028625,
"logits/rejected": -0.5060792565345764,
"logps/chosen": -1.752536416053772,
"logps/rejected": -2.009927749633789,
"loss": 1.8926,
"nll_loss": 1.837074875831604,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17525365948677063,
"rewards/margins": 0.0257391519844532,
"rewards/rejected": -0.20099279284477234,
"step": 186
},
{
"epoch": 0.516396272005523,
"grad_norm": 0.25260496139526367,
"learning_rate": 4.924223800941718e-06,
"log_odds_chosen": 0.18274395167827606,
"log_odds_ratio": -0.6112073659896851,
"logits/chosen": 0.4272249937057495,
"logits/rejected": -0.5964298248291016,
"logps/chosen": -1.7245608568191528,
"logps/rejected": -1.87729012966156,
"loss": 1.8858,
"nll_loss": 1.824722170829773,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.17245608568191528,
"rewards/margins": 0.015272947028279305,
"rewards/rejected": -0.18772903084754944,
"step": 187
},
{
"epoch": 0.5191577493959268,
"grad_norm": 0.25974321365356445,
"learning_rate": 4.9222531387232885e-06,
"log_odds_chosen": 0.2772579491138458,
"log_odds_ratio": -0.5700020790100098,
"logits/chosen": 0.5035889148712158,
"logits/rejected": -0.6021788120269775,
"logps/chosen": -1.810874581336975,
"logps/rejected": -2.048560380935669,
"loss": 1.9601,
"nll_loss": 1.903147578239441,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.181087464094162,
"rewards/margins": 0.023768583312630653,
"rewards/rejected": -0.2048560529947281,
"step": 188
},
{
"epoch": 0.5219192267863307,
"grad_norm": 0.2509472668170929,
"learning_rate": 4.920257584106104e-06,
"log_odds_chosen": 0.2922664284706116,
"log_odds_ratio": -0.5610611438751221,
"logits/chosen": 0.4223700761795044,
"logits/rejected": -0.8161606192588806,
"logps/chosen": -1.7685585021972656,
"logps/rejected": -2.017383575439453,
"loss": 1.9162,
"nll_loss": 1.8600515127182007,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17685584723949432,
"rewards/margins": 0.02488252893090248,
"rewards/rejected": -0.2017383873462677,
"step": 189
},
{
"epoch": 0.5246807041767345,
"grad_norm": 0.26333701610565186,
"learning_rate": 4.918237157597574e-06,
"log_odds_chosen": 0.367781400680542,
"log_odds_ratio": -0.5288804769515991,
"logits/chosen": 0.34821969270706177,
"logits/rejected": -0.5903292894363403,
"logps/chosen": -1.7087079286575317,
"logps/rejected": -2.0187838077545166,
"loss": 1.8792,
"nll_loss": 1.8263163566589355,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1708707958459854,
"rewards/margins": 0.03100760281085968,
"rewards/rejected": -0.2018783837556839,
"step": 190
},
{
"epoch": 0.5274421815671384,
"grad_norm": 0.2659012973308563,
"learning_rate": 4.916191879960708e-06,
"log_odds_chosen": 0.41825154423713684,
"log_odds_ratio": -0.5096725225448608,
"logits/chosen": 0.3918278217315674,
"logits/rejected": -0.7869745492935181,
"logps/chosen": -1.819606900215149,
"logps/rejected": -2.178121328353882,
"loss": 1.9644,
"nll_loss": 1.9134495258331299,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18196068704128265,
"rewards/margins": 0.03585144132375717,
"rewards/rejected": -0.21781213581562042,
"step": 191
},
{
"epoch": 0.5302036589575423,
"grad_norm": 0.2279636263847351,
"learning_rate": 4.914121772213898e-06,
"log_odds_chosen": 0.3906615972518921,
"log_odds_ratio": -0.5185546875,
"logits/chosen": 0.4513569176197052,
"logits/rejected": -0.5819852948188782,
"logps/chosen": -1.6937000751495361,
"logps/rejected": -2.0224616527557373,
"loss": 1.8302,
"nll_loss": 1.778322458267212,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16937001049518585,
"rewards/margins": 0.03287617862224579,
"rewards/rejected": -0.20224617421627045,
"step": 192
},
{
"epoch": 0.5329651363479462,
"grad_norm": 0.25850534439086914,
"learning_rate": 4.912026855630703e-06,
"log_odds_chosen": 0.4198724031448364,
"log_odds_ratio": -0.5083851218223572,
"logits/chosen": 0.44823789596557617,
"logits/rejected": -0.45316439867019653,
"logps/chosen": -1.7062734365463257,
"logps/rejected": -2.061528205871582,
"loss": 1.8534,
"nll_loss": 1.8025155067443848,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17062735557556152,
"rewards/margins": 0.035525478422641754,
"rewards/rejected": -0.20615282654762268,
"step": 193
},
{
"epoch": 0.5357266137383501,
"grad_norm": 0.2361551970243454,
"learning_rate": 4.909907151739634e-06,
"log_odds_chosen": 0.3865026831626892,
"log_odds_ratio": -0.527399480342865,
"logits/chosen": 0.3759016692638397,
"logits/rejected": -0.6874773502349854,
"logps/chosen": -1.6837282180786133,
"logps/rejected": -2.00811505317688,
"loss": 1.8406,
"nll_loss": 1.7878108024597168,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16837282478809357,
"rewards/margins": 0.03243869170546532,
"rewards/rejected": -0.200811505317688,
"step": 194
},
{
"epoch": 0.5384880911287538,
"grad_norm": 0.25059905648231506,
"learning_rate": 4.907762682323926e-06,
"log_odds_chosen": 0.5617655515670776,
"log_odds_ratio": -0.4947778284549713,
"logits/chosen": 0.4162059426307678,
"logits/rejected": -0.6656520366668701,
"logps/chosen": -1.6942963600158691,
"logps/rejected": -2.1570441722869873,
"loss": 1.8694,
"nll_loss": 1.8199554681777954,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16942965984344482,
"rewards/margins": 0.046274758875370026,
"rewards/rejected": -0.21570439636707306,
"step": 195
},
{
"epoch": 0.5412495685191577,
"grad_norm": 0.24844138324260712,
"learning_rate": 4.905593469421323e-06,
"log_odds_chosen": 0.2864172160625458,
"log_odds_ratio": -0.5658568143844604,
"logits/chosen": 0.40823209285736084,
"logits/rejected": -1.0584533214569092,
"logps/chosen": -1.6995760202407837,
"logps/rejected": -1.9419208765029907,
"loss": 1.8573,
"nll_loss": 1.8007633686065674,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16995760798454285,
"rewards/margins": 0.024234486743807793,
"rewards/rejected": -0.1941920816898346,
"step": 196
},
{
"epoch": 0.5440110459095616,
"grad_norm": 0.25183677673339844,
"learning_rate": 4.90339953532384e-06,
"log_odds_chosen": 0.32565394043922424,
"log_odds_ratio": -0.5478559732437134,
"logits/chosen": 0.4504649341106415,
"logits/rejected": -0.6915105581283569,
"logps/chosen": -1.723501443862915,
"logps/rejected": -1.9977482557296753,
"loss": 1.8793,
"nll_loss": 1.8244800567626953,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17235015332698822,
"rewards/margins": 0.027424685657024384,
"rewards/rejected": -0.199774831533432,
"step": 197
},
{
"epoch": 0.5467725232999655,
"grad_norm": 0.24120980501174927,
"learning_rate": 4.901180902577549e-06,
"log_odds_chosen": 0.36884185671806335,
"log_odds_ratio": -0.5313685536384583,
"logits/chosen": 0.47393080592155457,
"logits/rejected": -0.8454681038856506,
"logps/chosen": -1.7091484069824219,
"logps/rejected": -2.021761655807495,
"loss": 1.8562,
"nll_loss": 1.80307936668396,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17091482877731323,
"rewards/margins": 0.03126133233308792,
"rewards/rejected": -0.20217616856098175,
"step": 198
},
{
"epoch": 0.5495340006903694,
"grad_norm": 0.26695704460144043,
"learning_rate": 4.8989375939823305e-06,
"log_odds_chosen": 0.2716186046600342,
"log_odds_ratio": -0.5816208124160767,
"logits/chosen": 0.43514788150787354,
"logits/rejected": -0.9399389028549194,
"logps/chosen": -1.6752939224243164,
"logps/rejected": -1.9062312841415405,
"loss": 1.829,
"nll_loss": 1.770880937576294,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.1675294041633606,
"rewards/margins": 0.023093728348612785,
"rewards/rejected": -0.19062313437461853,
"step": 199
},
{
"epoch": 0.5522954780807732,
"grad_norm": 0.248373344540596,
"learning_rate": 4.896669632591652e-06,
"log_odds_chosen": 0.43290525674819946,
"log_odds_ratio": -0.5045643448829651,
"logits/chosen": 0.41075634956359863,
"logits/rejected": -0.6819908618927002,
"logps/chosen": -1.6926066875457764,
"logps/rejected": -2.0592057704925537,
"loss": 1.8492,
"nll_loss": 1.7987439632415771,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1692606657743454,
"rewards/margins": 0.03665991127490997,
"rewards/rejected": -0.20592057704925537,
"step": 200
},
{
"epoch": 0.5550569554711771,
"grad_norm": 0.2426389902830124,
"learning_rate": 4.894377041712327e-06,
"log_odds_chosen": 0.3046472668647766,
"log_odds_ratio": -0.5560302138328552,
"logits/chosen": 0.4586237370967865,
"logits/rejected": -0.889277458190918,
"logps/chosen": -1.7483210563659668,
"logps/rejected": -2.0069689750671387,
"loss": 1.8995,
"nll_loss": 1.8438801765441895,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17483210563659668,
"rewards/margins": 0.025864800438284874,
"rewards/rejected": -0.2006969153881073,
"step": 201
},
{
"epoch": 0.557818432861581,
"grad_norm": 0.2384442389011383,
"learning_rate": 4.892059844904273e-06,
"log_odds_chosen": 0.35407793521881104,
"log_odds_ratio": -0.5373241901397705,
"logits/chosen": 0.3391227722167969,
"logits/rejected": -0.8238649964332581,
"logps/chosen": -1.6727503538131714,
"logps/rejected": -1.971817135810852,
"loss": 1.8314,
"nll_loss": 1.7776691913604736,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16727504134178162,
"rewards/margins": 0.02990666963160038,
"rewards/rejected": -0.19718170166015625,
"step": 202
},
{
"epoch": 0.5605799102519848,
"grad_norm": 0.2237497717142105,
"learning_rate": 4.889718065980272e-06,
"log_odds_chosen": 0.42560863494873047,
"log_odds_ratio": -0.5102251768112183,
"logits/chosen": 0.38399800658226013,
"logits/rejected": -0.940986156463623,
"logps/chosen": -1.7369155883789062,
"logps/rejected": -2.0991053581237793,
"loss": 1.8724,
"nll_loss": 1.821379542350769,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1736915558576584,
"rewards/margins": 0.03621895611286163,
"rewards/rejected": -0.2099105268716812,
"step": 203
},
{
"epoch": 0.5633413876423887,
"grad_norm": 0.23220422863960266,
"learning_rate": 4.8873517290057265e-06,
"log_odds_chosen": 0.5465662479400635,
"log_odds_ratio": -0.45910075306892395,
"logits/chosen": 0.33986854553222656,
"logits/rejected": -0.9626595973968506,
"logps/chosen": -1.7113467454910278,
"logps/rejected": -2.1808066368103027,
"loss": 1.8594,
"nll_loss": 1.8135318756103516,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17113468050956726,
"rewards/margins": 0.046945974230766296,
"rewards/rejected": -0.21808065474033356,
"step": 204
},
{
"epoch": 0.5661028650327925,
"grad_norm": 0.22727084159851074,
"learning_rate": 4.88496085829841e-06,
"log_odds_chosen": 0.3183567523956299,
"log_odds_ratio": -0.5485174059867859,
"logits/chosen": 0.38077837228775024,
"logits/rejected": -0.3718608617782593,
"logps/chosen": -1.682438611984253,
"logps/rejected": -1.9494574069976807,
"loss": 1.8312,
"nll_loss": 1.7763221263885498,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.168243870139122,
"rewards/margins": 0.026701876893639565,
"rewards/rejected": -0.19494575262069702,
"step": 205
},
{
"epoch": 0.5688643424231964,
"grad_norm": 0.2435847967863083,
"learning_rate": 4.882545478428219e-06,
"log_odds_chosen": 0.4757111072540283,
"log_odds_ratio": -0.5007758140563965,
"logits/chosen": 0.4316224753856659,
"logits/rejected": -0.9769002199172974,
"logps/chosen": -1.6722346544265747,
"logps/rejected": -2.073765277862549,
"loss": 1.8362,
"nll_loss": 1.7861530780792236,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1672234833240509,
"rewards/margins": 0.04015304520726204,
"rewards/rejected": -0.20737652480602264,
"step": 206
},
{
"epoch": 0.5716258198136003,
"grad_norm": 0.28022557497024536,
"learning_rate": 4.880105614216917e-06,
"log_odds_chosen": 0.42549797892570496,
"log_odds_ratio": -0.507975161075592,
"logits/chosen": 0.42804068326950073,
"logits/rejected": -0.8541020750999451,
"logps/chosen": -1.8415427207946777,
"logps/rejected": -2.209498405456543,
"loss": 1.9733,
"nll_loss": 1.9224536418914795,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.18415425717830658,
"rewards/margins": 0.03679555654525757,
"rewards/rejected": -0.22094982862472534,
"step": 207
},
{
"epoch": 0.5743872972040042,
"grad_norm": 0.26431816816329956,
"learning_rate": 4.8776412907378845e-06,
"log_odds_chosen": 0.3722533583641052,
"log_odds_ratio": -0.5265185832977295,
"logits/chosen": 0.46087294816970825,
"logits/rejected": -0.8973901271820068,
"logps/chosen": -1.7529377937316895,
"logps/rejected": -2.0698721408843994,
"loss": 1.8939,
"nll_loss": 1.8412883281707764,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17529378831386566,
"rewards/margins": 0.03169342502951622,
"rewards/rejected": -0.206987202167511,
"step": 208
},
{
"epoch": 0.577148774594408,
"grad_norm": 0.2286507785320282,
"learning_rate": 4.875152533315859e-06,
"log_odds_chosen": 0.42754417657852173,
"log_odds_ratio": -0.5049228668212891,
"logits/chosen": 0.3295610547065735,
"logits/rejected": -0.821418285369873,
"logps/chosen": -1.6234990358352661,
"logps/rejected": -1.9790537357330322,
"loss": 1.7729,
"nll_loss": 1.7223597764968872,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1623499095439911,
"rewards/margins": 0.035555459558963776,
"rewards/rejected": -0.19790537655353546,
"step": 209
},
{
"epoch": 0.5799102519848118,
"grad_norm": 0.23942524194717407,
"learning_rate": 4.872639367526672e-06,
"log_odds_chosen": 0.4114229083061218,
"log_odds_ratio": -0.5152523517608643,
"logits/chosen": 0.3920520842075348,
"logits/rejected": -0.7173675298690796,
"logps/chosen": -1.6646122932434082,
"logps/rejected": -2.0126848220825195,
"loss": 1.8132,
"nll_loss": 1.7616626024246216,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16646124422550201,
"rewards/margins": 0.0348072350025177,
"rewards/rejected": -0.20126846432685852,
"step": 210
},
{
"epoch": 0.5826717293752157,
"grad_norm": 0.259086012840271,
"learning_rate": 4.870101819196992e-06,
"log_odds_chosen": 0.37038204073905945,
"log_odds_ratio": -0.5485732555389404,
"logits/chosen": 0.33591514825820923,
"logits/rejected": -0.7169915437698364,
"logps/chosen": -1.7096450328826904,
"logps/rejected": -2.028204917907715,
"loss": 1.863,
"nll_loss": 1.8081302642822266,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17096450924873352,
"rewards/margins": 0.031855978071689606,
"rewards/rejected": -0.20282049477100372,
"step": 211
},
{
"epoch": 0.5854332067656196,
"grad_norm": 0.23236605525016785,
"learning_rate": 4.8675399144040535e-06,
"log_odds_chosen": 0.48778051137924194,
"log_odds_ratio": -0.4863608777523041,
"logits/chosen": 0.3406648337841034,
"logits/rejected": -1.1020050048828125,
"logps/chosen": -1.6164040565490723,
"logps/rejected": -2.0246694087982178,
"loss": 1.7551,
"nll_loss": 1.7064510583877563,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16164040565490723,
"rewards/margins": 0.04082653671503067,
"rewards/rejected": -0.2024669200181961,
"step": 212
},
{
"epoch": 0.5881946841560235,
"grad_norm": 0.2472049593925476,
"learning_rate": 4.864953679475392e-06,
"log_odds_chosen": 0.4090927541255951,
"log_odds_ratio": -0.5185278654098511,
"logits/chosen": 0.3806914687156677,
"logits/rejected": -1.1645220518112183,
"logps/chosen": -1.6705214977264404,
"logps/rejected": -2.010647773742676,
"loss": 1.7979,
"nll_loss": 1.7460622787475586,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16705216467380524,
"rewards/margins": 0.03401262313127518,
"rewards/rejected": -0.201064795255661,
"step": 213
},
{
"epoch": 0.5909561615464274,
"grad_norm": 0.26932069659233093,
"learning_rate": 4.862343140988573e-06,
"log_odds_chosen": 0.38164323568344116,
"log_odds_ratio": -0.5293680429458618,
"logits/chosen": 0.35298576951026917,
"logits/rejected": -0.9924853444099426,
"logps/chosen": -1.7351062297821045,
"logps/rejected": -2.062347173690796,
"loss": 1.8652,
"nll_loss": 1.8122597932815552,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1735106110572815,
"rewards/margins": 0.03272408992052078,
"rewards/rejected": -0.20623470842838287,
"step": 214
},
{
"epoch": 0.5937176389368312,
"grad_norm": 0.26750069856643677,
"learning_rate": 4.859708325770919e-06,
"log_odds_chosen": 0.3453901410102844,
"log_odds_ratio": -0.5410705804824829,
"logits/chosen": 0.402724027633667,
"logits/rejected": -0.7651013135910034,
"logps/chosen": -1.736122727394104,
"logps/rejected": -2.027735948562622,
"loss": 1.901,
"nll_loss": 1.8469077348709106,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17361226677894592,
"rewards/margins": 0.029161330312490463,
"rewards/rejected": -0.20277361571788788,
"step": 215
},
{
"epoch": 0.596479116327235,
"grad_norm": 0.25545212626457214,
"learning_rate": 4.857049260899233e-06,
"log_odds_chosen": 0.42634040117263794,
"log_odds_ratio": -0.5089380145072937,
"logits/chosen": 0.2999016046524048,
"logits/rejected": -0.9428575038909912,
"logps/chosen": -1.6035995483398438,
"logps/rejected": -1.9544857740402222,
"loss": 1.7566,
"nll_loss": 1.7056996822357178,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1603599637746811,
"rewards/margins": 0.03508862853050232,
"rewards/rejected": -0.19544857740402222,
"step": 216
},
{
"epoch": 0.5992405937176389,
"grad_norm": 0.2341543734073639,
"learning_rate": 4.854365973699519e-06,
"log_odds_chosen": 0.29267483949661255,
"log_odds_ratio": -0.5718191862106323,
"logits/chosen": 0.32583779096603394,
"logits/rejected": -0.7815302610397339,
"logps/chosen": -1.6951828002929688,
"logps/rejected": -1.9353394508361816,
"loss": 1.8596,
"nll_loss": 1.8024464845657349,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.16951829195022583,
"rewards/margins": 0.024015672504901886,
"rewards/rejected": -0.19353395700454712,
"step": 217
},
{
"epoch": 0.6020020711080428,
"grad_norm": 0.24366186559200287,
"learning_rate": 4.851658491746707e-06,
"log_odds_chosen": 0.3909580707550049,
"log_odds_ratio": -0.5210633277893066,
"logits/chosen": 0.25465232133865356,
"logits/rejected": -0.9531072378158569,
"logps/chosen": -1.6791120767593384,
"logps/rejected": -2.0105059146881104,
"loss": 1.8187,
"nll_loss": 1.7665891647338867,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16791123151779175,
"rewards/margins": 0.03313935548067093,
"rewards/rejected": -0.20105057954788208,
"step": 218
},
{
"epoch": 0.6047635484984467,
"grad_norm": 0.24001233279705048,
"learning_rate": 4.848926842864361e-06,
"log_odds_chosen": 0.4899923503398895,
"log_odds_ratio": -0.48127448558807373,
"logits/chosen": 0.25574028491973877,
"logits/rejected": -1.256288766860962,
"logps/chosen": -1.6502352952957153,
"logps/rejected": -2.064323902130127,
"loss": 1.7891,
"nll_loss": 1.740965723991394,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.165023535490036,
"rewards/margins": 0.04140886664390564,
"rewards/rejected": -0.20643240213394165,
"step": 219
},
{
"epoch": 0.6075250258888505,
"grad_norm": 0.23009978234767914,
"learning_rate": 4.846171055124401e-06,
"log_odds_chosen": 0.3280143141746521,
"log_odds_ratio": -0.55921870470047,
"logits/chosen": 0.35827893018722534,
"logits/rejected": -0.7289958000183105,
"logps/chosen": -1.7372446060180664,
"logps/rejected": -2.02004075050354,
"loss": 1.8677,
"nll_loss": 1.811750888824463,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1737244725227356,
"rewards/margins": 0.028279609978199005,
"rewards/rejected": -0.2020040899515152,
"step": 220
},
{
"epoch": 0.6102865032792544,
"grad_norm": 0.24643385410308838,
"learning_rate": 4.843391156846811e-06,
"log_odds_chosen": 0.49847206473350525,
"log_odds_ratio": -0.4771438539028168,
"logits/chosen": 0.38028058409690857,
"logits/rejected": -0.8463162779808044,
"logps/chosen": -1.6676466464996338,
"logps/rejected": -2.0871989727020264,
"loss": 1.8045,
"nll_loss": 1.756779432296753,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16676466166973114,
"rewards/margins": 0.04195523262023926,
"rewards/rejected": -0.2087198942899704,
"step": 221
},
{
"epoch": 0.6130479806696583,
"grad_norm": 0.23624517023563385,
"learning_rate": 4.8405871765993435e-06,
"log_odds_chosen": 0.5461086630821228,
"log_odds_ratio": -0.4661559462547302,
"logits/chosen": 0.2758100926876068,
"logits/rejected": -0.9698406457901001,
"logps/chosen": -1.716646671295166,
"logps/rejected": -2.178037643432617,
"loss": 1.8539,
"nll_loss": 1.807306170463562,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17166468501091003,
"rewards/margins": 0.04613909870386124,
"rewards/rejected": -0.21780376136302948,
"step": 222
},
{
"epoch": 0.6158094580600622,
"grad_norm": 0.24216988682746887,
"learning_rate": 4.837759143197237e-06,
"log_odds_chosen": 0.33277636766433716,
"log_odds_ratio": -0.543420672416687,
"logits/chosen": 0.25846540927886963,
"logits/rejected": -1.1584213972091675,
"logps/chosen": -1.6766406297683716,
"logps/rejected": -1.959399700164795,
"loss": 1.818,
"nll_loss": 1.7636725902557373,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1676640659570694,
"rewards/margins": 0.028275907039642334,
"rewards/rejected": -0.19593995809555054,
"step": 223
},
{
"epoch": 0.618570935450466,
"grad_norm": 0.22247816622257233,
"learning_rate": 4.834907085702909e-06,
"log_odds_chosen": 0.42663368582725525,
"log_odds_ratio": -0.5054609775543213,
"logits/chosen": 0.2946387827396393,
"logits/rejected": -0.8483214378356934,
"logps/chosen": -1.6203222274780273,
"logps/rejected": -1.9752922058105469,
"loss": 1.7654,
"nll_loss": 1.7148088216781616,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16203221678733826,
"rewards/margins": 0.03549701347947121,
"rewards/rejected": -0.19752921164035797,
"step": 224
},
{
"epoch": 0.6213324128408698,
"grad_norm": 0.23438099026679993,
"learning_rate": 4.832031033425663e-06,
"log_odds_chosen": 0.4974308907985687,
"log_odds_ratio": -0.4770738184452057,
"logits/chosen": 0.22860127687454224,
"logits/rejected": -1.1651134490966797,
"logps/chosen": -1.5770584344863892,
"logps/rejected": -1.98891282081604,
"loss": 1.7128,
"nll_loss": 1.6650840044021606,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15770584344863892,
"rewards/margins": 0.04118544980883598,
"rewards/rejected": -0.198891282081604,
"step": 225
},
{
"epoch": 0.6240938902312737,
"grad_norm": 0.2453928291797638,
"learning_rate": 4.829131015921386e-06,
"log_odds_chosen": 0.336994469165802,
"log_odds_ratio": -0.5416135787963867,
"logits/chosen": 0.40966925024986267,
"logits/rejected": -0.7484245300292969,
"logps/chosen": -1.7475402355194092,
"logps/rejected": -2.035691499710083,
"loss": 1.8881,
"nll_loss": 1.833910346031189,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17475402355194092,
"rewards/margins": 0.0288151316344738,
"rewards/rejected": -0.2035691738128662,
"step": 226
},
{
"epoch": 0.6268553676216776,
"grad_norm": 0.22821033000946045,
"learning_rate": 4.826207062992245e-06,
"log_odds_chosen": 0.47645366191864014,
"log_odds_ratio": -0.4914059638977051,
"logits/chosen": 0.2782054543495178,
"logits/rejected": -0.9900674223899841,
"logps/chosen": -1.8174843788146973,
"logps/rejected": -2.2320914268493652,
"loss": 1.95,
"nll_loss": 1.9008376598358154,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1817484349012375,
"rewards/margins": 0.041460707783699036,
"rewards/rejected": -0.22320915758609772,
"step": 227
},
{
"epoch": 0.6296168450120815,
"grad_norm": 0.2569068968296051,
"learning_rate": 4.82325920468638e-06,
"log_odds_chosen": 0.2413053661584854,
"log_odds_ratio": -0.5858334302902222,
"logits/chosen": 0.32741737365722656,
"logits/rejected": -0.9221272468566895,
"logps/chosen": -1.7791626453399658,
"logps/rejected": -1.9837150573730469,
"loss": 1.9203,
"nll_loss": 1.8617504835128784,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1779162585735321,
"rewards/margins": 0.020455272868275642,
"rewards/rejected": -0.1983715444803238,
"step": 228
},
{
"epoch": 0.6323783224024854,
"grad_norm": 0.2548074424266815,
"learning_rate": 4.820287471297598e-06,
"log_odds_chosen": 0.4002269208431244,
"log_odds_ratio": -0.5239609479904175,
"logits/chosen": 0.21428313851356506,
"logits/rejected": -0.9460724592208862,
"logps/chosen": -1.6473997831344604,
"logps/rejected": -1.9846879243850708,
"loss": 1.7941,
"nll_loss": 1.7416696548461914,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1647399663925171,
"rewards/margins": 0.03372883051633835,
"rewards/rejected": -0.19846880435943604,
"step": 229
},
{
"epoch": 0.6351397997928891,
"grad_norm": 0.22254504263401031,
"learning_rate": 4.817291893365055e-06,
"log_odds_chosen": 0.46622079610824585,
"log_odds_ratio": -0.4967763423919678,
"logits/chosen": 0.07556484639644623,
"logits/rejected": -1.116629719734192,
"logps/chosen": -1.5138487815856934,
"logps/rejected": -1.8917471170425415,
"loss": 1.675,
"nll_loss": 1.625287413597107,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15138490498065948,
"rewards/margins": 0.03778982535004616,
"rewards/rejected": -0.18917471170425415,
"step": 230
},
{
"epoch": 0.637901277183293,
"grad_norm": 0.28197240829467773,
"learning_rate": 4.81427250167295e-06,
"log_odds_chosen": 0.24183571338653564,
"log_odds_ratio": -0.583247721195221,
"logits/chosen": 0.15436850488185883,
"logits/rejected": -1.1790400743484497,
"logps/chosen": -1.6747009754180908,
"logps/rejected": -1.8760582208633423,
"loss": 1.8184,
"nll_loss": 1.7600996494293213,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16747009754180908,
"rewards/margins": 0.020135723054409027,
"rewards/rejected": -0.1876058131456375,
"step": 231
},
{
"epoch": 0.6406627545736969,
"grad_norm": 0.24836094677448273,
"learning_rate": 4.811229327250204e-06,
"log_odds_chosen": 0.45771628618240356,
"log_odds_ratio": -0.5010443329811096,
"logits/chosen": 0.25003117322921753,
"logits/rejected": -1.1424946784973145,
"logps/chosen": -1.714363694190979,
"logps/rejected": -2.1092028617858887,
"loss": 1.8544,
"nll_loss": 1.8042795658111572,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1714363694190979,
"rewards/margins": 0.03948391228914261,
"rewards/rejected": -0.2109202891588211,
"step": 232
},
{
"epoch": 0.6434242319641008,
"grad_norm": 0.23518826067447662,
"learning_rate": 4.8081624013701435e-06,
"log_odds_chosen": 0.4311872124671936,
"log_odds_ratio": -0.5104647278785706,
"logits/chosen": 0.1991138905286789,
"logits/rejected": -1.3669785261154175,
"logps/chosen": -1.6888374090194702,
"logps/rejected": -2.053189992904663,
"loss": 1.8231,
"nll_loss": 1.7720434665679932,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16888374090194702,
"rewards/margins": 0.03643525391817093,
"rewards/rejected": -0.20531900227069855,
"step": 233
},
{
"epoch": 0.6461857093545047,
"grad_norm": 0.23462118208408356,
"learning_rate": 4.805071755550177e-06,
"log_odds_chosen": 0.394045889377594,
"log_odds_ratio": -0.5184462070465088,
"logits/chosen": 0.2896556854248047,
"logits/rejected": -1.3101093769073486,
"logps/chosen": -1.713099479675293,
"logps/rejected": -2.048048496246338,
"loss": 1.8398,
"nll_loss": 1.7879210710525513,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1713099479675293,
"rewards/margins": 0.03349488973617554,
"rewards/rejected": -0.20480485260486603,
"step": 234
},
{
"epoch": 0.6489471867449085,
"grad_norm": 0.23507662117481232,
"learning_rate": 4.8019574215514705e-06,
"log_odds_chosen": 0.3388752043247223,
"log_odds_ratio": -0.5402787327766418,
"logits/chosen": 0.20577961206436157,
"logits/rejected": -0.8569263219833374,
"logps/chosen": -1.6788839101791382,
"logps/rejected": -1.9616367816925049,
"loss": 1.8351,
"nll_loss": 1.7810907363891602,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16788838803768158,
"rewards/margins": 0.028275297954678535,
"rewards/rejected": -0.19616368412971497,
"step": 235
},
{
"epoch": 0.6517086641353124,
"grad_norm": 0.2070939689874649,
"learning_rate": 4.7988194313786275e-06,
"log_odds_chosen": 0.37065887451171875,
"log_odds_ratio": -0.5329977869987488,
"logits/chosen": 0.22679734230041504,
"logits/rejected": -0.9689663052558899,
"logps/chosen": -1.629687786102295,
"logps/rejected": -1.9382522106170654,
"loss": 1.7528,
"nll_loss": 1.6994796991348267,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16296879947185516,
"rewards/margins": 0.03085644729435444,
"rewards/rejected": -0.19382524490356445,
"step": 236
},
{
"epoch": 0.6544701415257163,
"grad_norm": 0.2251017838716507,
"learning_rate": 4.795657817279349e-06,
"log_odds_chosen": 0.3826752305030823,
"log_odds_ratio": -0.5331037044525146,
"logits/chosen": 0.143580362200737,
"logits/rejected": -1.0959559679031372,
"logps/chosen": -1.568124532699585,
"logps/rejected": -1.8851323127746582,
"loss": 1.7164,
"nll_loss": 1.663122296333313,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15681245923042297,
"rewards/margins": 0.03170077130198479,
"rewards/rejected": -0.18851323425769806,
"step": 237
},
{
"epoch": 0.6572316189161201,
"grad_norm": 0.24112077057361603,
"learning_rate": 4.7924726117441135e-06,
"log_odds_chosen": 0.45494502782821655,
"log_odds_ratio": -0.5097072124481201,
"logits/chosen": 0.12531203031539917,
"logits/rejected": -1.1530197858810425,
"logps/chosen": -1.6881966590881348,
"logps/rejected": -2.074317216873169,
"loss": 1.8279,
"nll_loss": 1.7769427299499512,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16881968080997467,
"rewards/margins": 0.03861205279827118,
"rewards/rejected": -0.20743173360824585,
"step": 238
},
{
"epoch": 0.659993096306524,
"grad_norm": 0.23394882678985596,
"learning_rate": 4.789263847505835e-06,
"log_odds_chosen": 0.48239994049072266,
"log_odds_ratio": -0.484576016664505,
"logits/chosen": 0.19327585399150848,
"logits/rejected": -1.0120890140533447,
"logps/chosen": -1.673211693763733,
"logps/rejected": -2.078700542449951,
"loss": 1.8042,
"nll_loss": 1.7557491064071655,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16732117533683777,
"rewards/margins": 0.04054888337850571,
"rewards/rejected": -0.20787005126476288,
"step": 239
},
{
"epoch": 0.6627545736969278,
"grad_norm": 0.24529801309108734,
"learning_rate": 4.786031557539532e-06,
"log_odds_chosen": 0.6541503667831421,
"log_odds_ratio": -0.42781609296798706,
"logits/chosen": 0.1258109211921692,
"logits/rejected": -1.3018333911895752,
"logps/chosen": -1.6192426681518555,
"logps/rejected": -2.17771577835083,
"loss": 1.7489,
"nll_loss": 1.7061513662338257,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16192427277565002,
"rewards/margins": 0.055847302079200745,
"rewards/rejected": -0.21777155995368958,
"step": 240
},
{
"epoch": 0.6655160510873317,
"grad_norm": 0.21988217532634735,
"learning_rate": 4.782775775061983e-06,
"log_odds_chosen": 0.4216863214969635,
"log_odds_ratio": -0.5078084468841553,
"logits/chosen": 0.22119548916816711,
"logits/rejected": -1.0060391426086426,
"logps/chosen": -1.6245155334472656,
"logps/rejected": -1.9757689237594604,
"loss": 1.7524,
"nll_loss": 1.7016619443893433,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16245155036449432,
"rewards/margins": 0.03512535244226456,
"rewards/rejected": -0.19757691025733948,
"step": 241
},
{
"epoch": 0.6682775284777356,
"grad_norm": 0.20372723042964935,
"learning_rate": 4.779496533531393e-06,
"log_odds_chosen": 0.5198055505752563,
"log_odds_ratio": -0.4691554009914398,
"logits/chosen": 0.20225608348846436,
"logits/rejected": -1.1654834747314453,
"logps/chosen": -1.5811524391174316,
"logps/rejected": -2.01274037361145,
"loss": 1.7073,
"nll_loss": 1.6603847742080688,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15811526775360107,
"rewards/margins": 0.043158773332834244,
"rewards/rejected": -0.20127403736114502,
"step": 242
},
{
"epoch": 0.6710390058681395,
"grad_norm": 0.22578665614128113,
"learning_rate": 4.7761938666470405e-06,
"log_odds_chosen": 0.4481002688407898,
"log_odds_ratio": -0.5030589699745178,
"logits/chosen": 0.21000558137893677,
"logits/rejected": -1.1474783420562744,
"logps/chosen": -1.6968859434127808,
"logps/rejected": -2.079129695892334,
"loss": 1.8331,
"nll_loss": 1.7828097343444824,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16968859732151031,
"rewards/margins": 0.03822438418865204,
"rewards/rejected": -0.20791295170783997,
"step": 243
},
{
"epoch": 0.6738004832585434,
"grad_norm": 0.21789832413196564,
"learning_rate": 4.7728678083489375e-06,
"log_odds_chosen": 0.3310457170009613,
"log_odds_ratio": -0.5467555522918701,
"logits/chosen": 0.21008087694644928,
"logits/rejected": -1.071950912475586,
"logps/chosen": -1.6163996458053589,
"logps/rejected": -1.8934293985366821,
"loss": 1.7531,
"nll_loss": 1.6983906030654907,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1616399735212326,
"rewards/margins": 0.027702966704964638,
"rewards/rejected": -0.1893429309129715,
"step": 244
},
{
"epoch": 0.6765619606489471,
"grad_norm": 0.21386297047138214,
"learning_rate": 4.7695183928174804e-06,
"log_odds_chosen": 0.4576282799243927,
"log_odds_ratio": -0.4972766637802124,
"logits/chosen": 0.11432070285081863,
"logits/rejected": -1.215945839881897,
"logps/chosen": -1.6433749198913574,
"logps/rejected": -2.0280091762542725,
"loss": 1.7829,
"nll_loss": 1.7331418991088867,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16433750092983246,
"rewards/margins": 0.03846340626478195,
"rewards/rejected": -0.2028008997440338,
"step": 245
},
{
"epoch": 0.679323438039351,
"grad_norm": 0.23491686582565308,
"learning_rate": 4.766145654473096e-06,
"log_odds_chosen": 0.37732306122779846,
"log_odds_ratio": -0.5303936004638672,
"logits/chosen": 0.18962648510932922,
"logits/rejected": -1.3029686212539673,
"logps/chosen": -1.716407299041748,
"logps/rejected": -2.0386903285980225,
"loss": 1.8439,
"nll_loss": 1.790850043296814,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17164072394371033,
"rewards/margins": 0.03222829848527908,
"rewards/rejected": -0.2038690447807312,
"step": 246
},
{
"epoch": 0.6820849154297549,
"grad_norm": 0.2103213518857956,
"learning_rate": 4.762749627975888e-06,
"log_odds_chosen": 0.40977245569229126,
"log_odds_ratio": -0.5140390396118164,
"logits/chosen": 0.0982648953795433,
"logits/rejected": -1.1915839910507202,
"logps/chosen": -1.557995319366455,
"logps/rejected": -1.8929803371429443,
"loss": 1.6972,
"nll_loss": 1.6458226442337036,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1557995229959488,
"rewards/margins": 0.033498503267765045,
"rewards/rejected": -0.18929803371429443,
"step": 247
},
{
"epoch": 0.6848463928201588,
"grad_norm": 0.2225971668958664,
"learning_rate": 4.7593303482252835e-06,
"log_odds_chosen": 0.36240053176879883,
"log_odds_ratio": -0.5350769758224487,
"logits/chosen": 0.16154634952545166,
"logits/rejected": -0.829785943031311,
"logps/chosen": -1.6611778736114502,
"logps/rejected": -1.9659464359283447,
"loss": 1.7928,
"nll_loss": 1.7392576932907104,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16611778736114502,
"rewards/margins": 0.03047686442732811,
"rewards/rejected": -0.19659464061260223,
"step": 248
},
{
"epoch": 0.6876078702105627,
"grad_norm": 0.19730301201343536,
"learning_rate": 4.755887850359673e-06,
"log_odds_chosen": 0.598614513874054,
"log_odds_ratio": -0.4566551446914673,
"logits/chosen": 0.10718496143817902,
"logits/rejected": -1.343011498451233,
"logps/chosen": -1.5110077857971191,
"logps/rejected": -2.002279043197632,
"loss": 1.6562,
"nll_loss": 1.6105355024337769,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1511007696390152,
"rewards/margins": 0.04912712052464485,
"rewards/rejected": -0.20022790133953094,
"step": 249
},
{
"epoch": 0.6903693476009665,
"grad_norm": 0.2222234308719635,
"learning_rate": 4.752422169756048e-06,
"log_odds_chosen": 0.4699355959892273,
"log_odds_ratio": -0.4920656085014343,
"logits/chosen": 0.1497102826833725,
"logits/rejected": -1.4104342460632324,
"logps/chosen": -1.6844984292984009,
"logps/rejected": -2.0840110778808594,
"loss": 1.8276,
"nll_loss": 1.7784277200698853,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16844984889030457,
"rewards/margins": 0.03995127975940704,
"rewards/rejected": -0.20840111374855042,
"step": 250
},
{
"epoch": 0.6931308249913704,
"grad_norm": 0.2100534588098526,
"learning_rate": 4.748933342029639e-06,
"log_odds_chosen": 0.5825514793395996,
"log_odds_ratio": -0.4510309398174286,
"logits/chosen": 0.1121891662478447,
"logits/rejected": -1.3764441013336182,
"logps/chosen": -1.560391902923584,
"logps/rejected": -2.048152446746826,
"loss": 1.7094,
"nll_loss": 1.6642597913742065,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15603917837142944,
"rewards/margins": 0.0487760454416275,
"rewards/rejected": -0.20481522381305695,
"step": 251
},
{
"epoch": 0.6958923023817742,
"grad_norm": 0.20206160843372345,
"learning_rate": 4.745421403033548e-06,
"log_odds_chosen": 0.4050910174846649,
"log_odds_ratio": -0.5125004649162292,
"logits/chosen": 0.10816405713558197,
"logits/rejected": -1.1761468648910522,
"logps/chosen": -1.6354482173919678,
"logps/rejected": -1.9715569019317627,
"loss": 1.7615,
"nll_loss": 1.7102546691894531,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16354484856128693,
"rewards/margins": 0.033610858023166656,
"rewards/rejected": -0.1971556842327118,
"step": 252
},
{
"epoch": 0.6986537797721781,
"grad_norm": 0.2380354106426239,
"learning_rate": 4.741886388858384e-06,
"log_odds_chosen": 0.3426484167575836,
"log_odds_ratio": -0.543499231338501,
"logits/chosen": 0.17392773926258087,
"logits/rejected": -1.2038366794586182,
"logps/chosen": -1.6468617916107178,
"logps/rejected": -1.928429365158081,
"loss": 1.79,
"nll_loss": 1.7356586456298828,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1646861881017685,
"rewards/margins": 0.02815674990415573,
"rewards/rejected": -0.19284293055534363,
"step": 253
},
{
"epoch": 0.701415257162582,
"grad_norm": 0.2207070291042328,
"learning_rate": 4.738328335831883e-06,
"log_odds_chosen": 0.3862311542034149,
"log_odds_ratio": -0.5232746005058289,
"logits/chosen": 0.13721241056919098,
"logits/rejected": -1.4115301370620728,
"logps/chosen": -1.612336277961731,
"logps/rejected": -1.9344983100891113,
"loss": 1.7501,
"nll_loss": 1.6977391242980957,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16123361885547638,
"rewards/margins": 0.03221620246767998,
"rewards/rejected": -0.19344982504844666,
"step": 254
},
{
"epoch": 0.7041767345529858,
"grad_norm": 0.2061997801065445,
"learning_rate": 4.734747280518549e-06,
"log_odds_chosen": 0.47361427545547485,
"log_odds_ratio": -0.48697221279144287,
"logits/chosen": 0.03231241926550865,
"logits/rejected": -1.5338258743286133,
"logps/chosen": -1.6500543355941772,
"logps/rejected": -2.0473241806030273,
"loss": 1.7861,
"nll_loss": 1.737368106842041,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16500544548034668,
"rewards/margins": 0.039726972579956055,
"rewards/rejected": -0.20473241806030273,
"step": 255
},
{
"epoch": 0.7069382119433897,
"grad_norm": 0.20688936114311218,
"learning_rate": 4.7311432597192655e-06,
"log_odds_chosen": 0.39435428380966187,
"log_odds_ratio": -0.5248243808746338,
"logits/chosen": 0.03083261288702488,
"logits/rejected": -1.4570684432983398,
"logps/chosen": -1.648856520652771,
"logps/rejected": -1.9787046909332275,
"loss": 1.7765,
"nll_loss": 1.7240355014801025,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16488566994667053,
"rewards/margins": 0.03298482671380043,
"rewards/rejected": -0.19787049293518066,
"step": 256
},
{
"epoch": 0.7096996893337936,
"grad_norm": 0.22945424914360046,
"learning_rate": 4.72751631047092e-06,
"log_odds_chosen": 0.5417366027832031,
"log_odds_ratio": -0.46491706371307373,
"logits/chosen": -0.006479084491729736,
"logits/rejected": -1.1154435873031616,
"logps/chosen": -1.6812236309051514,
"logps/rejected": -2.1395444869995117,
"loss": 1.8262,
"nll_loss": 1.7797247171401978,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16812236607074738,
"rewards/margins": 0.0458320677280426,
"rewards/rejected": -0.21395443379878998,
"step": 257
},
{
"epoch": 0.7124611667241975,
"grad_norm": 0.22095176577568054,
"learning_rate": 4.72386647004603e-06,
"log_odds_chosen": 0.4106917977333069,
"log_odds_ratio": -0.5133163332939148,
"logits/chosen": 0.1232781782746315,
"logits/rejected": -1.1046854257583618,
"logps/chosen": -1.6649987697601318,
"logps/rejected": -2.0084848403930664,
"loss": 1.811,
"nll_loss": 1.7596914768218994,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16649989783763885,
"rewards/margins": 0.03434859216213226,
"rewards/rejected": -0.20084848999977112,
"step": 258
},
{
"epoch": 0.7152226441146013,
"grad_norm": 0.22131314873695374,
"learning_rate": 4.720193775952352e-06,
"log_odds_chosen": 0.2770199477672577,
"log_odds_ratio": -0.5675607919692993,
"logits/chosen": 0.09717811644077301,
"logits/rejected": -1.1877737045288086,
"logps/chosen": -1.6597505807876587,
"logps/rejected": -1.889330267906189,
"loss": 1.7861,
"nll_loss": 1.729378581047058,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16597506403923035,
"rewards/margins": 0.022957956418395042,
"rewards/rejected": -0.18893301486968994,
"step": 259
},
{
"epoch": 0.7179841215050051,
"grad_norm": 0.2108003944158554,
"learning_rate": 4.716498265932501e-06,
"log_odds_chosen": 0.5200653076171875,
"log_odds_ratio": -0.47012218832969666,
"logits/chosen": -0.01136242039501667,
"logits/rejected": -1.1188238859176636,
"logps/chosen": -1.4902641773223877,
"logps/rejected": -1.915147066116333,
"loss": 1.6221,
"nll_loss": 1.575091004371643,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14902642369270325,
"rewards/margins": 0.04248826950788498,
"rewards/rejected": -0.19151470065116882,
"step": 260
},
{
"epoch": 0.720745598895409,
"grad_norm": 0.22381585836410522,
"learning_rate": 4.712779977963559e-06,
"log_odds_chosen": 0.39622795581817627,
"log_odds_ratio": -0.5201148390769958,
"logits/chosen": 0.10159610211849213,
"logits/rejected": -0.859241247177124,
"logps/chosen": -1.5900012254714966,
"logps/rejected": -1.917615294456482,
"loss": 1.7383,
"nll_loss": 1.6862510442733765,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15900012850761414,
"rewards/margins": 0.03276140242815018,
"rewards/rejected": -0.1917615383863449,
"step": 261
},
{
"epoch": 0.7235070762858129,
"grad_norm": 0.2046324461698532,
"learning_rate": 4.7090389502566884e-06,
"log_odds_chosen": 0.5120800733566284,
"log_odds_ratio": -0.47404342889785767,
"logits/chosen": 0.017112823203206062,
"logits/rejected": -1.1166574954986572,
"logps/chosen": -1.6049623489379883,
"logps/rejected": -2.0321714878082275,
"loss": 1.7168,
"nll_loss": 1.6693758964538574,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16049623489379883,
"rewards/margins": 0.042720913887023926,
"rewards/rejected": -0.20321716368198395,
"step": 262
},
{
"epoch": 0.7262685536762168,
"grad_norm": 0.20920297503471375,
"learning_rate": 4.705275221256738e-06,
"log_odds_chosen": 0.42567548155784607,
"log_odds_ratio": -0.5072777271270752,
"logits/chosen": 0.11223579943180084,
"logits/rejected": -1.1794720888137817,
"logps/chosen": -1.6436134576797485,
"logps/rejected": -2.0019237995147705,
"loss": 1.7837,
"nll_loss": 1.7329978942871094,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16436134278774261,
"rewards/margins": 0.035831037908792496,
"rewards/rejected": -0.2001923769712448,
"step": 263
},
{
"epoch": 0.7290300310666207,
"grad_norm": 0.215216726064682,
"learning_rate": 4.701488829641845e-06,
"log_odds_chosen": 0.3972318172454834,
"log_odds_ratio": -0.5163053870201111,
"logits/chosen": 0.07385722547769547,
"logits/rejected": -1.1585350036621094,
"logps/chosen": -1.5981651544570923,
"logps/rejected": -1.9258975982666016,
"loss": 1.7361,
"nll_loss": 1.6844836473464966,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15981650352478027,
"rewards/margins": 0.032773248851299286,
"rewards/rejected": -0.19258975982666016,
"step": 264
},
{
"epoch": 0.7317915084570245,
"grad_norm": 0.20215153694152832,
"learning_rate": 4.697679814323044e-06,
"log_odds_chosen": 0.373761922121048,
"log_odds_ratio": -0.5314816832542419,
"logits/chosen": 0.0431194081902504,
"logits/rejected": -1.3062759637832642,
"logps/chosen": -1.6237438917160034,
"logps/rejected": -1.9311178922653198,
"loss": 1.746,
"nll_loss": 1.6928824186325073,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1623743772506714,
"rewards/margins": 0.030737407505512238,
"rewards/rejected": -0.19311177730560303,
"step": 265
},
{
"epoch": 0.7345529858474283,
"grad_norm": 0.2043098360300064,
"learning_rate": 4.693848214443858e-06,
"log_odds_chosen": 0.3958456516265869,
"log_odds_ratio": -0.5203114151954651,
"logits/chosen": 0.016560683026909828,
"logits/rejected": -1.4892038106918335,
"logps/chosen": -1.690280795097351,
"logps/rejected": -2.024770736694336,
"loss": 1.8103,
"nll_loss": 1.7582213878631592,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16902808845043182,
"rewards/margins": 0.03344898298382759,
"rewards/rejected": -0.2024770826101303,
"step": 266
},
{
"epoch": 0.7373144632378322,
"grad_norm": 0.2200057953596115,
"learning_rate": 4.689994069379905e-06,
"log_odds_chosen": 0.6603919863700867,
"log_odds_ratio": -0.4194304049015045,
"logits/chosen": 0.0796060711145401,
"logits/rejected": -1.6784858703613281,
"logps/chosen": -1.5854812860488892,
"logps/rejected": -2.141174554824829,
"loss": 1.7061,
"nll_loss": 1.6641736030578613,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15854813158512115,
"rewards/margins": 0.05556933209300041,
"rewards/rejected": -0.21411746740341187,
"step": 267
},
{
"epoch": 0.7400759406282361,
"grad_norm": 0.207722008228302,
"learning_rate": 4.686117418738489e-06,
"log_odds_chosen": 0.4980109930038452,
"log_odds_ratio": -0.477446049451828,
"logits/chosen": 0.0639527440071106,
"logits/rejected": -1.2904075384140015,
"logps/chosen": -1.632141351699829,
"logps/rejected": -2.049818515777588,
"loss": 1.7618,
"nll_loss": 1.7140535116195679,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16321413218975067,
"rewards/margins": 0.041767701506614685,
"rewards/rejected": -0.20498183369636536,
"step": 268
},
{
"epoch": 0.74283741801864,
"grad_norm": 0.20242716372013092,
"learning_rate": 4.6822183023581945e-06,
"log_odds_chosen": 0.42001479864120483,
"log_odds_ratio": -0.517326831817627,
"logits/chosen": 0.07094614952802658,
"logits/rejected": -1.5565531253814697,
"logps/chosen": -1.6155306100845337,
"logps/rejected": -1.9663753509521484,
"loss": 1.7551,
"nll_loss": 1.7033692598342896,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1615530550479889,
"rewards/margins": 0.035084471106529236,
"rewards/rejected": -0.19663754105567932,
"step": 269
},
{
"epoch": 0.7455988954090439,
"grad_norm": 0.19576282799243927,
"learning_rate": 4.678296760308474e-06,
"log_odds_chosen": 0.34514501690864563,
"log_odds_ratio": -0.5403321385383606,
"logits/chosen": -0.05599237233400345,
"logits/rejected": -1.4408526420593262,
"logps/chosen": -1.5795042514801025,
"logps/rejected": -1.8630210161209106,
"loss": 1.7087,
"nll_loss": 1.6546752452850342,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15795043110847473,
"rewards/margins": 0.028351658955216408,
"rewards/rejected": -0.1863020956516266,
"step": 270
},
{
"epoch": 0.7483603727994477,
"grad_norm": 0.22211046516895294,
"learning_rate": 4.674352832889239e-06,
"log_odds_chosen": 0.5620113611221313,
"log_odds_ratio": -0.4555080533027649,
"logits/chosen": -0.005663935095071793,
"logits/rejected": -1.2683230638504028,
"logps/chosen": -1.6520458459854126,
"logps/rejected": -2.1268177032470703,
"loss": 1.7996,
"nll_loss": 1.7540650367736816,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16520458459854126,
"rewards/margins": 0.04747716709971428,
"rewards/rejected": -0.21268175542354584,
"step": 271
},
{
"epoch": 0.7511218501898516,
"grad_norm": 0.1922963708639145,
"learning_rate": 4.670386560630446e-06,
"log_odds_chosen": 0.444943368434906,
"log_odds_ratio": -0.5015792846679688,
"logits/chosen": -0.06714704632759094,
"logits/rejected": -1.379449725151062,
"logps/chosen": -1.5659842491149902,
"logps/rejected": -1.9318914413452148,
"loss": 1.6989,
"nll_loss": 1.6487019062042236,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15659843385219574,
"rewards/margins": 0.036590706557035446,
"rewards/rejected": -0.19318914413452148,
"step": 272
},
{
"epoch": 0.7538833275802554,
"grad_norm": 0.19772395491600037,
"learning_rate": 4.66639798429168e-06,
"log_odds_chosen": 0.5909014940261841,
"log_odds_ratio": -0.4440915584564209,
"logits/chosen": -0.010252359323203564,
"logits/rejected": -1.6117687225341797,
"logps/chosen": -1.6227033138275146,
"logps/rejected": -2.1234261989593506,
"loss": 1.7368,
"nll_loss": 1.69236421585083,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16227032244205475,
"rewards/margins": 0.05007229745388031,
"rewards/rejected": -0.21234261989593506,
"step": 273
},
{
"epoch": 0.7566448049706593,
"grad_norm": 0.21296410262584686,
"learning_rate": 4.6623871448617345e-06,
"log_odds_chosen": 0.37625253200531006,
"log_odds_ratio": -0.5279226303100586,
"logits/chosen": -0.11291900277137756,
"logits/rejected": -1.474963665008545,
"logps/chosen": -1.5688221454620361,
"logps/rejected": -1.8770910501480103,
"loss": 1.6973,
"nll_loss": 1.6444581747055054,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15688221156597137,
"rewards/margins": 0.03082689456641674,
"rewards/rejected": -0.18770912289619446,
"step": 274
},
{
"epoch": 0.7594062823610632,
"grad_norm": 0.21504846215248108,
"learning_rate": 4.6583540835581885e-06,
"log_odds_chosen": 0.446832537651062,
"log_odds_ratio": -0.5026010870933533,
"logits/chosen": -0.0652085542678833,
"logits/rejected": -1.2765593528747559,
"logps/chosen": -1.5856618881225586,
"logps/rejected": -1.9579052925109863,
"loss": 1.7239,
"nll_loss": 1.6736685037612915,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1585661768913269,
"rewards/margins": 0.037224359810352325,
"rewards/rejected": -0.19579055905342102,
"step": 275
},
{
"epoch": 0.762167759751467,
"grad_norm": 0.1951994001865387,
"learning_rate": 4.654298841826988e-06,
"log_odds_chosen": 0.38752269744873047,
"log_odds_ratio": -0.5222585797309875,
"logits/chosen": -0.09070023894309998,
"logits/rejected": -1.2031896114349365,
"logps/chosen": -1.5128428936004639,
"logps/rejected": -1.8278968334197998,
"loss": 1.6382,
"nll_loss": 1.586016058921814,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15128430724143982,
"rewards/margins": 0.031505391001701355,
"rewards/rejected": -0.18278968334197998,
"step": 276
},
{
"epoch": 0.7649292371418709,
"grad_norm": 0.20356132090091705,
"learning_rate": 4.6502214613420164e-06,
"log_odds_chosen": 0.661637008190155,
"log_odds_ratio": -0.4188510775566101,
"logits/chosen": -0.07356397807598114,
"logits/rejected": -1.270960807800293,
"logps/chosen": -1.4853070974349976,
"logps/rejected": -2.030834674835205,
"loss": 1.6179,
"nll_loss": 1.5760544538497925,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14853070676326752,
"rewards/margins": 0.054552774876356125,
"rewards/rejected": -0.20308347046375275,
"step": 277
},
{
"epoch": 0.7676907145322748,
"grad_norm": 0.21808800101280212,
"learning_rate": 4.646121984004666e-06,
"log_odds_chosen": 0.535017192363739,
"log_odds_ratio": -0.4694536030292511,
"logits/chosen": -0.058985427021980286,
"logits/rejected": -1.204667329788208,
"logps/chosen": -1.6129412651062012,
"logps/rejected": -2.0625391006469727,
"loss": 1.7558,
"nll_loss": 1.7088611125946045,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1612941473722458,
"rewards/margins": 0.04495978727936745,
"rewards/rejected": -0.20625391602516174,
"step": 278
},
{
"epoch": 0.7704521919226787,
"grad_norm": 0.21462294459342957,
"learning_rate": 4.642000451943409e-06,
"log_odds_chosen": 0.4302963316440582,
"log_odds_ratio": -0.5060604214668274,
"logits/chosen": 0.040374599397182465,
"logits/rejected": -1.187546968460083,
"logps/chosen": -1.6709057092666626,
"logps/rejected": -2.035818576812744,
"loss": 1.7989,
"nll_loss": 1.7482545375823975,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16709057986736298,
"rewards/margins": 0.03649128973484039,
"rewards/rejected": -0.20358186960220337,
"step": 279
},
{
"epoch": 0.7732136693130826,
"grad_norm": 0.19471004605293274,
"learning_rate": 4.637856907513366e-06,
"log_odds_chosen": 0.5729100704193115,
"log_odds_ratio": -0.45996299386024475,
"logits/chosen": -0.0875316932797432,
"logits/rejected": -1.2949634790420532,
"logps/chosen": -1.6255837678909302,
"logps/rejected": -2.107527256011963,
"loss": 1.7409,
"nll_loss": 1.6949416399002075,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1625583916902542,
"rewards/margins": 0.04819435626268387,
"rewards/rejected": -0.21075274050235748,
"step": 280
},
{
"epoch": 0.7759751467034863,
"grad_norm": 0.20401322841644287,
"learning_rate": 4.633691393295865e-06,
"log_odds_chosen": 0.3522525429725647,
"log_odds_ratio": -0.5362752676010132,
"logits/chosen": -0.07532086223363876,
"logits/rejected": -1.3928956985473633,
"logps/chosen": -1.6393187046051025,
"logps/rejected": -1.9348175525665283,
"loss": 1.7628,
"nll_loss": 1.7092012166976929,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16393187642097473,
"rewards/margins": 0.02954990416765213,
"rewards/rejected": -0.19348177313804626,
"step": 281
},
{
"epoch": 0.7787366240938902,
"grad_norm": 0.19418881833553314,
"learning_rate": 4.629503952098011e-06,
"log_odds_chosen": 0.6238572597503662,
"log_odds_ratio": -0.43670332431793213,
"logits/chosen": -0.08066678047180176,
"logits/rejected": -1.6776320934295654,
"logps/chosen": -1.5727202892303467,
"logps/rejected": -2.097163200378418,
"loss": 1.6952,
"nll_loss": 1.6514896154403687,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15727202594280243,
"rewards/margins": 0.05244428664445877,
"rewards/rejected": -0.2097163200378418,
"step": 282
},
{
"epoch": 0.7814981014842941,
"grad_norm": 0.19461016356945038,
"learning_rate": 4.6252946269522406e-06,
"log_odds_chosen": 0.41456982493400574,
"log_odds_ratio": -0.5249388813972473,
"logits/chosen": -0.09783484041690826,
"logits/rejected": -1.5897575616836548,
"logps/chosen": -1.5939539670944214,
"logps/rejected": -1.9444385766983032,
"loss": 1.7211,
"nll_loss": 1.6686402559280396,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.15939539670944214,
"rewards/margins": 0.0350484773516655,
"rewards/rejected": -0.19444386661052704,
"step": 283
},
{
"epoch": 0.784259578874698,
"grad_norm": 0.2026386708021164,
"learning_rate": 4.621063461115882e-06,
"log_odds_chosen": 0.42722252011299133,
"log_odds_ratio": -0.5157784223556519,
"logits/chosen": -0.04370000213384628,
"logits/rejected": -1.5634028911590576,
"logps/chosen": -1.6642380952835083,
"logps/rejected": -2.0263044834136963,
"loss": 1.7849,
"nll_loss": 1.733304738998413,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16642381250858307,
"rewards/margins": 0.03620663285255432,
"rewards/rejected": -0.20263046026229858,
"step": 284
},
{
"epoch": 0.7870210562651019,
"grad_norm": 0.20439012348651886,
"learning_rate": 4.6168104980707105e-06,
"log_odds_chosen": 0.4680078625679016,
"log_odds_ratio": -0.4906473755836487,
"logits/chosen": -0.09853056073188782,
"logits/rejected": -1.5130950212478638,
"logps/chosen": -1.5937169790267944,
"logps/rejected": -1.9836535453796387,
"loss": 1.7178,
"nll_loss": 1.668696641921997,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15937167406082153,
"rewards/margins": 0.038993678987026215,
"rewards/rejected": -0.19836536049842834,
"step": 285
},
{
"epoch": 0.7897825336555057,
"grad_norm": 0.1877906173467636,
"learning_rate": 4.612535781522504e-06,
"log_odds_chosen": 0.37254124879837036,
"log_odds_ratio": -0.525785505771637,
"logits/chosen": -0.08526084572076797,
"logits/rejected": -1.4526267051696777,
"logps/chosen": -1.5574032068252563,
"logps/rejected": -1.863482117652893,
"loss": 1.6777,
"nll_loss": 1.6251548528671265,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15574032068252563,
"rewards/margins": 0.03060789778828621,
"rewards/rejected": -0.18634822964668274,
"step": 286
},
{
"epoch": 0.7925440110459095,
"grad_norm": 0.19580195844173431,
"learning_rate": 4.6082393554005855e-06,
"log_odds_chosen": 0.5562997460365295,
"log_odds_ratio": -0.4619132876396179,
"logits/chosen": 0.01294594258069992,
"logits/rejected": -1.4304238557815552,
"logps/chosen": -1.5266389846801758,
"logps/rejected": -1.9919108152389526,
"loss": 1.6616,
"nll_loss": 1.6153795719146729,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.152663916349411,
"rewards/margins": 0.046527184545993805,
"rewards/rejected": -0.19919107854366302,
"step": 287
},
{
"epoch": 0.7953054884363134,
"grad_norm": 0.1980113834142685,
"learning_rate": 4.6039212638573835e-06,
"log_odds_chosen": 0.3346819281578064,
"log_odds_ratio": -0.5443198680877686,
"logits/chosen": -0.15275517106056213,
"logits/rejected": -1.6041910648345947,
"logps/chosen": -1.6962547302246094,
"logps/rejected": -1.9770543575286865,
"loss": 1.8067,
"nll_loss": 1.752286672592163,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16962547600269318,
"rewards/margins": 0.028079960495233536,
"rewards/rejected": -0.1977054327726364,
"step": 288
},
{
"epoch": 0.7980669658267173,
"grad_norm": 0.2273865044116974,
"learning_rate": 4.599581551267969e-06,
"log_odds_chosen": 0.5466289520263672,
"log_odds_ratio": -0.46321243047714233,
"logits/chosen": 0.03993244469165802,
"logits/rejected": -1.286005973815918,
"logps/chosen": -1.6738882064819336,
"logps/rejected": -2.1377532482147217,
"loss": 1.8,
"nll_loss": 1.7537211179733276,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16738884150981903,
"rewards/margins": 0.046386465430259705,
"rewards/rejected": -0.21377530694007874,
"step": 289
},
{
"epoch": 0.8008284432171212,
"grad_norm": 0.20244112610816956,
"learning_rate": 4.5952202622296015e-06,
"log_odds_chosen": 0.3539222180843353,
"log_odds_ratio": -0.53514164686203,
"logits/chosen": -0.11812002211809158,
"logits/rejected": -1.4588350057601929,
"logps/chosen": -1.5985196828842163,
"logps/rejected": -1.8924399614334106,
"loss": 1.7271,
"nll_loss": 1.6736091375350952,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15985198318958282,
"rewards/margins": 0.029392031952738762,
"rewards/rejected": -0.18924400210380554,
"step": 290
},
{
"epoch": 0.803589920607525,
"grad_norm": 0.20306488871574402,
"learning_rate": 4.590837441561277e-06,
"log_odds_chosen": 0.4768354296684265,
"log_odds_ratio": -0.49164149165153503,
"logits/chosen": -0.09056994318962097,
"logits/rejected": -1.4742063283920288,
"logps/chosen": -1.6228840351104736,
"logps/rejected": -2.022940158843994,
"loss": 1.7341,
"nll_loss": 1.6849125623703003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1622883826494217,
"rewards/margins": 0.040005628019571304,
"rewards/rejected": -0.2022940218448639,
"step": 291
},
{
"epoch": 0.8063513979979289,
"grad_norm": 0.19205592572689056,
"learning_rate": 4.586433134303257e-06,
"log_odds_chosen": 0.5734292268753052,
"log_odds_ratio": -0.44972163438796997,
"logits/chosen": -0.07964983582496643,
"logits/rejected": -1.3166429996490479,
"logps/chosen": -1.4941394329071045,
"logps/rejected": -1.9636625051498413,
"loss": 1.6364,
"nll_loss": 1.5913902521133423,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14941394329071045,
"rewards/margins": 0.04695230349898338,
"rewards/rejected": -0.19636625051498413,
"step": 292
},
{
"epoch": 0.8091128753883328,
"grad_norm": 0.20325274765491486,
"learning_rate": 4.582007385716614e-06,
"log_odds_chosen": 0.4024369418621063,
"log_odds_ratio": -0.5175961852073669,
"logits/chosen": -0.07684268802404404,
"logits/rejected": -1.502617597579956,
"logps/chosen": -1.6142741441726685,
"logps/rejected": -1.9492229223251343,
"loss": 1.7574,
"nll_loss": 1.7056207656860352,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16142742335796356,
"rewards/margins": 0.033494893461465836,
"rewards/rejected": -0.1949223130941391,
"step": 293
},
{
"epoch": 0.8118743527787367,
"grad_norm": 0.20083336532115936,
"learning_rate": 4.57756024128276e-06,
"log_odds_chosen": 0.5593162178993225,
"log_odds_ratio": -0.45524081587791443,
"logits/chosen": -0.17213015258312225,
"logits/rejected": -1.3977055549621582,
"logps/chosen": -1.5258231163024902,
"logps/rejected": -1.9875348806381226,
"loss": 1.6559,
"nll_loss": 1.610384225845337,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1525823026895523,
"rewards/margins": 0.04617120325565338,
"rewards/rejected": -0.1987534910440445,
"step": 294
},
{
"epoch": 0.8146358301691405,
"grad_norm": 0.19001390039920807,
"learning_rate": 4.573091746702988e-06,
"log_odds_chosen": 0.5878125429153442,
"log_odds_ratio": -0.44817692041397095,
"logits/chosen": -0.16976626217365265,
"logits/rejected": -1.3345102071762085,
"logps/chosen": -1.5175349712371826,
"logps/rejected": -2.003750801086426,
"loss": 1.6573,
"nll_loss": 1.612461805343628,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1517535001039505,
"rewards/margins": 0.048621561378240585,
"rewards/rejected": -0.20037508010864258,
"step": 295
},
{
"epoch": 0.8173973075595443,
"grad_norm": 0.18019473552703857,
"learning_rate": 4.5686019478979915e-06,
"log_odds_chosen": 0.6261818408966064,
"log_odds_ratio": -0.43093276023864746,
"logits/chosen": -0.045413050800561905,
"logits/rejected": -1.8382006883621216,
"logps/chosen": -1.5509614944458008,
"logps/rejected": -2.0732412338256836,
"loss": 1.6609,
"nll_loss": 1.617802619934082,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1550961434841156,
"rewards/margins": 0.05222797393798828,
"rewards/rejected": -0.20732411742210388,
"step": 296
},
{
"epoch": 0.8201587849499482,
"grad_norm": 0.20908498764038086,
"learning_rate": 4.564090891007401e-06,
"log_odds_chosen": 0.5261347889900208,
"log_odds_ratio": -0.4701959192752838,
"logits/chosen": -0.1871597170829773,
"logits/rejected": -1.5989326238632202,
"logps/chosen": -1.648465633392334,
"logps/rejected": -2.0937981605529785,
"loss": 1.7682,
"nll_loss": 1.7211995124816895,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16484656929969788,
"rewards/margins": 0.04453325271606445,
"rewards/rejected": -0.20937982201576233,
"step": 297
},
{
"epoch": 0.8229202623403521,
"grad_norm": 0.188289076089859,
"learning_rate": 4.559558622389304e-06,
"log_odds_chosen": 0.5777133703231812,
"log_odds_ratio": -0.4563387632369995,
"logits/chosen": -0.13260145485401154,
"logits/rejected": -1.7002828121185303,
"logps/chosen": -1.5394691228866577,
"logps/rejected": -2.0138742923736572,
"loss": 1.6561,
"nll_loss": 1.6105040311813354,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1539469212293625,
"rewards/margins": 0.047440510243177414,
"rewards/rejected": -0.201387420296669,
"step": 298
},
{
"epoch": 0.825681739730756,
"grad_norm": 0.19627498090267181,
"learning_rate": 4.555005188619776e-06,
"log_odds_chosen": 0.5525295734405518,
"log_odds_ratio": -0.4643310308456421,
"logits/chosen": -0.20222769677639008,
"logits/rejected": -1.6397647857666016,
"logps/chosen": -1.5733509063720703,
"logps/rejected": -2.0376038551330566,
"loss": 1.6892,
"nll_loss": 1.642791748046875,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1573350876569748,
"rewards/margins": 0.04642530530691147,
"rewards/rejected": -0.20376040041446686,
"step": 299
},
{
"epoch": 0.8284432171211599,
"grad_norm": 0.2084610015153885,
"learning_rate": 4.55043063649239e-06,
"log_odds_chosen": 0.6468226313591003,
"log_odds_ratio": -0.4257048964500427,
"logits/chosen": -0.23111680150032043,
"logits/rejected": -1.959672212600708,
"logps/chosen": -1.5862557888031006,
"logps/rejected": -2.1299490928649902,
"loss": 1.7007,
"nll_loss": 1.6581330299377441,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15862558782100677,
"rewards/margins": 0.054369326680898666,
"rewards/rejected": -0.21299490332603455,
"step": 300
},
{
"epoch": 0.8312046945115636,
"grad_norm": 0.18870840966701508,
"learning_rate": 4.54583501301775e-06,
"log_odds_chosen": 0.5869650840759277,
"log_odds_ratio": -0.45076417922973633,
"logits/chosen": -0.1821564882993698,
"logits/rejected": -1.9625955820083618,
"logps/chosen": -1.6092771291732788,
"logps/rejected": -2.104759693145752,
"loss": 1.7134,
"nll_loss": 1.668313980102539,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16092771291732788,
"rewards/margins": 0.04954826086759567,
"rewards/rejected": -0.21047596633434296,
"step": 301
},
{
"epoch": 0.8339661719019675,
"grad_norm": 0.1871059238910675,
"learning_rate": 4.541218365422997e-06,
"log_odds_chosen": 0.6819782257080078,
"log_odds_ratio": -0.4187195301055908,
"logits/chosen": -0.2113940417766571,
"logits/rejected": -1.5733145475387573,
"logps/chosen": -1.4704827070236206,
"logps/rejected": -2.02266001701355,
"loss": 1.5975,
"nll_loss": 1.5556209087371826,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14704826474189758,
"rewards/margins": 0.05521773174405098,
"rewards/rejected": -0.20226599276065826,
"step": 302
},
{
"epoch": 0.8367276492923714,
"grad_norm": 0.1986740678548813,
"learning_rate": 4.536580741151328e-06,
"log_odds_chosen": 0.427354633808136,
"log_odds_ratio": -0.5044746398925781,
"logits/chosen": -0.17407816648483276,
"logits/rejected": -1.2958568334579468,
"logps/chosen": -1.6126006841659546,
"logps/rejected": -1.9662362337112427,
"loss": 1.7285,
"nll_loss": 1.678093433380127,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16126006841659546,
"rewards/margins": 0.03536355867981911,
"rewards/rejected": -0.19662362337112427,
"step": 303
},
{
"epoch": 0.8394891266827753,
"grad_norm": 0.20910044014453888,
"learning_rate": 4.531922187861507e-06,
"log_odds_chosen": 0.6855639219284058,
"log_odds_ratio": -0.41133368015289307,
"logits/chosen": -0.17175668478012085,
"logits/rejected": -1.4931915998458862,
"logps/chosen": -1.491321086883545,
"logps/rejected": -2.059537649154663,
"loss": 1.631,
"nll_loss": 1.589834451675415,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1491321176290512,
"rewards/margins": 0.056821659207344055,
"rewards/rejected": -0.20595377683639526,
"step": 304
},
{
"epoch": 0.8422506040731792,
"grad_norm": 0.2135782539844513,
"learning_rate": 4.527242753427378e-06,
"log_odds_chosen": 0.4499048590660095,
"log_odds_ratio": -0.4998108744621277,
"logits/chosen": -0.23703832924365997,
"logits/rejected": -1.2717434167861938,
"logps/chosen": -1.6932170391082764,
"logps/rejected": -2.074625015258789,
"loss": 1.8238,
"nll_loss": 1.7738076448440552,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1693217009305954,
"rewards/margins": 0.038140811026096344,
"rewards/rejected": -0.20746250450611115,
"step": 305
},
{
"epoch": 0.845012081463583,
"grad_norm": 0.1993100643157959,
"learning_rate": 4.522542485937369e-06,
"log_odds_chosen": 0.6878387331962585,
"log_odds_ratio": -0.4140303134918213,
"logits/chosen": -0.17333604395389557,
"logits/rejected": -1.6610162258148193,
"logps/chosen": -1.5485466718673706,
"logps/rejected": -2.122166395187378,
"loss": 1.6836,
"nll_loss": 1.6422399282455444,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1548546552658081,
"rewards/margins": 0.057361967861652374,
"rewards/rejected": -0.21221664547920227,
"step": 306
},
{
"epoch": 0.8477735588539869,
"grad_norm": 0.20089736580848694,
"learning_rate": 4.5178214336940015e-06,
"log_odds_chosen": 0.5427862405776978,
"log_odds_ratio": -0.4643491506576538,
"logits/chosen": -0.1863488256931305,
"logits/rejected": -1.4939963817596436,
"logps/chosen": -1.556074857711792,
"logps/rejected": -2.007810592651367,
"loss": 1.6867,
"nll_loss": 1.640239953994751,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15560749173164368,
"rewards/margins": 0.04517355561256409,
"rewards/rejected": -0.20078104734420776,
"step": 307
},
{
"epoch": 0.8505350362443908,
"grad_norm": 0.19465354084968567,
"learning_rate": 4.513079645213391e-06,
"log_odds_chosen": 0.6006003618240356,
"log_odds_ratio": -0.4448007047176361,
"logits/chosen": -0.21229855716228485,
"logits/rejected": -1.460700273513794,
"logps/chosen": -1.4619970321655273,
"logps/rejected": -1.951730728149414,
"loss": 1.5808,
"nll_loss": 1.5363428592681885,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14619971811771393,
"rewards/margins": 0.04897337406873703,
"rewards/rejected": -0.19517306983470917,
"step": 308
},
{
"epoch": 0.8532965136347946,
"grad_norm": 0.22921526432037354,
"learning_rate": 4.508317169224752e-06,
"log_odds_chosen": 0.28590530157089233,
"log_odds_ratio": -0.5672011375427246,
"logits/chosen": -0.3028235137462616,
"logits/rejected": -1.3352372646331787,
"logps/chosen": -1.5896085500717163,
"logps/rejected": -1.8217551708221436,
"loss": 1.7142,
"nll_loss": 1.6574809551239014,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15896086394786835,
"rewards/margins": 0.0232146717607975,
"rewards/rejected": -0.18217553198337555,
"step": 309
},
{
"epoch": 0.8560579910251985,
"grad_norm": 0.20834492146968842,
"learning_rate": 4.5035340546698915e-06,
"log_odds_chosen": 0.5302640795707703,
"log_odds_ratio": -0.47237080335617065,
"logits/chosen": -0.13226553797721863,
"logits/rejected": -1.6689988374710083,
"logps/chosen": -1.5815542936325073,
"logps/rejected": -2.017597198486328,
"loss": 1.7019,
"nll_loss": 1.6546366214752197,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1581554412841797,
"rewards/margins": 0.0436042957007885,
"rewards/rejected": -0.2017597258090973,
"step": 310
},
{
"epoch": 0.8588194684156023,
"grad_norm": 0.2186000496149063,
"learning_rate": 4.4987303507027155e-06,
"log_odds_chosen": 0.3287478983402252,
"log_odds_ratio": -0.549705982208252,
"logits/chosen": -0.11287423223257065,
"logits/rejected": -1.349088430404663,
"logps/chosen": -1.6460450887680054,
"logps/rejected": -1.9183804988861084,
"loss": 1.7738,
"nll_loss": 1.718807339668274,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.16460449993610382,
"rewards/margins": 0.027233552187681198,
"rewards/rejected": -0.19183805584907532,
"step": 311
},
{
"epoch": 0.8615809458060062,
"grad_norm": 0.19687776267528534,
"learning_rate": 4.493906106688712e-06,
"log_odds_chosen": 0.6679433584213257,
"log_odds_ratio": -0.41612738370895386,
"logits/chosen": -0.1544291228055954,
"logits/rejected": -1.6344720125198364,
"logps/chosen": -1.5363831520080566,
"logps/rejected": -2.0934810638427734,
"loss": 1.6699,
"nll_loss": 1.6282765865325928,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1536383181810379,
"rewards/margins": 0.05570977181196213,
"rewards/rejected": -0.20934809744358063,
"step": 312
},
{
"epoch": 0.8643424231964101,
"grad_norm": 0.18424390256404877,
"learning_rate": 4.4890613722044526e-06,
"log_odds_chosen": 0.5501468181610107,
"log_odds_ratio": -0.4591679573059082,
"logits/chosen": -0.21150004863739014,
"logits/rejected": -1.5727708339691162,
"logps/chosen": -1.529168963432312,
"logps/rejected": -1.9859933853149414,
"loss": 1.6331,
"nll_loss": 1.587223768234253,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15291690826416016,
"rewards/margins": 0.04568243771791458,
"rewards/rejected": -0.19859933853149414,
"step": 313
},
{
"epoch": 0.867103900586814,
"grad_norm": 0.20299668610095978,
"learning_rate": 4.484196197037082e-06,
"log_odds_chosen": 0.5865733027458191,
"log_odds_ratio": -0.4465975761413574,
"logits/chosen": -0.13622766733169556,
"logits/rejected": -1.5913082361221313,
"logps/chosen": -1.5737249851226807,
"logps/rejected": -2.0672624111175537,
"loss": 1.691,
"nll_loss": 1.6463013887405396,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15737250447273254,
"rewards/margins": 0.04935373738408089,
"rewards/rejected": -0.20672622323036194,
"step": 314
},
{
"epoch": 0.8698653779772179,
"grad_norm": 0.19080907106399536,
"learning_rate": 4.4793106311838e-06,
"log_odds_chosen": 0.556647002696991,
"log_odds_ratio": -0.46323782205581665,
"logits/chosen": -0.2553403377532959,
"logits/rejected": -1.4489879608154297,
"logps/chosen": -1.562751054763794,
"logps/rejected": -2.0281355381011963,
"loss": 1.6685,
"nll_loss": 1.6221669912338257,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15627512335777283,
"rewards/margins": 0.04653845354914665,
"rewards/rejected": -0.20281356573104858,
"step": 315
},
{
"epoch": 0.8726268553676216,
"grad_norm": 0.21013295650482178,
"learning_rate": 4.474404724851356e-06,
"log_odds_chosen": 0.5066735148429871,
"log_odds_ratio": -0.48284393548965454,
"logits/chosen": -0.18822398781776428,
"logits/rejected": -1.6378931999206543,
"logps/chosen": -1.6176685094833374,
"logps/rejected": -2.041299819946289,
"loss": 1.7356,
"nll_loss": 1.687273621559143,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16176684200763702,
"rewards/margins": 0.04236314073204994,
"rewards/rejected": -0.20412999391555786,
"step": 316
},
{
"epoch": 0.8753883327580255,
"grad_norm": 0.2111610472202301,
"learning_rate": 4.469478528455529e-06,
"log_odds_chosen": 0.4905741810798645,
"log_odds_ratio": -0.4830451011657715,
"logits/chosen": -0.3353807330131531,
"logits/rejected": -1.3652547597885132,
"logps/chosen": -1.6042860746383667,
"logps/rejected": -2.011214017868042,
"loss": 1.7255,
"nll_loss": 1.677234411239624,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16042861342430115,
"rewards/margins": 0.04069279134273529,
"rewards/rejected": -0.20112140476703644,
"step": 317
},
{
"epoch": 0.8781498101484294,
"grad_norm": 0.20293498039245605,
"learning_rate": 4.464532092620607e-06,
"log_odds_chosen": 0.523048996925354,
"log_odds_ratio": -0.47193020582199097,
"logits/chosen": -0.18486103415489197,
"logits/rejected": -1.4264463186264038,
"logps/chosen": -1.638154149055481,
"logps/rejected": -2.0816562175750732,
"loss": 1.7478,
"nll_loss": 1.7006094455718994,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1638154238462448,
"rewards/margins": 0.04435021057724953,
"rewards/rejected": -0.20816563069820404,
"step": 318
},
{
"epoch": 0.8809112875388333,
"grad_norm": 0.17995508015155792,
"learning_rate": 4.4595654681788715e-06,
"log_odds_chosen": 0.647110641002655,
"log_odds_ratio": -0.428227037191391,
"logits/chosen": -0.23671314120292664,
"logits/rejected": -1.8144798278808594,
"logps/chosen": -1.5134212970733643,
"logps/rejected": -2.050638437271118,
"loss": 1.6212,
"nll_loss": 1.5783425569534302,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15134215354919434,
"rewards/margins": 0.053721703588962555,
"rewards/rejected": -0.2050638496875763,
"step": 319
},
{
"epoch": 0.8836727649292372,
"grad_norm": 0.20032061636447906,
"learning_rate": 4.454578706170075e-06,
"log_odds_chosen": 0.35773491859436035,
"log_odds_ratio": -0.5383328795433044,
"logits/chosen": -0.1582796573638916,
"logits/rejected": -1.6286273002624512,
"logps/chosen": -1.6196863651275635,
"logps/rejected": -1.9159085750579834,
"loss": 1.7371,
"nll_loss": 1.6832914352416992,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1619686335325241,
"rewards/margins": 0.02962222881615162,
"rewards/rejected": -0.19159086048603058,
"step": 320
},
{
"epoch": 0.886434242319641,
"grad_norm": 0.2055967003107071,
"learning_rate": 4.449571857840911e-06,
"log_odds_chosen": 0.586254358291626,
"log_odds_ratio": -0.45622390508651733,
"logits/chosen": -0.09812657535076141,
"logits/rejected": -1.682092547416687,
"logps/chosen": -1.5706590414047241,
"logps/rejected": -2.0691263675689697,
"loss": 1.6908,
"nll_loss": 1.6451488733291626,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15706591308116913,
"rewards/margins": 0.04984673112630844,
"rewards/rejected": -0.20691262185573578,
"step": 321
},
{
"epoch": 0.8891957197100449,
"grad_norm": 0.2170560508966446,
"learning_rate": 4.444544974644493e-06,
"log_odds_chosen": 0.42108941078186035,
"log_odds_ratio": -0.5122984647750854,
"logits/chosen": -0.20832902193069458,
"logits/rejected": -1.468292236328125,
"logps/chosen": -1.5347344875335693,
"logps/rejected": -1.8808050155639648,
"loss": 1.6619,
"nll_loss": 1.61066472530365,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15347345173358917,
"rewards/margins": 0.03460706025362015,
"rewards/rejected": -0.18808050453662872,
"step": 322
},
{
"epoch": 0.8919571971004487,
"grad_norm": 0.20523445308208466,
"learning_rate": 4.4394981082398254e-06,
"log_odds_chosen": 0.4536086320877075,
"log_odds_ratio": -0.5041500926017761,
"logits/chosen": -0.26630857586860657,
"logits/rejected": -1.5968291759490967,
"logps/chosen": -1.5988963842391968,
"logps/rejected": -1.9788540601730347,
"loss": 1.7272,
"nll_loss": 1.676782250404358,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15988965332508087,
"rewards/margins": 0.037995755672454834,
"rewards/rejected": -0.19788537919521332,
"step": 323
},
{
"epoch": 0.8947186744908526,
"grad_norm": 0.21546316146850586,
"learning_rate": 4.434431310491267e-06,
"log_odds_chosen": 0.6247555017471313,
"log_odds_ratio": -0.43717044591903687,
"logits/chosen": -0.27398669719696045,
"logits/rejected": -1.5846881866455078,
"logps/chosen": -1.660336971282959,
"logps/rejected": -2.190629243850708,
"loss": 1.7647,
"nll_loss": 1.7209898233413696,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16603372991085052,
"rewards/margins": 0.05302921682596207,
"rewards/rejected": -0.2190629243850708,
"step": 324
},
{
"epoch": 0.8974801518812565,
"grad_norm": 0.21981805562973022,
"learning_rate": 4.429344633468005e-06,
"log_odds_chosen": 0.5096076130867004,
"log_odds_ratio": -0.4749549627304077,
"logits/chosen": -0.22183284163475037,
"logits/rejected": -1.6317569017410278,
"logps/chosen": -1.528499722480774,
"logps/rejected": -1.9499012231826782,
"loss": 1.6449,
"nll_loss": 1.597440242767334,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1528499722480774,
"rewards/margins": 0.0421401672065258,
"rewards/rejected": -0.1949901133775711,
"step": 325
},
{
"epoch": 0.9002416292716603,
"grad_norm": 0.23145584762096405,
"learning_rate": 4.424238129443515e-06,
"log_odds_chosen": 0.4823288321495056,
"log_odds_ratio": -0.48343366384506226,
"logits/chosen": -0.1781751811504364,
"logits/rejected": -1.2557498216629028,
"logps/chosen": -1.6358767747879028,
"logps/rejected": -2.0398380756378174,
"loss": 1.7597,
"nll_loss": 1.7113655805587769,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16358768939971924,
"rewards/margins": 0.04039612039923668,
"rewards/rejected": -0.20398379862308502,
"step": 326
},
{
"epoch": 0.9030031066620642,
"grad_norm": 0.21986520290374756,
"learning_rate": 4.4191118508950286e-06,
"log_odds_chosen": 0.5763283371925354,
"log_odds_ratio": -0.4508843421936035,
"logits/chosen": -0.2733724117279053,
"logits/rejected": -1.4346749782562256,
"logps/chosen": -1.5722405910491943,
"logps/rejected": -2.0537078380584717,
"loss": 1.7093,
"nll_loss": 1.6641618013381958,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15722407400608063,
"rewards/margins": 0.048146720975637436,
"rewards/rejected": -0.20537079870700836,
"step": 327
},
{
"epoch": 0.9057645840524681,
"grad_norm": 0.19916215538978577,
"learning_rate": 4.413965850502987e-06,
"log_odds_chosen": 0.6543524265289307,
"log_odds_ratio": -0.42543232440948486,
"logits/chosen": -0.28132179379463196,
"logits/rejected": -1.6163063049316406,
"logps/chosen": -1.4477849006652832,
"logps/rejected": -1.9837119579315186,
"loss": 1.5427,
"nll_loss": 1.5002020597457886,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14477849006652832,
"rewards/margins": 0.05359271913766861,
"rewards/rejected": -0.19837118685245514,
"step": 328
},
{
"epoch": 0.908526061442872,
"grad_norm": 0.20786328613758087,
"learning_rate": 4.408800181150509e-06,
"log_odds_chosen": 0.7103330492973328,
"log_odds_ratio": -0.405758798122406,
"logits/chosen": -0.27468955516815186,
"logits/rejected": -1.7239093780517578,
"logps/chosen": -1.5232479572296143,
"logps/rejected": -2.113443374633789,
"loss": 1.6308,
"nll_loss": 1.5901868343353271,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15232481062412262,
"rewards/margins": 0.0590195432305336,
"rewards/rejected": -0.21134433150291443,
"step": 329
},
{
"epoch": 0.9112875388332758,
"grad_norm": 0.21173468232154846,
"learning_rate": 4.4036148959228365e-06,
"log_odds_chosen": 0.5421361327171326,
"log_odds_ratio": -0.45979243516921997,
"logits/chosen": -0.19346265494823456,
"logits/rejected": -1.3866770267486572,
"logps/chosen": -1.5939050912857056,
"logps/rejected": -2.0476269721984863,
"loss": 1.7068,
"nll_loss": 1.660808801651001,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15939049422740936,
"rewards/margins": 0.045372217893600464,
"rewards/rejected": -0.20476271212100983,
"step": 330
},
{
"epoch": 0.9140490162236796,
"grad_norm": 0.20240680873394012,
"learning_rate": 4.3984100481068e-06,
"log_odds_chosen": 0.5311475992202759,
"log_odds_ratio": -0.47231799364089966,
"logits/chosen": -0.10619790852069855,
"logits/rejected": -1.8089945316314697,
"logps/chosen": -1.528999924659729,
"logps/rejected": -1.9681410789489746,
"loss": 1.6383,
"nll_loss": 1.5910669565200806,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15289999544620514,
"rewards/margins": 0.04391412436962128,
"rewards/rejected": -0.19681411981582642,
"step": 331
},
{
"epoch": 0.9168104936140835,
"grad_norm": 0.18672843277454376,
"learning_rate": 4.3931856911902635e-06,
"log_odds_chosen": 0.6374708414077759,
"log_odds_ratio": -0.433654248714447,
"logits/chosen": -0.31257641315460205,
"logits/rejected": -1.766423225402832,
"logps/chosen": -1.5535167455673218,
"logps/rejected": -2.087543487548828,
"loss": 1.6746,
"nll_loss": 1.631203293800354,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1553516834974289,
"rewards/margins": 0.05340268462896347,
"rewards/rejected": -0.20875434577465057,
"step": 332
},
{
"epoch": 0.9195719710044874,
"grad_norm": 0.20278650522232056,
"learning_rate": 4.387941878861578e-06,
"log_odds_chosen": 0.442794531583786,
"log_odds_ratio": -0.5059284567832947,
"logits/chosen": -0.2818780243396759,
"logits/rejected": -1.41943359375,
"logps/chosen": -1.475003957748413,
"logps/rejected": -1.838566541671753,
"loss": 1.5936,
"nll_loss": 1.5429768562316895,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1475003957748413,
"rewards/margins": 0.03635626286268234,
"rewards/rejected": -0.18385665118694305,
"step": 333
},
{
"epoch": 0.9223334483948913,
"grad_norm": 0.21072961390018463,
"learning_rate": 4.382678665009028e-06,
"log_odds_chosen": 0.5503413677215576,
"log_odds_ratio": -0.4611153304576874,
"logits/chosen": -0.24195455014705658,
"logits/rejected": -1.77567720413208,
"logps/chosen": -1.6017379760742188,
"logps/rejected": -2.0620176792144775,
"loss": 1.7086,
"nll_loss": 1.6624857187271118,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16017380356788635,
"rewards/margins": 0.04602799564599991,
"rewards/rejected": -0.20620179176330566,
"step": 334
},
{
"epoch": 0.9250949257852952,
"grad_norm": 0.21240665018558502,
"learning_rate": 4.3773961037202784e-06,
"log_odds_chosen": 0.5787122249603271,
"log_odds_ratio": -0.4496590793132782,
"logits/chosen": -0.24867500364780426,
"logits/rejected": -1.7745471000671387,
"logps/chosen": -1.6565507650375366,
"logps/rejected": -2.1457858085632324,
"loss": 1.7518,
"nll_loss": 1.706835150718689,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16565507650375366,
"rewards/margins": 0.04892349988222122,
"rewards/rejected": -0.21457859873771667,
"step": 335
},
{
"epoch": 0.927856403175699,
"grad_norm": 0.20056602358818054,
"learning_rate": 4.37209424928182e-06,
"log_odds_chosen": 0.5505763292312622,
"log_odds_ratio": -0.4596712589263916,
"logits/chosen": -0.18031375110149384,
"logits/rejected": -1.4376002550125122,
"logps/chosen": -1.557979941368103,
"logps/rejected": -2.017378091812134,
"loss": 1.6716,
"nll_loss": 1.6256715059280396,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1557980179786682,
"rewards/margins": 0.04593981057405472,
"rewards/rejected": -0.20173780620098114,
"step": 336
},
{
"epoch": 0.9306178805661028,
"grad_norm": 0.18808571994304657,
"learning_rate": 4.366773156178413e-06,
"log_odds_chosen": 0.42912667989730835,
"log_odds_ratio": -0.5055819153785706,
"logits/chosen": -0.3135528564453125,
"logits/rejected": -1.5518022775650024,
"logps/chosen": -1.481281042098999,
"logps/rejected": -1.826270341873169,
"loss": 1.5991,
"nll_loss": 1.5485769510269165,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14812810719013214,
"rewards/margins": 0.0344989076256752,
"rewards/rejected": -0.18262703716754913,
"step": 337
},
{
"epoch": 0.9333793579565067,
"grad_norm": 0.21728971600532532,
"learning_rate": 4.361432879092518e-06,
"log_odds_chosen": 0.5635970234870911,
"log_odds_ratio": -0.4642696976661682,
"logits/chosen": -0.32608091831207275,
"logits/rejected": -1.478355050086975,
"logps/chosen": -1.5051325559616089,
"logps/rejected": -1.974266529083252,
"loss": 1.6273,
"nll_loss": 1.5808420181274414,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15051327645778656,
"rewards/margins": 0.04691339656710625,
"rewards/rejected": -0.1974266618490219,
"step": 338
},
{
"epoch": 0.9361408353469106,
"grad_norm": 0.20923133194446564,
"learning_rate": 4.356073472903747e-06,
"log_odds_chosen": 0.5899335145950317,
"log_odds_ratio": -0.44590240716934204,
"logits/chosen": -0.17045272886753082,
"logits/rejected": -1.5179771184921265,
"logps/chosen": -1.4507163763046265,
"logps/rejected": -1.9340299367904663,
"loss": 1.5672,
"nll_loss": 1.5226441621780396,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14507164061069489,
"rewards/margins": 0.048331368714571,
"rewards/rejected": -0.1934029906988144,
"step": 339
},
{
"epoch": 0.9389023127373145,
"grad_norm": 0.20438268780708313,
"learning_rate": 4.350694992688289e-06,
"log_odds_chosen": 0.6362269520759583,
"log_odds_ratio": -0.43733319640159607,
"logits/chosen": -0.14821594953536987,
"logits/rejected": -1.5649621486663818,
"logps/chosen": -1.4848688840866089,
"logps/rejected": -2.009694814682007,
"loss": 1.6031,
"nll_loss": 1.5594022274017334,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1484868973493576,
"rewards/margins": 0.05248260498046875,
"rewards/rejected": -0.20096951723098755,
"step": 340
},
{
"epoch": 0.9416637901277183,
"grad_norm": 0.20621763169765472,
"learning_rate": 4.345297493718352e-06,
"log_odds_chosen": 0.547203540802002,
"log_odds_ratio": -0.462637722492218,
"logits/chosen": -0.31976550817489624,
"logits/rejected": -1.318708062171936,
"logps/chosen": -1.4836325645446777,
"logps/rejected": -1.930004358291626,
"loss": 1.6093,
"nll_loss": 1.563034176826477,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14836326241493225,
"rewards/margins": 0.044637180864810944,
"rewards/rejected": -0.1930004358291626,
"step": 341
},
{
"epoch": 0.9444252675181222,
"grad_norm": 0.18609599769115448,
"learning_rate": 4.339881031461588e-06,
"log_odds_chosen": 0.47142109274864197,
"log_odds_ratio": -0.48913687467575073,
"logits/chosen": -0.26566094160079956,
"logits/rejected": -1.550631046295166,
"logps/chosen": -1.4443541765213013,
"logps/rejected": -1.8148910999298096,
"loss": 1.578,
"nll_loss": 1.5290637016296387,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14443542063236237,
"rewards/margins": 0.03705369308590889,
"rewards/rejected": -0.18148910999298096,
"step": 342
},
{
"epoch": 0.9471867449085261,
"grad_norm": 0.1996508240699768,
"learning_rate": 4.334445661580527e-06,
"log_odds_chosen": 0.5693928599357605,
"log_odds_ratio": -0.4520787000656128,
"logits/chosen": -0.33670923113822937,
"logits/rejected": -1.7492713928222656,
"logps/chosen": -1.5564992427825928,
"logps/rejected": -2.030904531478882,
"loss": 1.6725,
"nll_loss": 1.6272705793380737,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15564994513988495,
"rewards/margins": 0.04744052141904831,
"rewards/rejected": -0.20309044420719147,
"step": 343
},
{
"epoch": 0.94994822229893,
"grad_norm": 0.19215835630893707,
"learning_rate": 4.328991439932003e-06,
"log_odds_chosen": 0.6314505934715271,
"log_odds_ratio": -0.4288046658039093,
"logits/chosen": -0.29215654730796814,
"logits/rejected": -1.5209310054779053,
"logps/chosen": -1.486976981163025,
"logps/rejected": -2.0075106620788574,
"loss": 1.6116,
"nll_loss": 1.568747878074646,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14869770407676697,
"rewards/margins": 0.05205334722995758,
"rewards/rejected": -0.20075105130672455,
"step": 344
},
{
"epoch": 0.9527096996893338,
"grad_norm": 0.20573773980140686,
"learning_rate": 4.323518422566586e-06,
"log_odds_chosen": 0.7072029113769531,
"log_odds_ratio": -0.40426695346832275,
"logits/chosen": -0.23872965574264526,
"logits/rejected": -1.5439509153366089,
"logps/chosen": -1.5577975511550903,
"logps/rejected": -2.149085760116577,
"loss": 1.6903,
"nll_loss": 1.649876594543457,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15577976405620575,
"rewards/margins": 0.059128809720277786,
"rewards/rejected": -0.21490855515003204,
"step": 345
},
{
"epoch": 0.9554711770797376,
"grad_norm": 0.1889713704586029,
"learning_rate": 4.318026665727993e-06,
"log_odds_chosen": 0.6957321166992188,
"log_odds_ratio": -0.411748468875885,
"logits/chosen": -0.3132311701774597,
"logits/rejected": -1.7383476495742798,
"logps/chosen": -1.4288955926895142,
"logps/rejected": -1.9945143461227417,
"loss": 1.5422,
"nll_loss": 1.501028299331665,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14288955926895142,
"rewards/margins": 0.056561872363090515,
"rewards/rejected": -0.19945143163204193,
"step": 346
},
{
"epoch": 0.9582326544701415,
"grad_norm": 0.21370814740657806,
"learning_rate": 4.3125162258525265e-06,
"log_odds_chosen": 0.37931889295578003,
"log_odds_ratio": -0.5239338874816895,
"logits/chosen": -0.34714582562446594,
"logits/rejected": -1.3666698932647705,
"logps/chosen": -1.6313103437423706,
"logps/rejected": -1.9475239515304565,
"loss": 1.7528,
"nll_loss": 1.7004497051239014,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16313102841377258,
"rewards/margins": 0.031621355563402176,
"rewards/rejected": -0.19475241005420685,
"step": 347
},
{
"epoch": 0.9609941318605454,
"grad_norm": 0.20202378928661346,
"learning_rate": 4.3069871595684795e-06,
"log_odds_chosen": 0.5231636762619019,
"log_odds_ratio": -0.4718437194824219,
"logits/chosen": -0.3397434949874878,
"logits/rejected": -1.5151662826538086,
"logps/chosen": -1.4959080219268799,
"logps/rejected": -1.9243054389953613,
"loss": 1.6233,
"nll_loss": 1.5760971307754517,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14959080517292023,
"rewards/margins": 0.04283975064754486,
"rewards/rejected": -0.1924305558204651,
"step": 348
},
{
"epoch": 0.9637556092509493,
"grad_norm": 0.1996474266052246,
"learning_rate": 4.3014395236955635e-06,
"log_odds_chosen": 0.6398702263832092,
"log_odds_ratio": -0.4294753670692444,
"logits/chosen": -0.2723667621612549,
"logits/rejected": -1.6896021366119385,
"logps/chosen": -1.4915456771850586,
"logps/rejected": -2.0213863849639893,
"loss": 1.6075,
"nll_loss": 1.5645424127578735,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14915457367897034,
"rewards/margins": 0.0529840886592865,
"rewards/rejected": -0.20213866233825684,
"step": 349
},
{
"epoch": 0.9665170866413532,
"grad_norm": 0.18923406302928925,
"learning_rate": 4.295873375244319e-06,
"log_odds_chosen": 0.5657058358192444,
"log_odds_ratio": -0.4569021463394165,
"logits/chosen": -0.2960559129714966,
"logits/rejected": -1.5297155380249023,
"logps/chosen": -1.446962833404541,
"logps/rejected": -1.9055999517440796,
"loss": 1.5727,
"nll_loss": 1.527005672454834,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14469628036022186,
"rewards/margins": 0.045863717794418335,
"rewards/rejected": -0.1905599981546402,
"step": 350
},
{
"epoch": 0.9692785640317569,
"grad_norm": 0.2020733803510666,
"learning_rate": 4.290288771415536e-06,
"log_odds_chosen": 0.47521698474884033,
"log_odds_ratio": -0.48997536301612854,
"logits/chosen": -0.2961908280849457,
"logits/rejected": -1.738245964050293,
"logps/chosen": -1.6070820093154907,
"logps/rejected": -2.003466844558716,
"loss": 1.7209,
"nll_loss": 1.6719499826431274,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16070818901062012,
"rewards/margins": 0.039638496935367584,
"rewards/rejected": -0.2003466784954071,
"step": 351
},
{
"epoch": 0.9720400414221608,
"grad_norm": 0.19740743935108185,
"learning_rate": 4.284685769599658e-06,
"log_odds_chosen": 0.5527662038803101,
"log_odds_ratio": -0.4617147445678711,
"logits/chosen": -0.3190363645553589,
"logits/rejected": -1.6859633922576904,
"logps/chosen": -1.5147120952606201,
"logps/rejected": -1.9768071174621582,
"loss": 1.6243,
"nll_loss": 1.5781550407409668,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15147122740745544,
"rewards/margins": 0.04620949178934097,
"rewards/rejected": -0.19768072664737701,
"step": 352
},
{
"epoch": 0.9748015188125647,
"grad_norm": 0.19644393026828766,
"learning_rate": 4.279064427376199e-06,
"log_odds_chosen": 0.5512232780456543,
"log_odds_ratio": -0.45938748121261597,
"logits/chosen": -0.2991534173488617,
"logits/rejected": -1.5584678649902344,
"logps/chosen": -1.5441019535064697,
"logps/rejected": -2.002403736114502,
"loss": 1.6453,
"nll_loss": 1.5993587970733643,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1544102132320404,
"rewards/margins": 0.045830175280570984,
"rewards/rejected": -0.2002403736114502,
"step": 353
},
{
"epoch": 0.9775629962029686,
"grad_norm": 0.19680863618850708,
"learning_rate": 4.273424802513145e-06,
"log_odds_chosen": 0.5857518911361694,
"log_odds_ratio": -0.4452309012413025,
"logits/chosen": -0.2941315174102783,
"logits/rejected": -1.6820390224456787,
"logps/chosen": -1.5470666885375977,
"logps/rejected": -2.031536102294922,
"loss": 1.6621,
"nll_loss": 1.6175867319107056,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1547066569328308,
"rewards/margins": 0.04844695329666138,
"rewards/rejected": -0.2031536102294922,
"step": 354
},
{
"epoch": 0.9803244735933725,
"grad_norm": 0.18272706866264343,
"learning_rate": 4.267766952966369e-06,
"log_odds_chosen": 0.7239515781402588,
"log_odds_ratio": -0.4105169177055359,
"logits/chosen": -0.23595450818538666,
"logits/rejected": -1.5807809829711914,
"logps/chosen": -1.3758444786071777,
"logps/rejected": -1.9615696668624878,
"loss": 1.4949,
"nll_loss": 1.4538719654083252,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.13758444786071777,
"rewards/margins": 0.058572523295879364,
"rewards/rejected": -0.19615697860717773,
"step": 355
},
{
"epoch": 0.9830859509837763,
"grad_norm": 0.19880905747413635,
"learning_rate": 4.26209093687903e-06,
"log_odds_chosen": 0.4604690670967102,
"log_odds_ratio": -0.4963679313659668,
"logits/chosen": -0.30030739307403564,
"logits/rejected": -1.8233280181884766,
"logps/chosen": -1.5396702289581299,
"logps/rejected": -1.922170639038086,
"loss": 1.6588,
"nll_loss": 1.6091995239257812,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.153967022895813,
"rewards/margins": 0.038250040262937546,
"rewards/rejected": -0.19221706688404083,
"step": 356
},
{
"epoch": 0.9858474283741802,
"grad_norm": 0.19747234880924225,
"learning_rate": 4.2563968125809734e-06,
"log_odds_chosen": 0.5938950777053833,
"log_odds_ratio": -0.44322288036346436,
"logits/chosen": -0.17701445519924164,
"logits/rejected": -1.6618235111236572,
"logps/chosen": -1.60313880443573,
"logps/rejected": -2.101062774658203,
"loss": 1.7054,
"nll_loss": 1.6611095666885376,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16031388938426971,
"rewards/margins": 0.04979238659143448,
"rewards/rejected": -0.2101062536239624,
"step": 357
},
{
"epoch": 0.988608905764584,
"grad_norm": 0.19918540120124817,
"learning_rate": 4.2506846385881375e-06,
"log_odds_chosen": 0.6773942708969116,
"log_odds_ratio": -0.4168775975704193,
"logits/chosen": -0.4877479672431946,
"logits/rejected": -1.6255730390548706,
"logps/chosen": -1.409053087234497,
"logps/rejected": -1.9528459310531616,
"loss": 1.5337,
"nll_loss": 1.491982340812683,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14090532064437866,
"rewards/margins": 0.05437929555773735,
"rewards/rejected": -0.19528460502624512,
"step": 358
},
{
"epoch": 0.9913703831549879,
"grad_norm": 0.1901620328426361,
"learning_rate": 4.2449544736019486e-06,
"log_odds_chosen": 0.5646210312843323,
"log_odds_ratio": -0.45523136854171753,
"logits/chosen": -0.22285765409469604,
"logits/rejected": -1.4501441717147827,
"logps/chosen": -1.479009985923767,
"logps/rejected": -1.9385974407196045,
"loss": 1.5958,
"nll_loss": 1.5503125190734863,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1479010134935379,
"rewards/margins": 0.045958735048770905,
"rewards/rejected": -0.1938597410917282,
"step": 359
},
{
"epoch": 0.9941318605453918,
"grad_norm": 0.18198014795780182,
"learning_rate": 4.239206376508716e-06,
"log_odds_chosen": 0.7485941648483276,
"log_odds_ratio": -0.39402005076408386,
"logits/chosen": -0.25095412135124207,
"logits/rejected": -1.9625945091247559,
"logps/chosen": -1.4998773336410522,
"logps/rejected": -2.127000570297241,
"loss": 1.6257,
"nll_loss": 1.5863466262817383,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14998774230480194,
"rewards/margins": 0.06271231919527054,
"rewards/rejected": -0.21270006895065308,
"step": 360
},
{
"epoch": 0.9968933379357956,
"grad_norm": 0.18113547563552856,
"learning_rate": 4.233440406379032e-06,
"log_odds_chosen": 0.560468852519989,
"log_odds_ratio": -0.45691755414009094,
"logits/chosen": -0.35995978116989136,
"logits/rejected": -1.630347490310669,
"logps/chosen": -1.4223926067352295,
"logps/rejected": -1.871058702468872,
"loss": 1.5472,
"nll_loss": 1.501501202583313,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1422392576932907,
"rewards/margins": 0.04486660659313202,
"rewards/rejected": -0.18710586428642273,
"step": 361
},
{
"epoch": 0.9996548153261995,
"grad_norm": 0.1977638155221939,
"learning_rate": 4.227656622467162e-06,
"log_odds_chosen": 0.6151151657104492,
"log_odds_ratio": -0.4389611482620239,
"logits/chosen": -0.34485432505607605,
"logits/rejected": -1.5607213973999023,
"logps/chosen": -1.528998851776123,
"logps/rejected": -2.0421009063720703,
"loss": 1.6442,
"nll_loss": 1.6003334522247314,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15289989113807678,
"rewards/margins": 0.05131019651889801,
"rewards/rejected": -0.2042100876569748,
"step": 362
},
{
"epoch": 1.0,
"grad_norm": 0.4796704351902008,
"learning_rate": 4.221855084210433e-06,
"log_odds_chosen": 0.42105579376220703,
"log_odds_ratio": -0.5046184659004211,
"logits/chosen": -0.522539496421814,
"logits/rejected": -2.0535831451416016,
"logps/chosen": -1.5670783519744873,
"logps/rejected": -1.9137303829193115,
"loss": 1.6859,
"nll_loss": 1.635398030281067,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15670783817768097,
"rewards/margins": 0.034665197134017944,
"rewards/rejected": -0.1913730353116989,
"step": 363
},
{
"epoch": 1.0027614773904039,
"grad_norm": 0.2030143141746521,
"learning_rate": 4.2160358512286266e-06,
"log_odds_chosen": 0.5737169981002808,
"log_odds_ratio": -0.45416390895843506,
"logits/chosen": -0.2930692732334137,
"logits/rejected": -1.6667262315750122,
"logps/chosen": -1.5078061819076538,
"logps/rejected": -1.9778721332550049,
"loss": 1.6304,
"nll_loss": 1.5849525928497314,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1507806032896042,
"rewards/margins": 0.04700660705566406,
"rewards/rejected": -0.19778722524642944,
"step": 364
},
{
"epoch": 1.0055229547808078,
"grad_norm": 0.18416902422904968,
"learning_rate": 4.210198983323366e-06,
"log_odds_chosen": 0.6264990568161011,
"log_odds_ratio": -0.4338659346103668,
"logits/chosen": -0.2520604133605957,
"logits/rejected": -1.8486303091049194,
"logps/chosen": -1.5509365797042847,
"logps/rejected": -2.071831703186035,
"loss": 1.6494,
"nll_loss": 1.606053113937378,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15509365499019623,
"rewards/margins": 0.052089497447013855,
"rewards/rejected": -0.20718316733837128,
"step": 365
},
{
"epoch": 1.0082844321712117,
"grad_norm": 0.19729849696159363,
"learning_rate": 4.204344540477499e-06,
"log_odds_chosen": 0.6402697563171387,
"log_odds_ratio": -0.429515540599823,
"logits/chosen": -0.288376122713089,
"logits/rejected": -1.6332948207855225,
"logps/chosen": -1.5338780879974365,
"logps/rejected": -2.068819046020508,
"loss": 1.6531,
"nll_loss": 1.6101782321929932,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15338779985904694,
"rewards/margins": 0.053494103252887726,
"rewards/rejected": -0.20688191056251526,
"step": 366
},
{
"epoch": 1.0110459095616156,
"grad_norm": 0.18270985782146454,
"learning_rate": 4.1984725828544855e-06,
"log_odds_chosen": 0.6716368198394775,
"log_odds_ratio": -0.41607195138931274,
"logits/chosen": -0.27237242460250854,
"logits/rejected": -1.8720418214797974,
"logps/chosen": -1.4672292470932007,
"logps/rejected": -2.0190887451171875,
"loss": 1.5786,
"nll_loss": 1.5369707345962524,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1467229127883911,
"rewards/margins": 0.0551859587430954,
"rewards/rejected": -0.2019088715314865,
"step": 367
},
{
"epoch": 1.0138073869520194,
"grad_norm": 0.20039811730384827,
"learning_rate": 4.192583170797775e-06,
"log_odds_chosen": 0.5032901167869568,
"log_odds_ratio": -0.4751865863800049,
"logits/chosen": -0.3330274224281311,
"logits/rejected": -1.4479092359542847,
"logps/chosen": -1.5037407875061035,
"logps/rejected": -1.9160493612289429,
"loss": 1.6307,
"nll_loss": 1.5831364393234253,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15037408471107483,
"rewards/margins": 0.041230857372283936,
"rewards/rejected": -0.19160494208335876,
"step": 368
},
{
"epoch": 1.016568864342423,
"grad_norm": 0.1855594515800476,
"learning_rate": 4.186676364830187e-06,
"log_odds_chosen": 0.5586062669754028,
"log_odds_ratio": -0.45506125688552856,
"logits/chosen": -0.23738795518875122,
"logits/rejected": -1.6253269910812378,
"logps/chosen": -1.566367745399475,
"logps/rejected": -2.030472755432129,
"loss": 1.6675,
"nll_loss": 1.6219737529754639,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1566367745399475,
"rewards/margins": 0.04641049727797508,
"rewards/rejected": -0.20304730534553528,
"step": 369
},
{
"epoch": 1.019330341732827,
"grad_norm": 0.2150687575340271,
"learning_rate": 4.1807522256532925e-06,
"log_odds_chosen": 0.6354942321777344,
"log_odds_ratio": -0.43560659885406494,
"logits/chosen": -0.29759910702705383,
"logits/rejected": -1.7936424016952515,
"logps/chosen": -1.621231198310852,
"logps/rejected": -2.1618242263793945,
"loss": 1.7339,
"nll_loss": 1.6903626918792725,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16212311387062073,
"rewards/margins": 0.054059334099292755,
"rewards/rejected": -0.21618244051933289,
"step": 370
},
{
"epoch": 1.0220918191232309,
"grad_norm": 0.18705667555332184,
"learning_rate": 4.174810814146789e-06,
"log_odds_chosen": 0.6377235651016235,
"log_odds_ratio": -0.4334976375102997,
"logits/chosen": -0.17882226407527924,
"logits/rejected": -1.4699535369873047,
"logps/chosen": -1.5574746131896973,
"logps/rejected": -2.092029094696045,
"loss": 1.6697,
"nll_loss": 1.626399040222168,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1557474583387375,
"rewards/margins": 0.05345546454191208,
"rewards/rejected": -0.20920291543006897,
"step": 371
},
{
"epoch": 1.0248532965136348,
"grad_norm": 0.18511676788330078,
"learning_rate": 4.1688521913678706e-06,
"log_odds_chosen": 0.7460950613021851,
"log_odds_ratio": -0.39828044176101685,
"logits/chosen": -0.24553045630455017,
"logits/rejected": -1.5528843402862549,
"logps/chosen": -1.506624460220337,
"logps/rejected": -2.1312265396118164,
"loss": 1.6125,
"nll_loss": 1.5727205276489258,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15066243708133698,
"rewards/margins": 0.06246021389961243,
"rewards/rejected": -0.2131226509809494,
"step": 372
},
{
"epoch": 1.0276147739040387,
"grad_norm": 0.18531759083271027,
"learning_rate": 4.162876418550606e-06,
"log_odds_chosen": 0.5151762962341309,
"log_odds_ratio": -0.47239360213279724,
"logits/chosen": -0.3635602593421936,
"logits/rejected": -1.655716061592102,
"logps/chosen": -1.509564995765686,
"logps/rejected": -1.9321736097335815,
"loss": 1.6228,
"nll_loss": 1.5756025314331055,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15095649659633636,
"rewards/margins": 0.042260847985744476,
"rewards/rejected": -0.19321735203266144,
"step": 373
},
{
"epoch": 1.0303762512944425,
"grad_norm": 0.19160981476306915,
"learning_rate": 4.156883557105308e-06,
"log_odds_chosen": 0.537889301776886,
"log_odds_ratio": -0.46930158138275146,
"logits/chosen": -0.3415728509426117,
"logits/rejected": -1.7609002590179443,
"logps/chosen": -1.4833260774612427,
"logps/rejected": -1.925763726234436,
"loss": 1.5902,
"nll_loss": 1.5432225465774536,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1483326107263565,
"rewards/margins": 0.044243764132261276,
"rewards/rejected": -0.1925763636827469,
"step": 374
},
{
"epoch": 1.0331377286848464,
"grad_norm": 0.2014252096414566,
"learning_rate": 4.150873668617899e-06,
"log_odds_chosen": 0.5346254110336304,
"log_odds_ratio": -0.4631119966506958,
"logits/chosen": -0.4020942449569702,
"logits/rejected": -1.6286017894744873,
"logps/chosen": -1.554813265800476,
"logps/rejected": -1.9967365264892578,
"loss": 1.6587,
"nll_loss": 1.6124264001846313,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15548132359981537,
"rewards/margins": 0.0441923588514328,
"rewards/rejected": -0.19967366755008698,
"step": 375
},
{
"epoch": 1.0358992060752503,
"grad_norm": 0.1909962296485901,
"learning_rate": 4.144846814849282e-06,
"log_odds_chosen": 0.4685186743736267,
"log_odds_ratio": -0.4881165623664856,
"logits/chosen": -0.19894683361053467,
"logits/rejected": -1.4885890483856201,
"logps/chosen": -1.5590081214904785,
"logps/rejected": -1.9445595741271973,
"loss": 1.6834,
"nll_loss": 1.634606957435608,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15590080618858337,
"rewards/margins": 0.03855516016483307,
"rewards/rejected": -0.19445598125457764,
"step": 376
},
{
"epoch": 1.0386606834656542,
"grad_norm": 0.1877821981906891,
"learning_rate": 4.138803057734705e-06,
"log_odds_chosen": 0.7200895547866821,
"log_odds_ratio": -0.4008093476295471,
"logits/chosen": -0.25993677973747253,
"logits/rejected": -1.574484944343567,
"logps/chosen": -1.456033706665039,
"logps/rejected": -2.0489182472229004,
"loss": 1.5752,
"nll_loss": 1.5350782871246338,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14560337364673615,
"rewards/margins": 0.05928843468427658,
"rewards/rejected": -0.20489181578159332,
"step": 377
},
{
"epoch": 1.041422160856058,
"grad_norm": 0.17346148192882538,
"learning_rate": 4.132742459383122e-06,
"log_odds_chosen": 0.7830832004547119,
"log_odds_ratio": -0.3833864629268646,
"logits/chosen": -0.27740198373794556,
"logits/rejected": -1.7669141292572021,
"logps/chosen": -1.380853533744812,
"logps/rejected": -2.0105137825012207,
"loss": 1.4985,
"nll_loss": 1.4602102041244507,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.13808535039424896,
"rewards/margins": 0.06296603381633759,
"rewards/rejected": -0.20105136930942535,
"step": 378
},
{
"epoch": 1.0441836382464618,
"grad_norm": 0.183350071310997,
"learning_rate": 4.126665082076559e-06,
"log_odds_chosen": 0.5126218795776367,
"log_odds_ratio": -0.47373971343040466,
"logits/chosen": -0.3141680955886841,
"logits/rejected": -1.3418911695480347,
"logps/chosen": -1.516774296760559,
"logps/rejected": -1.933377742767334,
"loss": 1.6442,
"nll_loss": 1.5968701839447021,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1516774296760559,
"rewards/margins": 0.04166034609079361,
"rewards/rejected": -0.19333778321743011,
"step": 379
},
{
"epoch": 1.0469451156368657,
"grad_norm": 0.18558627367019653,
"learning_rate": 4.120570988269472e-06,
"log_odds_chosen": 0.651531994342804,
"log_odds_ratio": -0.423582524061203,
"logits/chosen": -0.38820552825927734,
"logits/rejected": -1.826667070388794,
"logps/chosen": -1.5054341554641724,
"logps/rejected": -2.0457534790039062,
"loss": 1.5926,
"nll_loss": 1.5502351522445679,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1505434215068817,
"rewards/margins": 0.05403192341327667,
"rewards/rejected": -0.20457535982131958,
"step": 380
},
{
"epoch": 1.0497065930272695,
"grad_norm": 0.19876988232135773,
"learning_rate": 4.114460240588101e-06,
"log_odds_chosen": 0.7609922885894775,
"log_odds_ratio": -0.3903619050979614,
"logits/chosen": -0.28415167331695557,
"logits/rejected": -1.509800910949707,
"logps/chosen": -1.5427088737487793,
"logps/rejected": -2.184783458709717,
"loss": 1.6545,
"nll_loss": 1.6154515743255615,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15427090227603912,
"rewards/margins": 0.06420743465423584,
"rewards/rejected": -0.21847833693027496,
"step": 381
},
{
"epoch": 1.0524680704176734,
"grad_norm": 0.17985652387142181,
"learning_rate": 4.1083329018298356e-06,
"log_odds_chosen": 0.7214679718017578,
"log_odds_ratio": -0.401907354593277,
"logits/chosen": -0.2644188404083252,
"logits/rejected": -1.569467544555664,
"logps/chosen": -1.4487037658691406,
"logps/rejected": -2.043267250061035,
"loss": 1.5525,
"nll_loss": 1.5123515129089355,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14487037062644958,
"rewards/margins": 0.05945635586977005,
"rewards/rejected": -0.20432673394680023,
"step": 382
},
{
"epoch": 1.0552295478080773,
"grad_norm": 0.19042105972766876,
"learning_rate": 4.102189034962561e-06,
"log_odds_chosen": 0.6154542565345764,
"log_odds_ratio": -0.4352980852127075,
"logits/chosen": -0.21143808960914612,
"logits/rejected": -1.4588818550109863,
"logps/chosen": -1.583728551864624,
"logps/rejected": -2.1008310317993164,
"loss": 1.6812,
"nll_loss": 1.6376571655273438,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15837284922599792,
"rewards/margins": 0.051710255444049835,
"rewards/rejected": -0.21008309721946716,
"step": 383
},
{
"epoch": 1.0579910251984812,
"grad_norm": 0.202115997672081,
"learning_rate": 4.096028703124014e-06,
"log_odds_chosen": 0.6282423138618469,
"log_odds_ratio": -0.43798086047172546,
"logits/chosen": -0.3399620056152344,
"logits/rejected": -1.302710771560669,
"logps/chosen": -1.5835597515106201,
"logps/rejected": -2.1131136417388916,
"loss": 1.67,
"nll_loss": 1.6261749267578125,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1583559811115265,
"rewards/margins": 0.05295538902282715,
"rewards/rejected": -0.21131137013435364,
"step": 384
},
{
"epoch": 1.060752502588885,
"grad_norm": 0.18484216928482056,
"learning_rate": 4.089851969621138e-06,
"log_odds_chosen": 0.7135899662971497,
"log_odds_ratio": -0.41620948910713196,
"logits/chosen": -0.28658849000930786,
"logits/rejected": -1.5913586616516113,
"logps/chosen": -1.425489902496338,
"logps/rejected": -2.015927791595459,
"loss": 1.5551,
"nll_loss": 1.5135202407836914,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14254900813102722,
"rewards/margins": 0.059043798595666885,
"rewards/rejected": -0.20159278810024261,
"step": 385
},
{
"epoch": 1.063513979979289,
"grad_norm": 0.20954498648643494,
"learning_rate": 4.083658897929425e-06,
"log_odds_chosen": 0.6113450527191162,
"log_odds_ratio": -0.4348878562450409,
"logits/chosen": -0.3510580360889435,
"logits/rejected": -1.5097743272781372,
"logps/chosen": -1.5490520000457764,
"logps/rejected": -2.056283950805664,
"loss": 1.6599,
"nll_loss": 1.616385817527771,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15490520000457764,
"rewards/margins": 0.050723206251859665,
"rewards/rejected": -0.2056283950805664,
"step": 386
},
{
"epoch": 1.0662754573696929,
"grad_norm": 0.19627171754837036,
"learning_rate": 4.077449551692268e-06,
"log_odds_chosen": 0.5390780568122864,
"log_odds_ratio": -0.46470367908477783,
"logits/chosen": -0.23739197850227356,
"logits/rejected": -1.621540904045105,
"logps/chosen": -1.555280089378357,
"logps/rejected": -2.001728057861328,
"loss": 1.6682,
"nll_loss": 1.6217542886734009,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1555280089378357,
"rewards/margins": 0.044644795358181,
"rewards/rejected": -0.20017282664775848,
"step": 387
},
{
"epoch": 1.0690369347600965,
"grad_norm": 0.1885172724723816,
"learning_rate": 4.071223994720309e-06,
"log_odds_chosen": 0.7198303937911987,
"log_odds_ratio": -0.40047964453697205,
"logits/chosen": -0.38902541995048523,
"logits/rejected": -1.71836519241333,
"logps/chosen": -1.4929300546646118,
"logps/rejected": -2.0929007530212402,
"loss": 1.6048,
"nll_loss": 1.5647118091583252,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14929300546646118,
"rewards/margins": 0.0599970780313015,
"rewards/rejected": -0.20929010212421417,
"step": 388
},
{
"epoch": 1.0717984121505004,
"grad_norm": 0.20288851857185364,
"learning_rate": 4.064982290990777e-06,
"log_odds_chosen": 0.5762845277786255,
"log_odds_ratio": -0.45192721486091614,
"logits/chosen": -0.3292975425720215,
"logits/rejected": -1.4449775218963623,
"logps/chosen": -1.4430099725723267,
"logps/rejected": -1.9106184244155884,
"loss": 1.5566,
"nll_loss": 1.511430263519287,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14430099725723267,
"rewards/margins": 0.04676084965467453,
"rewards/rejected": -0.1910618394613266,
"step": 389
},
{
"epoch": 1.0745598895409043,
"grad_norm": 0.18857373297214508,
"learning_rate": 4.058724504646834e-06,
"log_odds_chosen": 0.698142945766449,
"log_odds_ratio": -0.41126978397369385,
"logits/chosen": -0.3490840494632721,
"logits/rejected": -1.5612983703613281,
"logps/chosen": -1.4951398372650146,
"logps/rejected": -2.07562255859375,
"loss": 1.6046,
"nll_loss": 1.563432216644287,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14951398968696594,
"rewards/margins": 0.058048274368047714,
"rewards/rejected": -0.20756226778030396,
"step": 390
},
{
"epoch": 1.0773213669313082,
"grad_norm": 0.18817338347434998,
"learning_rate": 4.0524506999969185e-06,
"log_odds_chosen": 0.5495921969413757,
"log_odds_ratio": -0.46190890669822693,
"logits/chosen": -0.32165098190307617,
"logits/rejected": -1.7347975969314575,
"logps/chosen": -1.5509448051452637,
"logps/rejected": -2.0019354820251465,
"loss": 1.6579,
"nll_loss": 1.6116652488708496,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15509448945522308,
"rewards/margins": 0.045099057257175446,
"rewards/rejected": -0.20019352436065674,
"step": 391
},
{
"epoch": 1.080082844321712,
"grad_norm": 0.18304969370365143,
"learning_rate": 4.046160941514079e-06,
"log_odds_chosen": 0.7243356704711914,
"log_odds_ratio": -0.4019787311553955,
"logits/chosen": -0.20833127200603485,
"logits/rejected": -1.3766155242919922,
"logps/chosen": -1.4449529647827148,
"logps/rejected": -2.0372161865234375,
"loss": 1.5421,
"nll_loss": 1.5019174814224243,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14449530839920044,
"rewards/margins": 0.05922630429267883,
"rewards/rejected": -0.20372159779071808,
"step": 392
},
{
"epoch": 1.082844321712116,
"grad_norm": 0.23034730553627014,
"learning_rate": 4.039855293835316e-06,
"log_odds_chosen": 0.6121989488601685,
"log_odds_ratio": -0.4352457523345947,
"logits/chosen": -0.37496262788772583,
"logits/rejected": -1.6959614753723145,
"logps/chosen": -1.4758626222610474,
"logps/rejected": -1.9751558303833008,
"loss": 1.5737,
"nll_loss": 1.530150294303894,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14758625626564026,
"rewards/margins": 0.04992932081222534,
"rewards/rejected": -0.1975155770778656,
"step": 393
},
{
"epoch": 1.0856057991025199,
"grad_norm": 0.17257992923259735,
"learning_rate": 4.033533821760917e-06,
"log_odds_chosen": 0.7732821702957153,
"log_odds_ratio": -0.3887802064418793,
"logits/chosen": -0.3277055025100708,
"logits/rejected": -1.868857741355896,
"logps/chosen": -1.4420204162597656,
"logps/rejected": -2.0812883377075195,
"loss": 1.5466,
"nll_loss": 1.5076910257339478,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14420203864574432,
"rewards/margins": 0.06392678618431091,
"rewards/rejected": -0.20812883973121643,
"step": 394
},
{
"epoch": 1.0883672764929238,
"grad_norm": 0.1858333796262741,
"learning_rate": 4.027196590253786e-06,
"log_odds_chosen": 0.4944222569465637,
"log_odds_ratio": -0.4831688404083252,
"logits/chosen": -0.28738462924957275,
"logits/rejected": -1.3927885293960571,
"logps/chosen": -1.4958670139312744,
"logps/rejected": -1.8963196277618408,
"loss": 1.6172,
"nll_loss": 1.5688387155532837,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14958669245243073,
"rewards/margins": 0.04004526883363724,
"rewards/rejected": -0.18963195383548737,
"step": 395
},
{
"epoch": 1.0911287538833276,
"grad_norm": 0.1886051446199417,
"learning_rate": 4.020843664438783e-06,
"log_odds_chosen": 0.6189723610877991,
"log_odds_ratio": -0.4430191218852997,
"logits/chosen": -0.30028706789016724,
"logits/rejected": -1.7108986377716064,
"logps/chosen": -1.446290135383606,
"logps/rejected": -1.9520014524459839,
"loss": 1.5704,
"nll_loss": 1.5261311531066895,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.14462901651859283,
"rewards/margins": 0.05057113245129585,
"rewards/rejected": -0.1952001452445984,
"step": 396
},
{
"epoch": 1.0938902312737315,
"grad_norm": 0.189390167593956,
"learning_rate": 4.01447510960205e-06,
"log_odds_chosen": 0.7106426358222961,
"log_odds_ratio": -0.40562015771865845,
"logits/chosen": -0.4158485531806946,
"logits/rejected": -1.7321726083755493,
"logps/chosen": -1.5334806442260742,
"logps/rejected": -2.1250576972961426,
"loss": 1.6332,
"nll_loss": 1.592668890953064,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15334807336330414,
"rewards/margins": 0.059157684445381165,
"rewards/rejected": -0.2125057429075241,
"step": 397
},
{
"epoch": 1.0966517086641354,
"grad_norm": 0.19324593245983124,
"learning_rate": 4.008090991190341e-06,
"log_odds_chosen": 0.49737095832824707,
"log_odds_ratio": -0.47871023416519165,
"logits/chosen": -0.20496344566345215,
"logits/rejected": -1.5982781648635864,
"logps/chosen": -1.570819616317749,
"logps/rejected": -1.9830747842788696,
"loss": 1.6702,
"nll_loss": 1.622377634048462,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1570819765329361,
"rewards/margins": 0.04122550040483475,
"rewards/rejected": -0.19830746948719025,
"step": 398
},
{
"epoch": 1.099413186054539,
"grad_norm": 0.17873111367225647,
"learning_rate": 4.001691374810352e-06,
"log_odds_chosen": 0.7614402770996094,
"log_odds_ratio": -0.39445751905441284,
"logits/chosen": -0.27544835209846497,
"logits/rejected": -1.992354154586792,
"logps/chosen": -1.5202884674072266,
"logps/rejected": -2.1587905883789062,
"loss": 1.6232,
"nll_loss": 1.5837651491165161,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15202882885932922,
"rewards/margins": 0.06385020911693573,
"rewards/rejected": -0.21587903797626495,
"step": 399
},
{
"epoch": 1.102174663444943,
"grad_norm": 0.17042513191699982,
"learning_rate": 3.99527632622804e-06,
"log_odds_chosen": 0.6560631990432739,
"log_odds_ratio": -0.4252350628376007,
"logits/chosen": -0.3314496576786041,
"logits/rejected": -1.559665322303772,
"logps/chosen": -1.4409160614013672,
"logps/rejected": -1.9774514436721802,
"loss": 1.5645,
"nll_loss": 1.5220184326171875,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14409160614013672,
"rewards/margins": 0.05365355312824249,
"rewards/rejected": -0.19774514436721802,
"step": 400
},
{
"epoch": 1.1049361408353469,
"grad_norm": 0.1929718255996704,
"learning_rate": 3.988845911367957e-06,
"log_odds_chosen": 0.6236564517021179,
"log_odds_ratio": -0.4364378750324249,
"logits/chosen": -0.3177638649940491,
"logits/rejected": -1.5883958339691162,
"logps/chosen": -1.598113775253296,
"logps/rejected": -2.1222128868103027,
"loss": 1.6877,
"nll_loss": 1.644063949584961,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1598113626241684,
"rewards/margins": 0.052409932017326355,
"rewards/rejected": -0.21222129464149475,
"step": 401
},
{
"epoch": 1.1076976182257507,
"grad_norm": 0.17265060544013977,
"learning_rate": 3.982400196312565e-06,
"log_odds_chosen": 0.6908576488494873,
"log_odds_ratio": -0.4076445698738098,
"logits/chosen": -0.28831660747528076,
"logits/rejected": -1.4703201055526733,
"logps/chosen": -1.4102579355239868,
"logps/rejected": -1.9662861824035645,
"loss": 1.5263,
"nll_loss": 1.4855272769927979,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14102579653263092,
"rewards/margins": 0.05560281127691269,
"rewards/rejected": -0.1966286301612854,
"step": 402
},
{
"epoch": 1.1104590956161546,
"grad_norm": 0.18258056044578552,
"learning_rate": 3.975939247301558e-06,
"log_odds_chosen": 0.6171894669532776,
"log_odds_ratio": -0.43477940559387207,
"logits/chosen": -0.34336021542549133,
"logits/rejected": -1.718552589416504,
"logps/chosen": -1.5480486154556274,
"logps/rejected": -2.061230421066284,
"loss": 1.6639,
"nll_loss": 1.620387077331543,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15480485558509827,
"rewards/margins": 0.051318198442459106,
"rewards/rejected": -0.20612303912639618,
"step": 403
},
{
"epoch": 1.1132205730065585,
"grad_norm": 0.17894716560840607,
"learning_rate": 3.969463130731183e-06,
"log_odds_chosen": 0.601898729801178,
"log_odds_ratio": -0.44408684968948364,
"logits/chosen": -0.2595762610435486,
"logits/rejected": -1.671970248222351,
"logps/chosen": -1.480761170387268,
"logps/rejected": -1.973817229270935,
"loss": 1.5901,
"nll_loss": 1.5457121133804321,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.148076131939888,
"rewards/margins": 0.049305595457553864,
"rewards/rejected": -0.19738171994686127,
"step": 404
},
{
"epoch": 1.1159820503969624,
"grad_norm": 0.182702898979187,
"learning_rate": 3.9629719131535595e-06,
"log_odds_chosen": 0.5538443922996521,
"log_odds_ratio": -0.463712602853775,
"logits/chosen": -0.3739258646965027,
"logits/rejected": -1.5419493913650513,
"logps/chosen": -1.4951484203338623,
"logps/rejected": -1.9511198997497559,
"loss": 1.6103,
"nll_loss": 1.563896656036377,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14951485395431519,
"rewards/margins": 0.04559716209769249,
"rewards/rejected": -0.19511200487613678,
"step": 405
},
{
"epoch": 1.1187435277873663,
"grad_norm": 0.1713375300168991,
"learning_rate": 3.9564656612759904e-06,
"log_odds_chosen": 0.6431328058242798,
"log_odds_ratio": -0.42764732241630554,
"logits/chosen": -0.29896292090415955,
"logits/rejected": -1.8772965669631958,
"logps/chosen": -1.4999200105667114,
"logps/rejected": -2.031973361968994,
"loss": 1.6098,
"nll_loss": 1.5670844316482544,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14999200403690338,
"rewards/margins": 0.05320533737540245,
"rewards/rejected": -0.20319733023643494,
"step": 406
},
{
"epoch": 1.1215050051777702,
"grad_norm": 0.1880570352077484,
"learning_rate": 3.94994444196028e-06,
"log_odds_chosen": 0.6853294968605042,
"log_odds_ratio": -0.41755619645118713,
"logits/chosen": -0.2672858238220215,
"logits/rejected": -1.5635608434677124,
"logps/chosen": -1.5350513458251953,
"logps/rejected": -2.1063671112060547,
"loss": 1.6214,
"nll_loss": 1.579624056816101,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1535051316022873,
"rewards/margins": 0.0571315735578537,
"rewards/rejected": -0.21063672006130219,
"step": 407
},
{
"epoch": 1.124266482568174,
"grad_norm": 0.17640192806720734,
"learning_rate": 3.943408322222049e-06,
"log_odds_chosen": 0.6473301649093628,
"log_odds_ratio": -0.430767297744751,
"logits/chosen": -0.25322699546813965,
"logits/rejected": -1.3362030982971191,
"logps/chosen": -1.4611663818359375,
"logps/rejected": -1.9962221384048462,
"loss": 1.5796,
"nll_loss": 1.5365506410598755,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14611662924289703,
"rewards/margins": 0.053505584597587585,
"rewards/rejected": -0.19962221384048462,
"step": 408
},
{
"epoch": 1.127027959958578,
"grad_norm": 0.1975967288017273,
"learning_rate": 3.936857369230037e-06,
"log_odds_chosen": 0.5013847351074219,
"log_odds_ratio": -0.48179811239242554,
"logits/chosen": -0.24628515541553497,
"logits/rejected": -1.5175528526306152,
"logps/chosen": -1.5307085514068604,
"logps/rejected": -1.9439282417297363,
"loss": 1.6331,
"nll_loss": 1.584876537322998,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.153070867061615,
"rewards/margins": 0.04132195562124252,
"rewards/rejected": -0.1943928301334381,
"step": 409
},
{
"epoch": 1.1297894373489816,
"grad_norm": 0.19114039838314056,
"learning_rate": 3.930291650305424e-06,
"log_odds_chosen": 0.6988283395767212,
"log_odds_ratio": -0.40597397089004517,
"logits/chosen": -0.25875231623649597,
"logits/rejected": -1.3210163116455078,
"logps/chosen": -1.4922490119934082,
"logps/rejected": -2.0712697505950928,
"loss": 1.5831,
"nll_loss": 1.5425041913986206,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1492249071598053,
"rewards/margins": 0.05790204927325249,
"rewards/rejected": -0.20712696015834808,
"step": 410
},
{
"epoch": 1.1325509147393855,
"grad_norm": 0.1878093183040619,
"learning_rate": 3.92371123292113e-06,
"log_odds_chosen": 0.6247770190238953,
"log_odds_ratio": -0.43160274624824524,
"logits/chosen": -0.2506449520587921,
"logits/rejected": -1.691251516342163,
"logps/chosen": -1.5277273654937744,
"logps/rejected": -2.0446317195892334,
"loss": 1.6273,
"nll_loss": 1.5841726064682007,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15277275443077087,
"rewards/margins": 0.051690444350242615,
"rewards/rejected": -0.2044631838798523,
"step": 411
},
{
"epoch": 1.1353123921297894,
"grad_norm": 0.1842024326324463,
"learning_rate": 3.917116184701125e-06,
"log_odds_chosen": 0.7279251217842102,
"log_odds_ratio": -0.4019698202610016,
"logits/chosen": -0.2969970107078552,
"logits/rejected": -1.9143730401992798,
"logps/chosen": -1.4923046827316284,
"logps/rejected": -2.099824905395508,
"loss": 1.5904,
"nll_loss": 1.5501734018325806,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1492304652929306,
"rewards/margins": 0.0607520155608654,
"rewards/rejected": -0.2099824696779251,
"step": 412
},
{
"epoch": 1.1380738695201933,
"grad_norm": 0.2017175853252411,
"learning_rate": 3.910506573419734e-06,
"log_odds_chosen": 0.8388389348983765,
"log_odds_ratio": -0.3656638562679291,
"logits/chosen": -0.27687397599220276,
"logits/rejected": -1.5076192617416382,
"logps/chosen": -1.4898481369018555,
"logps/rejected": -2.190901279449463,
"loss": 1.5889,
"nll_loss": 1.5523051023483276,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14898481965065002,
"rewards/margins": 0.07010531425476074,
"rewards/rejected": -0.21909013390541077,
"step": 413
},
{
"epoch": 1.1408353469105972,
"grad_norm": 0.18677456676959991,
"learning_rate": 3.903882467000938e-06,
"log_odds_chosen": 0.789838433265686,
"log_odds_ratio": -0.37885501980781555,
"logits/chosen": -0.2613790035247803,
"logits/rejected": -1.6607636213302612,
"logps/chosen": -1.5320932865142822,
"logps/rejected": -2.194924831390381,
"loss": 1.6278,
"nll_loss": 1.5898661613464355,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15320934355258942,
"rewards/margins": 0.06628316640853882,
"rewards/rejected": -0.21949250996112823,
"step": 414
},
{
"epoch": 1.143596824301001,
"grad_norm": 0.18295292556285858,
"learning_rate": 3.897243933517679e-06,
"log_odds_chosen": 0.697784423828125,
"log_odds_ratio": -0.40981873869895935,
"logits/chosen": -0.36293089389801025,
"logits/rejected": -1.7186784744262695,
"logps/chosen": -1.4446918964385986,
"logps/rejected": -2.0167624950408936,
"loss": 1.5485,
"nll_loss": 1.50748872756958,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14446918666362762,
"rewards/margins": 0.05720707029104233,
"rewards/rejected": -0.20167624950408936,
"step": 415
},
{
"epoch": 1.146358301691405,
"grad_norm": 0.18878361582756042,
"learning_rate": 3.890591041191162e-06,
"log_odds_chosen": 0.7591959834098816,
"log_odds_ratio": -0.39290934801101685,
"logits/chosen": -0.2790437340736389,
"logits/rejected": -1.4805063009262085,
"logps/chosen": -1.4139214754104614,
"logps/rejected": -2.03341007232666,
"loss": 1.5176,
"nll_loss": 1.478288173675537,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14139214158058167,
"rewards/margins": 0.06194887310266495,
"rewards/rejected": -0.20334100723266602,
"step": 416
},
{
"epoch": 1.1491197790818088,
"grad_norm": 0.2020062804222107,
"learning_rate": 3.883923858390149e-06,
"log_odds_chosen": 0.75853431224823,
"log_odds_ratio": -0.38675639033317566,
"logits/chosen": -0.3220999538898468,
"logits/rejected": -1.7704015970230103,
"logps/chosen": -1.5146489143371582,
"logps/rejected": -2.149966239929199,
"loss": 1.6306,
"nll_loss": 1.5919427871704102,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15146489441394806,
"rewards/margins": 0.06353174149990082,
"rewards/rejected": -0.21499663591384888,
"step": 417
},
{
"epoch": 1.1518812564722127,
"grad_norm": 0.17892806231975555,
"learning_rate": 3.8772424536302565e-06,
"log_odds_chosen": 0.5580451488494873,
"log_odds_ratio": -0.4568861126899719,
"logits/chosen": -0.3027711510658264,
"logits/rejected": -1.707384467124939,
"logps/chosen": -1.5017491579055786,
"logps/rejected": -1.9595609903335571,
"loss": 1.5943,
"nll_loss": 1.5486559867858887,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15017491579055786,
"rewards/margins": 0.04578119143843651,
"rewards/rejected": -0.19595609605312347,
"step": 418
},
{
"epoch": 1.1546427338626164,
"grad_norm": 0.19901612401008606,
"learning_rate": 3.870546895573258e-06,
"log_odds_chosen": 0.6981452703475952,
"log_odds_ratio": -0.4115220010280609,
"logits/chosen": -0.3455246686935425,
"logits/rejected": -1.3123235702514648,
"logps/chosen": -1.479371428489685,
"logps/rejected": -2.0550196170806885,
"loss": 1.5787,
"nll_loss": 1.5375878810882568,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14793714880943298,
"rewards/margins": 0.05756482481956482,
"rewards/rejected": -0.2055019587278366,
"step": 419
},
{
"epoch": 1.1574042112530203,
"grad_norm": 0.1906924694776535,
"learning_rate": 3.863837253026372e-06,
"log_odds_chosen": 0.7406031489372253,
"log_odds_ratio": -0.40153640508651733,
"logits/chosen": -0.37284234166145325,
"logits/rejected": -1.6314030885696411,
"logps/chosen": -1.4350590705871582,
"logps/rejected": -2.0481374263763428,
"loss": 1.5474,
"nll_loss": 1.507211446762085,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14350590109825134,
"rewards/margins": 0.061307840049266815,
"rewards/rejected": -0.20481374859809875,
"step": 420
},
{
"epoch": 1.1601656886434242,
"grad_norm": 0.19268250465393066,
"learning_rate": 3.857113594941556e-06,
"log_odds_chosen": 0.5946828126907349,
"log_odds_ratio": -0.44611191749572754,
"logits/chosen": -0.3333417475223541,
"logits/rejected": -1.820539116859436,
"logps/chosen": -1.5357310771942139,
"logps/rejected": -2.030311346054077,
"loss": 1.6374,
"nll_loss": 1.5927873849868774,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15357311069965363,
"rewards/margins": 0.04945802688598633,
"rewards/rejected": -0.20303113758563995,
"step": 421
},
{
"epoch": 1.162927166033828,
"grad_norm": 0.1852002888917923,
"learning_rate": 3.8503759904148005e-06,
"log_odds_chosen": 0.753965437412262,
"log_odds_ratio": -0.39210668206214905,
"logits/chosen": -0.2807456851005554,
"logits/rejected": -1.7219178676605225,
"logps/chosen": -1.4659725427627563,
"logps/rejected": -2.0921542644500732,
"loss": 1.5699,
"nll_loss": 1.5306528806686401,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1465972512960434,
"rewards/margins": 0.0626181811094284,
"rewards/rejected": -0.209215447306633,
"step": 422
},
{
"epoch": 1.165688643424232,
"grad_norm": 0.18899178504943848,
"learning_rate": 3.843624508685416e-06,
"log_odds_chosen": 0.6049898266792297,
"log_odds_ratio": -0.43925538659095764,
"logits/chosen": -0.3452371060848236,
"logits/rejected": -1.7807867527008057,
"logps/chosen": -1.5395519733428955,
"logps/rejected": -2.040437698364258,
"loss": 1.6476,
"nll_loss": 1.603638768196106,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15395519137382507,
"rewards/margins": 0.05008859187364578,
"rewards/rejected": -0.20404377579689026,
"step": 423
},
{
"epoch": 1.1684501208146358,
"grad_norm": 0.1812516748905182,
"learning_rate": 3.8368592191353246e-06,
"log_odds_chosen": 0.6994820833206177,
"log_odds_ratio": -0.412604957818985,
"logits/chosen": -0.32768282294273376,
"logits/rejected": -1.7750557661056519,
"logps/chosen": -1.4714299440383911,
"logps/rejected": -2.0467658042907715,
"loss": 1.5632,
"nll_loss": 1.5219401121139526,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14714302122592926,
"rewards/margins": 0.05753358453512192,
"rewards/rejected": -0.20467659831047058,
"step": 424
},
{
"epoch": 1.1712115982050397,
"grad_norm": 0.18136382102966309,
"learning_rate": 3.830080191288342e-06,
"log_odds_chosen": 0.6253668665885925,
"log_odds_ratio": -0.42983123660087585,
"logits/chosen": -0.2867172360420227,
"logits/rejected": -1.5802618265151978,
"logps/chosen": -1.5029292106628418,
"logps/rejected": -2.018980026245117,
"loss": 1.6134,
"nll_loss": 1.5704439878463745,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15029294788837433,
"rewards/margins": 0.05160506069660187,
"rewards/rejected": -0.2018979787826538,
"step": 425
},
{
"epoch": 1.1739730755954436,
"grad_norm": 0.19747108221054077,
"learning_rate": 3.823287494809469e-06,
"log_odds_chosen": 0.7677461504936218,
"log_odds_ratio": -0.38692528009414673,
"logits/chosen": -0.38328537344932556,
"logits/rejected": -1.6817901134490967,
"logps/chosen": -1.4731775522232056,
"logps/rejected": -2.1067628860473633,
"loss": 1.5885,
"nll_loss": 1.5498508214950562,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14731775224208832,
"rewards/margins": 0.06335853040218353,
"rewards/rejected": -0.21067628264427185,
"step": 426
},
{
"epoch": 1.1767345529858475,
"grad_norm": 0.185506671667099,
"learning_rate": 3.816481199504171e-06,
"log_odds_chosen": 0.6367388963699341,
"log_odds_ratio": -0.4264675974845886,
"logits/chosen": -0.3647797703742981,
"logits/rejected": -1.5863122940063477,
"logps/chosen": -1.5056264400482178,
"logps/rejected": -2.031801462173462,
"loss": 1.6257,
"nll_loss": 1.5830763578414917,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15056264400482178,
"rewards/margins": 0.052617497742176056,
"rewards/rejected": -0.20318013429641724,
"step": 427
},
{
"epoch": 1.1794960303762512,
"grad_norm": 0.1755794882774353,
"learning_rate": 3.8096613753176635e-06,
"log_odds_chosen": 0.584825873374939,
"log_odds_ratio": -0.4493084251880646,
"logits/chosen": -0.27612411975860596,
"logits/rejected": -1.4378138780593872,
"logps/chosen": -1.3924418687820435,
"logps/rejected": -1.8575388193130493,
"loss": 1.5104,
"nll_loss": 1.4654783010482788,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1392441838979721,
"rewards/margins": 0.046509698033332825,
"rewards/rejected": -0.18575388193130493,
"step": 428
},
{
"epoch": 1.1822575077666553,
"grad_norm": 0.19249552488327026,
"learning_rate": 3.8028280923341927e-06,
"log_odds_chosen": 0.6473675966262817,
"log_odds_ratio": -0.4286215901374817,
"logits/chosen": -0.384939968585968,
"logits/rejected": -1.5598773956298828,
"logps/chosen": -1.4334121942520142,
"logps/rejected": -1.9639126062393188,
"loss": 1.5432,
"nll_loss": 1.500383973121643,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14334121346473694,
"rewards/margins": 0.05305003747344017,
"rewards/rejected": -0.1963912546634674,
"step": 429
},
{
"epoch": 1.185018985157059,
"grad_norm": 0.19077420234680176,
"learning_rate": 3.7959814207763134e-06,
"log_odds_chosen": 0.7534008026123047,
"log_odds_ratio": -0.3935543894767761,
"logits/chosen": -0.33575281500816345,
"logits/rejected": -1.7560871839523315,
"logps/chosen": -1.5460617542266846,
"logps/rejected": -2.17948579788208,
"loss": 1.6361,
"nll_loss": 1.5967589616775513,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1546061784029007,
"rewards/margins": 0.0633423924446106,
"rewards/rejected": -0.2179485559463501,
"step": 430
},
{
"epoch": 1.1877804625474628,
"grad_norm": 0.18546007573604584,
"learning_rate": 3.789121431004168e-06,
"log_odds_chosen": 0.6419503092765808,
"log_odds_ratio": -0.42676353454589844,
"logits/chosen": -0.36785295605659485,
"logits/rejected": -1.6887683868408203,
"logps/chosen": -1.5084519386291504,
"logps/rejected": -2.036573886871338,
"loss": 1.5997,
"nll_loss": 1.5570096969604492,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15084518492221832,
"rewards/margins": 0.05281219631433487,
"rewards/rejected": -0.2036573737859726,
"step": 431
},
{
"epoch": 1.1905419399378667,
"grad_norm": 0.194391131401062,
"learning_rate": 3.782248193514766e-06,
"log_odds_chosen": 0.6094563007354736,
"log_odds_ratio": -0.43929269909858704,
"logits/chosen": -0.29947635531425476,
"logits/rejected": -1.4955629110336304,
"logps/chosen": -1.4138904809951782,
"logps/rejected": -1.906821846961975,
"loss": 1.5403,
"nll_loss": 1.4963343143463135,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14138904213905334,
"rewards/margins": 0.049293152987957,
"rewards/rejected": -0.19068220257759094,
"step": 432
},
{
"epoch": 1.1933034173282706,
"grad_norm": 0.19166447222232819,
"learning_rate": 3.775361778941257e-06,
"log_odds_chosen": 0.7113713026046753,
"log_odds_ratio": -0.403501957654953,
"logits/chosen": -0.308038592338562,
"logits/rejected": -1.6283587217330933,
"logps/chosen": -1.501065731048584,
"logps/rejected": -2.0941624641418457,
"loss": 1.6079,
"nll_loss": 1.567560076713562,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15010657906532288,
"rewards/margins": 0.05930966138839722,
"rewards/rejected": -0.2094162404537201,
"step": 433
},
{
"epoch": 1.1960648947186745,
"grad_norm": 0.18622690439224243,
"learning_rate": 3.7684622580522057e-06,
"log_odds_chosen": 0.42840951681137085,
"log_odds_ratio": -0.5051460862159729,
"logits/chosen": -0.2970636785030365,
"logits/rejected": -1.6902334690093994,
"logps/chosen": -1.4522030353546143,
"logps/rejected": -1.7967474460601807,
"loss": 1.5557,
"nll_loss": 1.5051684379577637,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1452203243970871,
"rewards/margins": 0.03445442020893097,
"rewards/rejected": -0.17967472970485687,
"step": 434
},
{
"epoch": 1.1988263721090784,
"grad_norm": 0.17388908565044403,
"learning_rate": 3.761549701750865e-06,
"log_odds_chosen": 0.7077789902687073,
"log_odds_ratio": -0.41333314776420593,
"logits/chosen": -0.3090924024581909,
"logits/rejected": -1.843423843383789,
"logps/chosen": -1.4652737379074097,
"logps/rejected": -2.052661418914795,
"loss": 1.5629,
"nll_loss": 1.5216063261032104,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14652739465236664,
"rewards/margins": 0.058738768100738525,
"rewards/rejected": -0.20526614785194397,
"step": 435
},
{
"epoch": 1.2015878494994823,
"grad_norm": 0.17293590307235718,
"learning_rate": 3.7546241810744444e-06,
"log_odds_chosen": 0.5752092003822327,
"log_odds_ratio": -0.4525831639766693,
"logits/chosen": -0.32182741165161133,
"logits/rejected": -1.3567001819610596,
"logps/chosen": -1.4578838348388672,
"logps/rejected": -1.9286342859268188,
"loss": 1.5493,
"nll_loss": 1.50407075881958,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14578840136528015,
"rewards/margins": 0.047075025737285614,
"rewards/rejected": -0.19286341965198517,
"step": 436
},
{
"epoch": 1.2043493268898862,
"grad_norm": 0.19718696177005768,
"learning_rate": 3.747685767193385e-06,
"log_odds_chosen": 0.5642775297164917,
"log_odds_ratio": -0.4569299817085266,
"logits/chosen": -0.3862457871437073,
"logits/rejected": -1.4825284481048584,
"logps/chosen": -1.5323418378829956,
"logps/rejected": -1.9992895126342773,
"loss": 1.6529,
"nll_loss": 1.6072373390197754,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15323419868946075,
"rewards/margins": 0.046694785356521606,
"rewards/rejected": -0.19992896914482117,
"step": 437
},
{
"epoch": 1.20711080428029,
"grad_norm": 0.17915678024291992,
"learning_rate": 3.740734531410626e-06,
"log_odds_chosen": 0.7052585482597351,
"log_odds_ratio": -0.4100540578365326,
"logits/chosen": -0.3775138258934021,
"logits/rejected": -1.859032154083252,
"logps/chosen": -1.448523998260498,
"logps/rejected": -2.021770477294922,
"loss": 1.5652,
"nll_loss": 1.5241587162017822,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1448523849248886,
"rewards/margins": 0.057324644178152084,
"rewards/rejected": -0.202177032828331,
"step": 438
},
{
"epoch": 1.2098722816706937,
"grad_norm": 0.19827017188072205,
"learning_rate": 3.7337705451608676e-06,
"log_odds_chosen": 0.859776496887207,
"log_odds_ratio": -0.3666639029979706,
"logits/chosen": -0.4492262601852417,
"logits/rejected": -1.819311261177063,
"logps/chosen": -1.3853495121002197,
"logps/rejected": -2.082892894744873,
"loss": 1.4954,
"nll_loss": 1.4586937427520752,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.13853494822978973,
"rewards/margins": 0.06975432485342026,
"rewards/rejected": -0.2082892656326294,
"step": 439
},
{
"epoch": 1.2126337590610976,
"grad_norm": 0.19302192330360413,
"learning_rate": 3.7267938800098454e-06,
"log_odds_chosen": 0.8500153422355652,
"log_odds_ratio": -0.36537328362464905,
"logits/chosen": -0.5428920984268188,
"logits/rejected": -1.821568489074707,
"logps/chosen": -1.3568768501281738,
"logps/rejected": -2.052211046218872,
"loss": 1.4693,
"nll_loss": 1.4327150583267212,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1356876939535141,
"rewards/margins": 0.06953340768814087,
"rewards/rejected": -0.20522108674049377,
"step": 440
},
{
"epoch": 1.2153952364515015,
"grad_norm": 0.19974324107170105,
"learning_rate": 3.7198046076535865e-06,
"log_odds_chosen": 0.7102779746055603,
"log_odds_ratio": -0.40437954664230347,
"logits/chosen": -0.3492871820926666,
"logits/rejected": -1.5553520917892456,
"logps/chosen": -1.4951152801513672,
"logps/rejected": -2.0831298828125,
"loss": 1.6058,
"nll_loss": 1.565347671508789,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14951153099536896,
"rewards/margins": 0.05880144238471985,
"rewards/rejected": -0.20831298828125,
"step": 441
},
{
"epoch": 1.2181567138419054,
"grad_norm": 0.18230760097503662,
"learning_rate": 3.71280279991768e-06,
"log_odds_chosen": 0.7506513595581055,
"log_odds_ratio": -0.3923317492008209,
"logits/chosen": -0.4152137339115143,
"logits/rejected": -1.5510238409042358,
"logps/chosen": -1.4440295696258545,
"logps/rejected": -2.0645270347595215,
"loss": 1.5654,
"nll_loss": 1.5261952877044678,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14440295100212097,
"rewards/margins": 0.06204976886510849,
"rewards/rejected": -0.20645272731781006,
"step": 442
},
{
"epoch": 1.2209181912323093,
"grad_norm": 0.17220567166805267,
"learning_rate": 3.705788528756533e-06,
"log_odds_chosen": 0.7942907810211182,
"log_odds_ratio": -0.37784573435783386,
"logits/chosen": -0.43315792083740234,
"logits/rejected": -1.8588192462921143,
"logps/chosen": -1.4370851516723633,
"logps/rejected": -2.0935044288635254,
"loss": 1.5386,
"nll_loss": 1.500786304473877,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14370852708816528,
"rewards/margins": 0.06564193964004517,
"rewards/rejected": -0.20935045182704926,
"step": 443
},
{
"epoch": 1.2236796686227132,
"grad_norm": 0.34615159034729004,
"learning_rate": 3.698761866252635e-06,
"log_odds_chosen": 0.6159027218818665,
"log_odds_ratio": -0.44577932357788086,
"logits/chosen": -0.4024880826473236,
"logits/rejected": -1.7161859273910522,
"logps/chosen": -1.533597707748413,
"logps/rejected": -2.050471067428589,
"loss": 1.6393,
"nll_loss": 1.5947318077087402,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1533597856760025,
"rewards/margins": 0.0516873374581337,
"rewards/rejected": -0.2050471305847168,
"step": 444
},
{
"epoch": 1.226441146013117,
"grad_norm": 0.19309046864509583,
"learning_rate": 3.691722884615814e-06,
"log_odds_chosen": 0.6141983270645142,
"log_odds_ratio": -0.43802952766418457,
"logits/chosen": -0.2951904237270355,
"logits/rejected": -1.1809048652648926,
"logps/chosen": -1.4468181133270264,
"logps/rejected": -1.9419705867767334,
"loss": 1.5744,
"nll_loss": 1.5305941104888916,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14468181133270264,
"rewards/margins": 0.04951523244380951,
"rewards/rejected": -0.19419705867767334,
"step": 445
},
{
"epoch": 1.229202623403521,
"grad_norm": 0.19239898025989532,
"learning_rate": 3.684671656182497e-06,
"log_odds_chosen": 0.7434544563293457,
"log_odds_ratio": -0.39474281668663025,
"logits/chosen": -0.4174625277519226,
"logits/rejected": -1.7456690073013306,
"logps/chosen": -1.4840799570083618,
"logps/rejected": -2.0977742671966553,
"loss": 1.5985,
"nll_loss": 1.5590325593948364,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14840799570083618,
"rewards/margins": 0.06136942654848099,
"rewards/rejected": -0.20977741479873657,
"step": 446
},
{
"epoch": 1.2319641007939248,
"grad_norm": 0.19208678603172302,
"learning_rate": 3.6776082534149664e-06,
"log_odds_chosen": 0.7092225551605225,
"log_odds_ratio": -0.40430110692977905,
"logits/chosen": -0.4059605598449707,
"logits/rejected": -1.8414146900177002,
"logps/chosen": -1.5050946474075317,
"logps/rejected": -2.0934860706329346,
"loss": 1.5881,
"nll_loss": 1.5477027893066406,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15050947666168213,
"rewards/margins": 0.05883914604783058,
"rewards/rejected": -0.2093486189842224,
"step": 447
},
{
"epoch": 1.2347255781843287,
"grad_norm": 0.18131853640079498,
"learning_rate": 3.670532748900615e-06,
"log_odds_chosen": 0.7535479664802551,
"log_odds_ratio": -0.3983326554298401,
"logits/chosen": -0.3855303227901459,
"logits/rejected": -1.6423949003219604,
"logps/chosen": -1.4195005893707275,
"logps/rejected": -2.0328564643859863,
"loss": 1.5252,
"nll_loss": 1.4853616952896118,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14195004105567932,
"rewards/margins": 0.06133558601140976,
"rewards/rejected": -0.20328564941883087,
"step": 448
},
{
"epoch": 1.2374870555747326,
"grad_norm": 0.1773749440908432,
"learning_rate": 3.663445215351198e-06,
"log_odds_chosen": 0.794275164604187,
"log_odds_ratio": -0.38321515917778015,
"logits/chosen": -0.4386705160140991,
"logits/rejected": -1.5580754280090332,
"logps/chosen": -1.4094889163970947,
"logps/rejected": -2.067706823348999,
"loss": 1.5152,
"nll_loss": 1.476863145828247,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14094889163970947,
"rewards/margins": 0.06582178175449371,
"rewards/rejected": -0.20677067339420319,
"step": 449
},
{
"epoch": 1.2402485329651363,
"grad_norm": 0.18508228659629822,
"learning_rate": 3.656345725602089e-06,
"log_odds_chosen": 0.8697155714035034,
"log_odds_ratio": -0.35649657249450684,
"logits/chosen": -0.36322081089019775,
"logits/rejected": -1.8790079355239868,
"logps/chosen": -1.5029995441436768,
"logps/rejected": -2.235288143157959,
"loss": 1.5982,
"nll_loss": 1.5625407695770264,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15029993653297424,
"rewards/margins": 0.0732288807630539,
"rewards/rejected": -0.22352883219718933,
"step": 450
},
{
"epoch": 1.2430100103555402,
"grad_norm": 0.19414876401424408,
"learning_rate": 3.6492343526115292e-06,
"log_odds_chosen": 0.6894505023956299,
"log_odds_ratio": -0.4081804156303406,
"logits/chosen": -0.26924291253089905,
"logits/rejected": -1.8292944431304932,
"logps/chosen": -1.5266273021697998,
"logps/rejected": -2.100613594055176,
"loss": 1.612,
"nll_loss": 1.5711458921432495,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1526627391576767,
"rewards/margins": 0.05739862471818924,
"rewards/rejected": -0.21006137132644653,
"step": 451
},
{
"epoch": 1.245771487745944,
"grad_norm": 0.18495413661003113,
"learning_rate": 3.642111169459879e-06,
"log_odds_chosen": 0.5452620983123779,
"log_odds_ratio": -0.4608571529388428,
"logits/chosen": -0.3355942666530609,
"logits/rejected": -1.535355806350708,
"logps/chosen": -1.489123821258545,
"logps/rejected": -1.9351115226745605,
"loss": 1.5796,
"nll_loss": 1.533546805381775,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14891238510608673,
"rewards/margins": 0.044598765671253204,
"rewards/rejected": -0.19351115822792053,
"step": 452
},
{
"epoch": 1.248532965136348,
"grad_norm": 0.18209105730056763,
"learning_rate": 3.634976249348867e-06,
"log_odds_chosen": 0.6647239923477173,
"log_odds_ratio": -0.42399975657463074,
"logits/chosen": -0.3849475681781769,
"logits/rejected": -1.8075506687164307,
"logps/chosen": -1.4811009168624878,
"logps/rejected": -2.028481960296631,
"loss": 1.5873,
"nll_loss": 1.5449340343475342,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14811009168624878,
"rewards/margins": 0.05473810061812401,
"rewards/rejected": -0.2028481811285019,
"step": 453
},
{
"epoch": 1.2512944425267518,
"grad_norm": 0.20314469933509827,
"learning_rate": 3.6278296656008366e-06,
"log_odds_chosen": 0.8694459199905396,
"log_odds_ratio": -0.371795654296875,
"logits/chosen": -0.4006415903568268,
"logits/rejected": -1.7445772886276245,
"logps/chosen": -1.3579434156417847,
"logps/rejected": -2.065506935119629,
"loss": 1.4782,
"nll_loss": 1.4409888982772827,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.13579432666301727,
"rewards/margins": 0.07075636833906174,
"rewards/rejected": -0.2065506875514984,
"step": 454
},
{
"epoch": 1.2540559199171557,
"grad_norm": 0.1716334968805313,
"learning_rate": 3.6206714916579925e-06,
"log_odds_chosen": 0.7703570127487183,
"log_odds_ratio": -0.3888989984989166,
"logits/chosen": -0.38477808237075806,
"logits/rejected": -1.7767083644866943,
"logps/chosen": -1.398410439491272,
"logps/rejected": -2.027221441268921,
"loss": 1.4987,
"nll_loss": 1.4598515033721924,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.13984103500843048,
"rewards/margins": 0.06288108229637146,
"rewards/rejected": -0.20272211730480194,
"step": 455
},
{
"epoch": 1.2568173973075596,
"grad_norm": 0.18591812252998352,
"learning_rate": 3.613501801081648e-06,
"log_odds_chosen": 0.617323637008667,
"log_odds_ratio": -0.4372296929359436,
"logits/chosen": -0.36923906207084656,
"logits/rejected": -1.4284781217575073,
"logps/chosen": -1.4494431018829346,
"logps/rejected": -1.9530479907989502,
"loss": 1.5618,
"nll_loss": 1.5181188583374023,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14494431018829346,
"rewards/margins": 0.05036048963665962,
"rewards/rejected": -0.19530481100082397,
"step": 456
},
{
"epoch": 1.2595788746979635,
"grad_norm": 0.1984872668981552,
"learning_rate": 3.606320667551466e-06,
"log_odds_chosen": 0.7281745672225952,
"log_odds_ratio": -0.4030551612377167,
"logits/chosen": -0.36235833168029785,
"logits/rejected": -1.7550745010375977,
"logps/chosen": -1.49772310256958,
"logps/rejected": -2.1043448448181152,
"loss": 1.6083,
"nll_loss": 1.5679997205734253,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1497723013162613,
"rewards/margins": 0.06066218018531799,
"rewards/rejected": -0.21043448150157928,
"step": 457
},
{
"epoch": 1.2623403520883674,
"grad_norm": 0.1827681064605713,
"learning_rate": 3.599128164864706e-06,
"log_odds_chosen": 0.8096928596496582,
"log_odds_ratio": -0.37837105989456177,
"logits/chosen": -0.45445504784584045,
"logits/rejected": -1.770817756652832,
"logps/chosen": -1.4516648054122925,
"logps/rejected": -2.1250061988830566,
"loss": 1.5451,
"nll_loss": 1.5072699785232544,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14516648650169373,
"rewards/margins": 0.0673341453075409,
"rewards/rejected": -0.21250061690807343,
"step": 458
},
{
"epoch": 1.265101829478771,
"grad_norm": 0.19308075308799744,
"learning_rate": 3.5919243669354585e-06,
"log_odds_chosen": 0.6114473342895508,
"log_odds_ratio": -0.4423988461494446,
"logits/chosen": -0.405925452709198,
"logits/rejected": -1.2670363187789917,
"logps/chosen": -1.5176535844802856,
"logps/rejected": -2.0253612995147705,
"loss": 1.623,
"nll_loss": 1.5787138938903809,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1517653614282608,
"rewards/margins": 0.05077076703310013,
"rewards/rejected": -0.20253612101078033,
"step": 459
},
{
"epoch": 1.2678633068691751,
"grad_norm": 0.2073579728603363,
"learning_rate": 3.5847093477938955e-06,
"log_odds_chosen": 0.6397342681884766,
"log_odds_ratio": -0.4318113625049591,
"logits/chosen": -0.4677776098251343,
"logits/rejected": -1.5898178815841675,
"logps/chosen": -1.5188252925872803,
"logps/rejected": -2.048469066619873,
"loss": 1.6166,
"nll_loss": 1.5734236240386963,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15188252925872803,
"rewards/margins": 0.05296438932418823,
"rewards/rejected": -0.20484691858291626,
"step": 460
},
{
"epoch": 1.2706247842595788,
"grad_norm": 0.18931885063648224,
"learning_rate": 3.5774831815855017e-06,
"log_odds_chosen": 0.8001710772514343,
"log_odds_ratio": -0.3749019205570221,
"logits/chosen": -0.4591723084449768,
"logits/rejected": -1.6894080638885498,
"logps/chosen": -1.4038267135620117,
"logps/rejected": -2.0569498538970947,
"loss": 1.5196,
"nll_loss": 1.4821062088012695,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14038267731666565,
"rewards/margins": 0.06531231105327606,
"rewards/rejected": -0.2056949883699417,
"step": 461
},
{
"epoch": 1.2733862616499827,
"grad_norm": 0.17583203315734863,
"learning_rate": 3.5702459425703146e-06,
"log_odds_chosen": 0.8868230581283569,
"log_odds_ratio": -0.3547811210155487,
"logits/chosen": -0.43302029371261597,
"logits/rejected": -1.9412630796432495,
"logps/chosen": -1.508098840713501,
"logps/rejected": -2.258622884750366,
"loss": 1.6074,
"nll_loss": 1.5718796253204346,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1508098840713501,
"rewards/margins": 0.0750524029135704,
"rewards/rejected": -0.2258622944355011,
"step": 462
},
{
"epoch": 1.2761477390403866,
"grad_norm": 0.18731163442134857,
"learning_rate": 3.562997705122162e-06,
"log_odds_chosen": 0.7847945094108582,
"log_odds_ratio": -0.37928110361099243,
"logits/chosen": -0.38437145948410034,
"logits/rejected": -1.6156420707702637,
"logps/chosen": -1.4260213375091553,
"logps/rejected": -2.070751428604126,
"loss": 1.5279,
"nll_loss": 1.489925503730774,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14260214567184448,
"rewards/margins": 0.0644729733467102,
"rewards/rejected": -0.20707513391971588,
"step": 463
},
{
"epoch": 1.2789092164307905,
"grad_norm": 0.17475299537181854,
"learning_rate": 3.5557385437279e-06,
"log_odds_chosen": 0.838699221611023,
"log_odds_ratio": -0.36731863021850586,
"logits/chosen": -0.3592332601547241,
"logits/rejected": -1.4209747314453125,
"logps/chosen": -1.4949580430984497,
"logps/rejected": -2.196079730987549,
"loss": 1.5793,
"nll_loss": 1.5425434112548828,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14949579536914825,
"rewards/margins": 0.07011217623949051,
"rewards/rejected": -0.21960797905921936,
"step": 464
},
{
"epoch": 1.2816706938211944,
"grad_norm": 0.19598983228206635,
"learning_rate": 3.5484685329866424e-06,
"log_odds_chosen": 0.5413783192634583,
"log_odds_ratio": -0.4634351134300232,
"logits/chosen": -0.3708513081073761,
"logits/rejected": -1.4379222393035889,
"logps/chosen": -1.5153756141662598,
"logps/rejected": -1.9605016708374023,
"loss": 1.6065,
"nll_loss": 1.5601321458816528,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15153755247592926,
"rewards/margins": 0.044512614607810974,
"rewards/rejected": -0.19605018198490143,
"step": 465
},
{
"epoch": 1.2844321712115983,
"grad_norm": 0.18149885535240173,
"learning_rate": 3.541187747608998e-06,
"log_odds_chosen": 0.9340339303016663,
"log_odds_ratio": -0.34465107321739197,
"logits/chosen": -0.48461854457855225,
"logits/rejected": -1.7797619104385376,
"logps/chosen": -1.5739936828613281,
"logps/rejected": -2.3727049827575684,
"loss": 1.6472,
"nll_loss": 1.6127065420150757,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15739935636520386,
"rewards/margins": 0.07987111061811447,
"rewards/rejected": -0.23727048933506012,
"step": 466
},
{
"epoch": 1.2871936486020021,
"grad_norm": 0.19978304207324982,
"learning_rate": 3.533896262416302e-06,
"log_odds_chosen": 0.7202765941619873,
"log_odds_ratio": -0.4014014005661011,
"logits/chosen": -0.4147559404373169,
"logits/rejected": -1.5541059970855713,
"logps/chosen": -1.4875589609146118,
"logps/rejected": -2.0849876403808594,
"loss": 1.5897,
"nll_loss": 1.5495574474334717,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14875589311122894,
"rewards/margins": 0.05974285677075386,
"rewards/rejected": -0.2084987461566925,
"step": 467
},
{
"epoch": 1.2899551259924058,
"grad_norm": 0.21298463642597198,
"learning_rate": 3.5265941523398455e-06,
"log_odds_chosen": 0.8375830054283142,
"log_odds_ratio": -0.36393651366233826,
"logits/chosen": -0.42123696208000183,
"logits/rejected": -1.5165983438491821,
"logps/chosen": -1.5110690593719482,
"logps/rejected": -2.2138826847076416,
"loss": 1.6014,
"nll_loss": 1.5650498867034912,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15110690891742706,
"rewards/margins": 0.07028134167194366,
"rewards/rejected": -0.22138825058937073,
"step": 468
},
{
"epoch": 1.29271660338281,
"grad_norm": 0.1818659007549286,
"learning_rate": 3.519281492420108e-06,
"log_odds_chosen": 0.5199939012527466,
"log_odds_ratio": -0.47489961981773376,
"logits/chosen": -0.4219110906124115,
"logits/rejected": -1.4790453910827637,
"logps/chosen": -1.5349076986312866,
"logps/rejected": -1.9625968933105469,
"loss": 1.6236,
"nll_loss": 1.5760908126831055,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15349076688289642,
"rewards/margins": 0.042768917977809906,
"rewards/rejected": -0.19625969231128693,
"step": 469
},
{
"epoch": 1.2954780807732136,
"grad_norm": 0.19590230286121368,
"learning_rate": 3.5119583578059845e-06,
"log_odds_chosen": 0.6841270327568054,
"log_odds_ratio": -0.43147027492523193,
"logits/chosen": -0.4008702337741852,
"logits/rejected": -1.6819429397583008,
"logps/chosen": -1.6086608171463013,
"logps/rejected": -2.1926984786987305,
"loss": 1.6814,
"nll_loss": 1.6382827758789062,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.16086608171463013,
"rewards/margins": 0.05840376764535904,
"rewards/rejected": -0.21926987171173096,
"step": 470
},
{
"epoch": 1.2982395581636175,
"grad_norm": 0.17711324989795685,
"learning_rate": 3.504624823754014e-06,
"log_odds_chosen": 0.8761448860168457,
"log_odds_ratio": -0.3517749607563019,
"logits/chosen": -0.41212159395217896,
"logits/rejected": -1.957320213317871,
"logps/chosen": -1.4138109683990479,
"logps/rejected": -2.131588935852051,
"loss": 1.508,
"nll_loss": 1.4727998971939087,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14138111472129822,
"rewards/margins": 0.07177779823541641,
"rewards/rejected": -0.21315890550613403,
"step": 471
},
{
"epoch": 1.3010010355540214,
"grad_norm": 0.19494958221912384,
"learning_rate": 3.4972809656276047e-06,
"log_odds_chosen": 0.8046627640724182,
"log_odds_ratio": -0.3767206370830536,
"logits/chosen": -0.36885958909988403,
"logits/rejected": -1.8168095350265503,
"logps/chosen": -1.5026159286499023,
"logps/rejected": -2.176860809326172,
"loss": 1.6015,
"nll_loss": 1.5638599395751953,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15026158094406128,
"rewards/margins": 0.06742450594902039,
"rewards/rejected": -0.21768608689308167,
"step": 472
},
{
"epoch": 1.3037625129444252,
"grad_norm": 0.1766563355922699,
"learning_rate": 3.4899268588962613e-06,
"log_odds_chosen": 0.8576854467391968,
"log_odds_ratio": -0.3584885597229004,
"logits/chosen": -0.436603844165802,
"logits/rejected": -1.6729352474212646,
"logps/chosen": -1.3645590543746948,
"logps/rejected": -2.0580813884735107,
"loss": 1.4773,
"nll_loss": 1.4414961338043213,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.13645590841770172,
"rewards/margins": 0.06935223937034607,
"rewards/rejected": -0.20580816268920898,
"step": 473
},
{
"epoch": 1.3065239903348291,
"grad_norm": 0.18817074596881866,
"learning_rate": 3.4825625791348093e-06,
"log_odds_chosen": 0.8350617289543152,
"log_odds_ratio": -0.3708168864250183,
"logits/chosen": -0.3747791051864624,
"logits/rejected": -1.553459882736206,
"logps/chosen": -1.4124037027359009,
"logps/rejected": -2.1011507511138916,
"loss": 1.5145,
"nll_loss": 1.477430820465088,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14124037325382233,
"rewards/margins": 0.06887470930814743,
"rewards/rejected": -0.21011507511138916,
"step": 474
},
{
"epoch": 1.309285467725233,
"grad_norm": 0.1850864142179489,
"learning_rate": 3.4751882020226174e-06,
"log_odds_chosen": 0.7951568365097046,
"log_odds_ratio": -0.38151904940605164,
"logits/chosen": -0.41285592317581177,
"logits/rejected": -1.676164984703064,
"logps/chosen": -1.519218921661377,
"logps/rejected": -2.190464973449707,
"loss": 1.6106,
"nll_loss": 1.5724141597747803,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15192189812660217,
"rewards/margins": 0.06712460517883301,
"rewards/rejected": -0.21904650330543518,
"step": 475
},
{
"epoch": 1.312046945115637,
"grad_norm": 0.19581526517868042,
"learning_rate": 3.467803803342821e-06,
"log_odds_chosen": 0.7770808339118958,
"log_odds_ratio": -0.38217130303382874,
"logits/chosen": -0.3372090756893158,
"logits/rejected": -1.6060659885406494,
"logps/chosen": -1.4900453090667725,
"logps/rejected": -2.1375834941864014,
"loss": 1.5763,
"nll_loss": 1.5380480289459229,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14900454878807068,
"rewards/margins": 0.06475377827882767,
"rewards/rejected": -0.21375833451747894,
"step": 476
},
{
"epoch": 1.3148084225060408,
"grad_norm": 0.19793154299259186,
"learning_rate": 3.4604094589815402e-06,
"log_odds_chosen": 0.6940678358078003,
"log_odds_ratio": -0.40892940759658813,
"logits/chosen": -0.42878496646881104,
"logits/rejected": -1.5839377641677856,
"logps/chosen": -1.4118523597717285,
"logps/rejected": -1.9752473831176758,
"loss": 1.5164,
"nll_loss": 1.4755195379257202,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1411852389574051,
"rewards/margins": 0.056339483708143234,
"rewards/rejected": -0.19752474129199982,
"step": 477
},
{
"epoch": 1.3175698998964447,
"grad_norm": 0.18727529048919678,
"learning_rate": 3.4530052449271044e-06,
"log_odds_chosen": 0.8616414666175842,
"log_odds_ratio": -0.36362677812576294,
"logits/chosen": -0.4634791910648346,
"logits/rejected": -1.5600810050964355,
"logps/chosen": -1.4240953922271729,
"logps/rejected": -2.1343352794647217,
"loss": 1.521,
"nll_loss": 1.4846872091293335,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.142409548163414,
"rewards/margins": 0.07102398574352264,
"rewards/rejected": -0.21343351900577545,
"step": 478
},
{
"epoch": 1.3203313772868484,
"grad_norm": 0.19879932701587677,
"learning_rate": 3.4455912372692696e-06,
"log_odds_chosen": 0.5470461845397949,
"log_odds_ratio": -0.46493563055992126,
"logits/chosen": -0.3689558207988739,
"logits/rejected": -1.5988726615905762,
"logps/chosen": -1.4872140884399414,
"logps/rejected": -1.9357192516326904,
"loss": 1.5782,
"nll_loss": 1.5316959619522095,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14872139692306519,
"rewards/margins": 0.04485052451491356,
"rewards/rejected": -0.19357194006443024,
"step": 479
},
{
"epoch": 1.3230928546772525,
"grad_norm": 0.19459912180900574,
"learning_rate": 3.438167512198436e-06,
"log_odds_chosen": 0.5693470239639282,
"log_odds_ratio": -0.4518805146217346,
"logits/chosen": -0.41612759232521057,
"logits/rejected": -1.5004498958587646,
"logps/chosen": -1.5251268148422241,
"logps/rejected": -1.994539499282837,
"loss": 1.6246,
"nll_loss": 1.5794237852096558,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15251268446445465,
"rewards/margins": 0.046941258013248444,
"rewards/rejected": -0.1994539499282837,
"step": 480
},
{
"epoch": 1.3258543320676561,
"grad_norm": 0.1880672574043274,
"learning_rate": 3.4307341460048633e-06,
"log_odds_chosen": 0.7143114805221558,
"log_odds_ratio": -0.40364253520965576,
"logits/chosen": -0.4024220108985901,
"logits/rejected": -1.5502820014953613,
"logps/chosen": -1.5507985353469849,
"logps/rejected": -2.1468398571014404,
"loss": 1.6399,
"nll_loss": 1.5995802879333496,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15507985651493073,
"rewards/margins": 0.059604134410619736,
"rewards/rejected": -0.21468399465084076,
"step": 481
},
{
"epoch": 1.32861580945806,
"grad_norm": 0.19784514605998993,
"learning_rate": 3.4232912150778914e-06,
"log_odds_chosen": 0.7461116909980774,
"log_odds_ratio": -0.3941587507724762,
"logits/chosen": -0.43998983502388,
"logits/rejected": -1.7154864072799683,
"logps/chosen": -1.4538161754608154,
"logps/rejected": -2.0666849613189697,
"loss": 1.5731,
"nll_loss": 1.5336874723434448,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1453816145658493,
"rewards/margins": 0.06128688156604767,
"rewards/rejected": -0.20666849613189697,
"step": 482
},
{
"epoch": 1.331377286848464,
"grad_norm": 0.18405503034591675,
"learning_rate": 3.415838795905151e-06,
"log_odds_chosen": 0.9300730228424072,
"log_odds_ratio": -0.34047600626945496,
"logits/chosen": -0.5350120067596436,
"logits/rejected": -1.9728281497955322,
"logps/chosen": -1.4228546619415283,
"logps/rejected": -2.1867175102233887,
"loss": 1.5168,
"nll_loss": 1.4828009605407715,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14228546619415283,
"rewards/margins": 0.07638627290725708,
"rewards/rejected": -0.2186717540025711,
"step": 483
},
{
"epoch": 1.3341387642388678,
"grad_norm": 0.19047723710536957,
"learning_rate": 3.408376965071779e-06,
"log_odds_chosen": 0.6742444634437561,
"log_odds_ratio": -0.41640961170196533,
"logits/chosen": -0.37510067224502563,
"logits/rejected": -1.5923813581466675,
"logps/chosen": -1.5178049802780151,
"logps/rejected": -2.0749661922454834,
"loss": 1.6055,
"nll_loss": 1.5638338327407837,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15178050100803375,
"rewards/margins": 0.05571611970663071,
"rewards/rejected": -0.20749662816524506,
"step": 484
},
{
"epoch": 1.3369002416292717,
"grad_norm": 0.1780618578195572,
"learning_rate": 3.400905799259634e-06,
"log_odds_chosen": 0.653221607208252,
"log_odds_ratio": -0.4260196387767792,
"logits/chosen": -0.42139092087745667,
"logits/rejected": -1.648254632949829,
"logps/chosen": -1.4374747276306152,
"logps/rejected": -1.9686298370361328,
"loss": 1.5329,
"nll_loss": 1.490256667137146,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.143747478723526,
"rewards/margins": 0.05311552435159683,
"rewards/rejected": -0.19686299562454224,
"step": 485
},
{
"epoch": 1.3396617190196756,
"grad_norm": 0.1804792881011963,
"learning_rate": 3.393425375246503e-06,
"log_odds_chosen": 0.6369755268096924,
"log_odds_ratio": -0.4262813925743103,
"logits/chosen": -0.4290909469127655,
"logits/rejected": -1.7290550470352173,
"logps/chosen": -1.4642930030822754,
"logps/rejected": -1.98381507396698,
"loss": 1.5519,
"nll_loss": 1.509236454963684,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14642930030822754,
"rewards/margins": 0.05195220559835434,
"rewards/rejected": -0.1983814835548401,
"step": 486
},
{
"epoch": 1.3424231964100795,
"grad_norm": 0.19493341445922852,
"learning_rate": 3.3859357699053165e-06,
"log_odds_chosen": 0.7284951210021973,
"log_odds_ratio": -0.40332838892936707,
"logits/chosen": -0.40109413862228394,
"logits/rejected": -1.4352595806121826,
"logps/chosen": -1.402358055114746,
"logps/rejected": -1.9987090826034546,
"loss": 1.4967,
"nll_loss": 1.456404447555542,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1402358114719391,
"rewards/margins": 0.059635113924741745,
"rewards/rejected": -0.19987091422080994,
"step": 487
},
{
"epoch": 1.3451846738004831,
"grad_norm": 0.19050319492816925,
"learning_rate": 3.3784370602033572e-06,
"log_odds_chosen": 0.8729457259178162,
"log_odds_ratio": -0.35851162672042847,
"logits/chosen": -0.4902225732803345,
"logits/rejected": -1.7542881965637207,
"logps/chosen": -1.4271174669265747,
"logps/rejected": -2.151813507080078,
"loss": 1.5277,
"nll_loss": 1.4917997121810913,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14271175861358643,
"rewards/margins": 0.07246959954500198,
"rewards/rejected": -0.2151813805103302,
"step": 488
},
{
"epoch": 1.3479461511908872,
"grad_norm": 0.18820421397686005,
"learning_rate": 3.3709293232014705e-06,
"log_odds_chosen": 0.73007732629776,
"log_odds_ratio": -0.40136751532554626,
"logits/chosen": -0.44850000739097595,
"logits/rejected": -2.0052490234375,
"logps/chosen": -1.4974483251571655,
"logps/rejected": -2.1092348098754883,
"loss": 1.5884,
"nll_loss": 1.5482368469238281,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14974482357501984,
"rewards/margins": 0.06117865815758705,
"rewards/rejected": -0.21092349290847778,
"step": 489
},
{
"epoch": 1.350707628581291,
"grad_norm": 0.18799753487110138,
"learning_rate": 3.3634126360532694e-06,
"log_odds_chosen": 0.8482156991958618,
"log_odds_ratio": -0.36712414026260376,
"logits/chosen": -0.32226884365081787,
"logits/rejected": -1.778417944908142,
"logps/chosen": -1.577460765838623,
"logps/rejected": -2.302645206451416,
"loss": 1.6644,
"nll_loss": 1.6277295351028442,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1577460616827011,
"rewards/margins": 0.07251846790313721,
"rewards/rejected": -0.23026452958583832,
"step": 490
},
{
"epoch": 1.3534691059716948,
"grad_norm": 0.20365531742572784,
"learning_rate": 3.355887076004345e-06,
"log_odds_chosen": 0.7709956765174866,
"log_odds_ratio": -0.383152574300766,
"logits/chosen": -0.5297287702560425,
"logits/rejected": -1.6290040016174316,
"logps/chosen": -1.4873484373092651,
"logps/rejected": -2.12845778465271,
"loss": 1.5899,
"nll_loss": 1.551561713218689,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14873485267162323,
"rewards/margins": 0.06411094218492508,
"rewards/rejected": -0.21284577250480652,
"step": 491
},
{
"epoch": 1.3562305833620987,
"grad_norm": 0.18781210482120514,
"learning_rate": 3.3483527203914694e-06,
"log_odds_chosen": 0.8746954798698425,
"log_odds_ratio": -0.3559949994087219,
"logits/chosen": -0.3871724307537079,
"logits/rejected": -1.6837961673736572,
"logps/chosen": -1.5181317329406738,
"logps/rejected": -2.254382610321045,
"loss": 1.613,
"nll_loss": 1.5773670673370361,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15181316435337067,
"rewards/margins": 0.0736251100897789,
"rewards/rejected": -0.22543828189373016,
"step": 492
},
{
"epoch": 1.3589920607525026,
"grad_norm": 0.18413974344730377,
"learning_rate": 3.340809646641805e-06,
"log_odds_chosen": 0.6966791749000549,
"log_odds_ratio": -0.4073706269264221,
"logits/chosen": -0.32745176553726196,
"logits/rejected": -1.734323501586914,
"logps/chosen": -1.5864430665969849,
"logps/rejected": -2.1757755279541016,
"loss": 1.6634,
"nll_loss": 1.6226191520690918,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.15864431858062744,
"rewards/margins": 0.05893322825431824,
"rewards/rejected": -0.21757756173610687,
"step": 493
},
{
"epoch": 1.3617535381429065,
"grad_norm": 0.1795225441455841,
"learning_rate": 3.333257932272105e-06,
"log_odds_chosen": 0.7916382551193237,
"log_odds_ratio": -0.3854186236858368,
"logits/chosen": -0.3654117286205292,
"logits/rejected": -1.731317400932312,
"logps/chosen": -1.4387598037719727,
"logps/rejected": -2.0977869033813477,
"loss": 1.5401,
"nll_loss": 1.5015188455581665,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14387598633766174,
"rewards/margins": 0.0659027174115181,
"rewards/rejected": -0.20977871119976044,
"step": 494
},
{
"epoch": 1.3645150155333103,
"grad_norm": 0.19810736179351807,
"learning_rate": 3.3256976548879183e-06,
"log_odds_chosen": 0.7499178647994995,
"log_odds_ratio": -0.391695499420166,
"logits/chosen": -0.4586635231971741,
"logits/rejected": -1.5185637474060059,
"logps/chosen": -1.494145393371582,
"logps/rejected": -2.1175270080566406,
"loss": 1.591,
"nll_loss": 1.551873803138733,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.1494145393371582,
"rewards/margins": 0.06233816593885422,
"rewards/rejected": -0.21175269782543182,
"step": 495
},
{
"epoch": 1.3672764929237142,
"grad_norm": 0.17777174711227417,
"learning_rate": 3.3181288921827925e-06,
"log_odds_chosen": 0.8323229551315308,
"log_odds_ratio": -0.37165793776512146,
"logits/chosen": -0.3486481308937073,
"logits/rejected": -1.4729896783828735,
"logps/chosen": -1.3969337940216064,
"logps/rejected": -2.0784289836883545,
"loss": 1.5068,
"nll_loss": 1.4696803092956543,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.13969337940216064,
"rewards/margins": 0.06814949959516525,
"rewards/rejected": -0.2078428864479065,
"step": 496
},
{
"epoch": 1.3700379703141181,
"grad_norm": 0.18033576011657715,
"learning_rate": 3.310551721937475e-06,
"log_odds_chosen": 0.7983404994010925,
"log_odds_ratio": -0.3745984435081482,
"logits/chosen": -0.3823201656341553,
"logits/rejected": -1.5644809007644653,
"logps/chosen": -1.4277198314666748,
"logps/rejected": -2.0840868949890137,
"loss": 1.5007,
"nll_loss": 1.4631915092468262,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14277197420597076,
"rewards/margins": 0.06563669443130493,
"rewards/rejected": -0.2084086835384369,
"step": 497
},
{
"epoch": 1.372799447704522,
"grad_norm": 0.19242247939109802,
"learning_rate": 3.3029662220191146e-06,
"log_odds_chosen": 0.8101353645324707,
"log_odds_ratio": -0.37228965759277344,
"logits/chosen": -0.4509270191192627,
"logits/rejected": -1.8056501150131226,
"logps/chosen": -1.3381811380386353,
"logps/rejected": -1.9927046298980713,
"loss": 1.4395,
"nll_loss": 1.4023054838180542,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.133818119764328,
"rewards/margins": 0.06545236706733704,
"rewards/rejected": -0.19927047193050385,
"step": 498
},
{
"epoch": 1.3755609250949257,
"grad_norm": 0.1753472536802292,
"learning_rate": 3.2953724703804572e-06,
"log_odds_chosen": 0.892001211643219,
"log_odds_ratio": -0.35052934288978577,
"logits/chosen": -0.4044141173362732,
"logits/rejected": -1.8937467336654663,
"logps/chosen": -1.407699704170227,
"logps/rejected": -2.1476714611053467,
"loss": 1.4972,
"nll_loss": 1.4621860980987549,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14076997339725494,
"rewards/margins": 0.07399718463420868,
"rewards/rejected": -0.21476714313030243,
"step": 499
},
{
"epoch": 1.3783224024853298,
"grad_norm": 0.1958237588405609,
"learning_rate": 3.2877705450590525e-06,
"log_odds_chosen": 0.7319251298904419,
"log_odds_ratio": -0.396656334400177,
"logits/chosen": -0.3846544325351715,
"logits/rejected": -1.6542454957962036,
"logps/chosen": -1.4686310291290283,
"logps/rejected": -2.0715951919555664,
"loss": 1.5948,
"nll_loss": 1.5551836490631104,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.14686310291290283,
"rewards/margins": 0.06029640883207321,
"rewards/rejected": -0.20715951919555664,
"step": 500
}
],
"logging_steps": 1,
"max_steps": 1089,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}