silviasapora's picture
Model save
67102e2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9765925925925925,
"eval_steps": 500,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.047407407407407405,
"grad_norm": 979.1563110351562,
"learning_rate": 7.8125e-06,
"log_odds_chosen": -1.0658155679702759,
"log_odds_ratio": -11.016766548156738,
"logps/chosen": -22.7292423248291,
"logps/rejected": -21.66365623474121,
"loss": 332.1951,
"nll_loss": 9.848306655883789,
"rewards/accuracies": 0.4781250059604645,
"rewards/chosen": -11.36462116241455,
"rewards/margins": -0.5327932238578796,
"rewards/rejected": -10.831828117370605,
"step": 5
},
{
"epoch": 0.09481481481481481,
"grad_norm": 610.80419921875,
"learning_rate": 1.5625e-05,
"log_odds_chosen": -3.1405487060546875,
"log_odds_ratio": -11.108110427856445,
"logps/chosen": -22.11328887939453,
"logps/rejected": -18.973073959350586,
"loss": 331.6769,
"nll_loss": 8.794797897338867,
"rewards/accuracies": 0.44062501192092896,
"rewards/chosen": -11.056644439697266,
"rewards/margins": -1.5701072216033936,
"rewards/rejected": -9.486536979675293,
"step": 10
},
{
"epoch": 0.14222222222222222,
"grad_norm": 1824.323974609375,
"learning_rate": 2.34375e-05,
"log_odds_chosen": -1.2696795463562012,
"log_odds_ratio": -11.54572868347168,
"logps/chosen": -22.386913299560547,
"logps/rejected": -21.115848541259766,
"loss": 280.4082,
"nll_loss": 8.127224922180176,
"rewards/accuracies": 0.515625,
"rewards/chosen": -11.193456649780273,
"rewards/margins": -0.6355326771736145,
"rewards/rejected": -10.557924270629883,
"step": 15
},
{
"epoch": 0.18962962962962962,
"grad_norm": 7787.6650390625,
"learning_rate": 3.125e-05,
"log_odds_chosen": -5.64935302734375,
"log_odds_ratio": -11.974640846252441,
"logps/chosen": -21.131267547607422,
"logps/rejected": -15.482928276062012,
"loss": 324.5447,
"nll_loss": 7.3178510665893555,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -10.565633773803711,
"rewards/margins": -2.824169635772705,
"rewards/rejected": -7.741464138031006,
"step": 20
},
{
"epoch": 0.23703703703703705,
"grad_norm": 423.42431640625,
"learning_rate": 3.90625e-05,
"log_odds_chosen": 0.17381250858306885,
"log_odds_ratio": -3.9062447547912598,
"logps/chosen": -8.586548805236816,
"logps/rejected": -8.75158405303955,
"loss": 118.2598,
"nll_loss": 3.77813720703125,
"rewards/accuracies": 0.515625,
"rewards/chosen": -4.293274402618408,
"rewards/margins": 0.0825173407793045,
"rewards/rejected": -4.375792026519775,
"step": 25
},
{
"epoch": 0.28444444444444444,
"grad_norm": 249.5974578857422,
"learning_rate": 4.6875e-05,
"log_odds_chosen": 0.07060544192790985,
"log_odds_ratio": -0.9323924779891968,
"logps/chosen": -1.9480135440826416,
"logps/rejected": -2.0069220066070557,
"loss": 64.7047,
"nll_loss": 2.0514745712280273,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.9740067720413208,
"rewards/margins": 0.029454167932271957,
"rewards/rejected": -1.0034610033035278,
"step": 30
},
{
"epoch": 0.33185185185185184,
"grad_norm": 263.46826171875,
"learning_rate": 4.998613757348784e-05,
"log_odds_chosen": 0.32897791266441345,
"log_odds_ratio": -0.8598791360855103,
"logps/chosen": -1.866418480873108,
"logps/rejected": -2.1647419929504395,
"loss": 57.6721,
"nll_loss": 1.9514150619506836,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": -0.933209240436554,
"rewards/margins": 0.14916184544563293,
"rewards/rejected": -1.0823709964752197,
"step": 35
},
{
"epoch": 0.37925925925925924,
"grad_norm": 865.616455078125,
"learning_rate": 4.990147841143462e-05,
"log_odds_chosen": 0.44308799505233765,
"log_odds_ratio": -0.7074758410453796,
"logps/chosen": -1.699676513671875,
"logps/rejected": -2.0793070793151855,
"loss": 55.384,
"nll_loss": 1.9205652475357056,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -0.8498382568359375,
"rewards/margins": 0.18981528282165527,
"rewards/rejected": -1.0396535396575928,
"step": 40
},
{
"epoch": 0.4266666666666667,
"grad_norm": 1629.7843017578125,
"learning_rate": 4.97401218720448e-05,
"log_odds_chosen": 1.2202234268188477,
"log_odds_ratio": -0.6341068148612976,
"logps/chosen": -1.685363531112671,
"logps/rejected": -2.8407630920410156,
"loss": 41.6093,
"nll_loss": 1.8779910802841187,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -0.8426817655563354,
"rewards/margins": 0.5776998400688171,
"rewards/rejected": -1.4203815460205078,
"step": 45
},
{
"epoch": 0.4740740740740741,
"grad_norm": 15368.978515625,
"learning_rate": 4.9502564938797946e-05,
"log_odds_chosen": 17.89919662475586,
"log_odds_ratio": -4.792720794677734,
"logps/chosen": -9.761907577514648,
"logps/rejected": -27.604618072509766,
"loss": -36.8673,
"nll_loss": 7.769252777099609,
"rewards/accuracies": 0.703125,
"rewards/chosen": -4.880953788757324,
"rewards/margins": 8.921355247497559,
"rewards/rejected": -13.802309036254883,
"step": 50
},
{
"epoch": 0.5214814814814814,
"grad_norm": 45768.3046875,
"learning_rate": 4.918953929490768e-05,
"log_odds_chosen": 64.66633605957031,
"log_odds_ratio": -11.930875778198242,
"logps/chosen": -29.84969711303711,
"logps/rejected": -94.47431945800781,
"loss": -415.8344,
"nll_loss": 19.317481994628906,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -14.924848556518555,
"rewards/margins": 32.31230926513672,
"rewards/rejected": -47.237159729003906,
"step": 55
},
{
"epoch": 0.5688888888888889,
"grad_norm": 19440.806640625,
"learning_rate": 4.88020090697132e-05,
"log_odds_chosen": 115.04557037353516,
"log_odds_ratio": -21.812177658081055,
"logps/chosen": -38.03327560424805,
"logps/rejected": -153.05471801757812,
"loss": -922.3747,
"nll_loss": 28.686513900756836,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -19.016637802124023,
"rewards/margins": 57.51072311401367,
"rewards/rejected": -76.52735900878906,
"step": 60
},
{
"epoch": 0.6162962962962963,
"grad_norm": 451016.8125,
"learning_rate": 4.834116786912897e-05,
"log_odds_chosen": 97.50765228271484,
"log_odds_ratio": -31.05093002319336,
"logps/chosen": -40.515743255615234,
"logps/rejected": -137.99923706054688,
"loss": -469.6748,
"nll_loss": 34.06440353393555,
"rewards/accuracies": 0.640625,
"rewards/chosen": -20.257871627807617,
"rewards/margins": 48.74174880981445,
"rewards/rejected": -68.99961853027344,
"step": 65
},
{
"epoch": 0.6637037037037037,
"grad_norm": 94608.5,
"learning_rate": 4.7808435099299045e-05,
"log_odds_chosen": 338.44207763671875,
"log_odds_ratio": -85.28483581542969,
"logps/chosen": -131.35760498046875,
"logps/rejected": -469.7542419433594,
"loss": -2686.7586,
"nll_loss": 85.23709869384766,
"rewards/accuracies": 0.71875,
"rewards/chosen": -65.67880249023438,
"rewards/margins": 169.1982879638672,
"rewards/rejected": -234.8771209716797,
"step": 70
},
{
"epoch": 0.7111111111111111,
"grad_norm": 33316.55078125,
"learning_rate": 4.720545159477922e-05,
"log_odds_chosen": 127.3674545288086,
"log_odds_ratio": -31.760257720947266,
"logps/chosen": -35.016143798828125,
"logps/rejected": -162.33889770507812,
"loss": -1404.02,
"nll_loss": 19.785737991333008,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -17.508071899414062,
"rewards/margins": 63.66136932373047,
"rewards/rejected": -81.16944885253906,
"step": 75
},
{
"epoch": 0.7585185185185185,
"grad_norm": 181842.0625,
"learning_rate": 4.653407456471222e-05,
"log_odds_chosen": 319.1683654785156,
"log_odds_ratio": -147.62722778320312,
"logps/chosen": -289.9014892578125,
"logps/rejected": -609.013916015625,
"loss": 384.325,
"nll_loss": 171.5663604736328,
"rewards/accuracies": 0.65625,
"rewards/chosen": -144.95074462890625,
"rewards/margins": 159.55621337890625,
"rewards/rejected": -304.5069580078125,
"step": 80
},
{
"epoch": 0.8059259259259259,
"grad_norm": 52143.23828125,
"learning_rate": 4.579637187256222e-05,
"log_odds_chosen": 345.2151794433594,
"log_odds_ratio": -104.02201843261719,
"logps/chosen": -158.8235626220703,
"logps/rejected": -504.00628662109375,
"loss": -2944.3695,
"nll_loss": 80.5798110961914,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -79.41178131103516,
"rewards/margins": 172.5913543701172,
"rewards/rejected": -252.00314331054688,
"step": 85
},
{
"epoch": 0.8533333333333334,
"grad_norm": 36943.46875,
"learning_rate": 4.499461566702685e-05,
"log_odds_chosen": 404.80950927734375,
"log_odds_ratio": -105.06642150878906,
"logps/chosen": -144.46873474121094,
"logps/rejected": -549.2257690429688,
"loss": -3396.4172,
"nll_loss": 96.24049377441406,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -72.23436737060547,
"rewards/margins": 202.37850952148438,
"rewards/rejected": -274.6128845214844,
"step": 90
},
{
"epoch": 0.9007407407407407,
"grad_norm": 23182.97265625,
"learning_rate": 4.413127538374411e-05,
"log_odds_chosen": 131.83743286132812,
"log_odds_ratio": -14.629719734191895,
"logps/chosen": -16.93549919128418,
"logps/rejected": -148.75064086914062,
"loss": -1814.8482,
"nll_loss": 9.193554878234863,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -8.46774959564209,
"rewards/margins": 65.90757751464844,
"rewards/rejected": -74.37532043457031,
"step": 95
},
{
"epoch": 0.9481481481481482,
"grad_norm": 126997.953125,
"learning_rate": 4.320901013934887e-05,
"log_odds_chosen": 326.8649597167969,
"log_odds_ratio": -81.38832092285156,
"logps/chosen": -165.1029510498047,
"logps/rejected": -491.9105529785156,
"loss": -1725.1,
"nll_loss": 109.4944076538086,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -82.55147552490234,
"rewards/margins": 163.40377807617188,
"rewards/rejected": -245.9552764892578,
"step": 100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 48039.47265625,
"learning_rate": 4.223066054130568e-05,
"log_odds_chosen": 290.2133483886719,
"log_odds_ratio": -32.880943298339844,
"logps/chosen": -55.933021545410156,
"logps/rejected": -346.0943908691406,
"loss": -3505.9434,
"nll_loss": 35.519954681396484,
"rewards/accuracies": 0.6875,
"rewards/chosen": -27.966510772705078,
"rewards/margins": 145.0806884765625,
"rewards/rejected": -173.0471954345703,
"step": 105
},
{
"epoch": 1.037925925925926,
"grad_norm": 62192.86328125,
"learning_rate": 4.1199239938743797e-05,
"log_odds_chosen": 424.4546203613281,
"log_odds_ratio": -188.85874938964844,
"logps/chosen": -245.33665466308594,
"logps/rejected": -669.7190551757812,
"loss": -1119.4087,
"nll_loss": 173.051025390625,
"rewards/accuracies": 0.6993007063865662,
"rewards/chosen": -122.66832733154297,
"rewards/margins": 212.19119262695312,
"rewards/rejected": -334.8595275878906,
"step": 110
},
{
"epoch": 1.0853333333333333,
"grad_norm": 63345.234375,
"learning_rate": 4.0117925141242174e-05,
"log_odds_chosen": 455.9835510253906,
"log_odds_ratio": -80.30255889892578,
"logps/chosen": -111.84346771240234,
"logps/rejected": -567.7550048828125,
"loss": -4980.8164,
"nll_loss": 72.3052749633789,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -55.92173385620117,
"rewards/margins": 227.9557647705078,
"rewards/rejected": -283.87750244140625,
"step": 115
},
{
"epoch": 1.1327407407407408,
"grad_norm": 270150.0625,
"learning_rate": 3.899004663415084e-05,
"log_odds_chosen": 432.2300720214844,
"log_odds_ratio": -57.856689453125,
"logps/chosen": -103.71064758300781,
"logps/rejected": -535.87255859375,
"loss": -4734.0852,
"nll_loss": 68.14077758789062,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -51.855323791503906,
"rewards/margins": 216.0809326171875,
"rewards/rejected": -267.936279296875,
"step": 120
},
{
"epoch": 1.1801481481481482,
"grad_norm": 84333.71875,
"learning_rate": 3.781907832058587e-05,
"log_odds_chosen": 382.97308349609375,
"log_odds_ratio": -112.0127944946289,
"logps/chosen": -215.1722412109375,
"logps/rejected": -598.0943603515625,
"loss": -1093.5601,
"nll_loss": 157.28729248046875,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -107.58612060546875,
"rewards/margins": 191.46104431152344,
"rewards/rejected": -299.04718017578125,
"step": 125
},
{
"epoch": 1.2275555555555555,
"grad_norm": 86323.7890625,
"learning_rate": 3.660862682169282e-05,
"log_odds_chosen": 347.2674865722656,
"log_odds_ratio": -29.318981170654297,
"logps/chosen": -52.60059356689453,
"logps/rejected": -399.80438232421875,
"loss": -4269.5184,
"nll_loss": 40.1794548034668,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -26.300296783447266,
"rewards/margins": 173.60189819335938,
"rewards/rejected": -199.90219116210938,
"step": 130
},
{
"epoch": 1.274962962962963,
"grad_norm": 76375.984375,
"learning_rate": 3.5362420368134356e-05,
"log_odds_chosen": 334.5088806152344,
"log_odds_ratio": -122.74652099609375,
"logps/chosen": -183.52720642089844,
"logps/rejected": -517.9742431640625,
"loss": -1044.6864,
"nll_loss": 134.5770721435547,
"rewards/accuracies": 0.65625,
"rewards/chosen": -91.76360321044922,
"rewards/margins": 167.22354125976562,
"rewards/rejected": -258.98712158203125,
"step": 135
},
{
"epoch": 1.3223703703703704,
"grad_norm": 112008.921875,
"learning_rate": 3.408429731701635e-05,
"log_odds_chosen": 301.0145568847656,
"log_odds_ratio": -91.36270904541016,
"logps/chosen": -115.39459228515625,
"logps/rejected": -416.3567810058594,
"loss": -2338.4008,
"nll_loss": 77.40607452392578,
"rewards/accuracies": 0.65625,
"rewards/chosen": -57.697296142578125,
"rewards/margins": 150.48110961914062,
"rewards/rejected": -208.1783905029297,
"step": 140
},
{
"epoch": 1.3697777777777778,
"grad_norm": 194621.921875,
"learning_rate": 3.2778194329621104e-05,
"log_odds_chosen": 564.0087280273438,
"log_odds_ratio": -94.19833374023438,
"logps/chosen": -199.311767578125,
"logps/rejected": -763.2530517578125,
"loss": -4740.4227,
"nll_loss": 133.83242797851562,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -99.6558837890625,
"rewards/margins": 281.97064208984375,
"rewards/rejected": -381.62652587890625,
"step": 145
},
{
"epoch": 1.417185185185185,
"grad_norm": 85731.828125,
"learning_rate": 3.144813424636031e-05,
"log_odds_chosen": 521.4227294921875,
"log_odds_ratio": -95.4295425415039,
"logps/chosen": -129.7139129638672,
"logps/rejected": -651.0548706054688,
"loss": -5899.5816,
"nll_loss": 76.30853271484375,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -64.8569564819336,
"rewards/margins": 260.6704406738281,
"rewards/rejected": -325.5274353027344,
"step": 150
},
{
"epoch": 1.4645925925925927,
"grad_norm": 45407.44140625,
"learning_rate": 3.0098213696293542e-05,
"log_odds_chosen": 289.8687438964844,
"log_odds_ratio": -128.76556396484375,
"logps/chosen": -152.67922973632812,
"logps/rejected": -442.49920654296875,
"loss": -1464.2162,
"nll_loss": 99.15326690673828,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -76.33961486816406,
"rewards/margins": 144.91000366210938,
"rewards/rejected": -221.24960327148438,
"step": 155
},
{
"epoch": 1.512,
"grad_norm": 53448.38671875,
"learning_rate": 2.8732590479375165e-05,
"log_odds_chosen": 352.0550231933594,
"log_odds_ratio": -66.38088989257812,
"logps/chosen": -75.05381774902344,
"logps/rejected": -427.0362854003906,
"loss": -4057.2652,
"nll_loss": 49.201690673828125,
"rewards/accuracies": 0.6875,
"rewards/chosen": -37.52690887451172,
"rewards/margins": 175.99124145507812,
"rewards/rejected": -213.5181427001953,
"step": 160
},
{
"epoch": 1.5594074074074074,
"grad_norm": 157314.0,
"learning_rate": 2.7355470760292956e-05,
"log_odds_chosen": 414.6659240722656,
"log_odds_ratio": -107.09364318847656,
"logps/chosen": -161.87742614746094,
"logps/rejected": -576.4956665039062,
"loss": -2792.6059,
"nll_loss": 120.0401840209961,
"rewards/accuracies": 0.671875,
"rewards/chosen": -80.93871307373047,
"rewards/margins": 207.30908203125,
"rewards/rejected": -288.2478332519531,
"step": 165
},
{
"epoch": 1.6068148148148147,
"grad_norm": 136321.578125,
"learning_rate": 2.597109611334169e-05,
"log_odds_chosen": 488.1697692871094,
"log_odds_ratio": -115.3565444946289,
"logps/chosen": -189.2110595703125,
"logps/rejected": -677.3059692382812,
"loss": -3486.9227,
"nll_loss": 135.0811309814453,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -94.60552978515625,
"rewards/margins": 244.04745483398438,
"rewards/rejected": -338.6529846191406,
"step": 170
},
{
"epoch": 1.6542222222222223,
"grad_norm": 81299.7578125,
"learning_rate": 2.458373045823404e-05,
"log_odds_chosen": 418.0018005371094,
"log_odds_ratio": -95.27189636230469,
"logps/chosen": -160.82423400878906,
"logps/rejected": -578.742919921875,
"loss": -3317.5164,
"nll_loss": 105.28697204589844,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -80.41211700439453,
"rewards/margins": 208.9593505859375,
"rewards/rejected": -289.3714599609375,
"step": 175
},
{
"epoch": 1.7016296296296296,
"grad_norm": 231117.21875,
"learning_rate": 2.3197646927086697e-05,
"log_odds_chosen": 477.7691345214844,
"log_odds_ratio": -80.58141326904297,
"logps/chosen": -139.1304931640625,
"logps/rejected": -616.8330688476562,
"loss": -5099.5727,
"nll_loss": 79.48963165283203,
"rewards/accuracies": 0.703125,
"rewards/chosen": -69.56524658203125,
"rewards/margins": 238.85128784179688,
"rewards/rejected": -308.4165344238281,
"step": 180
},
{
"epoch": 1.749037037037037,
"grad_norm": 43973.0546875,
"learning_rate": 2.1817114703032176e-05,
"log_odds_chosen": 493.30084228515625,
"log_odds_ratio": -88.9622802734375,
"logps/chosen": -138.52340698242188,
"logps/rejected": -631.7484741210938,
"loss": -5567.6691,
"nll_loss": 72.62284851074219,
"rewards/accuracies": 0.734375,
"rewards/chosen": -69.26170349121094,
"rewards/margins": 246.612548828125,
"rewards/rejected": -315.8742370605469,
"step": 185
},
{
"epoch": 1.7964444444444445,
"grad_norm": 168100.390625,
"learning_rate": 2.0446385870993467e-05,
"log_odds_chosen": 627.9781494140625,
"log_odds_ratio": -72.4460220336914,
"logps/chosen": -129.3475341796875,
"logps/rejected": -757.2415771484375,
"loss": -7597.6812,
"nll_loss": 76.51949310302734,
"rewards/accuracies": 0.778124988079071,
"rewards/chosen": -64.67376708984375,
"rewards/margins": 313.94708251953125,
"rewards/rejected": -378.62078857421875,
"step": 190
},
{
"epoch": 1.8438518518518519,
"grad_norm": 391024.46875,
"learning_rate": 1.9089682321121834e-05,
"log_odds_chosen": 661.0269775390625,
"log_odds_ratio": -149.3189239501953,
"logps/chosen": -243.3693084716797,
"logps/rejected": -904.3167114257812,
"loss": -5584.2465,
"nll_loss": 155.9659881591797,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -121.68465423583984,
"rewards/margins": 330.47369384765625,
"rewards/rejected": -452.1583557128906,
"step": 195
},
{
"epoch": 1.8912592592592592,
"grad_norm": 125059.8046875,
"learning_rate": 1.775118274523545e-05,
"log_odds_chosen": 587.4786376953125,
"log_odds_ratio": -195.35035705566406,
"logps/chosen": -272.13336181640625,
"logps/rejected": -859.5462036132812,
"loss": -3844.7461,
"nll_loss": 173.55812072753906,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -136.06668090820312,
"rewards/margins": 293.70648193359375,
"rewards/rejected": -429.7731018066406,
"step": 200
},
{
"epoch": 1.9386666666666668,
"grad_norm": 130243.34375,
"learning_rate": 1.643500976631037e-05,
"log_odds_chosen": 501.4483337402344,
"log_odds_ratio": -159.20835876464844,
"logps/chosen": -232.3970489501953,
"logps/rejected": -733.7698974609375,
"loss": -3511.2203,
"nll_loss": 140.96083068847656,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -116.19852447509766,
"rewards/margins": 250.6864776611328,
"rewards/rejected": -366.88494873046875,
"step": 205
},
{
"epoch": 1.986074074074074,
"grad_norm": 173549.265625,
"learning_rate": 1.514521724066537e-05,
"log_odds_chosen": 486.5333557128906,
"log_odds_ratio": -179.15432739257812,
"logps/chosen": -247.67678833007812,
"logps/rejected": -734.17041015625,
"loss": -3348.5012,
"nll_loss": 138.6061553955078,
"rewards/accuracies": 0.640625,
"rewards/chosen": -123.83839416503906,
"rewards/margins": 243.2467803955078,
"rewards/rejected": -367.085205078125,
"step": 210
},
{
"epoch": 2.0284444444444443,
"grad_norm": 68642.6796875,
"learning_rate": 1.3885777771950348e-05,
"log_odds_chosen": 377.7022705078125,
"log_odds_ratio": -98.49359130859375,
"logps/chosen": -101.93464660644531,
"logps/rejected": -479.5637512207031,
"loss": -3602.625,
"nll_loss": 62.84866714477539,
"rewards/accuracies": 0.6643356680870056,
"rewards/chosen": -50.967323303222656,
"rewards/margins": 188.81454467773438,
"rewards/rejected": -239.78187561035156,
"step": 215
},
{
"epoch": 2.075851851851852,
"grad_norm": 55023.109375,
"learning_rate": 1.2660570475395683e-05,
"log_odds_chosen": 211.233154296875,
"log_odds_ratio": -52.553009033203125,
"logps/chosen": -59.05438995361328,
"logps/rejected": -270.23480224609375,
"loss": -2151.8414,
"nll_loss": 38.34517288208008,
"rewards/accuracies": 0.65625,
"rewards/chosen": -29.52719497680664,
"rewards/margins": 105.5902099609375,
"rewards/rejected": -135.11740112304688,
"step": 220
},
{
"epoch": 2.1232592592592594,
"grad_norm": 82995.109375,
"learning_rate": 1.1473369030008974e-05,
"log_odds_chosen": 543.0647583007812,
"log_odds_ratio": -64.39569091796875,
"logps/chosen": -87.19830322265625,
"logps/rejected": -630.1611328125,
"loss": -6956.893,
"nll_loss": 54.07854080200195,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -43.599151611328125,
"rewards/margins": 271.4814147949219,
"rewards/rejected": -315.08056640625,
"step": 225
},
{
"epoch": 2.1706666666666665,
"grad_norm": 512102.65625,
"learning_rate": 1.0327830055518842e-05,
"log_odds_chosen": 691.300537109375,
"log_odds_ratio": -71.8380355834961,
"logps/chosen": -161.6082763671875,
"logps/rejected": -852.82275390625,
"loss": -8369.6391,
"nll_loss": 84.05608367919922,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -80.80413818359375,
"rewards/margins": 345.6072692871094,
"rewards/rejected": -426.411376953125,
"step": 230
},
{
"epoch": 2.218074074074074,
"grad_norm": 81392.453125,
"learning_rate": 9.227481849865235e-06,
"log_odds_chosen": 758.3954467773438,
"log_odds_ratio": -129.21971130371094,
"logps/chosen": -269.18768310546875,
"logps/rejected": -1027.5037841796875,
"loss": -7679.0133,
"nll_loss": 139.18893432617188,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -134.59384155273438,
"rewards/margins": 379.1580810546875,
"rewards/rejected": -513.7518920898438,
"step": 235
},
{
"epoch": 2.2654814814814817,
"grad_norm": 156503.890625,
"learning_rate": 8.175713521924978e-06,
"log_odds_chosen": 703.6713256835938,
"log_odds_ratio": -142.05674743652344,
"logps/chosen": -237.63320922851562,
"logps/rejected": -941.2130126953125,
"loss": -6359.4469,
"nll_loss": 153.05723571777344,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -118.81660461425781,
"rewards/margins": 351.7899475097656,
"rewards/rejected": -470.60650634765625,
"step": 240
},
{
"epoch": 2.3128888888888888,
"grad_norm": 110527.40625,
"learning_rate": 7.1757645529443665e-06,
"log_odds_chosen": 720.1389770507812,
"log_odds_ratio": -68.03697967529297,
"logps/chosen": -130.52520751953125,
"logps/rejected": -850.5823974609375,
"loss": -9273.2656,
"nll_loss": 70.23912811279297,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -65.26260375976562,
"rewards/margins": 360.0286560058594,
"rewards/rejected": -425.29119873046875,
"step": 245
},
{
"epoch": 2.3602962962962963,
"grad_norm": 128753.984375,
"learning_rate": 6.230714818829733e-06,
"log_odds_chosen": 590.6346435546875,
"log_odds_ratio": -37.9924201965332,
"logps/chosen": -56.76947021484375,
"logps/rejected": -647.3065795898438,
"loss": -8556.7602,
"nll_loss": 27.869800567626953,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -28.384735107421875,
"rewards/margins": 295.2685852050781,
"rewards/rejected": -323.6532897949219,
"step": 250
},
{
"epoch": 2.407703703703704,
"grad_norm": 164814.296875,
"learning_rate": 5.343475104027743e-06,
"log_odds_chosen": 621.853515625,
"log_odds_ratio": -83.08097076416016,
"logps/chosen": -144.10885620117188,
"logps/rejected": -765.87646484375,
"loss": -7296.7188,
"nll_loss": 82.86141204833984,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -72.05442810058594,
"rewards/margins": 310.88385009765625,
"rewards/rejected": -382.938232421875,
"step": 255
},
{
"epoch": 2.455111111111111,
"grad_norm": 141456.359375,
"learning_rate": 4.516778136213037e-06,
"log_odds_chosen": 636.0737915039062,
"log_odds_ratio": -85.68701171875,
"logps/chosen": -142.60003662109375,
"logps/rejected": -778.5829467773438,
"loss": -7366.8719,
"nll_loss": 87.77666473388672,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -71.30001831054688,
"rewards/margins": 317.991455078125,
"rewards/rejected": -389.2914733886719,
"step": 260
},
{
"epoch": 2.5025185185185186,
"grad_norm": 186711.8125,
"learning_rate": 3.7531701693965554e-06,
"log_odds_chosen": 725.6994018554688,
"log_odds_ratio": -115.44535827636719,
"logps/chosen": -196.95616149902344,
"logps/rejected": -922.5770263671875,
"loss": -8682.6156,
"nll_loss": 91.47877502441406,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -98.47808074951172,
"rewards/margins": 362.8104553222656,
"rewards/rejected": -461.28851318359375,
"step": 265
},
{
"epoch": 2.549925925925926,
"grad_norm": 283810.84375,
"learning_rate": 3.055003141378948e-06,
"log_odds_chosen": 735.640869140625,
"log_odds_ratio": -116.50650787353516,
"logps/chosen": -200.65255737304688,
"logps/rejected": -936.19873046875,
"loss": -8755.6516,
"nll_loss": 94.15899658203125,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -100.32627868652344,
"rewards/margins": 367.77301025390625,
"rewards/rejected": -468.099365234375,
"step": 270
},
{
"epoch": 2.5973333333333333,
"grad_norm": 134505.046875,
"learning_rate": 2.424427429704365e-06,
"log_odds_chosen": 606.5227661132812,
"log_odds_ratio": -115.72232818603516,
"logps/chosen": -185.85130310058594,
"logps/rejected": -792.2674560546875,
"loss": -6780.2891,
"nll_loss": 91.32408905029297,
"rewards/accuracies": 0.75,
"rewards/chosen": -92.92565155029297,
"rewards/margins": 303.2080993652344,
"rewards/rejected": -396.13372802734375,
"step": 275
},
{
"epoch": 2.644740740740741,
"grad_norm": 236743.3125,
"learning_rate": 1.8633852284264508e-06,
"log_odds_chosen": 721.7117309570312,
"log_odds_ratio": -87.48831176757812,
"logps/chosen": -122.89962005615234,
"logps/rejected": -844.5099487304688,
"loss": -9344.2578,
"nll_loss": 68.79702758789062,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -61.44981002807617,
"rewards/margins": 360.80511474609375,
"rewards/rejected": -422.2549743652344,
"step": 280
},
{
"epoch": 2.6921481481481484,
"grad_norm": 172984.421875,
"learning_rate": 1.3736045660864034e-06,
"log_odds_chosen": 592.5506591796875,
"log_odds_ratio": -69.64677429199219,
"logps/chosen": -111.2774429321289,
"logps/rejected": -703.73583984375,
"loss": -7530.5437,
"nll_loss": 60.8997688293457,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -55.63872146606445,
"rewards/margins": 296.22918701171875,
"rewards/rejected": -351.867919921875,
"step": 285
},
{
"epoch": 2.7395555555555555,
"grad_norm": 116706.3125,
"learning_rate": 9.565939833279192e-07,
"log_odds_chosen": 557.0691528320312,
"log_odds_ratio": -115.3429183959961,
"logps/chosen": -170.6497344970703,
"logps/rejected": -727.6143798828125,
"loss": -5561.0809,
"nll_loss": 104.6985092163086,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -85.32486724853516,
"rewards/margins": 278.4822998046875,
"rewards/rejected": -363.80718994140625,
"step": 290
},
{
"epoch": 2.786962962962963,
"grad_norm": 98315.484375,
"learning_rate": 6.136378865420872e-07,
"log_odds_chosen": 574.1820678710938,
"log_odds_ratio": -110.40797424316406,
"logps/chosen": -160.60757446289062,
"logps/rejected": -734.6983642578125,
"loss": -6636.9953,
"nll_loss": 79.63924407958984,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -80.30378723144531,
"rewards/margins": 287.04534912109375,
"rewards/rejected": -367.34918212890625,
"step": 295
},
{
"epoch": 2.83437037037037,
"grad_norm": 91240.3984375,
"learning_rate": 3.45792591853214e-07,
"log_odds_chosen": 629.7505493164062,
"log_odds_ratio": -83.85370635986328,
"logps/chosen": -143.35501098632812,
"logps/rejected": -773.0218505859375,
"loss": -7824.6305,
"nll_loss": 70.31370544433594,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -71.67750549316406,
"rewards/margins": 314.83343505859375,
"rewards/rejected": -386.51092529296875,
"step": 300
},
{
"epoch": 2.8817777777777778,
"grad_norm": 97470.9609375,
"learning_rate": 1.538830716302092e-07,
"log_odds_chosen": 651.5947265625,
"log_odds_ratio": -122.08070373535156,
"logps/chosen": -166.5752716064453,
"logps/rejected": -818.0435791015625,
"loss": -7674.3211,
"nll_loss": 85.91165924072266,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -83.28763580322266,
"rewards/margins": 325.734130859375,
"rewards/rejected": -409.02178955078125,
"step": 305
},
{
"epoch": 2.9291851851851853,
"grad_norm": 98747.625,
"learning_rate": 3.8500413544415025e-08,
"log_odds_chosen": 705.1484985351562,
"log_odds_ratio": -87.88983154296875,
"logps/chosen": -134.3265838623047,
"logps/rejected": -839.3775634765625,
"loss": -8964.2422,
"nll_loss": 72.39295959472656,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -67.16329193115234,
"rewards/margins": 352.5255126953125,
"rewards/rejected": -419.68878173828125,
"step": 310
},
{
"epoch": 2.9765925925925925,
"grad_norm": 118650.2578125,
"learning_rate": 0.0,
"log_odds_chosen": 748.1420288085938,
"log_odds_ratio": -78.33130645751953,
"logps/chosen": -122.74269104003906,
"logps/rejected": -870.7769775390625,
"loss": -9606.4719,
"nll_loss": 73.81488037109375,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -61.37134552001953,
"rewards/margins": 374.01715087890625,
"rewards/rejected": -435.38848876953125,
"step": 315
},
{
"epoch": 2.9765925925925925,
"step": 315,
"total_flos": 0.0,
"train_loss": -4003.1467706589474,
"train_runtime": 9706.329,
"train_samples_per_second": 2.086,
"train_steps_per_second": 0.032
}
],
"logging_steps": 5,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}