|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9765925925925925, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 979.1563110351562, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": -1.0658155679702759, |
|
"log_odds_ratio": -11.016766548156738, |
|
"logps/chosen": -22.7292423248291, |
|
"logps/rejected": -21.66365623474121, |
|
"loss": 332.1951, |
|
"nll_loss": 9.848306655883789, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -11.36462116241455, |
|
"rewards/margins": -0.5327932238578796, |
|
"rewards/rejected": -10.831828117370605, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 610.80419921875, |
|
"learning_rate": 1.5625e-05, |
|
"log_odds_chosen": -3.1405487060546875, |
|
"log_odds_ratio": -11.108110427856445, |
|
"logps/chosen": -22.11328887939453, |
|
"logps/rejected": -18.973073959350586, |
|
"loss": 331.6769, |
|
"nll_loss": 8.794797897338867, |
|
"rewards/accuracies": 0.44062501192092896, |
|
"rewards/chosen": -11.056644439697266, |
|
"rewards/margins": -1.5701072216033936, |
|
"rewards/rejected": -9.486536979675293, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 1824.323974609375, |
|
"learning_rate": 2.34375e-05, |
|
"log_odds_chosen": -1.2696795463562012, |
|
"log_odds_ratio": -11.54572868347168, |
|
"logps/chosen": -22.386913299560547, |
|
"logps/rejected": -21.115848541259766, |
|
"loss": 280.4082, |
|
"nll_loss": 8.127224922180176, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -11.193456649780273, |
|
"rewards/margins": -0.6355326771736145, |
|
"rewards/rejected": -10.557924270629883, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 7787.6650390625, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": -5.64935302734375, |
|
"log_odds_ratio": -11.974640846252441, |
|
"logps/chosen": -21.131267547607422, |
|
"logps/rejected": -15.482928276062012, |
|
"loss": 324.5447, |
|
"nll_loss": 7.3178510665893555, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -10.565633773803711, |
|
"rewards/margins": -2.824169635772705, |
|
"rewards/rejected": -7.741464138031006, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 423.42431640625, |
|
"learning_rate": 3.90625e-05, |
|
"log_odds_chosen": 0.17381250858306885, |
|
"log_odds_ratio": -3.9062447547912598, |
|
"logps/chosen": -8.586548805236816, |
|
"logps/rejected": -8.75158405303955, |
|
"loss": 118.2598, |
|
"nll_loss": 3.77813720703125, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -4.293274402618408, |
|
"rewards/margins": 0.0825173407793045, |
|
"rewards/rejected": -4.375792026519775, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 249.5974578857422, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 0.07060544192790985, |
|
"log_odds_ratio": -0.9323924779891968, |
|
"logps/chosen": -1.9480135440826416, |
|
"logps/rejected": -2.0069220066070557, |
|
"loss": 64.7047, |
|
"nll_loss": 2.0514745712280273, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.9740067720413208, |
|
"rewards/margins": 0.029454167932271957, |
|
"rewards/rejected": -1.0034610033035278, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 263.46826171875, |
|
"learning_rate": 4.998613757348784e-05, |
|
"log_odds_chosen": 0.32897791266441345, |
|
"log_odds_ratio": -0.8598791360855103, |
|
"logps/chosen": -1.866418480873108, |
|
"logps/rejected": -2.1647419929504395, |
|
"loss": 57.6721, |
|
"nll_loss": 1.9514150619506836, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.933209240436554, |
|
"rewards/margins": 0.14916184544563293, |
|
"rewards/rejected": -1.0823709964752197, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 865.616455078125, |
|
"learning_rate": 4.990147841143462e-05, |
|
"log_odds_chosen": 0.44308799505233765, |
|
"log_odds_ratio": -0.7074758410453796, |
|
"logps/chosen": -1.699676513671875, |
|
"logps/rejected": -2.0793070793151855, |
|
"loss": 55.384, |
|
"nll_loss": 1.9205652475357056, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.8498382568359375, |
|
"rewards/margins": 0.18981528282165527, |
|
"rewards/rejected": -1.0396535396575928, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 1629.7843017578125, |
|
"learning_rate": 4.97401218720448e-05, |
|
"log_odds_chosen": 1.2202234268188477, |
|
"log_odds_ratio": -0.6341068148612976, |
|
"logps/chosen": -1.685363531112671, |
|
"logps/rejected": -2.8407630920410156, |
|
"loss": 41.6093, |
|
"nll_loss": 1.8779910802841187, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.8426817655563354, |
|
"rewards/margins": 0.5776998400688171, |
|
"rewards/rejected": -1.4203815460205078, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 15368.978515625, |
|
"learning_rate": 4.9502564938797946e-05, |
|
"log_odds_chosen": 17.89919662475586, |
|
"log_odds_ratio": -4.792720794677734, |
|
"logps/chosen": -9.761907577514648, |
|
"logps/rejected": -27.604618072509766, |
|
"loss": -36.8673, |
|
"nll_loss": 7.769252777099609, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -4.880953788757324, |
|
"rewards/margins": 8.921355247497559, |
|
"rewards/rejected": -13.802309036254883, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 45768.3046875, |
|
"learning_rate": 4.918953929490768e-05, |
|
"log_odds_chosen": 64.66633605957031, |
|
"log_odds_ratio": -11.930875778198242, |
|
"logps/chosen": -29.84969711303711, |
|
"logps/rejected": -94.47431945800781, |
|
"loss": -415.8344, |
|
"nll_loss": 19.317481994628906, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -14.924848556518555, |
|
"rewards/margins": 32.31230926513672, |
|
"rewards/rejected": -47.237159729003906, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 19440.806640625, |
|
"learning_rate": 4.88020090697132e-05, |
|
"log_odds_chosen": 115.04557037353516, |
|
"log_odds_ratio": -21.812177658081055, |
|
"logps/chosen": -38.03327560424805, |
|
"logps/rejected": -153.05471801757812, |
|
"loss": -922.3747, |
|
"nll_loss": 28.686513900756836, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -19.016637802124023, |
|
"rewards/margins": 57.51072311401367, |
|
"rewards/rejected": -76.52735900878906, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 451016.8125, |
|
"learning_rate": 4.834116786912897e-05, |
|
"log_odds_chosen": 97.50765228271484, |
|
"log_odds_ratio": -31.05093002319336, |
|
"logps/chosen": -40.515743255615234, |
|
"logps/rejected": -137.99923706054688, |
|
"loss": -469.6748, |
|
"nll_loss": 34.06440353393555, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -20.257871627807617, |
|
"rewards/margins": 48.74174880981445, |
|
"rewards/rejected": -68.99961853027344, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 94608.5, |
|
"learning_rate": 4.7808435099299045e-05, |
|
"log_odds_chosen": 338.44207763671875, |
|
"log_odds_ratio": -85.28483581542969, |
|
"logps/chosen": -131.35760498046875, |
|
"logps/rejected": -469.7542419433594, |
|
"loss": -2686.7586, |
|
"nll_loss": 85.23709869384766, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -65.67880249023438, |
|
"rewards/margins": 169.1982879638672, |
|
"rewards/rejected": -234.8771209716797, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 33316.55078125, |
|
"learning_rate": 4.720545159477922e-05, |
|
"log_odds_chosen": 127.3674545288086, |
|
"log_odds_ratio": -31.760257720947266, |
|
"logps/chosen": -35.016143798828125, |
|
"logps/rejected": -162.33889770507812, |
|
"loss": -1404.02, |
|
"nll_loss": 19.785737991333008, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -17.508071899414062, |
|
"rewards/margins": 63.66136932373047, |
|
"rewards/rejected": -81.16944885253906, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 181842.0625, |
|
"learning_rate": 4.653407456471222e-05, |
|
"log_odds_chosen": 319.1683654785156, |
|
"log_odds_ratio": -147.62722778320312, |
|
"logps/chosen": -289.9014892578125, |
|
"logps/rejected": -609.013916015625, |
|
"loss": 384.325, |
|
"nll_loss": 171.5663604736328, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -144.95074462890625, |
|
"rewards/margins": 159.55621337890625, |
|
"rewards/rejected": -304.5069580078125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 52143.23828125, |
|
"learning_rate": 4.579637187256222e-05, |
|
"log_odds_chosen": 345.2151794433594, |
|
"log_odds_ratio": -104.02201843261719, |
|
"logps/chosen": -158.8235626220703, |
|
"logps/rejected": -504.00628662109375, |
|
"loss": -2944.3695, |
|
"nll_loss": 80.5798110961914, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -79.41178131103516, |
|
"rewards/margins": 172.5913543701172, |
|
"rewards/rejected": -252.00314331054688, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 36943.46875, |
|
"learning_rate": 4.499461566702685e-05, |
|
"log_odds_chosen": 404.80950927734375, |
|
"log_odds_ratio": -105.06642150878906, |
|
"logps/chosen": -144.46873474121094, |
|
"logps/rejected": -549.2257690429688, |
|
"loss": -3396.4172, |
|
"nll_loss": 96.24049377441406, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -72.23436737060547, |
|
"rewards/margins": 202.37850952148438, |
|
"rewards/rejected": -274.6128845214844, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 23182.97265625, |
|
"learning_rate": 4.413127538374411e-05, |
|
"log_odds_chosen": 131.83743286132812, |
|
"log_odds_ratio": -14.629719734191895, |
|
"logps/chosen": -16.93549919128418, |
|
"logps/rejected": -148.75064086914062, |
|
"loss": -1814.8482, |
|
"nll_loss": 9.193554878234863, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -8.46774959564209, |
|
"rewards/margins": 65.90757751464844, |
|
"rewards/rejected": -74.37532043457031, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 126997.953125, |
|
"learning_rate": 4.320901013934887e-05, |
|
"log_odds_chosen": 326.8649597167969, |
|
"log_odds_ratio": -81.38832092285156, |
|
"logps/chosen": -165.1029510498047, |
|
"logps/rejected": -491.9105529785156, |
|
"loss": -1725.1, |
|
"nll_loss": 109.4944076538086, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -82.55147552490234, |
|
"rewards/margins": 163.40377807617188, |
|
"rewards/rejected": -245.9552764892578, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 48039.47265625, |
|
"learning_rate": 4.223066054130568e-05, |
|
"log_odds_chosen": 290.2133483886719, |
|
"log_odds_ratio": -32.880943298339844, |
|
"logps/chosen": -55.933021545410156, |
|
"logps/rejected": -346.0943908691406, |
|
"loss": -3505.9434, |
|
"nll_loss": 35.519954681396484, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -27.966510772705078, |
|
"rewards/margins": 145.0806884765625, |
|
"rewards/rejected": -173.0471954345703, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.037925925925926, |
|
"grad_norm": 62192.86328125, |
|
"learning_rate": 4.1199239938743797e-05, |
|
"log_odds_chosen": 424.4546203613281, |
|
"log_odds_ratio": -188.85874938964844, |
|
"logps/chosen": -245.33665466308594, |
|
"logps/rejected": -669.7190551757812, |
|
"loss": -1119.4087, |
|
"nll_loss": 173.051025390625, |
|
"rewards/accuracies": 0.6993007063865662, |
|
"rewards/chosen": -122.66832733154297, |
|
"rewards/margins": 212.19119262695312, |
|
"rewards/rejected": -334.8595275878906, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0853333333333333, |
|
"grad_norm": 63345.234375, |
|
"learning_rate": 4.0117925141242174e-05, |
|
"log_odds_chosen": 455.9835510253906, |
|
"log_odds_ratio": -80.30255889892578, |
|
"logps/chosen": -111.84346771240234, |
|
"logps/rejected": -567.7550048828125, |
|
"loss": -4980.8164, |
|
"nll_loss": 72.3052749633789, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -55.92173385620117, |
|
"rewards/margins": 227.9557647705078, |
|
"rewards/rejected": -283.87750244140625, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1327407407407408, |
|
"grad_norm": 270150.0625, |
|
"learning_rate": 3.899004663415084e-05, |
|
"log_odds_chosen": 432.2300720214844, |
|
"log_odds_ratio": -57.856689453125, |
|
"logps/chosen": -103.71064758300781, |
|
"logps/rejected": -535.87255859375, |
|
"loss": -4734.0852, |
|
"nll_loss": 68.14077758789062, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -51.855323791503906, |
|
"rewards/margins": 216.0809326171875, |
|
"rewards/rejected": -267.936279296875, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1801481481481482, |
|
"grad_norm": 84333.71875, |
|
"learning_rate": 3.781907832058587e-05, |
|
"log_odds_chosen": 382.97308349609375, |
|
"log_odds_ratio": -112.0127944946289, |
|
"logps/chosen": -215.1722412109375, |
|
"logps/rejected": -598.0943603515625, |
|
"loss": -1093.5601, |
|
"nll_loss": 157.28729248046875, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -107.58612060546875, |
|
"rewards/margins": 191.46104431152344, |
|
"rewards/rejected": -299.04718017578125, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2275555555555555, |
|
"grad_norm": 86323.7890625, |
|
"learning_rate": 3.660862682169282e-05, |
|
"log_odds_chosen": 347.2674865722656, |
|
"log_odds_ratio": -29.318981170654297, |
|
"logps/chosen": -52.60059356689453, |
|
"logps/rejected": -399.80438232421875, |
|
"loss": -4269.5184, |
|
"nll_loss": 40.1794548034668, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -26.300296783447266, |
|
"rewards/margins": 173.60189819335938, |
|
"rewards/rejected": -199.90219116210938, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.274962962962963, |
|
"grad_norm": 76375.984375, |
|
"learning_rate": 3.5362420368134356e-05, |
|
"log_odds_chosen": 334.5088806152344, |
|
"log_odds_ratio": -122.74652099609375, |
|
"logps/chosen": -183.52720642089844, |
|
"logps/rejected": -517.9742431640625, |
|
"loss": -1044.6864, |
|
"nll_loss": 134.5770721435547, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -91.76360321044922, |
|
"rewards/margins": 167.22354125976562, |
|
"rewards/rejected": -258.98712158203125, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3223703703703704, |
|
"grad_norm": 112008.921875, |
|
"learning_rate": 3.408429731701635e-05, |
|
"log_odds_chosen": 301.0145568847656, |
|
"log_odds_ratio": -91.36270904541016, |
|
"logps/chosen": -115.39459228515625, |
|
"logps/rejected": -416.3567810058594, |
|
"loss": -2338.4008, |
|
"nll_loss": 77.40607452392578, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -57.697296142578125, |
|
"rewards/margins": 150.48110961914062, |
|
"rewards/rejected": -208.1783905029297, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3697777777777778, |
|
"grad_norm": 194621.921875, |
|
"learning_rate": 3.2778194329621104e-05, |
|
"log_odds_chosen": 564.0087280273438, |
|
"log_odds_ratio": -94.19833374023438, |
|
"logps/chosen": -199.311767578125, |
|
"logps/rejected": -763.2530517578125, |
|
"loss": -4740.4227, |
|
"nll_loss": 133.83242797851562, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -99.6558837890625, |
|
"rewards/margins": 281.97064208984375, |
|
"rewards/rejected": -381.62652587890625, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.417185185185185, |
|
"grad_norm": 85731.828125, |
|
"learning_rate": 3.144813424636031e-05, |
|
"log_odds_chosen": 521.4227294921875, |
|
"log_odds_ratio": -95.4295425415039, |
|
"logps/chosen": -129.7139129638672, |
|
"logps/rejected": -651.0548706054688, |
|
"loss": -5899.5816, |
|
"nll_loss": 76.30853271484375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -64.8569564819336, |
|
"rewards/margins": 260.6704406738281, |
|
"rewards/rejected": -325.5274353027344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4645925925925927, |
|
"grad_norm": 45407.44140625, |
|
"learning_rate": 3.0098213696293542e-05, |
|
"log_odds_chosen": 289.8687438964844, |
|
"log_odds_ratio": -128.76556396484375, |
|
"logps/chosen": -152.67922973632812, |
|
"logps/rejected": -442.49920654296875, |
|
"loss": -1464.2162, |
|
"nll_loss": 99.15326690673828, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -76.33961486816406, |
|
"rewards/margins": 144.91000366210938, |
|
"rewards/rejected": -221.24960327148438, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.512, |
|
"grad_norm": 53448.38671875, |
|
"learning_rate": 2.8732590479375165e-05, |
|
"log_odds_chosen": 352.0550231933594, |
|
"log_odds_ratio": -66.38088989257812, |
|
"logps/chosen": -75.05381774902344, |
|
"logps/rejected": -427.0362854003906, |
|
"loss": -4057.2652, |
|
"nll_loss": 49.201690673828125, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -37.52690887451172, |
|
"rewards/margins": 175.99124145507812, |
|
"rewards/rejected": -213.5181427001953, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5594074074074074, |
|
"grad_norm": 157314.0, |
|
"learning_rate": 2.7355470760292956e-05, |
|
"log_odds_chosen": 414.6659240722656, |
|
"log_odds_ratio": -107.09364318847656, |
|
"logps/chosen": -161.87742614746094, |
|
"logps/rejected": -576.4956665039062, |
|
"loss": -2792.6059, |
|
"nll_loss": 120.0401840209961, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -80.93871307373047, |
|
"rewards/margins": 207.30908203125, |
|
"rewards/rejected": -288.2478332519531, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6068148148148147, |
|
"grad_norm": 136321.578125, |
|
"learning_rate": 2.597109611334169e-05, |
|
"log_odds_chosen": 488.1697692871094, |
|
"log_odds_ratio": -115.3565444946289, |
|
"logps/chosen": -189.2110595703125, |
|
"logps/rejected": -677.3059692382812, |
|
"loss": -3486.9227, |
|
"nll_loss": 135.0811309814453, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -94.60552978515625, |
|
"rewards/margins": 244.04745483398438, |
|
"rewards/rejected": -338.6529846191406, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6542222222222223, |
|
"grad_norm": 81299.7578125, |
|
"learning_rate": 2.458373045823404e-05, |
|
"log_odds_chosen": 418.0018005371094, |
|
"log_odds_ratio": -95.27189636230469, |
|
"logps/chosen": -160.82423400878906, |
|
"logps/rejected": -578.742919921875, |
|
"loss": -3317.5164, |
|
"nll_loss": 105.28697204589844, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -80.41211700439453, |
|
"rewards/margins": 208.9593505859375, |
|
"rewards/rejected": -289.3714599609375, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7016296296296296, |
|
"grad_norm": 231117.21875, |
|
"learning_rate": 2.3197646927086697e-05, |
|
"log_odds_chosen": 477.7691345214844, |
|
"log_odds_ratio": -80.58141326904297, |
|
"logps/chosen": -139.1304931640625, |
|
"logps/rejected": -616.8330688476562, |
|
"loss": -5099.5727, |
|
"nll_loss": 79.48963165283203, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -69.56524658203125, |
|
"rewards/margins": 238.85128784179688, |
|
"rewards/rejected": -308.4165344238281, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.749037037037037, |
|
"grad_norm": 43973.0546875, |
|
"learning_rate": 2.1817114703032176e-05, |
|
"log_odds_chosen": 493.30084228515625, |
|
"log_odds_ratio": -88.9622802734375, |
|
"logps/chosen": -138.52340698242188, |
|
"logps/rejected": -631.7484741210938, |
|
"loss": -5567.6691, |
|
"nll_loss": 72.62284851074219, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -69.26170349121094, |
|
"rewards/margins": 246.612548828125, |
|
"rewards/rejected": -315.8742370605469, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.7964444444444445, |
|
"grad_norm": 168100.390625, |
|
"learning_rate": 2.0446385870993467e-05, |
|
"log_odds_chosen": 627.9781494140625, |
|
"log_odds_ratio": -72.4460220336914, |
|
"logps/chosen": -129.3475341796875, |
|
"logps/rejected": -757.2415771484375, |
|
"loss": -7597.6812, |
|
"nll_loss": 76.51949310302734, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -64.67376708984375, |
|
"rewards/margins": 313.94708251953125, |
|
"rewards/rejected": -378.62078857421875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8438518518518519, |
|
"grad_norm": 391024.46875, |
|
"learning_rate": 1.9089682321121834e-05, |
|
"log_odds_chosen": 661.0269775390625, |
|
"log_odds_ratio": -149.3189239501953, |
|
"logps/chosen": -243.3693084716797, |
|
"logps/rejected": -904.3167114257812, |
|
"loss": -5584.2465, |
|
"nll_loss": 155.9659881591797, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -121.68465423583984, |
|
"rewards/margins": 330.47369384765625, |
|
"rewards/rejected": -452.1583557128906, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8912592592592592, |
|
"grad_norm": 125059.8046875, |
|
"learning_rate": 1.775118274523545e-05, |
|
"log_odds_chosen": 587.4786376953125, |
|
"log_odds_ratio": -195.35035705566406, |
|
"logps/chosen": -272.13336181640625, |
|
"logps/rejected": -859.5462036132812, |
|
"loss": -3844.7461, |
|
"nll_loss": 173.55812072753906, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -136.06668090820312, |
|
"rewards/margins": 293.70648193359375, |
|
"rewards/rejected": -429.7731018066406, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9386666666666668, |
|
"grad_norm": 130243.34375, |
|
"learning_rate": 1.643500976631037e-05, |
|
"log_odds_chosen": 501.4483337402344, |
|
"log_odds_ratio": -159.20835876464844, |
|
"logps/chosen": -232.3970489501953, |
|
"logps/rejected": -733.7698974609375, |
|
"loss": -3511.2203, |
|
"nll_loss": 140.96083068847656, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -116.19852447509766, |
|
"rewards/margins": 250.6864776611328, |
|
"rewards/rejected": -366.88494873046875, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.986074074074074, |
|
"grad_norm": 173549.265625, |
|
"learning_rate": 1.514521724066537e-05, |
|
"log_odds_chosen": 486.5333557128906, |
|
"log_odds_ratio": -179.15432739257812, |
|
"logps/chosen": -247.67678833007812, |
|
"logps/rejected": -734.17041015625, |
|
"loss": -3348.5012, |
|
"nll_loss": 138.6061553955078, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -123.83839416503906, |
|
"rewards/margins": 243.2467803955078, |
|
"rewards/rejected": -367.085205078125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0284444444444443, |
|
"grad_norm": 68642.6796875, |
|
"learning_rate": 1.3885777771950348e-05, |
|
"log_odds_chosen": 377.7022705078125, |
|
"log_odds_ratio": -98.49359130859375, |
|
"logps/chosen": -101.93464660644531, |
|
"logps/rejected": -479.5637512207031, |
|
"loss": -3602.625, |
|
"nll_loss": 62.84866714477539, |
|
"rewards/accuracies": 0.6643356680870056, |
|
"rewards/chosen": -50.967323303222656, |
|
"rewards/margins": 188.81454467773438, |
|
"rewards/rejected": -239.78187561035156, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.075851851851852, |
|
"grad_norm": 55023.109375, |
|
"learning_rate": 1.2660570475395683e-05, |
|
"log_odds_chosen": 211.233154296875, |
|
"log_odds_ratio": -52.553009033203125, |
|
"logps/chosen": -59.05438995361328, |
|
"logps/rejected": -270.23480224609375, |
|
"loss": -2151.8414, |
|
"nll_loss": 38.34517288208008, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -29.52719497680664, |
|
"rewards/margins": 105.5902099609375, |
|
"rewards/rejected": -135.11740112304688, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1232592592592594, |
|
"grad_norm": 82995.109375, |
|
"learning_rate": 1.1473369030008974e-05, |
|
"log_odds_chosen": 543.0647583007812, |
|
"log_odds_ratio": -64.39569091796875, |
|
"logps/chosen": -87.19830322265625, |
|
"logps/rejected": -630.1611328125, |
|
"loss": -6956.893, |
|
"nll_loss": 54.07854080200195, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -43.599151611328125, |
|
"rewards/margins": 271.4814147949219, |
|
"rewards/rejected": -315.08056640625, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.1706666666666665, |
|
"grad_norm": 512102.65625, |
|
"learning_rate": 1.0327830055518842e-05, |
|
"log_odds_chosen": 691.300537109375, |
|
"log_odds_ratio": -71.8380355834961, |
|
"logps/chosen": -161.6082763671875, |
|
"logps/rejected": -852.82275390625, |
|
"loss": -8369.6391, |
|
"nll_loss": 84.05608367919922, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -80.80413818359375, |
|
"rewards/margins": 345.6072692871094, |
|
"rewards/rejected": -426.411376953125, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.218074074074074, |
|
"grad_norm": 81392.453125, |
|
"learning_rate": 9.227481849865235e-06, |
|
"log_odds_chosen": 758.3954467773438, |
|
"log_odds_ratio": -129.21971130371094, |
|
"logps/chosen": -269.18768310546875, |
|
"logps/rejected": -1027.5037841796875, |
|
"loss": -7679.0133, |
|
"nll_loss": 139.18893432617188, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -134.59384155273438, |
|
"rewards/margins": 379.1580810546875, |
|
"rewards/rejected": -513.7518920898438, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2654814814814817, |
|
"grad_norm": 156503.890625, |
|
"learning_rate": 8.175713521924978e-06, |
|
"log_odds_chosen": 703.6713256835938, |
|
"log_odds_ratio": -142.05674743652344, |
|
"logps/chosen": -237.63320922851562, |
|
"logps/rejected": -941.2130126953125, |
|
"loss": -6359.4469, |
|
"nll_loss": 153.05723571777344, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -118.81660461425781, |
|
"rewards/margins": 351.7899475097656, |
|
"rewards/rejected": -470.60650634765625, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.3128888888888888, |
|
"grad_norm": 110527.40625, |
|
"learning_rate": 7.1757645529443665e-06, |
|
"log_odds_chosen": 720.1389770507812, |
|
"log_odds_ratio": -68.03697967529297, |
|
"logps/chosen": -130.52520751953125, |
|
"logps/rejected": -850.5823974609375, |
|
"loss": -9273.2656, |
|
"nll_loss": 70.23912811279297, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -65.26260375976562, |
|
"rewards/margins": 360.0286560058594, |
|
"rewards/rejected": -425.29119873046875, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3602962962962963, |
|
"grad_norm": 128753.984375, |
|
"learning_rate": 6.230714818829733e-06, |
|
"log_odds_chosen": 590.6346435546875, |
|
"log_odds_ratio": -37.9924201965332, |
|
"logps/chosen": -56.76947021484375, |
|
"logps/rejected": -647.3065795898438, |
|
"loss": -8556.7602, |
|
"nll_loss": 27.869800567626953, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -28.384735107421875, |
|
"rewards/margins": 295.2685852050781, |
|
"rewards/rejected": -323.6532897949219, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.407703703703704, |
|
"grad_norm": 164814.296875, |
|
"learning_rate": 5.343475104027743e-06, |
|
"log_odds_chosen": 621.853515625, |
|
"log_odds_ratio": -83.08097076416016, |
|
"logps/chosen": -144.10885620117188, |
|
"logps/rejected": -765.87646484375, |
|
"loss": -7296.7188, |
|
"nll_loss": 82.86141204833984, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -72.05442810058594, |
|
"rewards/margins": 310.88385009765625, |
|
"rewards/rejected": -382.938232421875, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.455111111111111, |
|
"grad_norm": 141456.359375, |
|
"learning_rate": 4.516778136213037e-06, |
|
"log_odds_chosen": 636.0737915039062, |
|
"log_odds_ratio": -85.68701171875, |
|
"logps/chosen": -142.60003662109375, |
|
"logps/rejected": -778.5829467773438, |
|
"loss": -7366.8719, |
|
"nll_loss": 87.77666473388672, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -71.30001831054688, |
|
"rewards/margins": 317.991455078125, |
|
"rewards/rejected": -389.2914733886719, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5025185185185186, |
|
"grad_norm": 186711.8125, |
|
"learning_rate": 3.7531701693965554e-06, |
|
"log_odds_chosen": 725.6994018554688, |
|
"log_odds_ratio": -115.44535827636719, |
|
"logps/chosen": -196.95616149902344, |
|
"logps/rejected": -922.5770263671875, |
|
"loss": -8682.6156, |
|
"nll_loss": 91.47877502441406, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -98.47808074951172, |
|
"rewards/margins": 362.8104553222656, |
|
"rewards/rejected": -461.28851318359375, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.549925925925926, |
|
"grad_norm": 283810.84375, |
|
"learning_rate": 3.055003141378948e-06, |
|
"log_odds_chosen": 735.640869140625, |
|
"log_odds_ratio": -116.50650787353516, |
|
"logps/chosen": -200.65255737304688, |
|
"logps/rejected": -936.19873046875, |
|
"loss": -8755.6516, |
|
"nll_loss": 94.15899658203125, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -100.32627868652344, |
|
"rewards/margins": 367.77301025390625, |
|
"rewards/rejected": -468.099365234375, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.5973333333333333, |
|
"grad_norm": 134505.046875, |
|
"learning_rate": 2.424427429704365e-06, |
|
"log_odds_chosen": 606.5227661132812, |
|
"log_odds_ratio": -115.72232818603516, |
|
"logps/chosen": -185.85130310058594, |
|
"logps/rejected": -792.2674560546875, |
|
"loss": -6780.2891, |
|
"nll_loss": 91.32408905029297, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -92.92565155029297, |
|
"rewards/margins": 303.2080993652344, |
|
"rewards/rejected": -396.13372802734375, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.644740740740741, |
|
"grad_norm": 236743.3125, |
|
"learning_rate": 1.8633852284264508e-06, |
|
"log_odds_chosen": 721.7117309570312, |
|
"log_odds_ratio": -87.48831176757812, |
|
"logps/chosen": -122.89962005615234, |
|
"logps/rejected": -844.5099487304688, |
|
"loss": -9344.2578, |
|
"nll_loss": 68.79702758789062, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -61.44981002807617, |
|
"rewards/margins": 360.80511474609375, |
|
"rewards/rejected": -422.2549743652344, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.6921481481481484, |
|
"grad_norm": 172984.421875, |
|
"learning_rate": 1.3736045660864034e-06, |
|
"log_odds_chosen": 592.5506591796875, |
|
"log_odds_ratio": -69.64677429199219, |
|
"logps/chosen": -111.2774429321289, |
|
"logps/rejected": -703.73583984375, |
|
"loss": -7530.5437, |
|
"nll_loss": 60.8997688293457, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -55.63872146606445, |
|
"rewards/margins": 296.22918701171875, |
|
"rewards/rejected": -351.867919921875, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.7395555555555555, |
|
"grad_norm": 116706.3125, |
|
"learning_rate": 9.565939833279192e-07, |
|
"log_odds_chosen": 557.0691528320312, |
|
"log_odds_ratio": -115.3429183959961, |
|
"logps/chosen": -170.6497344970703, |
|
"logps/rejected": -727.6143798828125, |
|
"loss": -5561.0809, |
|
"nll_loss": 104.6985092163086, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -85.32486724853516, |
|
"rewards/margins": 278.4822998046875, |
|
"rewards/rejected": -363.80718994140625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.786962962962963, |
|
"grad_norm": 98315.484375, |
|
"learning_rate": 6.136378865420872e-07, |
|
"log_odds_chosen": 574.1820678710938, |
|
"log_odds_ratio": -110.40797424316406, |
|
"logps/chosen": -160.60757446289062, |
|
"logps/rejected": -734.6983642578125, |
|
"loss": -6636.9953, |
|
"nll_loss": 79.63924407958984, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -80.30378723144531, |
|
"rewards/margins": 287.04534912109375, |
|
"rewards/rejected": -367.34918212890625, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.83437037037037, |
|
"grad_norm": 91240.3984375, |
|
"learning_rate": 3.45792591853214e-07, |
|
"log_odds_chosen": 629.7505493164062, |
|
"log_odds_ratio": -83.85370635986328, |
|
"logps/chosen": -143.35501098632812, |
|
"logps/rejected": -773.0218505859375, |
|
"loss": -7824.6305, |
|
"nll_loss": 70.31370544433594, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -71.67750549316406, |
|
"rewards/margins": 314.83343505859375, |
|
"rewards/rejected": -386.51092529296875, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.8817777777777778, |
|
"grad_norm": 97470.9609375, |
|
"learning_rate": 1.538830716302092e-07, |
|
"log_odds_chosen": 651.5947265625, |
|
"log_odds_ratio": -122.08070373535156, |
|
"logps/chosen": -166.5752716064453, |
|
"logps/rejected": -818.0435791015625, |
|
"loss": -7674.3211, |
|
"nll_loss": 85.91165924072266, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -83.28763580322266, |
|
"rewards/margins": 325.734130859375, |
|
"rewards/rejected": -409.02178955078125, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9291851851851853, |
|
"grad_norm": 98747.625, |
|
"learning_rate": 3.8500413544415025e-08, |
|
"log_odds_chosen": 705.1484985351562, |
|
"log_odds_ratio": -87.88983154296875, |
|
"logps/chosen": -134.3265838623047, |
|
"logps/rejected": -839.3775634765625, |
|
"loss": -8964.2422, |
|
"nll_loss": 72.39295959472656, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -67.16329193115234, |
|
"rewards/margins": 352.5255126953125, |
|
"rewards/rejected": -419.68878173828125, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"grad_norm": 118650.2578125, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 748.1420288085938, |
|
"log_odds_ratio": -78.33130645751953, |
|
"logps/chosen": -122.74269104003906, |
|
"logps/rejected": -870.7769775390625, |
|
"loss": -9606.4719, |
|
"nll_loss": 73.81488037109375, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -61.37134552001953, |
|
"rewards/margins": 374.01715087890625, |
|
"rewards/rejected": -435.38848876953125, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": -4003.1467706589474, |
|
"train_runtime": 9706.329, |
|
"train_samples_per_second": 2.086, |
|
"train_steps_per_second": 0.032 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|