|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9916550764951322, |
|
"eval_steps": 500, |
|
"global_step": 358, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 664.5507726524561, |
|
"learning_rate": 2.7777777777777777e-10, |
|
"logits/chosen": -1.145136833190918, |
|
"logits/rejected": -0.7638764977455139, |
|
"logps/chosen": -369.05181884765625, |
|
"logps/rejected": -372.01507568359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 784.2820602567579, |
|
"learning_rate": 2.777777777777778e-09, |
|
"logits/chosen": -1.0929811000823975, |
|
"logits/rejected": -0.9427244663238525, |
|
"logps/chosen": -372.3557434082031, |
|
"logps/rejected": -339.4920959472656, |
|
"loss": 0.7141, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.013798276893794537, |
|
"rewards/margins": -0.012861602939665318, |
|
"rewards/rejected": -0.0009366737212985754, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 708.7597502584047, |
|
"learning_rate": 5.555555555555556e-09, |
|
"logits/chosen": -0.8585885167121887, |
|
"logits/rejected": -0.7338771820068359, |
|
"logps/chosen": -365.14849853515625, |
|
"logps/rejected": -340.0997619628906, |
|
"loss": 0.7105, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.00040760039701126516, |
|
"rewards/margins": 0.008629530668258667, |
|
"rewards/rejected": -0.009037131443619728, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 671.6766126687812, |
|
"learning_rate": 8.333333333333334e-09, |
|
"logits/chosen": -1.1642935276031494, |
|
"logits/rejected": -0.8622403144836426, |
|
"logps/chosen": -386.5867614746094, |
|
"logps/rejected": -348.39959716796875, |
|
"loss": 0.7069, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.01048402488231659, |
|
"rewards/margins": 0.048695411533117294, |
|
"rewards/rejected": -0.038211386650800705, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 642.0282612566415, |
|
"learning_rate": 9.99619291237835e-09, |
|
"logits/chosen": -1.0643326044082642, |
|
"logits/rejected": -0.8505460619926453, |
|
"logps/chosen": -362.8887023925781, |
|
"logps/rejected": -332.2599792480469, |
|
"loss": 0.7045, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -0.04036983847618103, |
|
"rewards/margins": 0.002776807639747858, |
|
"rewards/rejected": -0.04314664751291275, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 708.8205943616936, |
|
"learning_rate": 9.953429730181653e-09, |
|
"logits/chosen": -1.069666862487793, |
|
"logits/rejected": -0.8791624903678894, |
|
"logps/chosen": -377.6866760253906, |
|
"logps/rejected": -340.76434326171875, |
|
"loss": 0.7045, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0037647963035851717, |
|
"rewards/margins": 0.01622203178703785, |
|
"rewards/rejected": -0.019986826926469803, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 615.8031557981225, |
|
"learning_rate": 9.863552602006434e-09, |
|
"logits/chosen": -1.0530660152435303, |
|
"logits/rejected": -0.8627260327339172, |
|
"logps/chosen": -368.82427978515625, |
|
"logps/rejected": -334.1483154296875, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.020714253187179565, |
|
"rewards/margins": 0.061485253274440765, |
|
"rewards/rejected": -0.0407710075378418, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 603.388531475607, |
|
"learning_rate": 9.72741638325434e-09, |
|
"logits/chosen": -1.0651648044586182, |
|
"logits/rejected": -0.9337506294250488, |
|
"logps/chosen": -361.4559631347656, |
|
"logps/rejected": -338.84661865234375, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.4906249940395355, |
|
"rewards/chosen": -0.02459469996392727, |
|
"rewards/margins": 0.010977035388350487, |
|
"rewards/rejected": -0.035571735352277756, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 596.781435384469, |
|
"learning_rate": 9.546315917055362e-09, |
|
"logits/chosen": -0.9770561456680298, |
|
"logits/rejected": -0.7514477372169495, |
|
"logps/chosen": -369.4873046875, |
|
"logps/rejected": -332.56134033203125, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.04002801328897476, |
|
"rewards/margins": 0.09637657552957535, |
|
"rewards/rejected": -0.056348562240600586, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 587.0211902729591, |
|
"learning_rate": 9.321973718524472e-09, |
|
"logits/chosen": -0.9680253863334656, |
|
"logits/rejected": -0.7858943343162537, |
|
"logps/chosen": -360.33001708984375, |
|
"logps/rejected": -336.4738464355469, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.001699769520200789, |
|
"rewards/margins": 0.08374340832233429, |
|
"rewards/rejected": -0.08544318377971649, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 589.9118067767693, |
|
"learning_rate": 9.056523591268063e-09, |
|
"logits/chosen": -1.0888255834579468, |
|
"logits/rejected": -0.9123393893241882, |
|
"logps/chosen": -381.3514099121094, |
|
"logps/rejected": -355.5039978027344, |
|
"loss": 0.6515, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.051872074604034424, |
|
"rewards/margins": 0.133183091878891, |
|
"rewards/rejected": -0.08131101727485657, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 589.5691633116944, |
|
"learning_rate": 8.752490331969806e-09, |
|
"logits/chosen": -1.048461675643921, |
|
"logits/rejected": -0.8349950909614563, |
|
"logps/chosen": -379.940185546875, |
|
"logps/rejected": -349.4921569824219, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.09967100620269775, |
|
"rewards/margins": 0.1681671440601349, |
|
"rewards/rejected": -0.06849612295627594, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 556.6131343001705, |
|
"learning_rate": 8.41276571609327e-09, |
|
"logits/chosen": -1.1043516397476196, |
|
"logits/rejected": -0.8103276491165161, |
|
"logps/chosen": -378.1798095703125, |
|
"logps/rejected": -348.0905456542969, |
|
"loss": 0.6435, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.10052381455898285, |
|
"rewards/margins": 0.18633946776390076, |
|
"rewards/rejected": -0.08581562340259552, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 585.5616649485556, |
|
"learning_rate": 8.040580993110404e-09, |
|
"logits/chosen": -1.119443416595459, |
|
"logits/rejected": -0.9243392944335938, |
|
"logps/chosen": -374.62371826171875, |
|
"logps/rejected": -351.63323974609375, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.19561262428760529, |
|
"rewards/margins": 0.23775608837604523, |
|
"rewards/rejected": -0.04214347153902054, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 568.4650526780266, |
|
"learning_rate": 7.639476152864163e-09, |
|
"logits/chosen": -1.0983831882476807, |
|
"logits/rejected": -0.987181544303894, |
|
"logps/chosen": -356.64117431640625, |
|
"logps/rejected": -332.7178039550781, |
|
"loss": 0.6295, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.1680435836315155, |
|
"rewards/margins": 0.22499410808086395, |
|
"rewards/rejected": -0.05695053189992905, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 565.1483875486134, |
|
"learning_rate": 7.21326625538456e-09, |
|
"logits/chosen": -1.0306283235549927, |
|
"logits/rejected": -0.7497280836105347, |
|
"logps/chosen": -373.5386047363281, |
|
"logps/rejected": -344.0720520019531, |
|
"loss": 0.6098, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.2403784692287445, |
|
"rewards/margins": 0.28130704164505005, |
|
"rewards/rejected": -0.04092860221862793, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 532.1675415448219, |
|
"learning_rate": 6.766005144407989e-09, |
|
"logits/chosen": -1.1308726072311401, |
|
"logits/rejected": -0.8597946166992188, |
|
"logps/chosen": -368.006591796875, |
|
"logps/rejected": -335.4107360839844, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.24870140850543976, |
|
"rewards/margins": 0.258108526468277, |
|
"rewards/rejected": -0.00940710585564375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 740.3690117109163, |
|
"learning_rate": 6.301946889734301e-09, |
|
"logits/chosen": -0.9349819421768188, |
|
"logits/rejected": -0.8212488293647766, |
|
"logps/chosen": -363.19781494140625, |
|
"logps/rejected": -337.5945739746094, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": 0.2966246008872986, |
|
"rewards/margins": 0.3543737530708313, |
|
"rewards/rejected": -0.05774913355708122, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 511.7643196913959, |
|
"learning_rate": 5.825505325157961e-09, |
|
"logits/chosen": -1.017749547958374, |
|
"logits/rejected": -0.809060275554657, |
|
"logps/chosen": -368.81463623046875, |
|
"logps/rejected": -349.66790771484375, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.3305915594100952, |
|
"rewards/margins": 0.28414201736450195, |
|
"rewards/rejected": 0.04644955322146416, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 540.5118356148005, |
|
"learning_rate": 5.341212066823356e-09, |
|
"logits/chosen": -1.1874797344207764, |
|
"logits/rejected": -0.9048231244087219, |
|
"logps/chosen": -382.1863708496094, |
|
"logps/rejected": -338.0008239746094, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.40876778960227966, |
|
"rewards/margins": 0.41164499521255493, |
|
"rewards/rejected": -0.0028771907091140747, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 574.532818545015, |
|
"learning_rate": 4.853673411307564e-09, |
|
"logits/chosen": -1.0419094562530518, |
|
"logits/rejected": -0.8011609315872192, |
|
"logps/chosen": -383.00933837890625, |
|
"logps/rejected": -344.86761474609375, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.4167671799659729, |
|
"rewards/margins": 0.3280712068080902, |
|
"rewards/rejected": 0.08869597315788269, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 567.697203170915, |
|
"learning_rate": 4.367526523389253e-09, |
|
"logits/chosen": -1.1011512279510498, |
|
"logits/rejected": -0.8666337132453918, |
|
"logps/chosen": -370.8396911621094, |
|
"logps/rejected": -342.78558349609375, |
|
"loss": 0.5881, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": 0.4040229916572571, |
|
"rewards/margins": 0.35025185346603394, |
|
"rewards/rejected": 0.05377109721302986, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 541.6922856310176, |
|
"learning_rate": 3.887395330218429e-09, |
|
"logits/chosen": -1.077133297920227, |
|
"logits/rejected": -0.8909082412719727, |
|
"logps/chosen": -376.4487609863281, |
|
"logps/rejected": -351.6236877441406, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": 0.4883548319339752, |
|
"rewards/margins": 0.3798820674419403, |
|
"rewards/rejected": 0.10847274214029312, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 527.0093020370833, |
|
"learning_rate": 3.4178465413942626e-09, |
|
"logits/chosen": -1.108720302581787, |
|
"logits/rejected": -0.8026013374328613, |
|
"logps/chosen": -378.4242248535156, |
|
"logps/rejected": -342.5303649902344, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.5319411158561707, |
|
"rewards/margins": 0.44675689935684204, |
|
"rewards/rejected": 0.08518422394990921, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 486.03500376882494, |
|
"learning_rate": 2.963346213260737e-09, |
|
"logits/chosen": -0.9698505401611328, |
|
"logits/rejected": -0.8225222826004028, |
|
"logps/chosen": -367.175048828125, |
|
"logps/rejected": -342.53631591796875, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.5555700063705444, |
|
"rewards/margins": 0.44443902373313904, |
|
"rewards/rejected": 0.11113103479146957, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 534.3215854444298, |
|
"learning_rate": 2.528217270553501e-09, |
|
"logits/chosen": -0.9723398089408875, |
|
"logits/rejected": -0.8604954481124878, |
|
"logps/chosen": -359.05828857421875, |
|
"logps/rejected": -328.1695556640625, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": 0.47152179479599, |
|
"rewards/margins": 0.378196656703949, |
|
"rewards/rejected": 0.09332513064146042, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 511.2341258250477, |
|
"learning_rate": 2.1165983894256645e-09, |
|
"logits/chosen": -0.9806705713272095, |
|
"logits/rejected": -0.8469393849372864, |
|
"logps/chosen": -367.417724609375, |
|
"logps/rejected": -341.4565124511719, |
|
"loss": 0.5765, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.540740430355072, |
|
"rewards/margins": 0.34819895029067993, |
|
"rewards/rejected": 0.19254149496555328, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 481.01667031921556, |
|
"learning_rate": 1.7324046329316252e-09, |
|
"logits/chosen": -1.0429353713989258, |
|
"logits/rejected": -0.745051920413971, |
|
"logps/chosen": -371.4905090332031, |
|
"logps/rejected": -336.188720703125, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.5642050504684448, |
|
"rewards/margins": 0.4432891011238098, |
|
"rewards/rejected": 0.12091599404811859, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 476.885773993008, |
|
"learning_rate": 1.3792902133797692e-09, |
|
"logits/chosen": -1.0373092889785767, |
|
"logits/rejected": -0.8002932667732239, |
|
"logps/chosen": -375.8576354980469, |
|
"logps/rejected": -339.48870849609375, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": 0.6401156187057495, |
|
"rewards/margins": 0.4620915353298187, |
|
"rewards/rejected": 0.178024023771286, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 571.353263468623, |
|
"learning_rate": 1.060613735735384e-09, |
|
"logits/chosen": -0.9055244326591492, |
|
"logits/rejected": -0.6923736333847046, |
|
"logps/chosen": -358.05694580078125, |
|
"logps/rejected": -331.3091125488281, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.5616304278373718, |
|
"rewards/margins": 0.4886326193809509, |
|
"rewards/rejected": 0.0729978084564209, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 521.7732616506195, |
|
"learning_rate": 7.794062526569734e-10, |
|
"logits/chosen": -1.1269298791885376, |
|
"logits/rejected": -0.7582497596740723, |
|
"logps/chosen": -392.01593017578125, |
|
"logps/rejected": -350.402587890625, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.6667734980583191, |
|
"rewards/margins": 0.451296329498291, |
|
"rewards/rejected": 0.2154771387577057, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 590.1584228378923, |
|
"learning_rate": 5.383424350065824e-10, |
|
"logits/chosen": -1.1271040439605713, |
|
"logits/rejected": -0.9299942255020142, |
|
"logps/chosen": -392.9803161621094, |
|
"logps/rejected": -365.2398376464844, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.710626482963562, |
|
"rewards/margins": 0.4400349259376526, |
|
"rewards/rejected": 0.27059149742126465, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 547.631467225418, |
|
"learning_rate": 3.397151320423647e-10, |
|
"logits/chosen": -1.1201112270355225, |
|
"logits/rejected": -0.8372467756271362, |
|
"logps/chosen": -377.406982421875, |
|
"logps/rejected": -341.136474609375, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": 0.6100808382034302, |
|
"rewards/margins": 0.44269323348999023, |
|
"rewards/rejected": 0.16738756000995636, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 532.2336174316262, |
|
"learning_rate": 1.8541356326100434e-10, |
|
"logits/chosen": -1.0350492000579834, |
|
"logits/rejected": -0.8257778882980347, |
|
"logps/chosen": -369.24896240234375, |
|
"logps/rejected": -344.9882507324219, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": 0.6600748896598816, |
|
"rewards/margins": 0.4713415205478668, |
|
"rewards/rejected": 0.18873335421085358, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 513.0198840165194, |
|
"learning_rate": 7.690534931565518e-11, |
|
"logits/chosen": -1.0103225708007812, |
|
"logits/rejected": -0.7807376384735107, |
|
"logps/chosen": -372.0942687988281, |
|
"logps/rejected": -339.8793029785156, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6672757863998413, |
|
"rewards/margins": 0.49458056688308716, |
|
"rewards/rejected": 0.1726953089237213, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 524.4542591483576, |
|
"learning_rate": 1.5222552920138853e-11, |
|
"logits/chosen": -1.0104167461395264, |
|
"logits/rejected": -0.8294402360916138, |
|
"logps/chosen": -391.20452880859375, |
|
"logps/rejected": -356.1845703125, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.7216753959655762, |
|
"rewards/margins": 0.5309287309646606, |
|
"rewards/rejected": 0.19074663519859314, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"step": 358, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6178853964672408, |
|
"train_runtime": 10771.3374, |
|
"train_samples_per_second": 8.543, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 358, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|