|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.8099329471588135, |
|
"logits/rejected": -2.7572641372680664, |
|
"logps/chosen": -241.48843383789062, |
|
"logps/rejected": -197.4517822265625, |
|
"loss": 271.7943, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.83237361907959, |
|
"logits/rejected": -2.808957815170288, |
|
"logps/chosen": -292.6072692871094, |
|
"logps/rejected": -278.4604797363281, |
|
"loss": 286.0386, |
|
"rewards/accuracies": 0.4166666567325592, |
|
"rewards/chosen": 0.0008353570010513067, |
|
"rewards/margins": -0.0004216647648718208, |
|
"rewards/rejected": 0.0012570219114422798, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.81878662109375, |
|
"logits/rejected": -2.7905821800231934, |
|
"logps/chosen": -286.19378662109375, |
|
"logps/rejected": -286.7618103027344, |
|
"loss": 264.9378, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.04205816239118576, |
|
"rewards/margins": 0.0025776384864002466, |
|
"rewards/rejected": 0.03948052600026131, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.7921807765960693, |
|
"logits/rejected": -2.7616498470306396, |
|
"logps/chosen": -232.4526824951172, |
|
"logps/rejected": -212.8272705078125, |
|
"loss": 266.1199, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.13909421861171722, |
|
"rewards/margins": 0.004568194039165974, |
|
"rewards/rejected": 0.13452602922916412, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.8503639698028564, |
|
"logits/rejected": -2.819370985031128, |
|
"logps/chosen": -280.4808654785156, |
|
"logps/rejected": -243.86935424804688, |
|
"loss": 258.2878, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.19040736556053162, |
|
"rewards/margins": 0.01646682806313038, |
|
"rewards/rejected": 0.1739405393600464, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.764444589614868, |
|
"logits/rejected": -2.7427210807800293, |
|
"logps/chosen": -254.5093231201172, |
|
"logps/rejected": -240.6798858642578, |
|
"loss": 244.2991, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.22921113669872284, |
|
"rewards/margins": -0.0009602505015209317, |
|
"rewards/rejected": 0.23017136752605438, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.7702980041503906, |
|
"logits/rejected": -2.742863893508911, |
|
"logps/chosen": -235.5380401611328, |
|
"logps/rejected": -211.11770629882812, |
|
"loss": 255.6714, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.23538246750831604, |
|
"rewards/margins": 0.020409177988767624, |
|
"rewards/rejected": 0.2149733006954193, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.7509877681732178, |
|
"logits/rejected": -2.7182183265686035, |
|
"logps/chosen": -239.36514282226562, |
|
"logps/rejected": -210.6224822998047, |
|
"loss": 253.836, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.29057446122169495, |
|
"rewards/margins": 0.04796246066689491, |
|
"rewards/rejected": 0.24261197447776794, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.699618339538574, |
|
"logits/rejected": -2.697359800338745, |
|
"logps/chosen": -241.5849609375, |
|
"logps/rejected": -241.43533325195312, |
|
"loss": 261.2479, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.29146844148635864, |
|
"rewards/margins": 0.030574629083275795, |
|
"rewards/rejected": 0.2608937919139862, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.7679967880249023, |
|
"logits/rejected": -2.7326865196228027, |
|
"logps/chosen": -225.57156372070312, |
|
"logps/rejected": -208.76693725585938, |
|
"loss": 252.1535, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.29392915964126587, |
|
"rewards/margins": 0.03465462103486061, |
|
"rewards/rejected": 0.25927454233169556, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.819113254547119, |
|
"logits/rejected": -2.783386707305908, |
|
"logps/chosen": -260.7179260253906, |
|
"logps/rejected": -238.54434204101562, |
|
"loss": 256.7688, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.28721123933792114, |
|
"rewards/margins": -0.005946027580648661, |
|
"rewards/rejected": 0.29315727949142456, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.7863247394561768, |
|
"eval_logits/rejected": -2.767011880874634, |
|
"eval_logps/chosen": -227.56491088867188, |
|
"eval_logps/rejected": -230.47303771972656, |
|
"eval_loss": 245.72764587402344, |
|
"eval_rewards/accuracies": 0.57421875, |
|
"eval_rewards/chosen": 0.29474756121635437, |
|
"eval_rewards/margins": 0.025945277884602547, |
|
"eval_rewards/rejected": 0.268802285194397, |
|
"eval_runtime": 53.5253, |
|
"eval_samples_per_second": 37.365, |
|
"eval_steps_per_second": 0.598, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.6783273220062256, |
|
"logits/rejected": -2.644078016281128, |
|
"logps/chosen": -237.9336395263672, |
|
"logps/rejected": -188.83895874023438, |
|
"loss": 240.9402, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.32163843512535095, |
|
"rewards/margins": -0.008066670037806034, |
|
"rewards/rejected": 0.3297051787376404, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -2.746994733810425, |
|
"logits/rejected": -2.724551200866699, |
|
"logps/chosen": -240.14242553710938, |
|
"logps/rejected": -237.6074981689453, |
|
"loss": 245.7042, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.2870863080024719, |
|
"rewards/margins": 0.03465163707733154, |
|
"rewards/rejected": 0.25243470072746277, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -2.7405877113342285, |
|
"logits/rejected": -2.7226414680480957, |
|
"logps/chosen": -242.31460571289062, |
|
"logps/rejected": -222.0249786376953, |
|
"loss": 239.1022, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3218019902706146, |
|
"rewards/margins": 0.03552493453025818, |
|
"rewards/rejected": 0.28627708554267883, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -2.7363457679748535, |
|
"logits/rejected": -2.710092067718506, |
|
"logps/chosen": -248.31982421875, |
|
"logps/rejected": -242.44064331054688, |
|
"loss": 240.1779, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.32314616441726685, |
|
"rewards/margins": 0.03991778939962387, |
|
"rewards/rejected": 0.2832283675670624, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -2.6783087253570557, |
|
"logits/rejected": -2.707118511199951, |
|
"logps/chosen": -196.87429809570312, |
|
"logps/rejected": -205.2587890625, |
|
"loss": 236.8216, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.2980460226535797, |
|
"rewards/margins": -0.012227327562868595, |
|
"rewards/rejected": 0.31027334928512573, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -2.7062594890594482, |
|
"logits/rejected": -2.6792685985565186, |
|
"logps/chosen": -224.0284881591797, |
|
"logps/rejected": -215.8988494873047, |
|
"loss": 233.851, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.31943365931510925, |
|
"rewards/margins": 0.01964866928756237, |
|
"rewards/rejected": 0.29978498816490173, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -2.6613574028015137, |
|
"logits/rejected": -2.6377127170562744, |
|
"logps/chosen": -249.140869140625, |
|
"logps/rejected": -229.2135009765625, |
|
"loss": 238.7709, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3302595019340515, |
|
"rewards/margins": 0.02071164920926094, |
|
"rewards/rejected": 0.30954790115356445, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -2.697937488555908, |
|
"logits/rejected": -2.6982693672180176, |
|
"logps/chosen": -237.61752319335938, |
|
"logps/rejected": -222.94534301757812, |
|
"loss": 240.8094, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.3156043589115143, |
|
"rewards/margins": -0.00877897534519434, |
|
"rewards/rejected": 0.32438334822654724, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -2.716660499572754, |
|
"logits/rejected": -2.6782338619232178, |
|
"logps/chosen": -256.08953857421875, |
|
"logps/rejected": -233.2887725830078, |
|
"loss": 241.1514, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.35306206345558167, |
|
"rewards/margins": 0.0451970100402832, |
|
"rewards/rejected": 0.3078650236129761, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -2.6975326538085938, |
|
"logits/rejected": -2.715744733810425, |
|
"logps/chosen": -260.04095458984375, |
|
"logps/rejected": -232.90744018554688, |
|
"loss": 239.4804, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.3612653315067291, |
|
"rewards/margins": 0.047182150185108185, |
|
"rewards/rejected": 0.31408315896987915, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.7289865016937256, |
|
"eval_logits/rejected": -2.7082109451293945, |
|
"eval_logps/chosen": -223.84622192382812, |
|
"eval_logps/rejected": -226.99331665039062, |
|
"eval_loss": 241.5078125, |
|
"eval_rewards/accuracies": 0.57421875, |
|
"eval_rewards/chosen": 0.33193421363830566, |
|
"eval_rewards/margins": 0.028334595263004303, |
|
"eval_rewards/rejected": 0.30359959602355957, |
|
"eval_runtime": 53.4363, |
|
"eval_samples_per_second": 37.428, |
|
"eval_steps_per_second": 0.599, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -2.653046131134033, |
|
"logits/rejected": -2.639833927154541, |
|
"logps/chosen": -236.7248992919922, |
|
"logps/rejected": -215.8306427001953, |
|
"loss": 232.2403, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.3630830645561218, |
|
"rewards/margins": 0.002032138407230377, |
|
"rewards/rejected": 0.36105093359947205, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -2.6577677726745605, |
|
"logits/rejected": -2.610652446746826, |
|
"logps/chosen": -234.62515258789062, |
|
"logps/rejected": -224.5327911376953, |
|
"loss": 236.0736, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.403405100107193, |
|
"rewards/margins": 0.025665929540991783, |
|
"rewards/rejected": 0.3777391314506531, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -2.7138988971710205, |
|
"logits/rejected": -2.6695990562438965, |
|
"logps/chosen": -238.20166015625, |
|
"logps/rejected": -238.32510375976562, |
|
"loss": 243.5431, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.36346226930618286, |
|
"rewards/margins": 0.04655776172876358, |
|
"rewards/rejected": 0.31690454483032227, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -2.6672616004943848, |
|
"logits/rejected": -2.6338298320770264, |
|
"logps/chosen": -245.7289581298828, |
|
"logps/rejected": -222.80068969726562, |
|
"loss": 241.8247, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.4044111371040344, |
|
"rewards/margins": 0.09945273399353027, |
|
"rewards/rejected": 0.30495840311050415, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -2.711887836456299, |
|
"logits/rejected": -2.660736083984375, |
|
"logps/chosen": -236.48410034179688, |
|
"logps/rejected": -220.5592803955078, |
|
"loss": 232.2589, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.3508817255496979, |
|
"rewards/margins": 0.012243595905601978, |
|
"rewards/rejected": 0.3386381268501282, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -2.6873819828033447, |
|
"logits/rejected": -2.665743827819824, |
|
"logps/chosen": -226.0983428955078, |
|
"logps/rejected": -222.646484375, |
|
"loss": 249.1208, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.33049115538597107, |
|
"rewards/margins": -0.025500113144516945, |
|
"rewards/rejected": 0.35599130392074585, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -2.684255599975586, |
|
"logits/rejected": -2.6710708141326904, |
|
"logps/chosen": -250.91659545898438, |
|
"logps/rejected": -232.743896484375, |
|
"loss": 232.0498, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3753630220890045, |
|
"rewards/margins": 0.05215846374630928, |
|
"rewards/rejected": 0.32320457696914673, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -2.642585515975952, |
|
"logits/rejected": -2.6435678005218506, |
|
"logps/chosen": -271.9496154785156, |
|
"logps/rejected": -232.7229461669922, |
|
"loss": 242.3832, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3579340875148773, |
|
"rewards/margins": 0.029489517211914062, |
|
"rewards/rejected": 0.32844457030296326, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -2.6854400634765625, |
|
"logits/rejected": -2.6508984565734863, |
|
"logps/chosen": -238.96533203125, |
|
"logps/rejected": -214.7186737060547, |
|
"loss": 237.8058, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.38798388838768005, |
|
"rewards/margins": 0.002142349723726511, |
|
"rewards/rejected": 0.3858415484428406, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -2.6979777812957764, |
|
"logits/rejected": -2.6885297298431396, |
|
"logps/chosen": -218.0729522705078, |
|
"logps/rejected": -206.6052703857422, |
|
"loss": 240.5041, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.3353363871574402, |
|
"rewards/margins": -0.021952930837869644, |
|
"rewards/rejected": 0.35728925466537476, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.7041783332824707, |
|
"eval_logits/rejected": -2.680725574493408, |
|
"eval_logps/chosen": -222.30435180664062, |
|
"eval_logps/rejected": -225.71856689453125, |
|
"eval_loss": 239.8052978515625, |
|
"eval_rewards/accuracies": 0.5625, |
|
"eval_rewards/chosen": 0.3473527431488037, |
|
"eval_rewards/margins": 0.03100587986409664, |
|
"eval_rewards/rejected": 0.3163468837738037, |
|
"eval_runtime": 53.5009, |
|
"eval_samples_per_second": 37.383, |
|
"eval_steps_per_second": 0.598, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -2.681898832321167, |
|
"logits/rejected": -2.647827625274658, |
|
"logps/chosen": -249.84933471679688, |
|
"logps/rejected": -221.06466674804688, |
|
"loss": 244.5816, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.37856870889663696, |
|
"rewards/margins": 0.031483568251132965, |
|
"rewards/rejected": 0.347085177898407, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -2.6419777870178223, |
|
"logits/rejected": -2.6089978218078613, |
|
"logps/chosen": -247.5414581298828, |
|
"logps/rejected": -228.00442504882812, |
|
"loss": 227.1146, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.38319897651672363, |
|
"rewards/margins": 0.010034086182713509, |
|
"rewards/rejected": 0.3731648921966553, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -2.6546921730041504, |
|
"logits/rejected": -2.619576930999756, |
|
"logps/chosen": -235.1280059814453, |
|
"logps/rejected": -228.36550903320312, |
|
"loss": 235.3102, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.3890138268470764, |
|
"rewards/margins": 0.030328262597322464, |
|
"rewards/rejected": 0.35868555307388306, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -2.6908440589904785, |
|
"logits/rejected": -2.6842880249023438, |
|
"logps/chosen": -221.61752319335938, |
|
"logps/rejected": -221.1806182861328, |
|
"loss": 226.7886, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.3297863006591797, |
|
"rewards/margins": 0.012335492298007011, |
|
"rewards/rejected": 0.3174508213996887, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -2.6660337448120117, |
|
"logits/rejected": -2.6440846920013428, |
|
"logps/chosen": -235.5915069580078, |
|
"logps/rejected": -227.4123077392578, |
|
"loss": 235.1443, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.4086208939552307, |
|
"rewards/margins": 0.052063293755054474, |
|
"rewards/rejected": 0.35655760765075684, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -2.631333112716675, |
|
"logits/rejected": -2.605966806411743, |
|
"logps/chosen": -223.34115600585938, |
|
"logps/rejected": -207.91500854492188, |
|
"loss": 246.5647, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.35043418407440186, |
|
"rewards/margins": -0.0040539586916565895, |
|
"rewards/rejected": 0.35448816418647766, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -2.667217254638672, |
|
"logits/rejected": -2.6636977195739746, |
|
"logps/chosen": -240.44247436523438, |
|
"logps/rejected": -231.893798828125, |
|
"loss": 236.9863, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.37617605924606323, |
|
"rewards/margins": 0.05858853459358215, |
|
"rewards/rejected": 0.3175875246524811, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -2.6146702766418457, |
|
"logits/rejected": -2.611351728439331, |
|
"logps/chosen": -189.4939422607422, |
|
"logps/rejected": -206.30081176757812, |
|
"loss": 233.4487, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.3334718644618988, |
|
"rewards/margins": -0.03314907103776932, |
|
"rewards/rejected": 0.3666209578514099, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -2.663173198699951, |
|
"logits/rejected": -2.634059429168701, |
|
"logps/chosen": -221.12240600585938, |
|
"logps/rejected": -204.8622589111328, |
|
"loss": 241.2531, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.30437472462654114, |
|
"rewards/margins": -0.002047918038442731, |
|
"rewards/rejected": 0.3064226508140564, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -2.646238088607788, |
|
"logits/rejected": -2.6048245429992676, |
|
"logps/chosen": -250.01693725585938, |
|
"logps/rejected": -215.4422607421875, |
|
"loss": 236.8453, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.35686779022216797, |
|
"rewards/margins": 0.03598792105913162, |
|
"rewards/rejected": 0.3208799362182617, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.697472333908081, |
|
"eval_logits/rejected": -2.673011541366577, |
|
"eval_logps/chosen": -221.73487854003906, |
|
"eval_logps/rejected": -225.18995666503906, |
|
"eval_loss": 239.1992950439453, |
|
"eval_rewards/accuracies": 0.5625, |
|
"eval_rewards/chosen": 0.3530477285385132, |
|
"eval_rewards/margins": 0.031414665281772614, |
|
"eval_rewards/rejected": 0.3216330409049988, |
|
"eval_runtime": 53.4218, |
|
"eval_samples_per_second": 37.438, |
|
"eval_steps_per_second": 0.599, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -2.684819459915161, |
|
"logits/rejected": -2.622917652130127, |
|
"logps/chosen": -226.3255157470703, |
|
"logps/rejected": -246.3280487060547, |
|
"loss": 234.8678, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.3726418912410736, |
|
"rewards/margins": 0.02128712832927704, |
|
"rewards/rejected": 0.3513546884059906, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -2.6737873554229736, |
|
"logits/rejected": -2.638714551925659, |
|
"logps/chosen": -250.0021514892578, |
|
"logps/rejected": -255.8992156982422, |
|
"loss": 246.8406, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.3681090474128723, |
|
"rewards/margins": 0.0030112355016171932, |
|
"rewards/rejected": 0.3650978207588196, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -2.6430375576019287, |
|
"logits/rejected": -2.6089277267456055, |
|
"logps/chosen": -255.19985961914062, |
|
"logps/rejected": -234.4241485595703, |
|
"loss": 234.6739, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.3505329489707947, |
|
"rewards/margins": -0.049122948199510574, |
|
"rewards/rejected": 0.39965590834617615, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -2.665576934814453, |
|
"logits/rejected": -2.637112617492676, |
|
"logps/chosen": -278.3708190917969, |
|
"logps/rejected": -230.59423828125, |
|
"loss": 242.5247, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.36680158972740173, |
|
"rewards/margins": 0.01681143045425415, |
|
"rewards/rejected": 0.34999018907546997, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -2.6472713947296143, |
|
"logits/rejected": -2.6246514320373535, |
|
"logps/chosen": -242.03775024414062, |
|
"logps/rejected": -210.9549560546875, |
|
"loss": 239.1569, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3881983160972595, |
|
"rewards/margins": -0.0041311681270599365, |
|
"rewards/rejected": 0.39232948422431946, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -2.6596500873565674, |
|
"logits/rejected": -2.6326732635498047, |
|
"logps/chosen": -259.3081359863281, |
|
"logps/rejected": -237.2527618408203, |
|
"loss": 246.6495, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.4107862412929535, |
|
"rewards/margins": 0.07695204019546509, |
|
"rewards/rejected": 0.3338342308998108, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -2.6772663593292236, |
|
"logits/rejected": -2.6103363037109375, |
|
"logps/chosen": -256.0906677246094, |
|
"logps/rejected": -202.92539978027344, |
|
"loss": 248.9801, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.42115721106529236, |
|
"rewards/margins": 0.12000129371881485, |
|
"rewards/rejected": 0.3011559247970581, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 243.0746703846185, |
|
"train_runtime": 4321.456, |
|
"train_samples_per_second": 14.147, |
|
"train_steps_per_second": 0.111 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|