|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 27.37984871419997, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -1.8783892393112183, |
|
"logits/rejected": -1.8756425380706787, |
|
"logps/chosen": -298.4870300292969, |
|
"logps/rejected": -398.0157165527344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 25.334426597070937, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -1.750243067741394, |
|
"logits/rejected": -1.7067593336105347, |
|
"logps/chosen": -280.5216369628906, |
|
"logps/rejected": -271.8791809082031, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": -0.00042370916344225407, |
|
"rewards/margins": -0.0002716032031457871, |
|
"rewards/rejected": -0.00015210600395221263, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 23.205563002993117, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.8309519290924072, |
|
"logits/rejected": -1.7239341735839844, |
|
"logps/chosen": -298.9266662597656, |
|
"logps/rejected": -320.81036376953125, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00880073755979538, |
|
"rewards/margins": 0.0003546981024555862, |
|
"rewards/rejected": 0.008446039631962776, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 22.833130746886702, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.8621642589569092, |
|
"logits/rejected": -1.811255693435669, |
|
"logps/chosen": -315.0081481933594, |
|
"logps/rejected": -281.7824401855469, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.06436704099178314, |
|
"rewards/margins": 0.02108323760330677, |
|
"rewards/rejected": 0.04328380152583122, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 20.296209907433, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.7256094217300415, |
|
"logits/rejected": -1.6898906230926514, |
|
"logps/chosen": -269.07220458984375, |
|
"logps/rejected": -258.07366943359375, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.15135471522808075, |
|
"rewards/margins": 0.05834723263978958, |
|
"rewards/rejected": 0.09300748258829117, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 18.992519669533575, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -1.7586348056793213, |
|
"logits/rejected": -1.7471107244491577, |
|
"logps/chosen": -274.77728271484375, |
|
"logps/rejected": -298.24298095703125, |
|
"loss": 0.6568, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2238006889820099, |
|
"rewards/margins": 0.05361497402191162, |
|
"rewards/rejected": 0.17018567025661469, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 22.488749510223712, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -1.8446115255355835, |
|
"logits/rejected": -1.8052647113800049, |
|
"logps/chosen": -268.59100341796875, |
|
"logps/rejected": -318.24041748046875, |
|
"loss": 0.642, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.22674357891082764, |
|
"rewards/margins": 0.11847379058599472, |
|
"rewards/rejected": 0.10826978832483292, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 24.241452630651324, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -1.6720319986343384, |
|
"logits/rejected": -1.6877762079238892, |
|
"logps/chosen": -274.5986022949219, |
|
"logps/rejected": -289.9263610839844, |
|
"loss": 0.6123, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.11401952803134918, |
|
"rewards/margins": 0.22531266510486603, |
|
"rewards/rejected": -0.11129315197467804, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 32.48718302712838, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -1.910599946975708, |
|
"logits/rejected": -1.7989906072616577, |
|
"logps/chosen": -356.32135009765625, |
|
"logps/rejected": -325.3817443847656, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.07416001707315445, |
|
"rewards/margins": 0.30830469727516174, |
|
"rewards/rejected": -0.3824646770954132, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 32.46521048247274, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -1.781141996383667, |
|
"logits/rejected": -1.773406982421875, |
|
"logps/chosen": -326.0487365722656, |
|
"logps/rejected": -370.7205505371094, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.45380252599716187, |
|
"rewards/margins": 0.5182568430900574, |
|
"rewards/rejected": -0.9720592498779297, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 33.51530497027872, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -1.795566201210022, |
|
"logits/rejected": -1.7746385335922241, |
|
"logps/chosen": -341.0810241699219, |
|
"logps/rejected": -391.9131774902344, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6200565695762634, |
|
"rewards/margins": 0.5509090423583984, |
|
"rewards/rejected": -1.1709656715393066, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.8679490089416504, |
|
"eval_logits/rejected": -1.8570616245269775, |
|
"eval_logps/chosen": -316.96636962890625, |
|
"eval_logps/rejected": -376.7557373046875, |
|
"eval_loss": 0.5698967576026917, |
|
"eval_rewards/accuracies": 0.73046875, |
|
"eval_rewards/chosen": -0.3533283472061157, |
|
"eval_rewards/margins": 0.5366135239601135, |
|
"eval_rewards/rejected": -0.8899418115615845, |
|
"eval_runtime": 97.6563, |
|
"eval_samples_per_second": 20.48, |
|
"eval_steps_per_second": 0.328, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 34.820943984944364, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -1.9302442073822021, |
|
"logits/rejected": -1.8041632175445557, |
|
"logps/chosen": -364.3658142089844, |
|
"logps/rejected": -368.28619384765625, |
|
"loss": 0.5779, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3326733410358429, |
|
"rewards/margins": 0.5019634962081909, |
|
"rewards/rejected": -0.8346366882324219, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 35.52031238722188, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -1.8828121423721313, |
|
"logits/rejected": -1.8731359243392944, |
|
"logps/chosen": -346.777099609375, |
|
"logps/rejected": -378.0817565917969, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.42460617423057556, |
|
"rewards/margins": 0.5200009942054749, |
|
"rewards/rejected": -0.9446069598197937, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 40.83171596073763, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -1.9067420959472656, |
|
"logits/rejected": -1.848259687423706, |
|
"logps/chosen": -353.1668395996094, |
|
"logps/rejected": -412.601806640625, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.5689653158187866, |
|
"rewards/margins": 0.6179059147834778, |
|
"rewards/rejected": -1.1868712902069092, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 39.57816446283388, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -1.79110848903656, |
|
"logits/rejected": -1.710828423500061, |
|
"logps/chosen": -390.3045959472656, |
|
"logps/rejected": -453.116943359375, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5443016290664673, |
|
"rewards/margins": 0.7209955453872681, |
|
"rewards/rejected": -1.2652971744537354, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 45.241736858623206, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -1.8114426136016846, |
|
"logits/rejected": -1.7426559925079346, |
|
"logps/chosen": -352.48992919921875, |
|
"logps/rejected": -402.91943359375, |
|
"loss": 0.5462, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5064585208892822, |
|
"rewards/margins": 0.5219663381576538, |
|
"rewards/rejected": -1.028424859046936, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 73.25214998863763, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -1.8390640020370483, |
|
"logits/rejected": -1.7504537105560303, |
|
"logps/chosen": -339.1869812011719, |
|
"logps/rejected": -387.9916076660156, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4455109238624573, |
|
"rewards/margins": 0.7391675710678101, |
|
"rewards/rejected": -1.1846784353256226, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 48.08778532882697, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -1.7452236413955688, |
|
"logits/rejected": -1.6487846374511719, |
|
"logps/chosen": -335.72528076171875, |
|
"logps/rejected": -377.5245361328125, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6902536749839783, |
|
"rewards/margins": 0.5506319999694824, |
|
"rewards/rejected": -1.2408854961395264, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 45.566526901622865, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -1.5920779705047607, |
|
"logits/rejected": -1.5328117609024048, |
|
"logps/chosen": -352.29937744140625, |
|
"logps/rejected": -390.72100830078125, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8087765574455261, |
|
"rewards/margins": 0.6529080867767334, |
|
"rewards/rejected": -1.4616845846176147, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 48.30624199959232, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -1.7276074886322021, |
|
"logits/rejected": -1.6613149642944336, |
|
"logps/chosen": -347.87579345703125, |
|
"logps/rejected": -405.24237060546875, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5396801829338074, |
|
"rewards/margins": 0.6805658936500549, |
|
"rewards/rejected": -1.2202460765838623, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 99.6040419345467, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -1.7740137577056885, |
|
"logits/rejected": -1.7177015542984009, |
|
"logps/chosen": -344.6033020019531, |
|
"logps/rejected": -404.29229736328125, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6725525856018066, |
|
"rewards/margins": 0.6624492406845093, |
|
"rewards/rejected": -1.3350017070770264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.901658296585083, |
|
"eval_logits/rejected": -1.8679291009902954, |
|
"eval_logps/chosen": -349.037841796875, |
|
"eval_logps/rejected": -432.6194152832031, |
|
"eval_loss": 0.5253521800041199, |
|
"eval_rewards/accuracies": 0.7265625, |
|
"eval_rewards/chosen": -0.6740425825119019, |
|
"eval_rewards/margins": 0.7745361328125, |
|
"eval_rewards/rejected": -1.4485788345336914, |
|
"eval_runtime": 97.5006, |
|
"eval_samples_per_second": 20.513, |
|
"eval_steps_per_second": 0.328, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 46.68866608504909, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -1.734480619430542, |
|
"logits/rejected": -1.6646308898925781, |
|
"logps/chosen": -384.4491882324219, |
|
"logps/rejected": -421.3724670410156, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7106617093086243, |
|
"rewards/margins": 0.7391539812088013, |
|
"rewards/rejected": -1.4498156309127808, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 44.67370083595421, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -1.7060235738754272, |
|
"logits/rejected": -1.621319055557251, |
|
"logps/chosen": -333.583740234375, |
|
"logps/rejected": -387.3582458496094, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.5107825994491577, |
|
"rewards/margins": 0.7757614850997925, |
|
"rewards/rejected": -1.2865440845489502, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 44.602377758622936, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -1.672357201576233, |
|
"logits/rejected": -1.677425742149353, |
|
"logps/chosen": -334.2008361816406, |
|
"logps/rejected": -428.0926208496094, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6795379519462585, |
|
"rewards/margins": 0.8884698152542114, |
|
"rewards/rejected": -1.5680078268051147, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 43.82303533573589, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -1.6859185695648193, |
|
"logits/rejected": -1.6255781650543213, |
|
"logps/chosen": -357.11773681640625, |
|
"logps/rejected": -421.4244079589844, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6822614073753357, |
|
"rewards/margins": 0.8219982385635376, |
|
"rewards/rejected": -1.5042595863342285, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 46.68066851465082, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -1.5676295757293701, |
|
"logits/rejected": -1.4538037776947021, |
|
"logps/chosen": -401.59979248046875, |
|
"logps/rejected": -471.2294006347656, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7957779765129089, |
|
"rewards/margins": 1.0585238933563232, |
|
"rewards/rejected": -1.8543018102645874, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 45.74080164598797, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -1.5453943014144897, |
|
"logits/rejected": -1.3946092128753662, |
|
"logps/chosen": -411.67681884765625, |
|
"logps/rejected": -464.185791015625, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9510448575019836, |
|
"rewards/margins": 0.9077135324478149, |
|
"rewards/rejected": -1.8587583303451538, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 35.67215071482242, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -1.4410674571990967, |
|
"logits/rejected": -1.4173917770385742, |
|
"logps/chosen": -389.84442138671875, |
|
"logps/rejected": -446.63946533203125, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.882165253162384, |
|
"rewards/margins": 0.7578494548797607, |
|
"rewards/rejected": -1.6400146484375, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 45.13283149696396, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -1.3330192565917969, |
|
"logits/rejected": -1.2097164392471313, |
|
"logps/chosen": -361.392578125, |
|
"logps/rejected": -428.5855407714844, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7854728102684021, |
|
"rewards/margins": 0.8376423716545105, |
|
"rewards/rejected": -1.6231151819229126, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 42.58221661061121, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -1.3783751726150513, |
|
"logits/rejected": -1.3098156452178955, |
|
"logps/chosen": -341.5384521484375, |
|
"logps/rejected": -416.83050537109375, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.7412964105606079, |
|
"rewards/margins": 0.8173438906669617, |
|
"rewards/rejected": -1.5586402416229248, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 44.44135138330837, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -1.4361331462860107, |
|
"logits/rejected": -1.2948487997055054, |
|
"logps/chosen": -415.39324951171875, |
|
"logps/rejected": -463.1932067871094, |
|
"loss": 0.4829, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9276901483535767, |
|
"rewards/margins": 0.8659119606018066, |
|
"rewards/rejected": -1.7936019897460938, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -1.256626844406128, |
|
"eval_logits/rejected": -1.199381709098816, |
|
"eval_logps/chosen": -368.2399597167969, |
|
"eval_logps/rejected": -477.2876892089844, |
|
"eval_loss": 0.49556368589401245, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -0.8660640716552734, |
|
"eval_rewards/margins": 1.0291972160339355, |
|
"eval_rewards/rejected": -1.895261287689209, |
|
"eval_runtime": 97.5907, |
|
"eval_samples_per_second": 20.494, |
|
"eval_steps_per_second": 0.328, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 49.20598478293576, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -1.070894479751587, |
|
"logits/rejected": -0.999220073223114, |
|
"logps/chosen": -422.22509765625, |
|
"logps/rejected": -478.1600646972656, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0501785278320312, |
|
"rewards/margins": 0.9182316660881042, |
|
"rewards/rejected": -1.9684101343154907, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 49.52786644109137, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -0.9732829332351685, |
|
"logits/rejected": -0.8598931431770325, |
|
"logps/chosen": -423.58465576171875, |
|
"logps/rejected": -463.65087890625, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0610870122909546, |
|
"rewards/margins": 0.8992059826850891, |
|
"rewards/rejected": -1.960293173789978, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 44.16474950280745, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -1.1072012186050415, |
|
"logits/rejected": -0.9854669570922852, |
|
"logps/chosen": -383.697509765625, |
|
"logps/rejected": -467.64630126953125, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.015866756439209, |
|
"rewards/margins": 0.7990261316299438, |
|
"rewards/rejected": -1.8148927688598633, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 49.790170416193874, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -1.0710186958312988, |
|
"logits/rejected": -0.9443724751472473, |
|
"logps/chosen": -406.4459228515625, |
|
"logps/rejected": -490.1005859375, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.9062315821647644, |
|
"rewards/margins": 0.9880655407905579, |
|
"rewards/rejected": -1.8942972421646118, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 45.78788884909769, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -0.964527428150177, |
|
"logits/rejected": -0.8877021670341492, |
|
"logps/chosen": -366.8417053222656, |
|
"logps/rejected": -448.39239501953125, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8918215036392212, |
|
"rewards/margins": 0.9880329966545105, |
|
"rewards/rejected": -1.879854440689087, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 47.523486254775236, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -1.099103331565857, |
|
"logits/rejected": -0.9152529835700989, |
|
"logps/chosen": -418.78790283203125, |
|
"logps/rejected": -472.5894470214844, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9209517240524292, |
|
"rewards/margins": 0.9369718432426453, |
|
"rewards/rejected": -1.8579237461090088, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 40.46200259764798, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -1.0258140563964844, |
|
"logits/rejected": -0.9037224054336548, |
|
"logps/chosen": -389.7789611816406, |
|
"logps/rejected": -427.80902099609375, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9432722926139832, |
|
"rewards/margins": 0.7514128684997559, |
|
"rewards/rejected": -1.6946852207183838, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 44.99044596346264, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -0.9202815294265747, |
|
"logits/rejected": -0.9092128872871399, |
|
"logps/chosen": -369.4691467285156, |
|
"logps/rejected": -499.80047607421875, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1355737447738647, |
|
"rewards/margins": 1.0553382635116577, |
|
"rewards/rejected": -2.1909122467041016, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 46.73184407203235, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -1.017165184020996, |
|
"logits/rejected": -0.9522297978401184, |
|
"logps/chosen": -429.59124755859375, |
|
"logps/rejected": -499.8984375, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0497267246246338, |
|
"rewards/margins": 0.8575556874275208, |
|
"rewards/rejected": -1.9072824716567993, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 45.88759783660656, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -0.8822342753410339, |
|
"logits/rejected": -0.7616764307022095, |
|
"logps/chosen": -393.79986572265625, |
|
"logps/rejected": -488.7137145996094, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0166757106781006, |
|
"rewards/margins": 0.9999138116836548, |
|
"rewards/rejected": -2.016589403152466, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.0730373859405518, |
|
"eval_logits/rejected": -0.9850106239318848, |
|
"eval_logps/chosen": -365.3529357910156, |
|
"eval_logps/rejected": -476.14508056640625, |
|
"eval_loss": 0.49130114912986755, |
|
"eval_rewards/accuracies": 0.78515625, |
|
"eval_rewards/chosen": -0.8371938467025757, |
|
"eval_rewards/margins": 1.0466417074203491, |
|
"eval_rewards/rejected": -1.8838355541229248, |
|
"eval_runtime": 97.6225, |
|
"eval_samples_per_second": 20.487, |
|
"eval_steps_per_second": 0.328, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 44.331882429947925, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -1.0954724550247192, |
|
"logits/rejected": -0.854290783405304, |
|
"logps/chosen": -407.5718078613281, |
|
"logps/rejected": -482.7383728027344, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8705843687057495, |
|
"rewards/margins": 1.0231659412384033, |
|
"rewards/rejected": -1.8937501907348633, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 48.31749590006741, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -1.0192204713821411, |
|
"logits/rejected": -0.973158061504364, |
|
"logps/chosen": -416.341552734375, |
|
"logps/rejected": -486.69232177734375, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9502483606338501, |
|
"rewards/margins": 0.9265721440315247, |
|
"rewards/rejected": -1.8768205642700195, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 40.281913550333705, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -0.9044865369796753, |
|
"logits/rejected": -0.8032494783401489, |
|
"logps/chosen": -405.5864562988281, |
|
"logps/rejected": -480.13201904296875, |
|
"loss": 0.4776, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.037095546722412, |
|
"rewards/margins": 0.9771502614021301, |
|
"rewards/rejected": -2.0142457485198975, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 43.058313272164526, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -0.9727311134338379, |
|
"logits/rejected": -0.8283950090408325, |
|
"logps/chosen": -393.12823486328125, |
|
"logps/rejected": -472.7400817871094, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8835296630859375, |
|
"rewards/margins": 0.8925860524177551, |
|
"rewards/rejected": -1.7761156558990479, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 45.17569103872668, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -0.8834640383720398, |
|
"logits/rejected": -0.8035561442375183, |
|
"logps/chosen": -416.1705627441406, |
|
"logps/rejected": -517.479248046875, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0344994068145752, |
|
"rewards/margins": 0.8305438756942749, |
|
"rewards/rejected": -1.8650434017181396, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 57.18112420515564, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -0.9279729723930359, |
|
"logits/rejected": -0.8204873204231262, |
|
"logps/chosen": -390.6974182128906, |
|
"logps/rejected": -443.84051513671875, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9189409017562866, |
|
"rewards/margins": 0.8423686027526855, |
|
"rewards/rejected": -1.7613098621368408, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 40.005457130126345, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -0.9693315625190735, |
|
"logits/rejected": -0.8152003288269043, |
|
"logps/chosen": -384.0590515136719, |
|
"logps/rejected": -482.89630126953125, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8925178647041321, |
|
"rewards/margins": 0.9355740547180176, |
|
"rewards/rejected": -1.8280918598175049, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5347933170685708, |
|
"train_runtime": 7634.2165, |
|
"train_samples_per_second": 8.008, |
|
"train_steps_per_second": 0.063 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|