|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 309, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 6.266341746346053, |
|
"learning_rate": 1.6129032258064518e-07, |
|
"logits/chosen": -0.5416143536567688, |
|
"logits/rejected": -0.9699263572692871, |
|
"logps/chosen": -998.3239135742188, |
|
"logps/rejected": -1286.9267578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.79176132535156, |
|
"learning_rate": 1.6129032258064516e-06, |
|
"logits/chosen": -0.501338541507721, |
|
"logits/rejected": -0.6205970048904419, |
|
"logps/chosen": -965.1869506835938, |
|
"logps/rejected": -1388.869140625, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": -0.005239578895270824, |
|
"rewards/margins": 0.0034307329915463924, |
|
"rewards/rejected": -0.00867031142115593, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.46084581758702, |
|
"learning_rate": 3.225806451612903e-06, |
|
"logits/chosen": -0.4920225143432617, |
|
"logits/rejected": -0.5186491012573242, |
|
"logps/chosen": -1001.7984619140625, |
|
"logps/rejected": -1416.242431640625, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.13937075436115265, |
|
"rewards/margins": 0.18681207299232483, |
|
"rewards/rejected": -0.32618284225463867, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.423152706060893, |
|
"learning_rate": 4.838709677419355e-06, |
|
"logits/chosen": -0.3083574175834656, |
|
"logits/rejected": -0.28873246908187866, |
|
"logps/chosen": -936.2586669921875, |
|
"logps/rejected": -1426.4195556640625, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.22132034599781036, |
|
"rewards/margins": 0.7907422780990601, |
|
"rewards/rejected": -1.012062668800354, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.497512811390226, |
|
"learning_rate": 4.987080943856887e-06, |
|
"logits/chosen": -0.39709392189979553, |
|
"logits/rejected": -0.3410794734954834, |
|
"logps/chosen": -864.56787109375, |
|
"logps/rejected": -1415.4027099609375, |
|
"loss": 0.431, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.10076986253261566, |
|
"rewards/margins": 1.5246957540512085, |
|
"rewards/rejected": -1.625465750694275, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 5.38009380317694, |
|
"learning_rate": 4.942593872763566e-06, |
|
"logits/chosen": -0.2893625795841217, |
|
"logits/rejected": -0.3507555425167084, |
|
"logps/chosen": -904.3942260742188, |
|
"logps/rejected": -1616.10986328125, |
|
"loss": 0.3741, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.2879861891269684, |
|
"rewards/margins": 2.9569807052612305, |
|
"rewards/rejected": -3.244966983795166, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 4.021367545314158, |
|
"learning_rate": 4.866946677079314e-06, |
|
"logits/chosen": -0.2734339237213135, |
|
"logits/rejected": -0.2639048397541046, |
|
"logps/chosen": -1028.2137451171875, |
|
"logps/rejected": -1476.271728515625, |
|
"loss": 0.349, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.4066247045993805, |
|
"rewards/margins": 1.8377554416656494, |
|
"rewards/rejected": -2.244380235671997, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 5.598262404371225, |
|
"learning_rate": 4.761104386672074e-06, |
|
"logits/chosen": -0.1697017401456833, |
|
"logits/rejected": -0.25459176301956177, |
|
"logps/chosen": -949.3585205078125, |
|
"logps/rejected": -1626.849365234375, |
|
"loss": 0.3077, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.5060699582099915, |
|
"rewards/margins": 2.161994457244873, |
|
"rewards/rejected": -2.6680643558502197, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.260536872127374, |
|
"learning_rate": 4.626417229671401e-06, |
|
"logits/chosen": -0.22611574828624725, |
|
"logits/rejected": -0.2613917291164398, |
|
"logps/chosen": -920.0872192382812, |
|
"logps/rejected": -1731.564453125, |
|
"loss": 0.2782, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.45848578214645386, |
|
"rewards/margins": 3.2202796936035156, |
|
"rewards/rejected": -3.6787655353546143, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 5.248398944622536, |
|
"learning_rate": 4.464603407633326e-06, |
|
"logits/chosen": -0.20690715312957764, |
|
"logits/rejected": -0.3079308271408081, |
|
"logps/chosen": -1012.1229248046875, |
|
"logps/rejected": -1682.8330078125, |
|
"loss": 0.2599, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.40270423889160156, |
|
"rewards/margins": 3.2318668365478516, |
|
"rewards/rejected": -3.634571075439453, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 4.915340212981017, |
|
"learning_rate": 4.2777271764750805e-06, |
|
"logits/chosen": -0.27068111300468445, |
|
"logits/rejected": -0.22187161445617676, |
|
"logps/chosen": -952.927734375, |
|
"logps/rejected": -1708.844482421875, |
|
"loss": 0.2249, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.5603285431861877, |
|
"rewards/margins": 3.670332670211792, |
|
"rewards/rejected": -4.230660915374756, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -0.1106632873415947, |
|
"eval_logits/rejected": -0.028987836092710495, |
|
"eval_logps/chosen": -650.2339477539062, |
|
"eval_logps/rejected": -1190.2701416015625, |
|
"eval_loss": 0.3820802569389343, |
|
"eval_rewards/accuracies": 0.8551136255264282, |
|
"eval_rewards/chosen": -0.9338886141777039, |
|
"eval_rewards/margins": 3.643230438232422, |
|
"eval_rewards/rejected": -4.577118873596191, |
|
"eval_runtime": 178.7591, |
|
"eval_samples_per_second": 7.787, |
|
"eval_steps_per_second": 0.246, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.910365611216077, |
|
"learning_rate": 4.06817251280076e-06, |
|
"logits/chosen": -0.20240063965320587, |
|
"logits/rejected": -0.265985906124115, |
|
"logps/chosen": -1035.5126953125, |
|
"logps/rejected": -1685.556640625, |
|
"loss": 0.2552, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.44126248359680176, |
|
"rewards/margins": 3.790078639984131, |
|
"rewards/rejected": -4.231341361999512, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 9.625288099258173, |
|
"learning_rate": 3.838612701556138e-06, |
|
"logits/chosen": -0.243825763463974, |
|
"logits/rejected": -0.3290537893772125, |
|
"logps/chosen": -967.8455810546875, |
|
"logps/rejected": -1898.212646484375, |
|
"loss": 0.2415, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19587793946266174, |
|
"rewards/margins": 3.995615005493164, |
|
"rewards/rejected": -4.191493034362793, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.4423605201536476, |
|
"learning_rate": 3.5919762329823556e-06, |
|
"logits/chosen": -0.22354824841022491, |
|
"logits/rejected": -0.2977936565876007, |
|
"logps/chosen": -962.7554931640625, |
|
"logps/rejected": -1781.5015869140625, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.3282354772090912, |
|
"rewards/margins": 4.096698760986328, |
|
"rewards/rejected": -4.424933910369873, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 4.355526701031263, |
|
"learning_rate": 3.3314094439203903e-06, |
|
"logits/chosen": -0.21654577553272247, |
|
"logits/rejected": -0.3782255947589874, |
|
"logps/chosen": -956.0657348632812, |
|
"logps/rejected": -1783.807861328125, |
|
"loss": 0.2054, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.5269081592559814, |
|
"rewards/margins": 4.415614128112793, |
|
"rewards/rejected": -4.9425225257873535, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.577252564770749, |
|
"learning_rate": 3.0602363800505198e-06, |
|
"logits/chosen": -0.21595144271850586, |
|
"logits/rejected": -0.3744010925292969, |
|
"logps/chosen": -1055.1593017578125, |
|
"logps/rejected": -1893.0072021484375, |
|
"loss": 0.1989, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3978404402732849, |
|
"rewards/margins": 5.1758904457092285, |
|
"rewards/rejected": -5.573730945587158, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 4.231494324857874, |
|
"learning_rate": 2.7819163911034175e-06, |
|
"logits/chosen": -0.22502343356609344, |
|
"logits/rejected": -0.37728679180145264, |
|
"logps/chosen": -989.0603637695312, |
|
"logps/rejected": -1922.759521484375, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.17479470372200012, |
|
"rewards/margins": 5.446383476257324, |
|
"rewards/rejected": -5.62117862701416, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 6.2160398163872665, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -0.2915242314338684, |
|
"logits/rejected": -0.45713406801223755, |
|
"logps/chosen": -1042.622314453125, |
|
"logps/rejected": -2141.868896484375, |
|
"loss": 0.1686, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.609991192817688, |
|
"rewards/margins": 7.452821254730225, |
|
"rewards/rejected": -8.062813758850098, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.781036420408215, |
|
"learning_rate": 2.2180836088965833e-06, |
|
"logits/chosen": -0.3469601273536682, |
|
"logits/rejected": -0.5014016032218933, |
|
"logps/chosen": -1045.5155029296875, |
|
"logps/rejected": -1821.9827880859375, |
|
"loss": 0.1551, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.07482553273439407, |
|
"rewards/margins": 5.159177303314209, |
|
"rewards/rejected": -5.234003067016602, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 3.3125651058366268, |
|
"learning_rate": 1.939763619949481e-06, |
|
"logits/chosen": -0.35754817724227905, |
|
"logits/rejected": -0.5549635887145996, |
|
"logps/chosen": -864.7000732421875, |
|
"logps/rejected": -1795.1302490234375, |
|
"loss": 0.1376, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.1424350142478943, |
|
"rewards/margins": 4.835862636566162, |
|
"rewards/rejected": -4.978297710418701, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 4.010712916190072, |
|
"learning_rate": 1.6685905560796101e-06, |
|
"logits/chosen": -0.46689772605895996, |
|
"logits/rejected": -0.6180375814437866, |
|
"logps/chosen": -1065.89599609375, |
|
"logps/rejected": -2047.24609375, |
|
"loss": 0.1549, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5212541222572327, |
|
"rewards/margins": 6.1373467445373535, |
|
"rewards/rejected": -6.658600807189941, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -0.3179807960987091, |
|
"eval_logits/rejected": -0.32223665714263916, |
|
"eval_logps/chosen": -652.9113159179688, |
|
"eval_logps/rejected": -1308.40478515625, |
|
"eval_loss": 0.27093741297721863, |
|
"eval_rewards/accuracies": 0.8977272510528564, |
|
"eval_rewards/chosen": -0.960662305355072, |
|
"eval_rewards/margins": 4.797802925109863, |
|
"eval_rewards/rejected": -5.758464813232422, |
|
"eval_runtime": 173.283, |
|
"eval_samples_per_second": 8.033, |
|
"eval_steps_per_second": 0.254, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 3.8323612831804175, |
|
"learning_rate": 1.4080237670176456e-06, |
|
"logits/chosen": -0.4167974889278412, |
|
"logits/rejected": -0.5508753061294556, |
|
"logps/chosen": -1013.05419921875, |
|
"logps/rejected": -1894.520263671875, |
|
"loss": 0.1443, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.12083841860294342, |
|
"rewards/margins": 5.549345970153809, |
|
"rewards/rejected": -5.67018461227417, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 4.939784171225143, |
|
"learning_rate": 1.161387298443863e-06, |
|
"logits/chosen": -0.3491138815879822, |
|
"logits/rejected": -0.5552398562431335, |
|
"logps/chosen": -902.9434814453125, |
|
"logps/rejected": -1838.8209228515625, |
|
"loss": 0.148, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.21528509259223938, |
|
"rewards/margins": 5.609208583831787, |
|
"rewards/rejected": -5.824493408203125, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 5.989265642846245, |
|
"learning_rate": 9.318274871992408e-07, |
|
"logits/chosen": -0.44859474897384644, |
|
"logits/rejected": -0.6015830636024475, |
|
"logps/chosen": -1067.4637451171875, |
|
"logps/rejected": -2109.045654296875, |
|
"loss": 0.157, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.305181622505188, |
|
"rewards/margins": 7.274392604827881, |
|
"rewards/rejected": -7.579575538635254, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 4.078696847368483, |
|
"learning_rate": 7.222728235249196e-07, |
|
"logits/chosen": -0.4398832321166992, |
|
"logits/rejected": -0.5384049415588379, |
|
"logps/chosen": -971.4193115234375, |
|
"logps/rejected": -1864.9664306640625, |
|
"loss": 0.1321, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.1642368584871292, |
|
"rewards/margins": 5.839243412017822, |
|
"rewards/rejected": -6.003479957580566, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.772016531721353, |
|
"learning_rate": 5.353965923666743e-07, |
|
"logits/chosen": -0.4434467852115631, |
|
"logits/rejected": -0.5806728601455688, |
|
"logps/chosen": -985.8177490234375, |
|
"logps/rejected": -1902.5126953125, |
|
"loss": 0.125, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.21859340369701385, |
|
"rewards/margins": 5.029125213623047, |
|
"rewards/rejected": -5.247718334197998, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 3.5375826568350557, |
|
"learning_rate": 3.7358277032860016e-07, |
|
"logits/chosen": -0.47191086411476135, |
|
"logits/rejected": -0.5328727960586548, |
|
"logps/chosen": -913.7701416015625, |
|
"logps/rejected": -2043.599365234375, |
|
"loss": 0.1244, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.4412022531032562, |
|
"rewards/margins": 6.3683366775512695, |
|
"rewards/rejected": -6.809537410736084, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.1823409653630907, |
|
"learning_rate": 2.388956133279266e-07, |
|
"logits/chosen": -0.3901548683643341, |
|
"logits/rejected": -0.4868335723876953, |
|
"logps/chosen": -891.0558471679688, |
|
"logps/rejected": -1852.1605224609375, |
|
"loss": 0.1274, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5433205366134644, |
|
"rewards/margins": 5.383849620819092, |
|
"rewards/rejected": -5.927170753479004, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.5807533458880316, |
|
"learning_rate": 1.3305332292068706e-07, |
|
"logits/chosen": -0.3871026039123535, |
|
"logits/rejected": -0.5441304445266724, |
|
"logps/chosen": -887.1787109375, |
|
"logps/rejected": -1961.8681640625, |
|
"loss": 0.0871, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.31611892580986023, |
|
"rewards/margins": 5.871817588806152, |
|
"rewards/rejected": -6.187936782836914, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.1667077758960303, |
|
"learning_rate": 5.7406127236434016e-08, |
|
"logits/chosen": -0.45710650086402893, |
|
"logits/rejected": -0.6058652400970459, |
|
"logps/chosen": -984.42578125, |
|
"logps/rejected": -2196.794189453125, |
|
"loss": 0.0998, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.6274508237838745, |
|
"rewards/margins": 7.608497619628906, |
|
"rewards/rejected": -8.23594856262207, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.549184554041034, |
|
"learning_rate": 1.2919056143113062e-08, |
|
"logits/chosen": -0.4359508156776428, |
|
"logits/rejected": -0.6320601105690002, |
|
"logps/chosen": -949.6388549804688, |
|
"logps/rejected": -2248.93798828125, |
|
"loss": 0.0946, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5125720500946045, |
|
"rewards/margins": 7.058285713195801, |
|
"rewards/rejected": -7.570857048034668, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": -0.3523492217063904, |
|
"eval_logits/rejected": -0.3282558023929596, |
|
"eval_logps/chosen": -679.615478515625, |
|
"eval_logps/rejected": -1442.4718017578125, |
|
"eval_loss": 0.27558115124702454, |
|
"eval_rewards/accuracies": 0.8920454382896423, |
|
"eval_rewards/chosen": -1.2277040481567383, |
|
"eval_rewards/margins": 5.87143087387085, |
|
"eval_rewards/rejected": -7.099134922027588, |
|
"eval_runtime": 171.6402, |
|
"eval_samples_per_second": 8.11, |
|
"eval_steps_per_second": 0.256, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 309, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0030385508506429234, |
|
"train_runtime": 144.6456, |
|
"train_samples_per_second": 136.617, |
|
"train_steps_per_second": 2.136 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 309, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|