|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984591679506933, |
|
"eval_steps": 100, |
|
"global_step": 324, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 39.87631410320537, |
|
"learning_rate": 1.5151515151515152e-08, |
|
"logits/chosen": -3.1684141159057617, |
|
"logits/rejected": -3.1765036582946777, |
|
"logps/chosen": -1262.7908935546875, |
|
"logps/rejected": -1304.270263671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 44.090928533988176, |
|
"learning_rate": 1.5151515151515152e-07, |
|
"logits/chosen": -3.145017385482788, |
|
"logits/rejected": -3.17344069480896, |
|
"logps/chosen": -1035.2520751953125, |
|
"logps/rejected": -1331.3636474609375, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.003002108307555318, |
|
"rewards/margins": 0.0036764023825526237, |
|
"rewards/rejected": -0.0006742942496202886, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 32.37744800941447, |
|
"learning_rate": 3.0303030303030305e-07, |
|
"logits/chosen": -3.1358423233032227, |
|
"logits/rejected": -3.18705415725708, |
|
"logps/chosen": -968.2097778320312, |
|
"logps/rejected": -1354.069580078125, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.03913033753633499, |
|
"rewards/margins": 0.06312780827283859, |
|
"rewards/rejected": -0.023997480049729347, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 30.93962012271263, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -3.2511069774627686, |
|
"logits/rejected": -3.244719982147217, |
|
"logps/chosen": -1036.7672119140625, |
|
"logps/rejected": -1373.1820068359375, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.06491168588399887, |
|
"rewards/margins": 0.31840670108795166, |
|
"rewards/rejected": -0.2534949779510498, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 31.51946350483435, |
|
"learning_rate": 4.992864684782648e-07, |
|
"logits/chosen": -3.3422675132751465, |
|
"logits/rejected": -3.370623826980591, |
|
"logps/chosen": -1083.17431640625, |
|
"logps/rejected": -1492.5845947265625, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.07627250999212265, |
|
"rewards/margins": 0.8225336074829102, |
|
"rewards/rejected": -0.898806095123291, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 29.782679812110892, |
|
"learning_rate": 4.958014217656854e-07, |
|
"logits/chosen": -3.3696506023406982, |
|
"logits/rejected": -3.4038467407226562, |
|
"logps/chosen": -1081.7869873046875, |
|
"logps/rejected": -1461.259033203125, |
|
"loss": 0.4171, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.1066322773694992, |
|
"rewards/margins": 1.0666204690933228, |
|
"rewards/rejected": -1.173252820968628, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 30.594547647279217, |
|
"learning_rate": 4.894543310469967e-07, |
|
"logits/chosen": -3.352465867996216, |
|
"logits/rejected": -3.3652706146240234, |
|
"logps/chosen": -1111.260009765625, |
|
"logps/rejected": -1504.715087890625, |
|
"loss": 0.3969, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.2318076640367508, |
|
"rewards/margins": 1.2468664646148682, |
|
"rewards/rejected": -1.4786741733551025, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 31.824817232007625, |
|
"learning_rate": 4.803191000971128e-07, |
|
"logits/chosen": -3.3312506675720215, |
|
"logits/rejected": -3.355130434036255, |
|
"logps/chosen": -968.1290893554688, |
|
"logps/rejected": -1600.333251953125, |
|
"loss": 0.3874, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.22658078372478485, |
|
"rewards/margins": 2.3517754077911377, |
|
"rewards/rejected": -2.5783562660217285, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 25.823223858100576, |
|
"learning_rate": 4.685020970273189e-07, |
|
"logits/chosen": -3.2700467109680176, |
|
"logits/rejected": -3.3080413341522217, |
|
"logps/chosen": -1002.4366455078125, |
|
"logps/rejected": -1593.41796875, |
|
"loss": 0.3546, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.07068847864866257, |
|
"rewards/margins": 2.2586522102355957, |
|
"rewards/rejected": -2.329341173171997, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 43.928369861559965, |
|
"learning_rate": 4.541409157643027e-07, |
|
"logits/chosen": -3.235419511795044, |
|
"logits/rejected": -3.2496044635772705, |
|
"logps/chosen": -956.4049072265625, |
|
"logps/rejected": -1599.0389404296875, |
|
"loss": 0.3143, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.038097791373729706, |
|
"rewards/margins": 2.2678751945495605, |
|
"rewards/rejected": -2.2297775745391846, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 30.820532733997354, |
|
"learning_rate": 4.374027739443952e-07, |
|
"logits/chosen": -3.204524517059326, |
|
"logits/rejected": -3.163343906402588, |
|
"logps/chosen": -1068.4237060546875, |
|
"logps/rejected": -1704.1986083984375, |
|
"loss": 0.2799, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3904297947883606, |
|
"rewards/margins": 3.4663283824920654, |
|
"rewards/rejected": -3.8567581176757812, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -3.0348000526428223, |
|
"eval_logits/rejected": -3.0867843627929688, |
|
"eval_logps/chosen": -584.1478881835938, |
|
"eval_logps/rejected": -794.01025390625, |
|
"eval_loss": 0.5261008143424988, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -1.5201810598373413, |
|
"eval_rewards/margins": 0.9906590580940247, |
|
"eval_rewards/rejected": -2.5108399391174316, |
|
"eval_runtime": 34.7053, |
|
"eval_samples_per_second": 7.261, |
|
"eval_steps_per_second": 0.231, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 47.845448282397456, |
|
"learning_rate": 4.184825658775027e-07, |
|
"logits/chosen": -3.128324031829834, |
|
"logits/rejected": -3.134152889251709, |
|
"logps/chosen": -1042.473388671875, |
|
"logps/rejected": -1787.997802734375, |
|
"loss": 0.2816, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.6833322644233704, |
|
"rewards/margins": 3.4197134971618652, |
|
"rewards/rejected": -4.10304594039917, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 24.2664669682948, |
|
"learning_rate": 3.9760059325148063e-07, |
|
"logits/chosen": -3.1436760425567627, |
|
"logits/rejected": -3.091614246368408, |
|
"logps/chosen": -1067.834716796875, |
|
"logps/rejected": -1788.0120849609375, |
|
"loss": 0.2536, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.610935389995575, |
|
"rewards/margins": 4.445748329162598, |
|
"rewards/rejected": -5.056683540344238, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 22.23462347593175, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -3.1414103507995605, |
|
"logits/rejected": -3.0941264629364014, |
|
"logps/chosen": -1100.4937744140625, |
|
"logps/rejected": -1801.8560791015625, |
|
"loss": 0.2298, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.540351152420044, |
|
"rewards/margins": 3.7757785320281982, |
|
"rewards/rejected": -4.3161301612854, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 28.613362043744857, |
|
"learning_rate": 3.509439412016004e-07, |
|
"logits/chosen": -3.0641441345214844, |
|
"logits/rejected": -3.0451717376708984, |
|
"logps/chosen": -1098.5340576171875, |
|
"logps/rejected": -1918.6890869140625, |
|
"loss": 0.2135, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.7187612056732178, |
|
"rewards/margins": 5.323573112487793, |
|
"rewards/rejected": -6.04233455657959, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 30.54616548038225, |
|
"learning_rate": 3.2571251897448763e-07, |
|
"logits/chosen": -2.992375135421753, |
|
"logits/rejected": -2.95180606842041, |
|
"logps/chosen": -1197.9376220703125, |
|
"logps/rejected": -2077.058349609375, |
|
"loss": 0.1801, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5255634784698486, |
|
"rewards/margins": 5.869881629943848, |
|
"rewards/rejected": -7.395445346832275, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 36.74589910484145, |
|
"learning_rate": 2.9959952104467243e-07, |
|
"logits/chosen": -2.9339356422424316, |
|
"logits/rejected": -2.85386323928833, |
|
"logps/chosen": -1247.737060546875, |
|
"logps/rejected": -2310.10205078125, |
|
"loss": 0.1778, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.0261478424072266, |
|
"rewards/margins": 7.8707451820373535, |
|
"rewards/rejected": -9.896891593933105, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 32.132075393104884, |
|
"learning_rate": 2.729089999626637e-07, |
|
"logits/chosen": -2.980856418609619, |
|
"logits/rejected": -2.856822967529297, |
|
"logps/chosen": -1185.372802734375, |
|
"logps/rejected": -2347.78076171875, |
|
"loss": 0.1698, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -1.51887047290802, |
|
"rewards/margins": 8.371113777160645, |
|
"rewards/rejected": -9.889985084533691, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 26.839790210789428, |
|
"learning_rate": 2.459517327993746e-07, |
|
"logits/chosen": -2.962564468383789, |
|
"logits/rejected": -2.8451316356658936, |
|
"logps/chosen": -1266.3397216796875, |
|
"logps/rejected": -2263.588623046875, |
|
"loss": 0.141, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -1.7179988622665405, |
|
"rewards/margins": 7.524572849273682, |
|
"rewards/rejected": -9.242570877075195, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 33.374896332465084, |
|
"learning_rate": 2.1904160254356748e-07, |
|
"logits/chosen": -2.881953477859497, |
|
"logits/rejected": -2.7538435459136963, |
|
"logps/chosen": -1169.200927734375, |
|
"logps/rejected": -2476.24072265625, |
|
"loss": 0.1207, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -1.6462358236312866, |
|
"rewards/margins": 9.677408218383789, |
|
"rewards/rejected": -11.323644638061523, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 44.90567050679125, |
|
"learning_rate": 1.9249194333484563e-07, |
|
"logits/chosen": -2.8342463970184326, |
|
"logits/rejected": -2.730264902114868, |
|
"logps/chosen": -1229.9298095703125, |
|
"logps/rejected": -2291.61181640625, |
|
"loss": 0.154, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.0603644847869873, |
|
"rewards/margins": 7.22509765625, |
|
"rewards/rejected": -9.285462379455566, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -2.694772720336914, |
|
"eval_logits/rejected": -2.554710626602173, |
|
"eval_logps/chosen": -742.1358642578125, |
|
"eval_logps/rejected": -1446.8753662109375, |
|
"eval_loss": 0.09226308017969131, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": -3.100059986114502, |
|
"eval_rewards/margins": 5.939432144165039, |
|
"eval_rewards/rejected": -9.0394926071167, |
|
"eval_runtime": 34.5072, |
|
"eval_samples_per_second": 7.303, |
|
"eval_steps_per_second": 0.232, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 29.94298857126716, |
|
"learning_rate": 1.6661189208729489e-07, |
|
"logits/chosen": -2.786771059036255, |
|
"logits/rejected": -2.6327857971191406, |
|
"logps/chosen": -1283.205810546875, |
|
"logps/rejected": -2401.399169921875, |
|
"loss": 0.1526, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.189497709274292, |
|
"rewards/margins": 8.632684707641602, |
|
"rewards/rejected": -10.822182655334473, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 33.13137270748439, |
|
"learning_rate": 1.4170278898446175e-07, |
|
"logits/chosen": -2.828369379043579, |
|
"logits/rejected": -2.650123119354248, |
|
"logps/chosen": -1175.46826171875, |
|
"logps/rejected": -2410.84326171875, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.8023840188980103, |
|
"rewards/margins": 8.576199531555176, |
|
"rewards/rejected": -10.378583908081055, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 30.216897410019698, |
|
"learning_rate": 1.1805466875731276e-07, |
|
"logits/chosen": -2.820298671722412, |
|
"logits/rejected": -2.622697591781616, |
|
"logps/chosen": -1135.1295166015625, |
|
"logps/rejected": -2719.731201171875, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.7641971111297607, |
|
"rewards/margins": 11.304890632629395, |
|
"rewards/rejected": -13.06908893585205, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 28.800219923929006, |
|
"learning_rate": 9.594288359976815e-08, |
|
"logits/chosen": -2.815680742263794, |
|
"logits/rejected": -2.6530845165252686, |
|
"logps/chosen": -1304.4205322265625, |
|
"logps/rejected": -2253.841064453125, |
|
"loss": 0.1093, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.9788525104522705, |
|
"rewards/margins": 7.066276550292969, |
|
"rewards/rejected": -9.045129776000977, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 28.53520064286844, |
|
"learning_rate": 7.56248970436493e-08, |
|
"logits/chosen": -2.7308030128479004, |
|
"logits/rejected": -2.56375789642334, |
|
"logps/chosen": -1186.9593505859375, |
|
"logps/rejected": -2636.3701171875, |
|
"loss": 0.0913, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.045689105987549, |
|
"rewards/margins": 10.186556816101074, |
|
"rewards/rejected": -12.232245445251465, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 21.7882636792373, |
|
"learning_rate": 5.733728612427771e-08, |
|
"logits/chosen": -2.7645225524902344, |
|
"logits/rejected": -2.5233638286590576, |
|
"logps/chosen": -1321.7745361328125, |
|
"logps/rejected": -2658.189453125, |
|
"loss": 0.1155, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.5851030349731445, |
|
"rewards/margins": 10.245210647583008, |
|
"rewards/rejected": -12.830312728881836, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 34.32906854711248, |
|
"learning_rate": 4.1292986742682254e-08, |
|
"logits/chosen": -2.668457508087158, |
|
"logits/rejected": -2.500288963317871, |
|
"logps/chosen": -1262.6650390625, |
|
"logps/rejected": -2782.10009765625, |
|
"loss": 0.0989, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.7102155685424805, |
|
"rewards/margins": 11.686820983886719, |
|
"rewards/rejected": -14.3970365524292, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 42.763438453906815, |
|
"learning_rate": 2.7678814298657732e-08, |
|
"logits/chosen": -2.6972427368164062, |
|
"logits/rejected": -2.4791617393493652, |
|
"logps/chosen": -1353.071533203125, |
|
"logps/rejected": -2790.54052734375, |
|
"loss": 0.1022, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.827423095703125, |
|
"rewards/margins": 11.204734802246094, |
|
"rewards/rejected": -14.032157897949219, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 26.418126829556318, |
|
"learning_rate": 1.6653288463741062e-08, |
|
"logits/chosen": -2.689786672592163, |
|
"logits/rejected": -2.518730401992798, |
|
"logps/chosen": -1242.576416015625, |
|
"logps/rejected": -2554.541748046875, |
|
"loss": 0.1067, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.544224262237549, |
|
"rewards/margins": 9.17101001739502, |
|
"rewards/rejected": -11.715234756469727, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 40.4422866987403, |
|
"learning_rate": 8.344787421847216e-09, |
|
"logits/chosen": -2.65974497795105, |
|
"logits/rejected": -2.4722535610198975, |
|
"logps/chosen": -1281.4610595703125, |
|
"logps/rejected": -2672.197265625, |
|
"loss": 0.0948, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.5506279468536377, |
|
"rewards/margins": 10.449880599975586, |
|
"rewards/rejected": -13.000508308410645, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -2.5877139568328857, |
|
"eval_logits/rejected": -2.3930397033691406, |
|
"eval_logps/chosen": -803.1033325195312, |
|
"eval_logps/rejected": -1661.4266357421875, |
|
"eval_loss": 0.07533077150583267, |
|
"eval_rewards/accuracies": 0.953125, |
|
"eval_rewards/chosen": -3.70973539352417, |
|
"eval_rewards/margins": 7.4752678871154785, |
|
"eval_rewards/rejected": -11.185002326965332, |
|
"eval_runtime": 34.126, |
|
"eval_samples_per_second": 7.384, |
|
"eval_steps_per_second": 0.234, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 38.70439473653127, |
|
"learning_rate": 2.850053069080344e-09, |
|
"logits/chosen": -2.730034351348877, |
|
"logits/rejected": -2.4978787899017334, |
|
"logps/chosen": -1227.954345703125, |
|
"logps/rejected": -2714.13623046875, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.477766990661621, |
|
"rewards/margins": 11.021059036254883, |
|
"rewards/rejected": -13.498825073242188, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 25.381541507877866, |
|
"learning_rate": 2.3306457775981727e-10, |
|
"logits/chosen": -2.6724932193756104, |
|
"logits/rejected": -2.4461209774017334, |
|
"logps/chosen": -1288.38623046875, |
|
"logps/rejected": -2817.382568359375, |
|
"loss": 0.0963, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.566399097442627, |
|
"rewards/margins": 11.983835220336914, |
|
"rewards/rejected": -14.550233840942383, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 324, |
|
"total_flos": 0.0, |
|
"train_loss": 0.007740737387427577, |
|
"train_runtime": 396.513, |
|
"train_samples_per_second": 52.306, |
|
"train_steps_per_second": 0.817 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 324, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|