Minbyul's picture
Model save
94bb268 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984591679506933,
"eval_steps": 100,
"global_step": 324,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 39.87631410320537,
"learning_rate": 1.5151515151515152e-08,
"logits/chosen": -3.1684141159057617,
"logits/rejected": -3.1765036582946777,
"logps/chosen": -1262.7908935546875,
"logps/rejected": -1304.270263671875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 44.090928533988176,
"learning_rate": 1.5151515151515152e-07,
"logits/chosen": -3.145017385482788,
"logits/rejected": -3.17344069480896,
"logps/chosen": -1035.2520751953125,
"logps/rejected": -1331.3636474609375,
"loss": 0.6917,
"rewards/accuracies": 0.5277777910232544,
"rewards/chosen": 0.003002108307555318,
"rewards/margins": 0.0036764023825526237,
"rewards/rejected": -0.0006742942496202886,
"step": 10
},
{
"epoch": 0.06,
"grad_norm": 32.37744800941447,
"learning_rate": 3.0303030303030305e-07,
"logits/chosen": -3.1358423233032227,
"logits/rejected": -3.18705415725708,
"logps/chosen": -968.2097778320312,
"logps/rejected": -1354.069580078125,
"loss": 0.6617,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": 0.03913033753633499,
"rewards/margins": 0.06312780827283859,
"rewards/rejected": -0.023997480049729347,
"step": 20
},
{
"epoch": 0.09,
"grad_norm": 30.93962012271263,
"learning_rate": 4.545454545454545e-07,
"logits/chosen": -3.2511069774627686,
"logits/rejected": -3.244719982147217,
"logps/chosen": -1036.7672119140625,
"logps/rejected": -1373.1820068359375,
"loss": 0.5896,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 0.06491168588399887,
"rewards/margins": 0.31840670108795166,
"rewards/rejected": -0.2534949779510498,
"step": 30
},
{
"epoch": 0.12,
"grad_norm": 31.51946350483435,
"learning_rate": 4.992864684782648e-07,
"logits/chosen": -3.3422675132751465,
"logits/rejected": -3.370623826980591,
"logps/chosen": -1083.17431640625,
"logps/rejected": -1492.5845947265625,
"loss": 0.5125,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.07627250999212265,
"rewards/margins": 0.8225336074829102,
"rewards/rejected": -0.898806095123291,
"step": 40
},
{
"epoch": 0.15,
"grad_norm": 29.782679812110892,
"learning_rate": 4.958014217656854e-07,
"logits/chosen": -3.3696506023406982,
"logits/rejected": -3.4038467407226562,
"logps/chosen": -1081.7869873046875,
"logps/rejected": -1461.259033203125,
"loss": 0.4171,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.1066322773694992,
"rewards/margins": 1.0666204690933228,
"rewards/rejected": -1.173252820968628,
"step": 50
},
{
"epoch": 0.18,
"grad_norm": 30.594547647279217,
"learning_rate": 4.894543310469967e-07,
"logits/chosen": -3.352465867996216,
"logits/rejected": -3.3652706146240234,
"logps/chosen": -1111.260009765625,
"logps/rejected": -1504.715087890625,
"loss": 0.3969,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.2318076640367508,
"rewards/margins": 1.2468664646148682,
"rewards/rejected": -1.4786741733551025,
"step": 60
},
{
"epoch": 0.22,
"grad_norm": 31.824817232007625,
"learning_rate": 4.803191000971128e-07,
"logits/chosen": -3.3312506675720215,
"logits/rejected": -3.355130434036255,
"logps/chosen": -968.1290893554688,
"logps/rejected": -1600.333251953125,
"loss": 0.3874,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.22658078372478485,
"rewards/margins": 2.3517754077911377,
"rewards/rejected": -2.5783562660217285,
"step": 70
},
{
"epoch": 0.25,
"grad_norm": 25.823223858100576,
"learning_rate": 4.685020970273189e-07,
"logits/chosen": -3.2700467109680176,
"logits/rejected": -3.3080413341522217,
"logps/chosen": -1002.4366455078125,
"logps/rejected": -1593.41796875,
"loss": 0.3546,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.07068847864866257,
"rewards/margins": 2.2586522102355957,
"rewards/rejected": -2.329341173171997,
"step": 80
},
{
"epoch": 0.28,
"grad_norm": 43.928369861559965,
"learning_rate": 4.541409157643027e-07,
"logits/chosen": -3.235419511795044,
"logits/rejected": -3.2496044635772705,
"logps/chosen": -956.4049072265625,
"logps/rejected": -1599.0389404296875,
"loss": 0.3143,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 0.038097791373729706,
"rewards/margins": 2.2678751945495605,
"rewards/rejected": -2.2297775745391846,
"step": 90
},
{
"epoch": 0.31,
"grad_norm": 30.820532733997354,
"learning_rate": 4.374027739443952e-07,
"logits/chosen": -3.204524517059326,
"logits/rejected": -3.163343906402588,
"logps/chosen": -1068.4237060546875,
"logps/rejected": -1704.1986083984375,
"loss": 0.2799,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.3904297947883606,
"rewards/margins": 3.4663283824920654,
"rewards/rejected": -3.8567581176757812,
"step": 100
},
{
"epoch": 0.31,
"eval_logits/chosen": -3.0348000526428223,
"eval_logits/rejected": -3.0867843627929688,
"eval_logps/chosen": -584.1478881835938,
"eval_logps/rejected": -794.01025390625,
"eval_loss": 0.5261008143424988,
"eval_rewards/accuracies": 0.75,
"eval_rewards/chosen": -1.5201810598373413,
"eval_rewards/margins": 0.9906590580940247,
"eval_rewards/rejected": -2.5108399391174316,
"eval_runtime": 34.7053,
"eval_samples_per_second": 7.261,
"eval_steps_per_second": 0.231,
"step": 100
},
{
"epoch": 0.34,
"grad_norm": 47.845448282397456,
"learning_rate": 4.184825658775027e-07,
"logits/chosen": -3.128324031829834,
"logits/rejected": -3.134152889251709,
"logps/chosen": -1042.473388671875,
"logps/rejected": -1787.997802734375,
"loss": 0.2816,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.6833322644233704,
"rewards/margins": 3.4197134971618652,
"rewards/rejected": -4.10304594039917,
"step": 110
},
{
"epoch": 0.37,
"grad_norm": 24.2664669682948,
"learning_rate": 3.9760059325148063e-07,
"logits/chosen": -3.1436760425567627,
"logits/rejected": -3.091614246368408,
"logps/chosen": -1067.834716796875,
"logps/rejected": -1788.0120849609375,
"loss": 0.2536,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.610935389995575,
"rewards/margins": 4.445748329162598,
"rewards/rejected": -5.056683540344238,
"step": 120
},
{
"epoch": 0.4,
"grad_norm": 22.23462347593175,
"learning_rate": 3.75e-07,
"logits/chosen": -3.1414103507995605,
"logits/rejected": -3.0941264629364014,
"logps/chosen": -1100.4937744140625,
"logps/rejected": -1801.8560791015625,
"loss": 0.2298,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.540351152420044,
"rewards/margins": 3.7757785320281982,
"rewards/rejected": -4.3161301612854,
"step": 130
},
{
"epoch": 0.43,
"grad_norm": 28.613362043744857,
"learning_rate": 3.509439412016004e-07,
"logits/chosen": -3.0641441345214844,
"logits/rejected": -3.0451717376708984,
"logps/chosen": -1098.5340576171875,
"logps/rejected": -1918.6890869140625,
"loss": 0.2135,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.7187612056732178,
"rewards/margins": 5.323573112487793,
"rewards/rejected": -6.04233455657959,
"step": 140
},
{
"epoch": 0.46,
"grad_norm": 30.54616548038225,
"learning_rate": 3.2571251897448763e-07,
"logits/chosen": -2.992375135421753,
"logits/rejected": -2.95180606842041,
"logps/chosen": -1197.9376220703125,
"logps/rejected": -2077.058349609375,
"loss": 0.1801,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -1.5255634784698486,
"rewards/margins": 5.869881629943848,
"rewards/rejected": -7.395445346832275,
"step": 150
},
{
"epoch": 0.49,
"grad_norm": 36.74589910484145,
"learning_rate": 2.9959952104467243e-07,
"logits/chosen": -2.9339356422424316,
"logits/rejected": -2.85386323928833,
"logps/chosen": -1247.737060546875,
"logps/rejected": -2310.10205078125,
"loss": 0.1778,
"rewards/accuracies": 0.9375,
"rewards/chosen": -2.0261478424072266,
"rewards/margins": 7.8707451820373535,
"rewards/rejected": -9.896891593933105,
"step": 160
},
{
"epoch": 0.52,
"grad_norm": 32.132075393104884,
"learning_rate": 2.729089999626637e-07,
"logits/chosen": -2.980856418609619,
"logits/rejected": -2.856822967529297,
"logps/chosen": -1185.372802734375,
"logps/rejected": -2347.78076171875,
"loss": 0.1698,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -1.51887047290802,
"rewards/margins": 8.371113777160645,
"rewards/rejected": -9.889985084533691,
"step": 170
},
{
"epoch": 0.55,
"grad_norm": 26.839790210789428,
"learning_rate": 2.459517327993746e-07,
"logits/chosen": -2.962564468383789,
"logits/rejected": -2.8451316356658936,
"logps/chosen": -1266.3397216796875,
"logps/rejected": -2263.588623046875,
"loss": 0.141,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -1.7179988622665405,
"rewards/margins": 7.524572849273682,
"rewards/rejected": -9.242570877075195,
"step": 180
},
{
"epoch": 0.59,
"grad_norm": 33.374896332465084,
"learning_rate": 2.1904160254356748e-07,
"logits/chosen": -2.881953477859497,
"logits/rejected": -2.7538435459136963,
"logps/chosen": -1169.200927734375,
"logps/rejected": -2476.24072265625,
"loss": 0.1207,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -1.6462358236312866,
"rewards/margins": 9.677408218383789,
"rewards/rejected": -11.323644638061523,
"step": 190
},
{
"epoch": 0.62,
"grad_norm": 44.90567050679125,
"learning_rate": 1.9249194333484563e-07,
"logits/chosen": -2.8342463970184326,
"logits/rejected": -2.730264902114868,
"logps/chosen": -1229.9298095703125,
"logps/rejected": -2291.61181640625,
"loss": 0.154,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -2.0603644847869873,
"rewards/margins": 7.22509765625,
"rewards/rejected": -9.285462379455566,
"step": 200
},
{
"epoch": 0.62,
"eval_logits/chosen": -2.694772720336914,
"eval_logits/rejected": -2.554710626602173,
"eval_logps/chosen": -742.1358642578125,
"eval_logps/rejected": -1446.8753662109375,
"eval_loss": 0.09226308017969131,
"eval_rewards/accuracies": 0.9375,
"eval_rewards/chosen": -3.100059986114502,
"eval_rewards/margins": 5.939432144165039,
"eval_rewards/rejected": -9.0394926071167,
"eval_runtime": 34.5072,
"eval_samples_per_second": 7.303,
"eval_steps_per_second": 0.232,
"step": 200
},
{
"epoch": 0.65,
"grad_norm": 29.94298857126716,
"learning_rate": 1.6661189208729489e-07,
"logits/chosen": -2.786771059036255,
"logits/rejected": -2.6327857971191406,
"logps/chosen": -1283.205810546875,
"logps/rejected": -2401.399169921875,
"loss": 0.1526,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -2.189497709274292,
"rewards/margins": 8.632684707641602,
"rewards/rejected": -10.822182655334473,
"step": 210
},
{
"epoch": 0.68,
"grad_norm": 33.13137270748439,
"learning_rate": 1.4170278898446175e-07,
"logits/chosen": -2.828369379043579,
"logits/rejected": -2.650123119354248,
"logps/chosen": -1175.46826171875,
"logps/rejected": -2410.84326171875,
"loss": 0.1252,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -1.8023840188980103,
"rewards/margins": 8.576199531555176,
"rewards/rejected": -10.378583908081055,
"step": 220
},
{
"epoch": 0.71,
"grad_norm": 30.216897410019698,
"learning_rate": 1.1805466875731276e-07,
"logits/chosen": -2.820298671722412,
"logits/rejected": -2.622697591781616,
"logps/chosen": -1135.1295166015625,
"logps/rejected": -2719.731201171875,
"loss": 0.1353,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -1.7641971111297607,
"rewards/margins": 11.304890632629395,
"rewards/rejected": -13.06908893585205,
"step": 230
},
{
"epoch": 0.74,
"grad_norm": 28.800219923929006,
"learning_rate": 9.594288359976815e-08,
"logits/chosen": -2.815680742263794,
"logits/rejected": -2.6530845165252686,
"logps/chosen": -1304.4205322265625,
"logps/rejected": -2253.841064453125,
"loss": 0.1093,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -1.9788525104522705,
"rewards/margins": 7.066276550292969,
"rewards/rejected": -9.045129776000977,
"step": 240
},
{
"epoch": 0.77,
"grad_norm": 28.53520064286844,
"learning_rate": 7.56248970436493e-08,
"logits/chosen": -2.7308030128479004,
"logits/rejected": -2.56375789642334,
"logps/chosen": -1186.9593505859375,
"logps/rejected": -2636.3701171875,
"loss": 0.0913,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -2.045689105987549,
"rewards/margins": 10.186556816101074,
"rewards/rejected": -12.232245445251465,
"step": 250
},
{
"epoch": 0.8,
"grad_norm": 21.7882636792373,
"learning_rate": 5.733728612427771e-08,
"logits/chosen": -2.7645225524902344,
"logits/rejected": -2.5233638286590576,
"logps/chosen": -1321.7745361328125,
"logps/rejected": -2658.189453125,
"loss": 0.1155,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -2.5851030349731445,
"rewards/margins": 10.245210647583008,
"rewards/rejected": -12.830312728881836,
"step": 260
},
{
"epoch": 0.83,
"grad_norm": 34.32906854711248,
"learning_rate": 4.1292986742682254e-08,
"logits/chosen": -2.668457508087158,
"logits/rejected": -2.500288963317871,
"logps/chosen": -1262.6650390625,
"logps/rejected": -2782.10009765625,
"loss": 0.0989,
"rewards/accuracies": 0.96875,
"rewards/chosen": -2.7102155685424805,
"rewards/margins": 11.686820983886719,
"rewards/rejected": -14.3970365524292,
"step": 270
},
{
"epoch": 0.86,
"grad_norm": 42.763438453906815,
"learning_rate": 2.7678814298657732e-08,
"logits/chosen": -2.6972427368164062,
"logits/rejected": -2.4791617393493652,
"logps/chosen": -1353.071533203125,
"logps/rejected": -2790.54052734375,
"loss": 0.1022,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -2.827423095703125,
"rewards/margins": 11.204734802246094,
"rewards/rejected": -14.032157897949219,
"step": 280
},
{
"epoch": 0.89,
"grad_norm": 26.418126829556318,
"learning_rate": 1.6653288463741062e-08,
"logits/chosen": -2.689786672592163,
"logits/rejected": -2.518730401992798,
"logps/chosen": -1242.576416015625,
"logps/rejected": -2554.541748046875,
"loss": 0.1067,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -2.544224262237549,
"rewards/margins": 9.17101001739502,
"rewards/rejected": -11.715234756469727,
"step": 290
},
{
"epoch": 0.92,
"grad_norm": 40.4422866987403,
"learning_rate": 8.344787421847216e-09,
"logits/chosen": -2.65974497795105,
"logits/rejected": -2.4722535610198975,
"logps/chosen": -1281.4610595703125,
"logps/rejected": -2672.197265625,
"loss": 0.0948,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -2.5506279468536377,
"rewards/margins": 10.449880599975586,
"rewards/rejected": -13.000508308410645,
"step": 300
},
{
"epoch": 0.92,
"eval_logits/chosen": -2.5877139568328857,
"eval_logits/rejected": -2.3930397033691406,
"eval_logps/chosen": -803.1033325195312,
"eval_logps/rejected": -1661.4266357421875,
"eval_loss": 0.07533077150583267,
"eval_rewards/accuracies": 0.953125,
"eval_rewards/chosen": -3.70973539352417,
"eval_rewards/margins": 7.4752678871154785,
"eval_rewards/rejected": -11.185002326965332,
"eval_runtime": 34.126,
"eval_samples_per_second": 7.384,
"eval_steps_per_second": 0.234,
"step": 300
},
{
"epoch": 0.96,
"grad_norm": 38.70439473653127,
"learning_rate": 2.850053069080344e-09,
"logits/chosen": -2.730034351348877,
"logits/rejected": -2.4978787899017334,
"logps/chosen": -1227.954345703125,
"logps/rejected": -2714.13623046875,
"loss": 0.1114,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -2.477766990661621,
"rewards/margins": 11.021059036254883,
"rewards/rejected": -13.498825073242188,
"step": 310
},
{
"epoch": 0.99,
"grad_norm": 25.381541507877866,
"learning_rate": 2.3306457775981727e-10,
"logits/chosen": -2.6724932193756104,
"logits/rejected": -2.4461209774017334,
"logps/chosen": -1288.38623046875,
"logps/rejected": -2817.382568359375,
"loss": 0.0963,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -2.566399097442627,
"rewards/margins": 11.983835220336914,
"rewards/rejected": -14.550233840942383,
"step": 320
},
{
"epoch": 1.0,
"step": 324,
"total_flos": 0.0,
"train_loss": 0.007740737387427577,
"train_runtime": 396.513,
"train_samples_per_second": 52.306,
"train_steps_per_second": 0.817
}
],
"logging_steps": 10,
"max_steps": 324,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}