Minbyul's picture
Model save
1646b68 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984591679506933,
"eval_steps": 100,
"global_step": 324,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 6.101982836222645,
"learning_rate": 1.5151515151515152e-07,
"logits/chosen": -0.362821102142334,
"logits/rejected": -0.6466645002365112,
"logps/chosen": -1025.3448486328125,
"logps/rejected": -1304.718017578125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 6.0068256381560765,
"learning_rate": 1.5151515151515152e-06,
"logits/chosen": -0.6083016991615295,
"logits/rejected": -0.6111394166946411,
"logps/chosen": -990.301025390625,
"logps/rejected": -1385.5863037109375,
"loss": 0.6912,
"rewards/accuracies": 0.4583333432674408,
"rewards/chosen": -0.00793336983770132,
"rewards/margins": 0.0015673839952796698,
"rewards/rejected": -0.009500754997134209,
"step": 10
},
{
"epoch": 0.06,
"grad_norm": 6.64385894959999,
"learning_rate": 3.0303030303030305e-06,
"logits/chosen": -0.39747971296310425,
"logits/rejected": -0.5266290903091431,
"logps/chosen": -1019.9202270507812,
"logps/rejected": -1275.5029296875,
"loss": 0.6306,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.17681096494197845,
"rewards/margins": 0.19123901426792145,
"rewards/rejected": -0.3680500090122223,
"step": 20
},
{
"epoch": 0.09,
"grad_norm": 5.146938095650329,
"learning_rate": 4.5454545454545455e-06,
"logits/chosen": -0.3289431631565094,
"logits/rejected": -0.3537369966506958,
"logps/chosen": -914.8097534179688,
"logps/rejected": -1425.679443359375,
"loss": 0.5081,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.23936215043067932,
"rewards/margins": 0.8084670305252075,
"rewards/rejected": -1.0478291511535645,
"step": 30
},
{
"epoch": 0.12,
"grad_norm": 11.382612181193535,
"learning_rate": 4.9928646847826494e-06,
"logits/chosen": -0.27268069982528687,
"logits/rejected": -0.3392156958580017,
"logps/chosen": -1024.892578125,
"logps/rejected": -1513.9617919921875,
"loss": 0.4356,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.20300360023975372,
"rewards/margins": 1.8095756769180298,
"rewards/rejected": -2.0125787258148193,
"step": 40
},
{
"epoch": 0.15,
"grad_norm": 4.485452143975147,
"learning_rate": 4.958014217656855e-06,
"logits/chosen": -0.23044054210186005,
"logits/rejected": -0.25221356749534607,
"logps/chosen": -967.2037353515625,
"logps/rejected": -1537.017333984375,
"loss": 0.3801,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.12485456466674805,
"rewards/margins": 2.005478620529175,
"rewards/rejected": -2.130333185195923,
"step": 50
},
{
"epoch": 0.18,
"grad_norm": 3.8070457431642892,
"learning_rate": 4.894543310469968e-06,
"logits/chosen": -0.19517004489898682,
"logits/rejected": -0.22597365081310272,
"logps/chosen": -916.4852294921875,
"logps/rejected": -1595.1839599609375,
"loss": 0.3655,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.2139274626970291,
"rewards/margins": 2.360320568084717,
"rewards/rejected": -2.5742483139038086,
"step": 60
},
{
"epoch": 0.22,
"grad_norm": 3.6141979805383726,
"learning_rate": 4.803191000971128e-06,
"logits/chosen": -0.17929306626319885,
"logits/rejected": -0.18331752717494965,
"logps/chosen": -965.8648681640625,
"logps/rejected": -1572.9818115234375,
"loss": 0.3243,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.4429554343223572,
"rewards/margins": 2.501615047454834,
"rewards/rejected": -2.944570541381836,
"step": 70
},
{
"epoch": 0.25,
"grad_norm": 3.9698745789179712,
"learning_rate": 4.68502097027319e-06,
"logits/chosen": -0.18549516797065735,
"logits/rejected": -0.30454546213150024,
"logps/chosen": -881.955078125,
"logps/rejected": -1555.6883544921875,
"loss": 0.284,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.10575978457927704,
"rewards/margins": 2.606116771697998,
"rewards/rejected": -2.711876392364502,
"step": 80
},
{
"epoch": 0.28,
"grad_norm": 4.915459926093003,
"learning_rate": 4.541409157643027e-06,
"logits/chosen": -0.2555353045463562,
"logits/rejected": -0.3517759442329407,
"logps/chosen": -1006.6803588867188,
"logps/rejected": -1699.8876953125,
"loss": 0.2626,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.24506595730781555,
"rewards/margins": 3.7898712158203125,
"rewards/rejected": -4.034937381744385,
"step": 90
},
{
"epoch": 0.31,
"grad_norm": 5.579746591418778,
"learning_rate": 4.374027739443953e-06,
"logits/chosen": -0.2530584931373596,
"logits/rejected": -0.39778950810432434,
"logps/chosen": -1006.4603271484375,
"logps/rejected": -1830.1383056640625,
"loss": 0.249,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.23174142837524414,
"rewards/margins": 4.588972091674805,
"rewards/rejected": -4.820713996887207,
"step": 100
},
{
"epoch": 0.31,
"eval_logits/chosen": -0.2358812391757965,
"eval_logits/rejected": -0.266615092754364,
"eval_logps/chosen": -535.3107299804688,
"eval_logps/rejected": -1504.041259765625,
"eval_loss": 0.3604305684566498,
"eval_rewards/accuracies": 0.8942307829856873,
"eval_rewards/chosen": -0.7724042534828186,
"eval_rewards/margins": 7.1227898597717285,
"eval_rewards/rejected": -7.8951945304870605,
"eval_runtime": 41.33,
"eval_samples_per_second": 9.581,
"eval_steps_per_second": 0.315,
"step": 100
},
{
"epoch": 0.34,
"grad_norm": 13.962571315802977,
"learning_rate": 4.184825658775027e-06,
"logits/chosen": -0.35490721464157104,
"logits/rejected": -0.3757438659667969,
"logps/chosen": -973.4483642578125,
"logps/rejected": -1818.024658203125,
"loss": 0.2291,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.5153377056121826,
"rewards/margins": 3.8287353515625,
"rewards/rejected": -4.344073295593262,
"step": 110
},
{
"epoch": 0.37,
"grad_norm": 3.793575374162859,
"learning_rate": 3.976005932514807e-06,
"logits/chosen": -0.3033773601055145,
"logits/rejected": -0.33670344948768616,
"logps/chosen": -1026.07373046875,
"logps/rejected": -1623.307861328125,
"loss": 0.1906,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.40073472261428833,
"rewards/margins": 3.711804151535034,
"rewards/rejected": -4.112539768218994,
"step": 120
},
{
"epoch": 0.4,
"grad_norm": 3.3455045057377073,
"learning_rate": 3.7500000000000005e-06,
"logits/chosen": -0.22906668484210968,
"logits/rejected": -0.30051860213279724,
"logps/chosen": -947.1951904296875,
"logps/rejected": -1786.0419921875,
"loss": 0.1972,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.5512313842773438,
"rewards/margins": 3.6224727630615234,
"rewards/rejected": -4.173704147338867,
"step": 130
},
{
"epoch": 0.43,
"grad_norm": 5.442047168951181,
"learning_rate": 3.5094394120160047e-06,
"logits/chosen": -0.2941485047340393,
"logits/rejected": -0.324366956949234,
"logps/chosen": -1009.9542236328125,
"logps/rejected": -1759.7135009765625,
"loss": 0.2106,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.6786917448043823,
"rewards/margins": 3.91167950630188,
"rewards/rejected": -4.590371608734131,
"step": 140
},
{
"epoch": 0.46,
"grad_norm": 4.215335437797857,
"learning_rate": 3.257125189744877e-06,
"logits/chosen": -0.32115620374679565,
"logits/rejected": -0.36864355206489563,
"logps/chosen": -954.0335693359375,
"logps/rejected": -1671.050537109375,
"loss": 0.1917,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.24637527763843536,
"rewards/margins": 3.504626750946045,
"rewards/rejected": -3.751002073287964,
"step": 150
},
{
"epoch": 0.49,
"grad_norm": 2.836600464080731,
"learning_rate": 2.9959952104467247e-06,
"logits/chosen": -0.3462420105934143,
"logits/rejected": -0.37651991844177246,
"logps/chosen": -1160.772216796875,
"logps/rejected": -1859.069091796875,
"loss": 0.1688,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.4739566743373871,
"rewards/margins": 4.305468559265137,
"rewards/rejected": -4.779424667358398,
"step": 160
},
{
"epoch": 0.52,
"grad_norm": 7.01626911931463,
"learning_rate": 2.729089999626637e-06,
"logits/chosen": -0.323803573846817,
"logits/rejected": -0.38482701778411865,
"logps/chosen": -950.5234375,
"logps/rejected": -1763.620849609375,
"loss": 0.1733,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.5791029930114746,
"rewards/margins": 4.474118709564209,
"rewards/rejected": -5.053222179412842,
"step": 170
},
{
"epoch": 0.55,
"grad_norm": 2.8279150301129055,
"learning_rate": 2.4595173279937464e-06,
"logits/chosen": -0.373486191034317,
"logits/rejected": -0.42519837617874146,
"logps/chosen": -935.4544677734375,
"logps/rejected": -1869.3843994140625,
"loss": 0.126,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.6261727809906006,
"rewards/margins": 5.2783613204956055,
"rewards/rejected": -5.904534339904785,
"step": 180
},
{
"epoch": 0.59,
"grad_norm": 5.49355443422865,
"learning_rate": 2.190416025435675e-06,
"logits/chosen": -0.40133827924728394,
"logits/rejected": -0.4126282334327698,
"logps/chosen": -1012.4228515625,
"logps/rejected": -1692.1015625,
"loss": 0.1903,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.6535183191299438,
"rewards/margins": 4.786801815032959,
"rewards/rejected": -5.440320014953613,
"step": 190
},
{
"epoch": 0.62,
"grad_norm": 3.3031942791417244,
"learning_rate": 1.9249194333484567e-06,
"logits/chosen": -0.32231295108795166,
"logits/rejected": -0.42156219482421875,
"logps/chosen": -821.6759643554688,
"logps/rejected": -1748.581787109375,
"loss": 0.1374,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.3118464946746826,
"rewards/margins": 4.042534828186035,
"rewards/rejected": -4.354381561279297,
"step": 200
},
{
"epoch": 0.62,
"eval_logits/chosen": -0.2821931540966034,
"eval_logits/rejected": -0.1753174513578415,
"eval_logps/chosen": -550.3823852539062,
"eval_logps/rejected": -1521.086181640625,
"eval_loss": 0.23887068033218384,
"eval_rewards/accuracies": 0.9038461446762085,
"eval_rewards/chosen": -0.9231204390525818,
"eval_rewards/margins": 7.142522811889648,
"eval_rewards/rejected": -8.065644264221191,
"eval_runtime": 41.3932,
"eval_samples_per_second": 9.567,
"eval_steps_per_second": 0.314,
"step": 200
},
{
"epoch": 0.65,
"grad_norm": 5.527965146477785,
"learning_rate": 1.6661189208729492e-06,
"logits/chosen": -0.38927820324897766,
"logits/rejected": -0.5214006304740906,
"logps/chosen": -1015.1365356445312,
"logps/rejected": -1966.6363525390625,
"loss": 0.1308,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.6492040157318115,
"rewards/margins": 5.716192722320557,
"rewards/rejected": -6.365396022796631,
"step": 210
},
{
"epoch": 0.68,
"grad_norm": 3.602893800956427,
"learning_rate": 1.4170278898446176e-06,
"logits/chosen": -0.4857853055000305,
"logits/rejected": -0.529462993144989,
"logps/chosen": -1030.688232421875,
"logps/rejected": -1932.1002197265625,
"loss": 0.1249,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.7277423143386841,
"rewards/margins": 5.351422309875488,
"rewards/rejected": -6.079164028167725,
"step": 220
},
{
"epoch": 0.71,
"grad_norm": 4.7188705667925674,
"learning_rate": 1.1805466875731277e-06,
"logits/chosen": -0.49866923689842224,
"logits/rejected": -0.6342719793319702,
"logps/chosen": -1055.165771484375,
"logps/rejected": -1956.5159912109375,
"loss": 0.1241,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.6889677047729492,
"rewards/margins": 5.961886405944824,
"rewards/rejected": -6.650854587554932,
"step": 230
},
{
"epoch": 0.74,
"grad_norm": 3.655545564406198,
"learning_rate": 9.594288359976817e-07,
"logits/chosen": -0.48687905073165894,
"logits/rejected": -0.5934125185012817,
"logps/chosen": -927.3426513671875,
"logps/rejected": -1977.37890625,
"loss": 0.1424,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.49007558822631836,
"rewards/margins": 5.872694969177246,
"rewards/rejected": -6.362771034240723,
"step": 240
},
{
"epoch": 0.77,
"grad_norm": 3.6147182029800553,
"learning_rate": 7.56248970436493e-07,
"logits/chosen": -0.4635826647281647,
"logits/rejected": -0.5487635135650635,
"logps/chosen": -992.0515747070312,
"logps/rejected": -1830.902587890625,
"loss": 0.133,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.31789669394493103,
"rewards/margins": 5.5414719581604,
"rewards/rejected": -5.859368801116943,
"step": 250
},
{
"epoch": 0.8,
"grad_norm": 7.0194030888446655,
"learning_rate": 5.733728612427772e-07,
"logits/chosen": -0.48970723152160645,
"logits/rejected": -0.5219728350639343,
"logps/chosen": -932.720703125,
"logps/rejected": -1843.7044677734375,
"loss": 0.1156,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": -0.4680628776550293,
"rewards/margins": 5.688261032104492,
"rewards/rejected": -6.1563239097595215,
"step": 260
},
{
"epoch": 0.83,
"grad_norm": 3.4254663130723073,
"learning_rate": 4.129298674268226e-07,
"logits/chosen": -0.47387346625328064,
"logits/rejected": -0.5744868516921997,
"logps/chosen": -919.7806396484375,
"logps/rejected": -2094.39404296875,
"loss": 0.1271,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": -0.5868527293205261,
"rewards/margins": 6.565216064453125,
"rewards/rejected": -7.152068138122559,
"step": 270
},
{
"epoch": 0.86,
"grad_norm": 2.915035180237974,
"learning_rate": 2.7678814298657735e-07,
"logits/chosen": -0.48424941301345825,
"logits/rejected": -0.5429133176803589,
"logps/chosen": -989.7317504882812,
"logps/rejected": -2131.172607421875,
"loss": 0.112,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": -0.640870213508606,
"rewards/margins": 6.7747802734375,
"rewards/rejected": -7.415650844573975,
"step": 280
},
{
"epoch": 0.89,
"grad_norm": 4.035709496896224,
"learning_rate": 1.6653288463741064e-07,
"logits/chosen": -0.5001234412193298,
"logits/rejected": -0.5271893739700317,
"logps/chosen": -983.5861206054688,
"logps/rejected": -2070.129150390625,
"loss": 0.119,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.6213029623031616,
"rewards/margins": 7.315102577209473,
"rewards/rejected": -7.936405181884766,
"step": 290
},
{
"epoch": 0.92,
"grad_norm": 3.63054147536607,
"learning_rate": 8.344787421847216e-08,
"logits/chosen": -0.4631820619106293,
"logits/rejected": -0.5495749711990356,
"logps/chosen": -926.2537231445312,
"logps/rejected": -1872.173095703125,
"loss": 0.0982,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.8281421661376953,
"rewards/margins": 5.6122145652771,
"rewards/rejected": -6.440356254577637,
"step": 300
},
{
"epoch": 0.92,
"eval_logits/chosen": -0.3568806052207947,
"eval_logits/rejected": -0.21113936603069305,
"eval_logps/chosen": -567.682861328125,
"eval_logps/rejected": -2033.01416015625,
"eval_loss": 0.24133986234664917,
"eval_rewards/accuracies": 0.8942307829856873,
"eval_rewards/chosen": -1.0961254835128784,
"eval_rewards/margins": 12.088796615600586,
"eval_rewards/rejected": -13.184922218322754,
"eval_runtime": 41.398,
"eval_samples_per_second": 9.566,
"eval_steps_per_second": 0.314,
"step": 300
},
{
"epoch": 0.96,
"grad_norm": 7.390559743925079,
"learning_rate": 2.850053069080344e-08,
"logits/chosen": -0.4385458827018738,
"logits/rejected": -0.5586596131324768,
"logps/chosen": -981.732421875,
"logps/rejected": -2004.222900390625,
"loss": 0.1192,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.7575939893722534,
"rewards/margins": 6.195023536682129,
"rewards/rejected": -6.952617645263672,
"step": 310
},
{
"epoch": 0.99,
"grad_norm": 7.4456834695369585,
"learning_rate": 2.330645777598173e-09,
"logits/chosen": -0.5341562032699585,
"logits/rejected": -0.5692285299301147,
"logps/chosen": -944.4517822265625,
"logps/rejected": -1914.8863525390625,
"loss": 0.1173,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.5554883480072021,
"rewards/margins": 6.5892229080200195,
"rewards/rejected": -7.144711494445801,
"step": 320
},
{
"epoch": 1.0,
"step": 324,
"total_flos": 0.0,
"train_loss": 0.23018195102980107,
"train_runtime": 4792.5755,
"train_samples_per_second": 4.331,
"train_steps_per_second": 0.068
}
],
"logging_steps": 10,
"max_steps": 324,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}