phi-2-gpo-20k-40k-60k-lora512-i2 / trainer_state.json
BraylonDash's picture
Model save
f61ddb0 verified
raw
history blame contribute delete
No virus
16 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.5625e-07,
"logits/chosen": -0.03749094158411026,
"logits/rejected": 0.03204140067100525,
"logps/chosen": -559.2154541015625,
"logps/rejected": -890.8544921875,
"loss": 0.1791,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.5625e-06,
"logits/chosen": -0.15300115942955017,
"logits/rejected": -0.12813442945480347,
"logps/chosen": -481.31634521484375,
"logps/rejected": -804.3386840820312,
"loss": 0.2179,
"rewards/accuracies": 0.5347222089767456,
"rewards/chosen": -0.0013270878698676825,
"rewards/margins": 0.0023776644375175238,
"rewards/rejected": -0.0037047527730464935,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 3.125e-06,
"logits/chosen": -0.17372946441173553,
"logits/rejected": -0.10258068144321442,
"logps/chosen": -623.6970825195312,
"logps/rejected": -831.9564208984375,
"loss": 0.1794,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.0440058633685112,
"rewards/margins": 0.03512907028198242,
"rewards/rejected": -0.07913494110107422,
"step": 20
},
{
"epoch": 0.1,
"learning_rate": 4.6875000000000004e-06,
"logits/chosen": -0.19729416072368622,
"logits/rejected": -0.2202361822128296,
"logps/chosen": -677.2532958984375,
"logps/rejected": -1107.8955078125,
"loss": 0.1602,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.1397601068019867,
"rewards/margins": 0.1527976095676422,
"rewards/rejected": -0.2925576865673065,
"step": 30
},
{
"epoch": 0.13,
"learning_rate": 4.989935734988098e-06,
"logits/chosen": -0.19801445305347443,
"logits/rejected": -0.20075838267803192,
"logps/chosen": -686.9973754882812,
"logps/rejected": -964.6900634765625,
"loss": 0.1518,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.13784293830394745,
"rewards/margins": 0.11145637929439545,
"rewards/rejected": -0.2492993324995041,
"step": 40
},
{
"epoch": 0.16,
"learning_rate": 4.949188496058089e-06,
"logits/chosen": -0.19628724455833435,
"logits/rejected": -0.19985933601856232,
"logps/chosen": -606.3834228515625,
"logps/rejected": -982.189453125,
"loss": 0.143,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.13489742577075958,
"rewards/margins": 0.14312751591205597,
"rewards/rejected": -0.27802491188049316,
"step": 50
},
{
"epoch": 0.19,
"learning_rate": 4.8776412907378845e-06,
"logits/chosen": -0.18777325749397278,
"logits/rejected": -0.2514544427394867,
"logps/chosen": -679.7066650390625,
"logps/rejected": -1182.11083984375,
"loss": 0.1376,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.14588578045368195,
"rewards/margins": 0.16957412660121918,
"rewards/rejected": -0.3154599070549011,
"step": 60
},
{
"epoch": 0.22,
"learning_rate": 4.7761938666470405e-06,
"logits/chosen": -0.20718152821063995,
"logits/rejected": -0.18668043613433838,
"logps/chosen": -692.5406494140625,
"logps/rejected": -1079.9908447265625,
"loss": 0.1417,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.1537138670682907,
"rewards/margins": 0.16138955950737,
"rewards/rejected": -0.3151034116744995,
"step": 70
},
{
"epoch": 0.26,
"learning_rate": 4.646121984004666e-06,
"logits/chosen": -0.21971774101257324,
"logits/rejected": -0.24234215915203094,
"logps/chosen": -710.6842041015625,
"logps/rejected": -1136.7525634765625,
"loss": 0.1352,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.19244925677776337,
"rewards/margins": 0.1675754338502884,
"rewards/rejected": -0.36002466082572937,
"step": 80
},
{
"epoch": 0.29,
"learning_rate": 4.4890613722044526e-06,
"logits/chosen": -0.23076686263084412,
"logits/rejected": -0.21980659663677216,
"logps/chosen": -706.5121459960938,
"logps/rejected": -1076.0596923828125,
"loss": 0.1278,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.18025998771190643,
"rewards/margins": 0.15869399905204773,
"rewards/rejected": -0.33895397186279297,
"step": 90
},
{
"epoch": 0.32,
"learning_rate": 4.3069871595684795e-06,
"logits/chosen": -0.2438955307006836,
"logits/rejected": -0.19910338521003723,
"logps/chosen": -733.8079833984375,
"logps/rejected": -1110.71923828125,
"loss": 0.1256,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.20329563319683075,
"rewards/margins": 0.16255612671375275,
"rewards/rejected": -0.3658517897129059,
"step": 100
},
{
"epoch": 0.35,
"learning_rate": 4.102189034962561e-06,
"logits/chosen": -0.19663023948669434,
"logits/rejected": -0.21354413032531738,
"logps/chosen": -751.34765625,
"logps/rejected": -1080.0113525390625,
"loss": 0.1301,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.1990022361278534,
"rewards/margins": 0.15701577067375183,
"rewards/rejected": -0.3560180068016052,
"step": 110
},
{
"epoch": 0.38,
"learning_rate": 3.8772424536302565e-06,
"logits/chosen": -0.1580425500869751,
"logits/rejected": -0.20701774954795837,
"logps/chosen": -649.1971435546875,
"logps/rejected": -1110.748779296875,
"loss": 0.1265,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.17047809064388275,
"rewards/margins": 0.17829902470111847,
"rewards/rejected": -0.34877708554267883,
"step": 120
},
{
"epoch": 0.42,
"learning_rate": 3.634976249348867e-06,
"logits/chosen": -0.1981876790523529,
"logits/rejected": -0.22599034011363983,
"logps/chosen": -736.1990966796875,
"logps/rejected": -1140.4241943359375,
"loss": 0.119,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.1852777749300003,
"rewards/margins": 0.19615033268928528,
"rewards/rejected": -0.3814280927181244,
"step": 130
},
{
"epoch": 0.45,
"learning_rate": 3.3784370602033572e-06,
"logits/chosen": -0.22472605109214783,
"logits/rejected": -0.20980677008628845,
"logps/chosen": -727.8230590820312,
"logps/rejected": -1080.5826416015625,
"loss": 0.1191,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.19285574555397034,
"rewards/margins": 0.16788846254348755,
"rewards/rejected": -0.3607442080974579,
"step": 140
},
{
"epoch": 0.48,
"learning_rate": 3.1108510153447352e-06,
"logits/chosen": -0.18862155079841614,
"logits/rejected": -0.18806660175323486,
"logps/chosen": -744.17919921875,
"logps/rejected": -1178.953369140625,
"loss": 0.1196,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.196334570646286,
"rewards/margins": 0.20167379081249237,
"rewards/rejected": -0.39800840616226196,
"step": 150
},
{
"epoch": 0.51,
"learning_rate": 2.835583164544139e-06,
"logits/chosen": -0.20716705918312073,
"logits/rejected": -0.23998220264911652,
"logps/chosen": -716.6298217773438,
"logps/rejected": -1128.6080322265625,
"loss": 0.1196,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.20008280873298645,
"rewards/margins": 0.19826427102088928,
"rewards/rejected": -0.39834707975387573,
"step": 160
},
{
"epoch": 0.54,
"learning_rate": 2.556095160739513e-06,
"logits/chosen": -0.18807800114154816,
"logits/rejected": -0.24145498871803284,
"logps/chosen": -771.5186767578125,
"logps/rejected": -1165.31787109375,
"loss": 0.1095,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.24131879210472107,
"rewards/margins": 0.18717139959335327,
"rewards/rejected": -0.42849016189575195,
"step": 170
},
{
"epoch": 0.58,
"learning_rate": 2.2759017277414165e-06,
"logits/chosen": -0.22785386443138123,
"logits/rejected": -0.24600060284137726,
"logps/chosen": -828.021484375,
"logps/rejected": -1258.8414306640625,
"loss": 0.1148,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.26432016491889954,
"rewards/margins": 0.2154586762189865,
"rewards/rejected": -0.47977885603904724,
"step": 180
},
{
"epoch": 0.61,
"learning_rate": 1.9985264605418185e-06,
"logits/chosen": -0.19840948283672333,
"logits/rejected": -0.22779376804828644,
"logps/chosen": -748.4619140625,
"logps/rejected": -1191.1099853515625,
"loss": 0.1083,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.22014987468719482,
"rewards/margins": 0.20830897986888885,
"rewards/rejected": -0.4284588396549225,
"step": 190
},
{
"epoch": 0.64,
"learning_rate": 1.7274575140626318e-06,
"logits/chosen": -0.15184776484966278,
"logits/rejected": -0.2133471965789795,
"logps/chosen": -777.3941650390625,
"logps/rejected": -1259.5482177734375,
"loss": 0.1097,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.21874341368675232,
"rewards/margins": 0.20576436817646027,
"rewards/rejected": -0.4245077669620514,
"step": 200
},
{
"epoch": 0.67,
"learning_rate": 1.466103737583699e-06,
"logits/chosen": -0.21760430932044983,
"logits/rejected": -0.21733775734901428,
"logps/chosen": -786.8435668945312,
"logps/rejected": -1184.2471923828125,
"loss": 0.1045,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.22768548130989075,
"rewards/margins": 0.21592466533184052,
"rewards/rejected": -0.44361013174057007,
"step": 210
},
{
"epoch": 0.7,
"learning_rate": 1.217751806485235e-06,
"logits/chosen": -0.15853038430213928,
"logits/rejected": -0.2094181478023529,
"logps/chosen": -753.2785034179688,
"logps/rejected": -1242.8369140625,
"loss": 0.1112,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.2066824734210968,
"rewards/margins": 0.21286919713020325,
"rewards/rejected": -0.41955167055130005,
"step": 220
},
{
"epoch": 0.74,
"learning_rate": 9.855248903979505e-07,
"logits/chosen": -0.1381937712430954,
"logits/rejected": -0.23759886622428894,
"logps/chosen": -732.6824951171875,
"logps/rejected": -1177.4642333984375,
"loss": 0.1109,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.19210098683834076,
"rewards/margins": 0.2129323035478592,
"rewards/rejected": -0.40503329038619995,
"step": 230
},
{
"epoch": 0.77,
"learning_rate": 7.723433775328385e-07,
"logits/chosen": -0.18693767488002777,
"logits/rejected": -0.19473309814929962,
"logps/chosen": -755.0777587890625,
"logps/rejected": -1212.91064453125,
"loss": 0.112,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.21374483406543732,
"rewards/margins": 0.2292633354663849,
"rewards/rejected": -0.4430081248283386,
"step": 240
},
{
"epoch": 0.8,
"learning_rate": 5.808881491049723e-07,
"logits/chosen": -0.24827036261558533,
"logits/rejected": -0.20749957859516144,
"logps/chosen": -715.92333984375,
"logps/rejected": -1144.6124267578125,
"loss": 0.1215,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.20713326334953308,
"rewards/margins": 0.18099360167980194,
"rewards/rejected": -0.38812685012817383,
"step": 250
},
{
"epoch": 0.83,
"learning_rate": 4.1356686569674344e-07,
"logits/chosen": -0.19055083394050598,
"logits/rejected": -0.2412928342819214,
"logps/chosen": -693.1292724609375,
"logps/rejected": -1136.007080078125,
"loss": 0.1117,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.1922534853219986,
"rewards/margins": 0.17840158939361572,
"rewards/rejected": -0.3706550598144531,
"step": 260
},
{
"epoch": 0.86,
"learning_rate": 2.7248368952908055e-07,
"logits/chosen": -0.15525199472904205,
"logits/rejected": -0.16159489750862122,
"logps/chosen": -790.9383544921875,
"logps/rejected": -1267.6949462890625,
"loss": 0.109,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.21919909119606018,
"rewards/margins": 0.19449128210544586,
"rewards/rejected": -0.41369038820266724,
"step": 270
},
{
"epoch": 0.9,
"learning_rate": 1.59412823400657e-07,
"logits/chosen": -0.19185583293437958,
"logits/rejected": -0.1721155345439911,
"logps/chosen": -745.1690673828125,
"logps/rejected": -1151.0634765625,
"loss": 0.1118,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.20249255001544952,
"rewards/margins": 0.19131307303905487,
"rewards/rejected": -0.3938056528568268,
"step": 280
},
{
"epoch": 0.93,
"learning_rate": 7.577619905828281e-08,
"logits/chosen": -0.18523597717285156,
"logits/rejected": -0.20255737006664276,
"logps/chosen": -761.7174072265625,
"logps/rejected": -1124.2659912109375,
"loss": 0.1165,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.20564845204353333,
"rewards/margins": 0.20229394733905792,
"rewards/rejected": -0.40794238448143005,
"step": 290
},
{
"epoch": 0.96,
"learning_rate": 2.262559558016325e-08,
"logits/chosen": -0.24068090319633484,
"logits/rejected": -0.1731335073709488,
"logps/chosen": -718.3553466796875,
"logps/rejected": -1177.1654052734375,
"loss": 0.1115,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.21485304832458496,
"rewards/margins": 0.19611014425754547,
"rewards/rejected": -0.41096314787864685,
"step": 300
},
{
"epoch": 0.99,
"learning_rate": 6.294126437336734e-10,
"logits/chosen": -0.1374652087688446,
"logits/rejected": -0.15347729623317719,
"logps/chosen": -747.0050048828125,
"logps/rejected": -1133.5335693359375,
"loss": 0.1124,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.199618861079216,
"rewards/margins": 0.20002660155296326,
"rewards/rejected": -0.39964547753334045,
"step": 310
},
{
"epoch": 1.0,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.004328498234733557,
"train_runtime": 430.9957,
"train_samples_per_second": 46.404,
"train_steps_per_second": 0.724
}
],
"logging_steps": 10,
"max_steps": 312,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}