phi-2-gpo-v27-i1 / trainer_state.json
lole25's picture
Model save
eb482d6 verified
raw
history blame
No virus
10.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9976019184652278,
"eval_steps": 500,
"global_step": 208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.3809523809523811e-07,
"logits/chosen": 0.15803536772727966,
"logits/rejected": 0.08697354793548584,
"logps/chosen": -431.6365661621094,
"logps/rejected": -312.2266845703125,
"loss": 0.3497,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.05,
"learning_rate": 2.380952380952381e-06,
"logits/chosen": 0.08879230171442032,
"logits/rejected": 0.23703241348266602,
"logps/chosen": -334.3096008300781,
"logps/rejected": -325.03387451171875,
"loss": 0.3916,
"rewards/accuracies": 0.4513888955116272,
"rewards/chosen": 0.011717023327946663,
"rewards/margins": 0.0023966077715158463,
"rewards/rejected": 0.009320415556430817,
"step": 10
},
{
"epoch": 0.1,
"learning_rate": 4.761904761904762e-06,
"logits/chosen": 0.10182257741689682,
"logits/rejected": 0.21816711127758026,
"logps/chosen": -337.0960388183594,
"logps/rejected": -311.6546936035156,
"loss": 0.3814,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.020739946514368057,
"rewards/margins": 0.02319016307592392,
"rewards/rejected": -0.04393010586500168,
"step": 20
},
{
"epoch": 0.14,
"learning_rate": 4.97147773390341e-06,
"logits/chosen": 0.10252387821674347,
"logits/rejected": 0.20911017060279846,
"logps/chosen": -333.58074951171875,
"logps/rejected": -314.22686767578125,
"loss": 0.3406,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.03605792671442032,
"rewards/margins": 0.06152229756116867,
"rewards/rejected": -0.0254643764346838,
"step": 30
},
{
"epoch": 0.19,
"learning_rate": 4.873717504456219e-06,
"logits/chosen": 0.1256047487258911,
"logits/rejected": 0.18428723514080048,
"logps/chosen": -361.2447509765625,
"logps/rejected": -337.5652770996094,
"loss": 0.3075,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.1919463872909546,
"rewards/margins": 0.11055928468704224,
"rewards/rejected": 0.08138711750507355,
"step": 40
},
{
"epoch": 0.24,
"learning_rate": 4.709119209978242e-06,
"logits/chosen": 0.13542751967906952,
"logits/rejected": 0.17227646708488464,
"logps/chosen": -339.5179138183594,
"logps/rejected": -339.02984619140625,
"loss": 0.3253,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.29387253522872925,
"rewards/margins": 0.18539837002754211,
"rewards/rejected": 0.10847418010234833,
"step": 50
},
{
"epoch": 0.29,
"learning_rate": 4.482317534878901e-06,
"logits/chosen": 0.14710070192813873,
"logits/rejected": 0.1668437272310257,
"logps/chosen": -338.6156311035156,
"logps/rejected": -317.2750549316406,
"loss": 0.3199,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.2612247169017792,
"rewards/margins": 0.14398948848247528,
"rewards/rejected": 0.1172352284193039,
"step": 60
},
{
"epoch": 0.34,
"learning_rate": 4.199698658255298e-06,
"logits/chosen": 0.10598815977573395,
"logits/rejected": 0.15820710361003876,
"logps/chosen": -339.61456298828125,
"logps/rejected": -329.31536865234375,
"loss": 0.2793,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.3697130084037781,
"rewards/margins": 0.20087119936943054,
"rewards/rejected": 0.16884183883666992,
"step": 70
},
{
"epoch": 0.38,
"learning_rate": 3.869220434746509e-06,
"logits/chosen": 0.07073510438203812,
"logits/rejected": 0.15016858279705048,
"logps/chosen": -314.06280517578125,
"logps/rejected": -323.94268798828125,
"loss": 0.2928,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.3701319396495819,
"rewards/margins": 0.16388371586799622,
"rewards/rejected": 0.2062481939792633,
"step": 80
},
{
"epoch": 0.43,
"learning_rate": 3.5001883208580668e-06,
"logits/chosen": 0.12372653186321259,
"logits/rejected": 0.20820951461791992,
"logps/chosen": -373.20831298828125,
"logps/rejected": -339.1376037597656,
"loss": 0.2895,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.33848610520362854,
"rewards/margins": 0.1839863359928131,
"rewards/rejected": 0.15449976921081543,
"step": 90
},
{
"epoch": 0.48,
"learning_rate": 3.102993356121938e-06,
"logits/chosen": 0.11043532192707062,
"logits/rejected": 0.21166983246803284,
"logps/chosen": -332.3284606933594,
"logps/rejected": -328.7100524902344,
"loss": 0.3023,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": 0.28233280777931213,
"rewards/margins": 0.1811174899339676,
"rewards/rejected": 0.10121532529592514,
"step": 100
},
{
"epoch": 0.53,
"learning_rate": 2.6888195769001147e-06,
"logits/chosen": 0.12863442301750183,
"logits/rejected": 0.2186942994594574,
"logps/chosen": -322.6536560058594,
"logps/rejected": -326.0906066894531,
"loss": 0.2747,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.3540286421775818,
"rewards/margins": 0.2065799981355667,
"rewards/rejected": 0.14744864404201508,
"step": 110
},
{
"epoch": 0.58,
"learning_rate": 2.269329101341745e-06,
"logits/chosen": 0.11101800203323364,
"logits/rejected": 0.21909146010875702,
"logps/chosen": -386.37506103515625,
"logps/rejected": -338.628662109375,
"loss": 0.2594,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.32755765318870544,
"rewards/margins": 0.19837772846221924,
"rewards/rejected": 0.1291799247264862,
"step": 120
},
{
"epoch": 0.62,
"learning_rate": 1.856333752729311e-06,
"logits/chosen": 0.108365498483181,
"logits/rejected": 0.21159549057483673,
"logps/chosen": -354.3942565917969,
"logps/rejected": -350.58026123046875,
"loss": 0.2845,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.32795828580856323,
"rewards/margins": 0.22649607062339783,
"rewards/rejected": 0.10146218538284302,
"step": 130
},
{
"epoch": 0.67,
"learning_rate": 1.4614624674952843e-06,
"logits/chosen": 0.13152627646923065,
"logits/rejected": 0.1252177655696869,
"logps/chosen": -310.82440185546875,
"logps/rejected": -306.65057373046875,
"loss": 0.3034,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.28935328125953674,
"rewards/margins": 0.17421108484268188,
"rewards/rejected": 0.11514218151569366,
"step": 140
},
{
"epoch": 0.72,
"learning_rate": 1.0958338528840893e-06,
"logits/chosen": 0.14218227565288544,
"logits/rejected": 0.17058388888835907,
"logps/chosen": -365.53497314453125,
"logps/rejected": -336.30670166015625,
"loss": 0.279,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.34200993180274963,
"rewards/margins": 0.18471720814704895,
"rewards/rejected": 0.1572926938533783,
"step": 150
},
{
"epoch": 0.77,
"learning_rate": 7.697431142327633e-07,
"logits/chosen": 0.1493266373872757,
"logits/rejected": 0.1491091400384903,
"logps/chosen": -363.55841064453125,
"logps/rejected": -332.8086853027344,
"loss": 0.2657,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.3300887942314148,
"rewards/margins": 0.20138521492481232,
"rewards/rejected": 0.12870360910892487,
"step": 160
},
{
"epoch": 0.82,
"learning_rate": 4.923721672305148e-07,
"logits/chosen": 0.09402619302272797,
"logits/rejected": 0.1920977234840393,
"logps/chosen": -334.378662109375,
"logps/rejected": -331.62322998046875,
"loss": 0.2685,
"rewards/accuracies": 0.65625,
"rewards/chosen": 0.3077259361743927,
"rewards/margins": 0.17248141765594482,
"rewards/rejected": 0.13524451851844788,
"step": 170
},
{
"epoch": 0.86,
"learning_rate": 2.7153109768518926e-07,
"logits/chosen": 0.11278879642486572,
"logits/rejected": 0.18854503333568573,
"logps/chosen": -388.7989501953125,
"logps/rejected": -336.02105712890625,
"loss": 0.2612,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.34268996119499207,
"rewards/margins": 0.21422357857227325,
"rewards/rejected": 0.12846639752388,
"step": 180
},
{
"epoch": 0.91,
"learning_rate": 1.1343824865573422e-07,
"logits/chosen": 0.09929057955741882,
"logits/rejected": 0.15045389533042908,
"logps/chosen": -343.9807434082031,
"logps/rejected": -318.48028564453125,
"loss": 0.2662,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 0.29964178800582886,
"rewards/margins": 0.17701460421085358,
"rewards/rejected": 0.1226271539926529,
"step": 190
},
{
"epoch": 0.96,
"learning_rate": 2.2545127157831416e-08,
"logits/chosen": 0.07950419932603836,
"logits/rejected": 0.178897887468338,
"logps/chosen": -297.21661376953125,
"logps/rejected": -294.276611328125,
"loss": 0.2553,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": 0.3100071847438812,
"rewards/margins": 0.16973480582237244,
"rewards/rejected": 0.14027239382266998,
"step": 200
},
{
"epoch": 1.0,
"step": 208,
"total_flos": 0.0,
"train_loss": 0.29713730256144816,
"train_runtime": 2891.8659,
"train_samples_per_second": 3.458,
"train_steps_per_second": 0.072
}
],
"logging_steps": 10,
"max_steps": 208,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}