0.0_dataup_4iters_iter_2 / trainer_state.json
ShenaoZ's picture
Model save
341b17d verified
raw
history blame
6.38 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.99581589958159,
"eval_steps": 500,
"global_step": 119,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.166666666666666e-08,
"logits/chosen": -2.7608747482299805,
"logits/rejected": -2.7489399909973145,
"logps/chosen": -156.13702392578125,
"logps/rejected": -214.59707641601562,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.08,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -2.7645390033721924,
"logits/rejected": -2.704571008682251,
"logps/chosen": -192.93963623046875,
"logps/rejected": -218.26573181152344,
"loss": 0.6928,
"rewards/accuracies": 0.4444444477558136,
"rewards/chosen": 0.0014097224920988083,
"rewards/margins": 0.0015009460039436817,
"rewards/rejected": -9.122348274104297e-05,
"step": 10
},
{
"epoch": 0.17,
"learning_rate": 4.931352528237397e-07,
"logits/chosen": -2.6521761417388916,
"logits/rejected": -2.614973545074463,
"logps/chosen": -236.9346466064453,
"logps/rejected": -204.9691162109375,
"loss": 0.686,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.07020659744739532,
"rewards/margins": 0.029431456699967384,
"rewards/rejected": -0.09963803738355637,
"step": 20
},
{
"epoch": 0.25,
"learning_rate": 4.658920803689553e-07,
"logits/chosen": -2.56916880607605,
"logits/rejected": -2.5743496417999268,
"logps/chosen": -238.0299072265625,
"logps/rejected": -208.71658325195312,
"loss": 0.685,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.2192213088274002,
"rewards/margins": 0.04515828937292099,
"rewards/rejected": -0.2643795907497406,
"step": 30
},
{
"epoch": 0.33,
"learning_rate": 4.201712553872657e-07,
"logits/chosen": -2.51991605758667,
"logits/rejected": -2.5045738220214844,
"logps/chosen": -258.259033203125,
"logps/rejected": -245.266845703125,
"loss": 0.6645,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.2803400158882141,
"rewards/margins": 0.07451216131448746,
"rewards/rejected": -0.35485216975212097,
"step": 40
},
{
"epoch": 0.42,
"learning_rate": 3.598859066780754e-07,
"logits/chosen": -2.519888401031494,
"logits/rejected": -2.50127911567688,
"logps/chosen": -257.92901611328125,
"logps/rejected": -247.06884765625,
"loss": 0.6622,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.3478809893131256,
"rewards/margins": 0.08011214435100555,
"rewards/rejected": -0.42799311876296997,
"step": 50
},
{
"epoch": 0.5,
"learning_rate": 2.9019570347986706e-07,
"logits/chosen": -2.436131000518799,
"logits/rejected": -2.4517111778259277,
"logps/chosen": -284.99066162109375,
"logps/rejected": -233.84280395507812,
"loss": 0.66,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.3800959289073944,
"rewards/margins": 0.2344251424074173,
"rewards/rejected": -0.6145211458206177,
"step": 60
},
{
"epoch": 0.59,
"learning_rate": 2.1706525253979534e-07,
"logits/chosen": -2.4360086917877197,
"logits/rejected": -2.440263032913208,
"logps/chosen": -264.9581604003906,
"logps/rejected": -267.7023010253906,
"loss": 0.6551,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.4374156892299652,
"rewards/margins": 0.17235831916332245,
"rewards/rejected": -0.6097739338874817,
"step": 70
},
{
"epoch": 0.67,
"learning_rate": 1.4675360263490295e-07,
"logits/chosen": -2.4380290508270264,
"logits/rejected": -2.4010090827941895,
"logps/chosen": -236.95388793945312,
"logps/rejected": -257.6996154785156,
"loss": 0.6616,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.4010644853115082,
"rewards/margins": 0.13567090034484863,
"rewards/rejected": -0.536735475063324,
"step": 80
},
{
"epoch": 0.75,
"learning_rate": 8.527854855097224e-08,
"logits/chosen": -2.469369888305664,
"logits/rejected": -2.4206488132476807,
"logps/chosen": -275.33831787109375,
"logps/rejected": -252.06930541992188,
"loss": 0.6579,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.4325632154941559,
"rewards/margins": 0.17887099087238312,
"rewards/rejected": -0.611434280872345,
"step": 90
},
{
"epoch": 0.84,
"learning_rate": 3.790158337517127e-08,
"logits/chosen": -2.4000065326690674,
"logits/rejected": -2.405733585357666,
"logps/chosen": -256.33343505859375,
"logps/rejected": -261.50762939453125,
"loss": 0.6521,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.44618505239486694,
"rewards/margins": 0.14844560623168945,
"rewards/rejected": -0.5946307182312012,
"step": 100
},
{
"epoch": 0.92,
"learning_rate": 8.677580722139671e-09,
"logits/chosen": -2.432537078857422,
"logits/rejected": -2.425053358078003,
"logps/chosen": -262.3310546875,
"logps/rejected": -261.58697509765625,
"loss": 0.6741,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.6105460524559021,
"rewards/margins": -0.03789714723825455,
"rewards/rejected": -0.5726489424705505,
"step": 110
},
{
"epoch": 1.0,
"step": 119,
"total_flos": 0.0,
"train_loss": 0.66752010233262,
"train_runtime": 1991.7897,
"train_samples_per_second": 7.673,
"train_steps_per_second": 0.06
}
],
"logging_steps": 10,
"max_steps": 119,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}