stablelm-2-1_6b-orpo-full-v1 / trainer_state.json
vain05's picture
Model save
4ae3500 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 100,
"global_step": 282,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"grad_norm": 8.17277637691037,
"learning_rate": 1.7241379310344828e-07,
"log_odds_chosen": -0.08250121772289276,
"log_odds_ratio": -0.908496081829071,
"logits/chosen": -2.125,
"logits/rejected": -2.0625,
"logps/chosen": -2.078125,
"logps/rejected": -2.0,
"loss": 1.7207,
"nll_loss": 1.6328125,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.20703125,
"rewards/margins": -0.0074462890625,
"rewards/rejected": -0.2001953125,
"step": 10
},
{
"epoch": 0.14,
"grad_norm": 5.501828832938749,
"learning_rate": 3.4482758620689656e-07,
"log_odds_chosen": -0.01568603515625,
"log_odds_ratio": -0.8124023675918579,
"logits/chosen": -2.109375,
"logits/rejected": -2.03125,
"logps/chosen": -1.8984375,
"logps/rejected": -1.875,
"loss": 1.662,
"nll_loss": 1.59375,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.189453125,
"rewards/margins": -0.00177764892578125,
"rewards/rejected": -0.1875,
"step": 20
},
{
"epoch": 0.21,
"grad_norm": 6.819250485621814,
"learning_rate": 4.99980726386944e-07,
"log_odds_chosen": 0.08204345405101776,
"log_odds_ratio": -0.8072265386581421,
"logits/chosen": -2.078125,
"logits/rejected": -2.046875,
"logps/chosen": -1.9140625,
"logps/rejected": -1.984375,
"loss": 1.6703,
"nll_loss": 1.6171875,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.19140625,
"rewards/margins": 0.006591796875,
"rewards/rejected": -0.1982421875,
"step": 30
},
{
"epoch": 0.28,
"grad_norm": 4.727303404359562,
"learning_rate": 4.976714865090826e-07,
"log_odds_chosen": 0.21958008408546448,
"log_odds_ratio": -0.795214831829071,
"logits/chosen": -2.203125,
"logits/rejected": -2.125,
"logps/chosen": -1.84375,
"logps/rejected": -2.046875,
"loss": 1.6432,
"nll_loss": 1.5625,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": -0.18359375,
"rewards/margins": 0.021240234375,
"rewards/rejected": -0.205078125,
"step": 40
},
{
"epoch": 0.35,
"grad_norm": 5.560160837530604,
"learning_rate": 4.915482824798726e-07,
"log_odds_chosen": 0.09776916354894638,
"log_odds_ratio": -0.7564452886581421,
"logits/chosen": -2.171875,
"logits/rejected": -2.078125,
"logps/chosen": -1.75,
"logps/rejected": -1.8359375,
"loss": 1.6065,
"nll_loss": 1.53125,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.1748046875,
"rewards/margins": 0.0087890625,
"rewards/rejected": -0.1845703125,
"step": 50
},
{
"epoch": 0.43,
"grad_norm": 4.645157759584829,
"learning_rate": 4.817054072717832e-07,
"log_odds_chosen": 0.10500488430261612,
"log_odds_ratio": -0.715039074420929,
"logits/chosen": -2.125,
"logits/rejected": -2.140625,
"logps/chosen": -1.5859375,
"logps/rejected": -1.6875,
"loss": 1.5635,
"nll_loss": 1.4921875,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.1591796875,
"rewards/margins": 0.00970458984375,
"rewards/rejected": -0.1689453125,
"step": 60
},
{
"epoch": 0.5,
"grad_norm": 5.0025739143196315,
"learning_rate": 4.68294434139043e-07,
"log_odds_chosen": 0.07453002780675888,
"log_odds_ratio": -0.7457031011581421,
"logits/chosen": -2.25,
"logits/rejected": -2.203125,
"logps/chosen": -1.6875,
"logps/rejected": -1.7421875,
"loss": 1.5798,
"nll_loss": 1.53125,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.16796875,
"rewards/margins": 0.006072998046875,
"rewards/rejected": -0.1748046875,
"step": 70
},
{
"epoch": 0.57,
"grad_norm": 4.63466624849484,
"learning_rate": 4.515218824976894e-07,
"log_odds_chosen": 0.102294921875,
"log_odds_ratio": -0.71875,
"logits/chosen": -2.265625,
"logits/rejected": -2.203125,
"logps/chosen": -1.625,
"logps/rejected": -1.7109375,
"loss": 1.5814,
"nll_loss": 1.546875,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.1630859375,
"rewards/margins": 0.0078125,
"rewards/rejected": -0.1708984375,
"step": 80
},
{
"epoch": 0.64,
"grad_norm": 3.5161909712120885,
"learning_rate": 4.3164603767393594e-07,
"log_odds_chosen": 0.08747558295726776,
"log_odds_ratio": -0.7129882574081421,
"logits/chosen": -2.34375,
"logits/rejected": -2.3125,
"logps/chosen": -1.5,
"logps/rejected": -1.578125,
"loss": 1.5456,
"nll_loss": 1.4609375,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.150390625,
"rewards/margins": 0.0079345703125,
"rewards/rejected": -0.158203125,
"step": 90
},
{
"epoch": 0.71,
"grad_norm": 3.876797623785475,
"learning_rate": 4.0897297349446334e-07,
"log_odds_chosen": 0.2746826112270355,
"log_odds_ratio": -0.6348632574081421,
"logits/chosen": -2.359375,
"logits/rejected": -2.390625,
"logps/chosen": -1.4140625,
"logps/rejected": -1.6328125,
"loss": 1.4572,
"nll_loss": 1.375,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.1416015625,
"rewards/margins": 0.021728515625,
"rewards/rejected": -0.1630859375,
"step": 100
},
{
"epoch": 0.71,
"eval_log_odds_chosen": 0.15282440185546875,
"eval_log_odds_ratio": -0.6873779296875,
"eval_logits/chosen": -2.375,
"eval_logits/rejected": -2.359375,
"eval_logps/chosen": -1.40625,
"eval_logps/rejected": -1.5078125,
"eval_loss": 1.437416672706604,
"eval_nll_loss": 1.390625,
"eval_rewards/accuracies": 0.546875,
"eval_rewards/chosen": -0.140625,
"eval_rewards/margins": 0.01092529296875,
"eval_rewards/rejected": -0.1513671875,
"eval_runtime": 28.8431,
"eval_samples_per_second": 26.003,
"eval_steps_per_second": 1.109,
"step": 100
},
{
"epoch": 0.78,
"grad_norm": 3.104351936223712,
"learning_rate": 3.8385183896790644e-07,
"log_odds_chosen": 0.14180298149585724,
"log_odds_ratio": -0.6834961175918579,
"logits/chosen": -2.4375,
"logits/rejected": -2.46875,
"logps/chosen": -1.40625,
"logps/rejected": -1.5,
"loss": 1.3979,
"nll_loss": 1.3515625,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.140625,
"rewards/margins": 0.00982666015625,
"rewards/rejected": -0.150390625,
"step": 110
},
{
"epoch": 0.85,
"grad_norm": 4.824975774552339,
"learning_rate": 3.566694816392771e-07,
"log_odds_chosen": 0.07709960639476776,
"log_odds_ratio": -0.727343738079071,
"logits/chosen": -2.34375,
"logits/rejected": -2.328125,
"logps/chosen": -1.3515625,
"logps/rejected": -1.40625,
"loss": 1.4478,
"nll_loss": 1.3125,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.1357421875,
"rewards/margins": 0.005584716796875,
"rewards/rejected": -0.140625,
"step": 120
},
{
"epoch": 0.92,
"grad_norm": 3.1045900908725934,
"learning_rate": 3.278444904138297e-07,
"log_odds_chosen": 0.14324340224266052,
"log_odds_ratio": -0.7000976800918579,
"logits/chosen": -2.4375,
"logits/rejected": -2.515625,
"logps/chosen": -1.3359375,
"logps/rejected": -1.4609375,
"loss": 1.4356,
"nll_loss": 1.375,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.1337890625,
"rewards/margins": 0.0123291015625,
"rewards/rejected": -0.146484375,
"step": 130
},
{
"epoch": 0.99,
"grad_norm": 3.0100692405685825,
"learning_rate": 2.9782074958662915e-07,
"log_odds_chosen": 0.08026123046875,
"log_odds_ratio": -0.715527355670929,
"logits/chosen": -2.4375,
"logits/rejected": -2.390625,
"logps/chosen": -1.40625,
"logps/rejected": -1.46875,
"loss": 1.4861,
"nll_loss": 1.4140625,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.140625,
"rewards/margins": 0.0059814453125,
"rewards/rejected": -0.146484375,
"step": 140
},
{
"epoch": 1.06,
"grad_norm": 3.20502629473615,
"learning_rate": 2.6706060334116775e-07,
"log_odds_chosen": 0.22966308891773224,
"log_odds_ratio": -0.6705077886581421,
"logits/chosen": -2.46875,
"logits/rejected": -2.46875,
"logps/chosen": -1.2890625,
"logps/rejected": -1.453125,
"loss": 1.437,
"nll_loss": 1.3359375,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.12890625,
"rewards/margins": 0.016357421875,
"rewards/rejected": -0.1455078125,
"step": 150
},
{
"epoch": 1.13,
"grad_norm": 2.9568341009806005,
"learning_rate": 2.3603773597887236e-07,
"log_odds_chosen": 0.05845336988568306,
"log_odds_ratio": -0.7265625,
"logits/chosen": -2.515625,
"logits/rejected": -2.5,
"logps/chosen": -1.3515625,
"logps/rejected": -1.3984375,
"loss": 1.4049,
"nll_loss": 1.3515625,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.1357421875,
"rewards/margins": 0.004425048828125,
"rewards/rejected": -0.1396484375,
"step": 160
},
{
"epoch": 1.21,
"grad_norm": 2.6182523616400624,
"learning_rate": 2.0522987751888875e-07,
"log_odds_chosen": 0.0771484375,
"log_odds_ratio": -0.726757824420929,
"logits/chosen": -2.421875,
"logits/rejected": -2.453125,
"logps/chosen": -1.359375,
"logps/rejected": -1.4296875,
"loss": 1.4526,
"nll_loss": 1.390625,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.1357421875,
"rewards/margins": 0.006805419921875,
"rewards/rejected": -0.142578125,
"step": 170
},
{
"epoch": 1.28,
"grad_norm": 3.6328150676008355,
"learning_rate": 1.7511144699669963e-07,
"log_odds_chosen": 0.10901489108800888,
"log_odds_ratio": -0.703417956829071,
"logits/chosen": -2.484375,
"logits/rejected": -2.515625,
"logps/chosen": -1.3046875,
"logps/rejected": -1.390625,
"loss": 1.4426,
"nll_loss": 1.3671875,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.130859375,
"rewards/margins": 0.0087890625,
"rewards/rejected": -0.1396484375,
"step": 180
},
{
"epoch": 1.35,
"grad_norm": 3.052674455238246,
"learning_rate": 1.461462467495284e-07,
"log_odds_chosen": 0.23468628525733948,
"log_odds_ratio": -0.652539074420929,
"logits/chosen": -2.46875,
"logits/rejected": -2.484375,
"logps/chosen": -1.2421875,
"logps/rejected": -1.421875,
"loss": 1.4109,
"nll_loss": 1.3125,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.12451171875,
"rewards/margins": 0.0179443359375,
"rewards/rejected": -0.142578125,
"step": 190
},
{
"epoch": 1.42,
"grad_norm": 3.212384657142265,
"learning_rate": 1.1878032019132014e-07,
"log_odds_chosen": 0.23659667372703552,
"log_odds_ratio": -0.665332019329071,
"logits/chosen": -2.40625,
"logits/rejected": -2.453125,
"logps/chosen": -1.234375,
"logps/rejected": -1.40625,
"loss": 1.3846,
"nll_loss": 1.2890625,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.12353515625,
"rewards/margins": 0.01708984375,
"rewards/rejected": -0.140625,
"step": 200
},
{
"epoch": 1.42,
"eval_log_odds_chosen": 0.1745758056640625,
"eval_log_odds_ratio": -0.6793212890625,
"eval_logits/chosen": -2.484375,
"eval_logits/rejected": -2.46875,
"eval_logps/chosen": -1.28125,
"eval_logps/rejected": -1.3984375,
"eval_loss": 1.3828542232513428,
"eval_nll_loss": 1.3359375,
"eval_rewards/accuracies": 0.5625,
"eval_rewards/chosen": -0.1279296875,
"eval_rewards/margins": 0.011474609375,
"eval_rewards/rejected": -0.1396484375,
"eval_runtime": 28.5986,
"eval_samples_per_second": 26.225,
"eval_steps_per_second": 1.119,
"step": 200
},
{
"epoch": 1.49,
"grad_norm": 2.9802043706553674,
"learning_rate": 9.34350830624677e-08,
"log_odds_chosen": 0.18032225966453552,
"log_odds_ratio": -0.6673828363418579,
"logits/chosen": -2.46875,
"logits/rejected": -2.53125,
"logps/chosen": -1.3203125,
"logps/rejected": -1.453125,
"loss": 1.3959,
"nll_loss": 1.328125,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.1318359375,
"rewards/margins": 0.01361083984375,
"rewards/rejected": -0.1455078125,
"step": 210
},
{
"epoch": 1.56,
"grad_norm": 2.71778589765558,
"learning_rate": 7.050083392813649e-08,
"log_odds_chosen": 0.16171875596046448,
"log_odds_ratio": -0.6756836175918579,
"logits/chosen": -2.484375,
"logits/rejected": -2.46875,
"logps/chosen": -1.28125,
"logps/rejected": -1.3984375,
"loss": 1.3794,
"nll_loss": 1.3203125,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.1279296875,
"rewards/margins": 0.01153564453125,
"rewards/rejected": -0.1396484375,
"step": 220
},
{
"epoch": 1.63,
"grad_norm": 2.662070784648976,
"learning_rate": 5.033074385888189e-08,
"log_odds_chosen": 0.08112792670726776,
"log_odds_ratio": -0.7230468988418579,
"logits/chosen": -2.515625,
"logits/rejected": -2.453125,
"logps/chosen": -1.3046875,
"logps/rejected": -1.375,
"loss": 1.393,
"nll_loss": 1.328125,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.130859375,
"rewards/margins": 0.00653076171875,
"rewards/rejected": -0.13671875,
"step": 230
},
{
"epoch": 1.7,
"grad_norm": 2.5341509494230845,
"learning_rate": 3.323541784818898e-08,
"log_odds_chosen": 0.10616455227136612,
"log_odds_ratio": -0.710742175579071,
"logits/chosen": -2.53125,
"logits/rejected": -2.515625,
"logps/chosen": -1.3046875,
"logps/rejected": -1.390625,
"loss": 1.434,
"nll_loss": 1.3359375,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.130859375,
"rewards/margins": 0.0086669921875,
"rewards/rejected": -0.1396484375,
"step": 240
},
{
"epoch": 1.77,
"grad_norm": 2.4611715244981904,
"learning_rate": 1.9478111717223967e-08,
"log_odds_chosen": 0.18498535454273224,
"log_odds_ratio": -0.676074206829071,
"logits/chosen": -2.59375,
"logits/rejected": -2.609375,
"logps/chosen": -1.2890625,
"logps/rejected": -1.40625,
"loss": 1.3887,
"nll_loss": 1.3828125,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.12890625,
"rewards/margins": 0.01171875,
"rewards/rejected": -0.140625,
"step": 250
},
{
"epoch": 1.84,
"grad_norm": 2.5379707430688163,
"learning_rate": 9.270678163050217e-09,
"log_odds_chosen": 0.14968261122703552,
"log_odds_ratio": -0.6859375238418579,
"logits/chosen": -2.546875,
"logits/rejected": -2.5625,
"logps/chosen": -1.3203125,
"logps/rejected": -1.4296875,
"loss": 1.4595,
"nll_loss": 1.3671875,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.1318359375,
"rewards/margins": 0.01068115234375,
"rewards/rejected": -0.142578125,
"step": 260
},
{
"epoch": 1.91,
"grad_norm": 2.818634582820743,
"learning_rate": 2.7703043782735524e-09,
"log_odds_chosen": 0.12449340522289276,
"log_odds_ratio": -0.705078125,
"logits/chosen": -2.5,
"logits/rejected": -2.515625,
"logps/chosen": -1.2734375,
"logps/rejected": -1.359375,
"loss": 1.3615,
"nll_loss": 1.328125,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.126953125,
"rewards/margins": 0.00897216796875,
"rewards/rejected": -0.1357421875,
"step": 270
},
{
"epoch": 1.99,
"grad_norm": 3.087114524720046,
"learning_rate": 7.709148044679481e-11,
"log_odds_chosen": 0.244537353515625,
"log_odds_ratio": -0.6534179449081421,
"logits/chosen": -2.5,
"logits/rejected": -2.5,
"logps/chosen": -1.28125,
"logps/rejected": -1.484375,
"loss": 1.4121,
"nll_loss": 1.359375,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.12890625,
"rewards/margins": 0.01953125,
"rewards/rejected": -0.1484375,
"step": 280
},
{
"epoch": 2.0,
"step": 282,
"total_flos": 0.0,
"train_loss": 1.48347466719066,
"train_runtime": 2049.7189,
"train_samples_per_second": 6.586,
"train_steps_per_second": 0.138
}
],
"logging_steps": 10,
"max_steps": 282,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}