Minbyul's picture
Model save
91fa69c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9973333333333333,
"eval_steps": 100,
"global_step": 187,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 19.109572167610484,
"learning_rate": 2.6315789473684208e-08,
"logits/chosen": -2.964515209197998,
"logits/rejected": -2.865140914916992,
"logps/chosen": -485.6763916015625,
"logps/rejected": -1249.7501220703125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.05,
"grad_norm": 18.895223645335697,
"learning_rate": 2.631578947368421e-07,
"logits/chosen": -2.7736825942993164,
"logits/rejected": -2.7408108711242676,
"logps/chosen": -604.7006225585938,
"logps/rejected": -1056.1942138671875,
"loss": 0.6926,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": 0.0012125401990488172,
"rewards/margins": 0.001352548599243164,
"rewards/rejected": -0.00014000837109051645,
"step": 10
},
{
"epoch": 0.11,
"grad_norm": 19.562748691217283,
"learning_rate": 4.999562902281866e-07,
"logits/chosen": -2.7962822914123535,
"logits/rejected": -2.8271851539611816,
"logps/chosen": -571.3375854492188,
"logps/rejected": -971.5126953125,
"loss": 0.6749,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.03103743866086006,
"rewards/margins": 0.03241748735308647,
"rewards/rejected": -0.0013800484593957663,
"step": 20
},
{
"epoch": 0.16,
"grad_norm": 23.57935669375875,
"learning_rate": 4.947295864744121e-07,
"logits/chosen": -2.859532117843628,
"logits/rejected": -2.8859381675720215,
"logps/chosen": -529.7252197265625,
"logps/rejected": -1093.7412109375,
"loss": 0.6296,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.047512348741292953,
"rewards/margins": 0.15334269404411316,
"rewards/rejected": -0.10583032667636871,
"step": 30
},
{
"epoch": 0.21,
"grad_norm": 74.03794269111636,
"learning_rate": 4.809698831278217e-07,
"logits/chosen": -3.1058590412139893,
"logits/rejected": -3.105548143386841,
"logps/chosen": -631.2692260742188,
"logps/rejected": -1100.1131591796875,
"loss": 0.5067,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.4612053334712982,
"rewards/margins": 0.6213432550430298,
"rewards/rejected": -1.0825484991073608,
"step": 40
},
{
"epoch": 0.27,
"grad_norm": 80.08928437177174,
"learning_rate": 4.591569405016049e-07,
"logits/chosen": -3.1383297443389893,
"logits/rejected": -3.338413953781128,
"logps/chosen": -614.7294921875,
"logps/rejected": -1324.274658203125,
"loss": 0.3007,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.7203965187072754,
"rewards/margins": 2.4570107460021973,
"rewards/rejected": -3.1774070262908936,
"step": 50
},
{
"epoch": 0.32,
"grad_norm": 52.8412534701194,
"learning_rate": 4.3005131163403164e-07,
"logits/chosen": -3.232844829559326,
"logits/rejected": -3.4020397663116455,
"logps/chosen": -607.4974365234375,
"logps/rejected": -1571.42578125,
"loss": 0.2467,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -0.6835159063339233,
"rewards/margins": 4.190090656280518,
"rewards/rejected": -4.8736066818237305,
"step": 60
},
{
"epoch": 0.37,
"grad_norm": 45.803944170508274,
"learning_rate": 3.946678240449515e-07,
"logits/chosen": -3.016165256500244,
"logits/rejected": -3.2087910175323486,
"logps/chosen": -602.6742553710938,
"logps/rejected": -1499.858154296875,
"loss": 0.2227,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.6613572239875793,
"rewards/margins": 4.315842628479004,
"rewards/rejected": -4.977200031280518,
"step": 70
},
{
"epoch": 0.43,
"grad_norm": 33.74568647416123,
"learning_rate": 3.5424019569033206e-07,
"logits/chosen": -2.980517864227295,
"logits/rejected": -2.997511863708496,
"logps/chosen": -698.8486328125,
"logps/rejected": -1709.7763671875,
"loss": 0.2216,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.0458548069000244,
"rewards/margins": 5.626683712005615,
"rewards/rejected": -6.672537803649902,
"step": 80
},
{
"epoch": 0.48,
"grad_norm": 32.76518067019826,
"learning_rate": 3.1017801885224326e-07,
"logits/chosen": -3.0111451148986816,
"logits/rejected": -3.0090878009796143,
"logps/chosen": -650.3148193359375,
"logps/rejected": -1498.55419921875,
"loss": 0.2021,
"rewards/accuracies": 0.90625,
"rewards/chosen": -0.8722761869430542,
"rewards/margins": 4.187361717224121,
"rewards/rejected": -5.059638023376465,
"step": 90
},
{
"epoch": 0.53,
"grad_norm": 64.24324243411806,
"learning_rate": 2.640176118092979e-07,
"logits/chosen": -2.9020493030548096,
"logits/rejected": -2.935757875442505,
"logps/chosen": -751.5125732421875,
"logps/rejected": -1689.5228271484375,
"loss": 0.1645,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -1.2032721042633057,
"rewards/margins": 4.921408653259277,
"rewards/rejected": -6.124680995941162,
"step": 100
},
{
"epoch": 0.53,
"eval_logits/chosen": -3.0544369220733643,
"eval_logits/rejected": -2.793405294418335,
"eval_logps/chosen": -725.9426879882812,
"eval_logps/rejected": -1452.9771728515625,
"eval_loss": 0.25031739473342896,
"eval_rewards/accuracies": 0.831250011920929,
"eval_rewards/chosen": -1.6025804281234741,
"eval_rewards/margins": 3.9000518321990967,
"eval_rewards/rejected": -5.502632141113281,
"eval_runtime": 65.7537,
"eval_samples_per_second": 9.368,
"eval_steps_per_second": 0.304,
"step": 100
},
{
"epoch": 0.59,
"grad_norm": 41.59873680369454,
"learning_rate": 2.1736845194498716e-07,
"logits/chosen": -2.9784274101257324,
"logits/rejected": -2.980086088180542,
"logps/chosen": -600.6064453125,
"logps/rejected": -1670.901611328125,
"loss": 0.1595,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.918400764465332,
"rewards/margins": 6.283780574798584,
"rewards/rejected": -7.202181339263916,
"step": 110
},
{
"epoch": 0.64,
"grad_norm": 28.23680644032835,
"learning_rate": 1.718570580135889e-07,
"logits/chosen": -3.0252156257629395,
"logits/rejected": -3.080897569656372,
"logps/chosen": -611.710693359375,
"logps/rejected": -1694.8226318359375,
"loss": 0.1391,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.8532626032829285,
"rewards/margins": 5.446272850036621,
"rewards/rejected": -6.299535751342773,
"step": 120
},
{
"epoch": 0.69,
"grad_norm": 40.906944468121836,
"learning_rate": 1.2907027822369005e-07,
"logits/chosen": -2.9933369159698486,
"logits/rejected": -3.124406576156616,
"logps/chosen": -700.328125,
"logps/rejected": -1804.997802734375,
"loss": 0.1477,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -1.007743000984192,
"rewards/margins": 6.478204250335693,
"rewards/rejected": -7.485948085784912,
"step": 130
},
{
"epoch": 0.75,
"grad_norm": 22.754078194499957,
"learning_rate": 9.049996151674788e-08,
"logits/chosen": -3.086073875427246,
"logits/rejected": -3.1164612770080566,
"logps/chosen": -631.7467651367188,
"logps/rejected": -1740.2171630859375,
"loss": 0.1821,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -1.0232716798782349,
"rewards/margins": 5.889337539672852,
"rewards/rejected": -6.912609100341797,
"step": 140
},
{
"epoch": 0.8,
"grad_norm": 20.144359719952234,
"learning_rate": 5.74909411901843e-08,
"logits/chosen": -2.9675424098968506,
"logits/rejected": -2.990185499191284,
"logps/chosen": -617.1038818359375,
"logps/rejected": -1656.051513671875,
"loss": 0.1413,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.9472934603691101,
"rewards/margins": 5.291378974914551,
"rewards/rejected": -6.238672733306885,
"step": 150
},
{
"epoch": 0.85,
"grad_norm": 26.642508471840806,
"learning_rate": 3.119414452281158e-08,
"logits/chosen": -2.9869649410247803,
"logits/rejected": -3.0431644916534424,
"logps/chosen": -662.4171142578125,
"logps/rejected": -1831.9390869140625,
"loss": 0.1189,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.8813556432723999,
"rewards/margins": 6.860285758972168,
"rewards/rejected": -7.741641044616699,
"step": 160
},
{
"epoch": 0.91,
"grad_norm": 18.842250875900756,
"learning_rate": 1.2526463331788501e-08,
"logits/chosen": -3.083080291748047,
"logits/rejected": -2.9783942699432373,
"logps/chosen": -638.3408203125,
"logps/rejected": -1725.673583984375,
"loss": 0.1265,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -0.9777008891105652,
"rewards/margins": 6.12181282043457,
"rewards/rejected": -7.099513053894043,
"step": 170
},
{
"epoch": 0.96,
"grad_norm": 34.250119439829845,
"learning_rate": 2.1387846565474044e-09,
"logits/chosen": -3.0460267066955566,
"logits/rejected": -2.9695019721984863,
"logps/chosen": -608.745849609375,
"logps/rejected": -1744.884521484375,
"loss": 0.1257,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -1.041512131690979,
"rewards/margins": 6.2788825035095215,
"rewards/rejected": -7.320394992828369,
"step": 180
},
{
"epoch": 1.0,
"step": 187,
"total_flos": 0.0,
"train_loss": 0.2699868052719749,
"train_runtime": 2833.2764,
"train_samples_per_second": 4.234,
"train_steps_per_second": 0.066
}
],
"logging_steps": 10,
"max_steps": 187,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}