test-2 / checkpoint-177 /trainer_state.json
hibana2077's picture
Upload folder using huggingface_hub
c5db3b6
raw
history blame contribute delete
No virus
7.91 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 177,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19,
"learning_rate": 1.3333333333333333e-05,
"logits/chosen": -54.362911224365234,
"logits/rejected": -50.76668930053711,
"logps/chosen": -1319.19677734375,
"logps/rejected": -33.71879577636719,
"loss": 0.6892,
"rewards/accuracies": 0.40909090638160706,
"rewards/chosen": 0.008085771463811398,
"rewards/margins": 0.007936443202197552,
"rewards/rejected": 0.0001493280433351174,
"step": 11
},
{
"epoch": 0.37,
"learning_rate": 2.9811320754716983e-05,
"logits/chosen": -54.190120697021484,
"logits/rejected": -52.07868957519531,
"logps/chosen": -1108.5838623046875,
"logps/rejected": -34.123477935791016,
"loss": 0.6329,
"rewards/accuracies": 0.8636363744735718,
"rewards/chosen": 0.12481586635112762,
"rewards/margins": 0.1290697455406189,
"rewards/rejected": -0.004253899212926626,
"step": 22
},
{
"epoch": 0.56,
"learning_rate": 2.7924528301886794e-05,
"logits/chosen": -55.231224060058594,
"logits/rejected": -52.04852294921875,
"logps/chosen": -879.3148803710938,
"logps/rejected": -34.018951416015625,
"loss": 0.5471,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.3259323835372925,
"rewards/margins": 0.3459864854812622,
"rewards/rejected": -0.020054107531905174,
"step": 33
},
{
"epoch": 0.75,
"learning_rate": 2.5849056603773585e-05,
"logits/chosen": -54.642879486083984,
"logits/rejected": -50.72136306762695,
"logps/chosen": -1260.130615234375,
"logps/rejected": -32.81765365600586,
"loss": 0.3731,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.9432396292686462,
"rewards/margins": 0.9746879935264587,
"rewards/rejected": -0.03144851326942444,
"step": 44
},
{
"epoch": 0.93,
"learning_rate": 2.377358490566038e-05,
"logits/chosen": -54.97840881347656,
"logits/rejected": -52.43299865722656,
"logps/chosen": -1015.5433959960938,
"logps/rejected": -34.17496109008789,
"loss": 0.3539,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.0532668828964233,
"rewards/margins": 1.1037672758102417,
"rewards/rejected": -0.0505005344748497,
"step": 55
},
{
"epoch": 1.12,
"learning_rate": 2.169811320754717e-05,
"logits/chosen": -55.37051773071289,
"logits/rejected": -52.23636245727539,
"logps/chosen": -1029.4456787109375,
"logps/rejected": -34.25499725341797,
"loss": 0.2602,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.5273464918136597,
"rewards/margins": 1.6002427339553833,
"rewards/rejected": -0.07289613038301468,
"step": 66
},
{
"epoch": 1.31,
"learning_rate": 1.9622641509433963e-05,
"logits/chosen": -55.24615478515625,
"logits/rejected": -51.99899673461914,
"logps/chosen": -1143.59716796875,
"logps/rejected": -35.575958251953125,
"loss": 0.2115,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.0695841312408447,
"rewards/margins": 2.1615474224090576,
"rewards/rejected": -0.09196347743272781,
"step": 77
},
{
"epoch": 1.49,
"learning_rate": 1.7547169811320753e-05,
"logits/chosen": -55.43460464477539,
"logits/rejected": -50.45402908325195,
"logps/chosen": -1283.4525146484375,
"logps/rejected": -34.708351135253906,
"loss": 0.1256,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9994964599609375,
"rewards/margins": 3.1131112575531006,
"rewards/rejected": -0.11361455917358398,
"step": 88
},
{
"epoch": 1.68,
"learning_rate": 1.5471698113207547e-05,
"logits/chosen": -55.35545349121094,
"logits/rejected": -52.1571044921875,
"logps/chosen": -875.93359375,
"logps/rejected": -33.44974136352539,
"loss": 0.1917,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.1976325511932373,
"rewards/margins": 2.317676544189453,
"rewards/rejected": -0.12004398554563522,
"step": 99
},
{
"epoch": 1.86,
"learning_rate": 1.339622641509434e-05,
"logits/chosen": -55.0767936706543,
"logits/rejected": -51.604347229003906,
"logps/chosen": -1365.019775390625,
"logps/rejected": -35.252044677734375,
"loss": 0.1243,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.4024481773376465,
"rewards/margins": 3.554830312728882,
"rewards/rejected": -0.15238191187381744,
"step": 110
},
{
"epoch": 2.05,
"learning_rate": 1.1320754716981132e-05,
"logits/chosen": -56.40107727050781,
"logits/rejected": -54.048580169677734,
"logps/chosen": -858.4847412109375,
"logps/rejected": -34.9173698425293,
"loss": 0.1969,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.170722484588623,
"rewards/margins": 2.324540138244629,
"rewards/rejected": -0.15381723642349243,
"step": 121
},
{
"epoch": 2.24,
"learning_rate": 9.245283018867924e-06,
"logits/chosen": -55.104923248291016,
"logits/rejected": -52.74159240722656,
"logps/chosen": -809.7835693359375,
"logps/rejected": -35.847755432128906,
"loss": 0.1303,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.388211727142334,
"rewards/margins": 2.551792860031128,
"rewards/rejected": -0.16358119249343872,
"step": 132
},
{
"epoch": 2.42,
"learning_rate": 7.169811320754717e-06,
"logits/chosen": -55.185218811035156,
"logits/rejected": -52.399497985839844,
"logps/chosen": -959.4953002929688,
"logps/rejected": -34.56352615356445,
"loss": 0.1291,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7163705825805664,
"rewards/margins": 2.8906729221343994,
"rewards/rejected": -0.1743021458387375,
"step": 143
},
{
"epoch": 2.61,
"learning_rate": 5.094339622641509e-06,
"logits/chosen": -53.87266540527344,
"logits/rejected": -51.906978607177734,
"logps/chosen": -1213.17529296875,
"logps/rejected": -34.25074768066406,
"loss": 0.069,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.6375014781951904,
"rewards/margins": 3.8169896602630615,
"rewards/rejected": -0.17948788404464722,
"step": 154
},
{
"epoch": 2.8,
"learning_rate": 3.018867924528302e-06,
"logits/chosen": -55.073856353759766,
"logits/rejected": -50.98554611206055,
"logps/chosen": -1360.9793701171875,
"logps/rejected": -36.670528411865234,
"loss": 0.1152,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.9220426082611084,
"rewards/margins": 4.106159687042236,
"rewards/rejected": -0.18411725759506226,
"step": 165
},
{
"epoch": 2.98,
"learning_rate": 9.433962264150943e-07,
"logits/chosen": -55.68719482421875,
"logits/rejected": -54.916015625,
"logps/chosen": -787.702880859375,
"logps/rejected": -35.69668960571289,
"loss": 0.159,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.3595094680786133,
"rewards/margins": 2.553260564804077,
"rewards/rejected": -0.19375087320804596,
"step": 176
}
],
"logging_steps": 11,
"max_steps": 177,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}