zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
c769f9f verified
raw
history blame
No virus
9.15 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0064,
"grad_norm": 1341.8496030875679,
"learning_rate": 6.25e-10,
"logits/chosen": -3.9499800205230713,
"logits/rejected": -4.237819194793701,
"logps/chosen": -300.693115234375,
"logps/rejected": -249.96307373046875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.064,
"grad_norm": 1351.1067467304115,
"learning_rate": 6.25e-09,
"logits/chosen": -4.128900527954102,
"logits/rejected": -4.351526260375977,
"logps/chosen": -351.4300537109375,
"logps/rejected": -308.8679504394531,
"loss": 0.7229,
"rewards/accuracies": 0.4340277910232544,
"rewards/chosen": -0.0018261770019307733,
"rewards/margins": -0.04775632172822952,
"rewards/rejected": 0.04593014344573021,
"step": 10
},
{
"epoch": 0.128,
"grad_norm": 1408.8095936894558,
"learning_rate": 9.979871469976195e-09,
"logits/chosen": -4.194854736328125,
"logits/rejected": -4.3817548751831055,
"logps/chosen": -335.3293762207031,
"logps/rejected": -294.04248046875,
"loss": 0.7269,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.01136251911520958,
"rewards/margins": 0.004810346756130457,
"rewards/rejected": 0.0065521723590791225,
"step": 20
},
{
"epoch": 0.192,
"grad_norm": 1432.0458755805519,
"learning_rate": 9.755282581475768e-09,
"logits/chosen": -4.23565149307251,
"logits/rejected": -4.369490623474121,
"logps/chosen": -329.5267028808594,
"logps/rejected": -296.1650390625,
"loss": 0.7136,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": 0.0647984966635704,
"rewards/margins": 0.020466070622205734,
"rewards/rejected": 0.04433242976665497,
"step": 30
},
{
"epoch": 0.256,
"grad_norm": 1362.637677953038,
"learning_rate": 9.29224396800933e-09,
"logits/chosen": -4.142593860626221,
"logits/rejected": -4.344474792480469,
"logps/chosen": -333.652587890625,
"logps/rejected": -289.78851318359375,
"loss": 0.691,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.16199079155921936,
"rewards/margins": 0.10949220508337021,
"rewards/rejected": 0.052498579025268555,
"step": 40
},
{
"epoch": 0.32,
"grad_norm": 1293.8956896680802,
"learning_rate": 8.613974319136958e-09,
"logits/chosen": -4.226416110992432,
"logits/rejected": -4.406065940856934,
"logps/chosen": -334.3558044433594,
"logps/rejected": -293.1966552734375,
"loss": 0.6734,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.24848651885986328,
"rewards/margins": 0.16572698950767517,
"rewards/rejected": 0.08275953680276871,
"step": 50
},
{
"epoch": 0.384,
"grad_norm": 1213.937252280571,
"learning_rate": 7.754484907260514e-09,
"logits/chosen": -4.241747856140137,
"logits/rejected": -4.412692546844482,
"logps/chosen": -326.20147705078125,
"logps/rejected": -293.2193908691406,
"loss": 0.6501,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.28125494718551636,
"rewards/margins": 0.12699946761131287,
"rewards/rejected": 0.1542554497718811,
"step": 60
},
{
"epoch": 0.448,
"grad_norm": 1168.8702151248158,
"learning_rate": 6.756874120406714e-09,
"logits/chosen": -4.1678466796875,
"logits/rejected": -4.357397556304932,
"logps/chosen": -326.0350036621094,
"logps/rejected": -290.5421447753906,
"loss": 0.6267,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.4029604494571686,
"rewards/margins": 0.24430949985980988,
"rewards/rejected": 0.1586509495973587,
"step": 70
},
{
"epoch": 0.512,
"grad_norm": 1195.264190588224,
"learning_rate": 5.671166329088278e-09,
"logits/chosen": -4.038235187530518,
"logits/rejected": -4.326010227203369,
"logps/chosen": -352.18646240234375,
"logps/rejected": -309.32562255859375,
"loss": 0.6092,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": 0.5486255288124084,
"rewards/margins": 0.3041314482688904,
"rewards/rejected": 0.24449411034584045,
"step": 80
},
{
"epoch": 0.576,
"grad_norm": 1097.5673117468077,
"learning_rate": 4.551803455482833e-09,
"logits/chosen": -4.168010711669922,
"logits/rejected": -4.375750541687012,
"logps/chosen": -338.2205505371094,
"logps/rejected": -296.5308532714844,
"loss": 0.59,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.5563652515411377,
"rewards/margins": 0.29324790835380554,
"rewards/rejected": 0.263117253780365,
"step": 90
},
{
"epoch": 0.64,
"grad_norm": 1066.1810496477938,
"learning_rate": 3.4549150281252633e-09,
"logits/chosen": -4.156978130340576,
"logits/rejected": -4.374584197998047,
"logps/chosen": -335.9981384277344,
"logps/rejected": -287.0412902832031,
"loss": 0.5812,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": 0.6475387811660767,
"rewards/margins": 0.36960989236831665,
"rewards/rejected": 0.2779288589954376,
"step": 100
},
{
"epoch": 0.704,
"grad_norm": 1155.1395500395697,
"learning_rate": 2.43550361297047e-09,
"logits/chosen": -4.1374359130859375,
"logits/rejected": -4.378481864929199,
"logps/chosen": -317.46600341796875,
"logps/rejected": -277.5682067871094,
"loss": 0.5759,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.7310987710952759,
"rewards/margins": 0.3804031014442444,
"rewards/rejected": 0.3506956100463867,
"step": 110
},
{
"epoch": 0.768,
"grad_norm": 1066.5080189058133,
"learning_rate": 1.5446867550656768e-09,
"logits/chosen": -4.136859893798828,
"logits/rejected": -4.3448615074157715,
"logps/chosen": -331.464111328125,
"logps/rejected": -281.9703674316406,
"loss": 0.5683,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": 0.7297540903091431,
"rewards/margins": 0.37383073568344116,
"rewards/rejected": 0.35592326521873474,
"step": 120
},
{
"epoch": 0.832,
"grad_norm": 1131.6322549220279,
"learning_rate": 8.271337313934869e-10,
"logits/chosen": -4.222386360168457,
"logits/rejected": -4.382724761962891,
"logps/chosen": -336.8995666503906,
"logps/rejected": -288.167236328125,
"loss": 0.5682,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": 0.7898508310317993,
"rewards/margins": 0.4281063973903656,
"rewards/rejected": 0.3617444634437561,
"step": 130
},
{
"epoch": 0.896,
"grad_norm": 1132.1867619059146,
"learning_rate": 3.18825646801314e-10,
"logits/chosen": -4.176682472229004,
"logits/rejected": -4.3904242515563965,
"logps/chosen": -338.28924560546875,
"logps/rejected": -304.8387451171875,
"loss": 0.5706,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.6995974183082581,
"rewards/margins": 0.34457093477249146,
"rewards/rejected": 0.3550264835357666,
"step": 140
},
{
"epoch": 0.96,
"grad_norm": 1203.6386117758473,
"learning_rate": 4.52511911603265e-11,
"logits/chosen": -4.113102912902832,
"logits/rejected": -4.341179370880127,
"logps/chosen": -344.94573974609375,
"logps/rejected": -296.61328125,
"loss": 0.5703,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.7801700830459595,
"rewards/margins": 0.40149813890457153,
"rewards/rejected": 0.37867194414138794,
"step": 150
},
{
"epoch": 0.9984,
"step": 156,
"total_flos": 0.0,
"train_loss": 0.6263951460520426,
"train_runtime": 5142.9133,
"train_samples_per_second": 7.766,
"train_steps_per_second": 0.03
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}