zephyr-7b-gemma-dpo / trainer_state.json
ale-bay's picture
Model save
cd9aa6a verified
raw
history blame
No virus
7.36 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.971563981042654,
"eval_steps": 100,
"global_step": 104,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018957345971563982,
"grad_norm": 384.97491646147324,
"learning_rate": 4.545454545454545e-08,
"logits/chosen": -11.400373458862305,
"logits/rejected": -11.167098045349121,
"logps/chosen": -1579.2471923828125,
"logps/rejected": -1833.805419921875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.1895734597156398,
"grad_norm": 285.06639638114245,
"learning_rate": 4.545454545454545e-07,
"logits/chosen": -14.52730941772461,
"logits/rejected": -14.906502723693848,
"logps/chosen": -1777.32421875,
"logps/rejected": -1881.382568359375,
"loss": 0.6985,
"rewards/accuracies": 0.4236111044883728,
"rewards/chosen": -0.15817444026470184,
"rewards/margins": 0.02842862159013748,
"rewards/rejected": -0.18660305440425873,
"step": 10
},
{
"epoch": 0.3791469194312796,
"grad_norm": 534.5020309330747,
"learning_rate": 4.885348141000122e-07,
"logits/chosen": -21.795948028564453,
"logits/rejected": -23.514450073242188,
"logps/chosen": -1682.7659912109375,
"logps/rejected": -1746.1217041015625,
"loss": 0.7332,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.4811238646507263,
"rewards/margins": 0.043054938316345215,
"rewards/rejected": -0.5241788625717163,
"step": 20
},
{
"epoch": 0.5687203791469194,
"grad_norm": 425.21287499734404,
"learning_rate": 4.5025027361734613e-07,
"logits/chosen": -7.340817451477051,
"logits/rejected": -15.593961715698242,
"logps/chosen": -1770.065673828125,
"logps/rejected": -1880.151611328125,
"loss": 0.8071,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.468301385641098,
"rewards/margins": 0.7533053159713745,
"rewards/rejected": -1.2216066122055054,
"step": 30
},
{
"epoch": 0.7582938388625592,
"grad_norm": 712.3590680432743,
"learning_rate": 3.893311157806091e-07,
"logits/chosen": -14.5736665725708,
"logits/rejected": -23.62198829650879,
"logps/chosen": -1539.815185546875,
"logps/rejected": -1462.7039794921875,
"loss": 0.9612,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -4.467960357666016,
"rewards/margins": -0.5169845819473267,
"rewards/rejected": -3.950974941253662,
"step": 40
},
{
"epoch": 0.9478672985781991,
"grad_norm": 881.241885579057,
"learning_rate": 3.126631330646801e-07,
"logits/chosen": -9.282499313354492,
"logits/rejected": -10.507891654968262,
"logps/chosen": -2434.360595703125,
"logps/rejected": -2703.219970703125,
"loss": 0.7979,
"rewards/accuracies": 0.65625,
"rewards/chosen": 0.584517240524292,
"rewards/margins": 0.6922141313552856,
"rewards/rejected": -0.1076967716217041,
"step": 50
},
{
"epoch": 1.1374407582938388,
"grad_norm": 429.8468773274208,
"learning_rate": 2.2891223348923882e-07,
"logits/chosen": -6.557864189147949,
"logits/rejected": -9.822066307067871,
"logps/chosen": -1984.517578125,
"logps/rejected": -1996.927490234375,
"loss": 0.5975,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.916027307510376,
"rewards/margins": 1.0445150136947632,
"rewards/rejected": 0.871512234210968,
"step": 60
},
{
"epoch": 1.3270142180094786,
"grad_norm": 175.85699006716078,
"learning_rate": 1.4754491880085317e-07,
"logits/chosen": -7.904175758361816,
"logits/rejected": -13.800127983093262,
"logps/chosen": -1871.2291259765625,
"logps/rejected": -1999.8929443359375,
"loss": 0.4226,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -0.032440781593322754,
"rewards/margins": 1.4170843362808228,
"rewards/rejected": -1.4495251178741455,
"step": 70
},
{
"epoch": 1.5165876777251186,
"grad_norm": 231.31575551946057,
"learning_rate": 7.775827023107834e-08,
"logits/chosen": -12.700660705566406,
"logits/rejected": -10.377889633178711,
"logps/chosen": -1366.731689453125,
"logps/rejected": -1466.466552734375,
"loss": 0.3894,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": 0.11536725610494614,
"rewards/margins": 1.3466222286224365,
"rewards/rejected": -1.2312551736831665,
"step": 80
},
{
"epoch": 1.7061611374407581,
"grad_norm": 230.19784681381057,
"learning_rate": 2.7440387297912122e-08,
"logits/chosen": -10.58217716217041,
"logits/rejected": -11.754631996154785,
"logps/chosen": -1986.6451416015625,
"logps/rejected": -2165.16162109375,
"loss": 0.3852,
"rewards/accuracies": 0.90625,
"rewards/chosen": -0.339664489030838,
"rewards/margins": 2.0723910331726074,
"rewards/rejected": -2.412055492401123,
"step": 90
},
{
"epoch": 1.8957345971563981,
"grad_norm": 302.98036373926305,
"learning_rate": 2.27878296044029e-09,
"logits/chosen": -8.607019424438477,
"logits/rejected": -15.033491134643555,
"logps/chosen": -2083.50048828125,
"logps/rejected": -2193.92431640625,
"loss": 0.4114,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.0890653133392334,
"rewards/margins": 1.4980413913726807,
"rewards/rejected": -1.5871065855026245,
"step": 100
},
{
"epoch": 1.8957345971563981,
"eval_logits/chosen": -18.000696182250977,
"eval_logits/rejected": -25.16254234313965,
"eval_logps/chosen": -1530.4515380859375,
"eval_logps/rejected": -1648.5675048828125,
"eval_loss": 0.8002049326896667,
"eval_rewards/accuracies": 0.7604166865348816,
"eval_rewards/chosen": -0.46603381633758545,
"eval_rewards/margins": 0.8467853665351868,
"eval_rewards/rejected": -1.3128191232681274,
"eval_runtime": 36.1276,
"eval_samples_per_second": 20.76,
"eval_steps_per_second": 0.664,
"step": 100
},
{
"epoch": 1.971563981042654,
"step": 104,
"total_flos": 0.0,
"train_loss": 0.6178501087885636,
"train_runtime": 1142.0913,
"train_samples_per_second": 11.82,
"train_steps_per_second": 0.091
}
],
"logging_steps": 10,
"max_steps": 104,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}