zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
e3f2a18 verified
raw
history blame
No virus
10.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 185,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 171.98892218238854,
"learning_rate": 2.6315789473684208e-08,
"logits/chosen": -0.1266070306301117,
"logits/rejected": 0.7204304933547974,
"logps/chosen": -319.01666259765625,
"logps/rejected": -252.47039794921875,
"loss": 0.6916,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.05,
"grad_norm": 158.2614639136714,
"learning_rate": 2.631578947368421e-07,
"logits/chosen": -0.3861861824989319,
"logits/rejected": 0.33749374747276306,
"logps/chosen": -266.4891052246094,
"logps/rejected": -224.11000061035156,
"loss": 0.6758,
"rewards/accuracies": 0.5520833134651184,
"rewards/chosen": -0.03102089650928974,
"rewards/margins": 0.034922875463962555,
"rewards/rejected": -0.06594377011060715,
"step": 10
},
{
"epoch": 0.11,
"grad_norm": 104.41587535161224,
"learning_rate": 4.999552306674344e-07,
"logits/chosen": -0.24374540150165558,
"logits/rejected": 0.8117060661315918,
"logps/chosen": -289.02911376953125,
"logps/rejected": -250.653564453125,
"loss": 0.478,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.4349571764469147,
"rewards/margins": 1.263426661491394,
"rewards/rejected": -1.6983836889266968,
"step": 20
},
{
"epoch": 0.16,
"grad_norm": 98.86884631406178,
"learning_rate": 4.946022852363932e-07,
"logits/chosen": -0.2871348261833191,
"logits/rejected": 0.6740838289260864,
"logps/chosen": -281.1429748535156,
"logps/rejected": -271.7496032714844,
"loss": 0.4067,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -1.6509501934051514,
"rewards/margins": 2.906687021255493,
"rewards/rejected": -4.5576372146606445,
"step": 30
},
{
"epoch": 0.22,
"grad_norm": 95.68866413164287,
"learning_rate": 4.805146507594034e-07,
"logits/chosen": -0.5090769529342651,
"logits/rejected": 0.5341213345527649,
"logps/chosen": -283.4405517578125,
"logps/rejected": -268.97686767578125,
"loss": 0.371,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -2.4618725776672363,
"rewards/margins": 3.340365171432495,
"rewards/rejected": -5.802238464355469,
"step": 40
},
{
"epoch": 0.27,
"grad_norm": 103.23233983894589,
"learning_rate": 4.581953932909403e-07,
"logits/chosen": -0.4626421332359314,
"logits/rejected": 0.5320831537246704,
"logps/chosen": -313.1284484863281,
"logps/rejected": -299.7115173339844,
"loss": 0.335,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -2.947516441345215,
"rewards/margins": 3.4062907695770264,
"rewards/rejected": -6.353806495666504,
"step": 50
},
{
"epoch": 0.32,
"grad_norm": 91.31338474042249,
"learning_rate": 4.284415281717847e-07,
"logits/chosen": -0.3830726444721222,
"logits/rejected": 0.7034914493560791,
"logps/chosen": -302.44549560546875,
"logps/rejected": -295.2908020019531,
"loss": 0.2941,
"rewards/accuracies": 0.890625,
"rewards/chosen": -2.427272081375122,
"rewards/margins": 3.3501389026641846,
"rewards/rejected": -5.777410507202148,
"step": 60
},
{
"epoch": 0.38,
"grad_norm": 108.21771645007362,
"learning_rate": 3.923155588020165e-07,
"logits/chosen": -0.050761766731739044,
"logits/rejected": 1.1738256216049194,
"logps/chosen": -279.0822448730469,
"logps/rejected": -271.3674011230469,
"loss": 0.3118,
"rewards/accuracies": 0.859375,
"rewards/chosen": -2.9542946815490723,
"rewards/margins": 3.31215238571167,
"rewards/rejected": -6.266446590423584,
"step": 70
},
{
"epoch": 0.43,
"grad_norm": 110.48344039822193,
"learning_rate": 3.511075348989692e-07,
"logits/chosen": -0.02379416488111019,
"logits/rejected": 0.9985305666923523,
"logps/chosen": -291.3994140625,
"logps/rejected": -278.81207275390625,
"loss": 0.3145,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -1.9188703298568726,
"rewards/margins": 3.239673614501953,
"rewards/rejected": -5.158544063568115,
"step": 80
},
{
"epoch": 0.49,
"grad_norm": 94.44668679211257,
"learning_rate": 3.062889851306735e-07,
"logits/chosen": 0.15241345763206482,
"logits/rejected": 1.204730749130249,
"logps/chosen": -285.8970947265625,
"logps/rejected": -274.2763671875,
"loss": 0.3256,
"rewards/accuracies": 0.859375,
"rewards/chosen": -2.3829667568206787,
"rewards/margins": 3.3176727294921875,
"rewards/rejected": -5.700639724731445,
"step": 90
},
{
"epoch": 0.54,
"grad_norm": 72.25131318661623,
"learning_rate": 2.594603691794176e-07,
"logits/chosen": 0.017316246405243874,
"logits/rejected": 1.112657070159912,
"logps/chosen": -291.90631103515625,
"logps/rejected": -277.5140686035156,
"loss": 0.2907,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": -2.7051641941070557,
"rewards/margins": 3.0991756916046143,
"rewards/rejected": -5.804339408874512,
"step": 100
},
{
"epoch": 0.59,
"grad_norm": 106.17479973453032,
"learning_rate": 2.1229392570965654e-07,
"logits/chosen": 0.5481065511703491,
"logits/rejected": 1.4057379961013794,
"logps/chosen": -290.0019226074219,
"logps/rejected": -288.4178161621094,
"loss": 0.2795,
"rewards/accuracies": 0.859375,
"rewards/chosen": -3.1080322265625,
"rewards/margins": 3.114968776702881,
"rewards/rejected": -6.223001003265381,
"step": 110
},
{
"epoch": 0.65,
"grad_norm": 88.25415485320248,
"learning_rate": 1.6647395712565254e-07,
"logits/chosen": 0.10530638694763184,
"logits/rejected": 1.3136330842971802,
"logps/chosen": -303.7025451660156,
"logps/rejected": -291.4312438964844,
"loss": 0.3024,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -2.87852144241333,
"rewards/margins": 3.439791440963745,
"rewards/rejected": -6.318312644958496,
"step": 120
},
{
"epoch": 0.7,
"grad_norm": 81.41509800140894,
"learning_rate": 1.2363668353585485e-07,
"logits/chosen": 0.025721266865730286,
"logits/rejected": 1.1706856489181519,
"logps/chosen": -291.2774963378906,
"logps/rejected": -280.7757873535156,
"loss": 0.2712,
"rewards/accuracies": 0.903124988079071,
"rewards/chosen": -2.8241302967071533,
"rewards/margins": 3.6137948036193848,
"rewards/rejected": -6.437924385070801,
"step": 130
},
{
"epoch": 0.76,
"grad_norm": 76.56961564493653,
"learning_rate": 8.53118137245516e-08,
"logits/chosen": 0.24798288941383362,
"logits/rejected": 1.3128881454467773,
"logps/chosen": -298.71783447265625,
"logps/rejected": -297.16790771484375,
"loss": 0.2607,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -2.95615553855896,
"rewards/margins": 3.7294158935546875,
"rewards/rejected": -6.685571193695068,
"step": 140
},
{
"epoch": 0.81,
"grad_norm": 101.60579173655283,
"learning_rate": 5.2867919617408553e-08,
"logits/chosen": 0.16610342264175415,
"logits/rejected": 1.297738790512085,
"logps/chosen": -296.17230224609375,
"logps/rejected": -285.56707763671875,
"loss": 0.2777,
"rewards/accuracies": 0.903124988079071,
"rewards/chosen": -2.7571194171905518,
"rewards/margins": 3.536668062210083,
"rewards/rejected": -6.293786525726318,
"step": 150
},
{
"epoch": 0.86,
"grad_norm": 92.310593955402,
"learning_rate": 2.7463564905650853e-08,
"logits/chosen": 0.06046704202890396,
"logits/rejected": 1.0854153633117676,
"logps/chosen": -297.1445007324219,
"logps/rejected": -291.33868408203125,
"loss": 0.2684,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -2.6816515922546387,
"rewards/margins": 3.552661418914795,
"rewards/rejected": -6.234313011169434,
"step": 160
},
{
"epoch": 0.92,
"grad_norm": 83.71834684366553,
"learning_rate": 1.0005933014019307e-08,
"logits/chosen": 0.15604642033576965,
"logits/rejected": 1.338841199874878,
"logps/chosen": -298.0588684082031,
"logps/rejected": -293.54638671875,
"loss": 0.2745,
"rewards/accuracies": 0.903124988079071,
"rewards/chosen": -3.0692405700683594,
"rewards/margins": 3.527927875518799,
"rewards/rejected": -6.59716796875,
"step": 170
},
{
"epoch": 0.97,
"grad_norm": 78.86616344216218,
"learning_rate": 1.1184317978602808e-09,
"logits/chosen": -0.07575028389692307,
"logits/rejected": 1.0216057300567627,
"logps/chosen": -288.5888366699219,
"logps/rejected": -287.2474670410156,
"loss": 0.3031,
"rewards/accuracies": 0.90625,
"rewards/chosen": -2.831172466278076,
"rewards/margins": 3.852785587310791,
"rewards/rejected": -6.683958530426025,
"step": 180
},
{
"epoch": 1.0,
"step": 185,
"total_flos": 0.0,
"train_loss": 0.33391942269093283,
"train_runtime": 5319.9853,
"train_samples_per_second": 8.891,
"train_steps_per_second": 0.035
}
],
"logging_steps": 10,
"max_steps": 185,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}