zephyr-7b-dpo-full / trainer_state.json
RikkiXu's picture
Model save
1c9e834 verified
raw
history blame
No virus
8.83 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 500,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0064,
"grad_norm": 1341.8773394764246,
"learning_rate": 3.125e-09,
"logits/chosen": -3.9499800205230713,
"logits/rejected": -4.237819194793701,
"logps/chosen": -300.693115234375,
"logps/rejected": -249.96307373046875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.064,
"grad_norm": 1342.2810836893796,
"learning_rate": 3.125e-08,
"logits/chosen": -4.129705905914307,
"logits/rejected": -4.352028846740723,
"logps/chosen": -351.5079650878906,
"logps/rejected": -308.8138427734375,
"loss": 0.7326,
"rewards/accuracies": 0.3680555522441864,
"rewards/chosen": -0.04078766331076622,
"rewards/margins": -0.11378024518489838,
"rewards/rejected": 0.07299260050058365,
"step": 10
},
{
"epoch": 0.128,
"grad_norm": 1252.3965895279962,
"learning_rate": 4.9899357349880975e-08,
"logits/chosen": -4.194980144500732,
"logits/rejected": -4.382790565490723,
"logps/chosen": -334.9039001464844,
"logps/rejected": -293.8416748046875,
"loss": 0.683,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.22410114109516144,
"rewards/margins": 0.11712154000997543,
"rewards/rejected": 0.10697959363460541,
"step": 20
},
{
"epoch": 0.192,
"grad_norm": 904.3776918610464,
"learning_rate": 4.877641290737884e-08,
"logits/chosen": -4.230466365814209,
"logits/rejected": -4.363996505737305,
"logps/chosen": -327.71453857421875,
"logps/rejected": -295.3287658691406,
"loss": 0.5498,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": 0.9708820581436157,
"rewards/margins": 0.5084127187728882,
"rewards/rejected": 0.46246927976608276,
"step": 30
},
{
"epoch": 0.256,
"grad_norm": 894.6327423356746,
"learning_rate": 4.646121984004665e-08,
"logits/chosen": -4.1493096351623535,
"logits/rejected": -4.351648807525635,
"logps/chosen": -330.09368896484375,
"logps/rejected": -288.2974853515625,
"loss": 0.4125,
"rewards/accuracies": 0.8218749761581421,
"rewards/chosen": 1.9414455890655518,
"rewards/margins": 1.1434320211410522,
"rewards/rejected": 0.7980135083198547,
"step": 40
},
{
"epoch": 0.32,
"grad_norm": 706.4309708182283,
"learning_rate": 4.3069871595684784e-08,
"logits/chosen": -4.244365215301514,
"logits/rejected": -4.423664093017578,
"logps/chosen": -329.6412353515625,
"logps/rejected": -291.22528076171875,
"loss": 0.3694,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": 2.6057987213134766,
"rewards/margins": 1.537340521812439,
"rewards/rejected": 1.068458080291748,
"step": 50
},
{
"epoch": 0.384,
"grad_norm": 679.6447682422123,
"learning_rate": 3.8772424536302564e-08,
"logits/chosen": -4.262530326843262,
"logits/rejected": -4.4340620040893555,
"logps/chosen": -320.7197570800781,
"logps/rejected": -291.15264892578125,
"loss": 0.3459,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": 3.022132158279419,
"rewards/margins": 1.8344866037368774,
"rewards/rejected": 1.187645673751831,
"step": 60
},
{
"epoch": 0.448,
"grad_norm": 600.9568341116722,
"learning_rate": 3.378437060203357e-08,
"logits/chosen": -4.188047885894775,
"logits/rejected": -4.377224445343018,
"logps/chosen": -320.23345947265625,
"logps/rejected": -288.5027770996094,
"loss": 0.3189,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": 3.3037331104278564,
"rewards/margins": 2.1254096031188965,
"rewards/rejected": 1.1783230304718018,
"step": 70
},
{
"epoch": 0.512,
"grad_norm": 654.7049863576665,
"learning_rate": 2.8355831645441387e-08,
"logits/chosen": -4.0522565841674805,
"logits/rejected": -4.341280937194824,
"logps/chosen": -345.8344421386719,
"logps/rejected": -307.4328918457031,
"loss": 0.3105,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 3.7246456146240234,
"rewards/margins": 2.5337729454040527,
"rewards/rejected": 1.1908724308013916,
"step": 80
},
{
"epoch": 0.576,
"grad_norm": 638.1282144295093,
"learning_rate": 2.2759017277414164e-08,
"logits/chosen": -4.180428504943848,
"logits/rejected": -4.390549659729004,
"logps/chosen": -332.82275390625,
"logps/rejected": -295.1810607910156,
"loss": 0.3099,
"rewards/accuracies": 0.875,
"rewards/chosen": 3.2552542686462402,
"rewards/margins": 2.3172354698181152,
"rewards/rejected": 0.9380186796188354,
"step": 90
},
{
"epoch": 0.64,
"grad_norm": 680.3285346474286,
"learning_rate": 1.7274575140626317e-08,
"logits/chosen": -4.167009353637695,
"logits/rejected": -4.386021614074707,
"logps/chosen": -330.049560546875,
"logps/rejected": -285.8011169433594,
"loss": 0.3123,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": 3.6218514442443848,
"rewards/margins": 2.723836898803711,
"rewards/rejected": 0.8980148434638977,
"step": 100
},
{
"epoch": 0.704,
"grad_norm": 616.2712616857408,
"learning_rate": 1.217751806485235e-08,
"logits/chosen": -4.145500183105469,
"logits/rejected": -4.386542320251465,
"logps/chosen": -311.7583923339844,
"logps/rejected": -276.3233947753906,
"loss": 0.3022,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": 3.584909439086914,
"rewards/margins": 2.6118006706237793,
"rewards/rejected": 0.9731090664863586,
"step": 110
},
{
"epoch": 0.768,
"grad_norm": 649.1888991009114,
"learning_rate": 7.723433775328384e-09,
"logits/chosen": -4.141805171966553,
"logits/rejected": -4.35054874420166,
"logps/chosen": -325.5559997558594,
"logps/rejected": -280.5980529785156,
"loss": 0.3033,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": 3.6838138103485107,
"rewards/margins": 2.6417319774627686,
"rewards/rejected": 1.0420820713043213,
"step": 120
},
{
"epoch": 0.832,
"grad_norm": 747.4298760038148,
"learning_rate": 4.135668656967433e-09,
"logits/chosen": -4.228358268737793,
"logits/rejected": -4.38976526260376,
"logps/chosen": -331.02642822265625,
"logps/rejected": -286.7439880371094,
"loss": 0.3064,
"rewards/accuracies": 0.875,
"rewards/chosen": 3.7264277935028076,
"rewards/margins": 2.6530587673187256,
"rewards/rejected": 1.073369026184082,
"step": 130
},
{
"epoch": 0.896,
"grad_norm": 697.5841535989922,
"learning_rate": 1.5941282340065698e-09,
"logits/chosen": -4.18213415145874,
"logits/rejected": -4.3970947265625,
"logps/chosen": -332.56500244140625,
"logps/rejected": -303.63543701171875,
"loss": 0.3069,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": 3.5617058277130127,
"rewards/margins": 2.6050186157226562,
"rewards/rejected": 0.9566874504089355,
"step": 140
},
{
"epoch": 0.96,
"grad_norm": 567.1610784183449,
"learning_rate": 2.262559558016325e-10,
"logits/chosen": -4.118973731994629,
"logits/rejected": -4.348026752471924,
"logps/chosen": -339.0107116699219,
"logps/rejected": -295.09564208984375,
"loss": 0.3078,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": 3.7477049827575684,
"rewards/margins": 2.61022686958313,
"rewards/rejected": 1.1374781131744385,
"step": 150
},
{
"epoch": 0.9984,
"step": 156,
"total_flos": 0.0,
"train_loss": 0.3884877807054764,
"train_runtime": 4677.6403,
"train_samples_per_second": 8.539,
"train_steps_per_second": 0.033
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}