zephyr-7b-dpo-full / trainer_state.json
wzhouad's picture
Model save
c97fab2 verified
raw
history blame
No virus
4.92 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 53,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"debug/losses": 0.34206920862197876,
"debug/policy_weights": 0.49350154399871826,
"debug/raw_losses": 0.6931471824645996,
"epoch": 0.018867924528301886,
"grad_norm": 5.360033875918955,
"learning_rate": 8.333333333333333e-08,
"logits/chosen": -2.855412006378174,
"logits/rejected": -2.8797199726104736,
"logps/chosen": -320.43853759765625,
"logps/rejected": -340.07073974609375,
"loss": 0.378,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"debug/losses": 0.374397873878479,
"debug/policy_weights": 0.5419037342071533,
"debug/raw_losses": 0.6909083127975464,
"epoch": 0.18867924528301888,
"grad_norm": 5.481970932548877,
"learning_rate": 4.911172937635942e-07,
"logits/chosen": -2.8661186695098877,
"logits/rejected": -2.892002820968628,
"logps/chosen": -305.7351379394531,
"logps/rejected": -332.1855773925781,
"loss": 0.3734,
"rewards/accuracies": 0.4861111044883728,
"rewards/chosen": 0.000888873531948775,
"rewards/margins": 0.004573077894747257,
"rewards/rejected": -0.0036842040717601776,
"step": 10
},
{
"debug/losses": 0.36864763498306274,
"debug/policy_weights": 0.5463515520095825,
"debug/raw_losses": 0.6742688417434692,
"epoch": 0.37735849056603776,
"grad_norm": 5.223305949320831,
"learning_rate": 3.982949361823388e-07,
"logits/chosen": -2.8624260425567627,
"logits/rejected": -2.864138126373291,
"logps/chosen": -323.93145751953125,
"logps/rejected": -330.8647155761719,
"loss": 0.3687,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": 0.006857290863990784,
"rewards/margins": 0.042396366596221924,
"rewards/rejected": -0.03553907200694084,
"step": 20
},
{
"debug/losses": 0.36431893706321716,
"debug/policy_weights": 0.5633269548416138,
"debug/raw_losses": 0.6449006199836731,
"epoch": 0.5660377358490566,
"grad_norm": 5.588271480922223,
"learning_rate": 2.416462557480814e-07,
"logits/chosen": -2.85429048538208,
"logits/rejected": -2.857250452041626,
"logps/chosen": -296.940673828125,
"logps/rejected": -313.1925354003906,
"loss": 0.3529,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.0012655016034841537,
"rewards/margins": 0.12636741995811462,
"rewards/rejected": -0.12763293087482452,
"step": 30
},
{
"debug/losses": 0.3164761960506439,
"debug/policy_weights": 0.5361936688423157,
"debug/raw_losses": 0.5776438117027283,
"epoch": 0.7547169811320755,
"grad_norm": 4.853338929616513,
"learning_rate": 8.859303711029939e-08,
"logits/chosen": -2.862122058868408,
"logits/rejected": -2.85917329788208,
"logps/chosen": -290.1681823730469,
"logps/rejected": -323.2647705078125,
"loss": 0.3411,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.006347997579723597,
"rewards/margins": 0.3348899781703949,
"rewards/rejected": -0.34123796224594116,
"step": 40
},
{
"debug/losses": 0.3770141899585724,
"debug/policy_weights": 0.5816048979759216,
"debug/raw_losses": 0.6441487073898315,
"epoch": 0.9433962264150944,
"grad_norm": 5.24061929616419,
"learning_rate": 5.009573740853313e-09,
"logits/chosen": -2.899784564971924,
"logits/rejected": -2.886505603790283,
"logps/chosen": -282.5003662109375,
"logps/rejected": -317.9324645996094,
"loss": 0.329,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.03169974684715271,
"rewards/margins": 0.16929562389850616,
"rewards/rejected": -0.20099535584449768,
"step": 50
},
{
"epoch": 1.0,
"step": 53,
"total_flos": 0.0,
"train_loss": 0.35314782722940985,
"train_runtime": 383.8735,
"train_samples_per_second": 17.584,
"train_steps_per_second": 0.138
}
],
"logging_steps": 10,
"max_steps": 53,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}