zephyr-7b-dpo-lora / trainer_state.json
Jerry46's picture
Training in progress, epoch 1
2a34ae7
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.998451213216314,
"eval_steps": 100,
"global_step": 2904,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.7182130584192438e-09,
"logits/chosen": -2.9648265838623047,
"logits/rejected": -2.9711227416992188,
"logps/chosen": -256.0919494628906,
"logps/rejected": -234.60708618164062,
"loss": 1.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.718213058419244e-08,
"logits/chosen": -3.049875497817993,
"logits/rejected": -3.0188238620758057,
"logps/chosen": -276.6912536621094,
"logps/rejected": -202.39605712890625,
"loss": 1.0001,
"rewards/accuracies": 0.4375,
"rewards/chosen": 0.0005764114903286099,
"rewards/margins": -0.006484686397016048,
"rewards/rejected": 0.007061097305268049,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 3.436426116838488e-08,
"logits/chosen": -2.988577127456665,
"logits/rejected": -2.9995627403259277,
"logps/chosen": -312.2018127441406,
"logps/rejected": -246.76266479492188,
"loss": 1.0026,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": 0.003506724489852786,
"rewards/margins": -0.0012849611230194569,
"rewards/rejected": 0.004791685380041599,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 5.154639175257731e-08,
"logits/chosen": -3.063732624053955,
"logits/rejected": -3.0357906818389893,
"logps/chosen": -260.15679931640625,
"logps/rejected": -224.3686065673828,
"loss": 0.9974,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.0021156296133995056,
"rewards/margins": 0.0043937130831182,
"rewards/rejected": -0.0022780844010412693,