llama-3.2-3b-dpo-2 / eval_results.json
tanliboy's picture
End of training
26bbc31 verified
raw
history blame contribute delete
579 Bytes
{
"epoch": 2.998693948628646,
"eval_logits/chosen": 0.182608962059021,
"eval_logits/rejected": 0.23953425884246826,
"eval_logps/chosen": -338.5595703125,
"eval_logps/rejected": -388.22418212890625,
"eval_loss": 0.5813568234443665,
"eval_rewards/accuracies": 0.7848101258277893,
"eval_rewards/chosen": 1.7432384490966797,
"eval_rewards/margins": 5.916741847991943,
"eval_rewards/rejected": -4.173503398895264,
"eval_runtime": 69.9912,
"eval_samples": 2500,
"eval_samples_per_second": 35.719,
"eval_steps_per_second": 1.129
}