zephyr-7b-dpo-full / eval_results.json
RikkiXu's picture
End of training
eeab4a3 verified
raw
history blame
562 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -2.5045135021209717,
"eval_logits/rejected": -2.4882149696350098,
"eval_logps/chosen": -248.94781494140625,
"eval_logps/rejected": -191.02655029296875,
"eval_loss": 0.21413667500019073,
"eval_rewards/accuracies": 0.9140625,
"eval_rewards/chosen": 4.242725372314453,
"eval_rewards/margins": 9.778908729553223,
"eval_rewards/rejected": -5.536184310913086,
"eval_runtime": 97.1606,
"eval_samples": 2000,
"eval_samples_per_second": 20.584,
"eval_steps_per_second": 0.329
}