mistral-sft4epoch-dpo-v / eval_results.json
AmberYifan's picture
End of training
5600425 verified
raw
history blame contribute delete
570 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -2.7477951049804688,
"eval_logits/rejected": -2.7179834842681885,
"eval_logps/chosen": -160.58680725097656,
"eval_logps/rejected": -136.7208709716797,
"eval_loss": 0.8708102703094482,
"eval_rewards/accuracies": 0.6257961988449097,
"eval_rewards/chosen": 2.9987568855285645,
"eval_rewards/margins": 0.8227174878120422,
"eval_rewards/rejected": 2.176039695739746,
"eval_runtime": 293.552,
"eval_samples": 5000,
"eval_samples_per_second": 17.033,
"eval_steps_per_second": 0.535
}