eurus-dpo-qlora-uf-ours-5e-7 / eval_results.json
just1nseo's picture
End of training
fd86cd9 verified
raw
history blame contribute delete
723 Bytes
{
"epoch": 3.0,
"eval_logits/chosen": -1.915927767753601,
"eval_logits/rejected": -1.8290798664093018,
"eval_logps/chosen": -540.2177734375,
"eval_logps/rejected": -569.8001098632812,
"eval_loss": 0.825461208820343,
"eval_rewards/accuracies": 0.5920000076293945,
"eval_rewards/chosen": -2.6533703804016113,
"eval_rewards/margins": 0.46942609548568726,
"eval_rewards/margins_max": 3.5073935985565186,
"eval_rewards/margins_min": -2.2740330696105957,
"eval_rewards/margins_std": 1.913187861442566,
"eval_rewards/rejected": -3.122796058654785,
"eval_runtime": 739.3637,
"eval_samples": 2000,
"eval_samples_per_second": 2.705,
"eval_steps_per_second": 0.169
}