gemma-7b-dpo-full-orca-v0 / eval_results.json
lewtun's picture
lewtun HF staff
End of training
0063657 verified
{
"epoch": 0.99,
"eval_logits/chosen": 94.10542297363281,
"eval_logits/rejected": 141.18112182617188,
"eval_logps/chosen": -725.1587524414062,
"eval_logps/rejected": -860.7156982421875,
"eval_loss": 0.013092889450490475,
"eval_rewards/accuracies": 0.9921875,
"eval_rewards/chosen": 4.552516937255859,
"eval_rewards/margins": 12.267425537109375,
"eval_rewards/rejected": -7.714908599853516,
"eval_runtime": 44.617,
"eval_samples": 1000,
"eval_samples_per_second": 22.413,
"eval_steps_per_second": 0.717
}