gemma-7b-dpo-full-mix1-beta-0.1 / eval_results.json
lewtun's picture
lewtun HF staff
End of training
f9e1bdc verified
{
"epoch": 0.99,
"eval_logits/chosen": 105.48260498046875,
"eval_logits/rejected": 99.1939697265625,
"eval_logps/chosen": -473.2679748535156,
"eval_logps/rejected": -486.23388671875,
"eval_loss": 0.6159864068031311,
"eval_rewards/accuracies": 0.7083333134651184,
"eval_rewards/chosen": -1.986350178718567,
"eval_rewards/margins": 1.4820828437805176,
"eval_rewards/rejected": -3.468433141708374,
"eval_runtime": 50.9713,
"eval_samples": 750,
"eval_samples_per_second": 14.714,
"eval_steps_per_second": 0.471
}