phi-2-gpo-renew2-b0.01-log-i0 / eval_results.json
BraylonDash's picture
Model save
dca0b81 verified
{
"epoch": 1.0,
"eval_logits/chosen": 0.9228530526161194,
"eval_logits/rejected": 1.0251185894012451,
"eval_logps/chosen": -280.4223937988281,
"eval_logps/rejected": -252.46142578125,
"eval_loss": 0.6908722519874573,
"eval_rewards/accuracies": 0.6269999742507935,
"eval_rewards/chosen": -0.028755955398082733,
"eval_rewards/margins": 0.057699065655469894,
"eval_rewards/rejected": -0.08645503222942352,
"eval_runtime": 539.9766,
"eval_samples": 2000,
"eval_samples_per_second": 3.704,
"eval_steps_per_second": 0.926
}