phi-2-gpo-ultrachat-lora-2 / eval_results.json
lole25's picture
Model save
ec33c3e verified
{
"epoch": 2.0,
"eval_logits/chosen": 0.7331738471984863,
"eval_logits/rejected": 0.7520135045051575,
"eval_logps/chosen": -93.26776885986328,
"eval_logps/rejected": -96.37944030761719,
"eval_loss": 0.00928194634616375,
"eval_rewards/accuracies": 0.3499999940395355,
"eval_rewards/chosen": -0.015403981320559978,
"eval_rewards/margins": 0.006420300807803869,
"eval_rewards/rejected": -0.02182428352534771,
"eval_runtime": 272.4398,
"eval_samples": 2000,
"eval_samples_per_second": 7.341,
"eval_steps_per_second": 0.459
}