phi-2-gpo-test-longest-iter-2 / eval_results.json
BraylonDash's picture
Model save
266f7bc verified
{
"epoch": 1.98,
"eval_logits/chosen": -0.009708462283015251,
"eval_logits/rejected": 0.08755116909742355,
"eval_logps/chosen": -306.4169006347656,
"eval_logps/rejected": -278.6773681640625,
"eval_loss": 0.010828138329088688,
"eval_rewards/accuracies": 0.49149999022483826,
"eval_rewards/chosen": -0.00047626858577132225,
"eval_rewards/margins": -0.00013606167340185493,
"eval_rewards/rejected": -0.00034020686871372163,
"eval_runtime": 840.6964,
"eval_samples": 2000,
"eval_samples_per_second": 2.379,
"eval_steps_per_second": 0.595
}