phi-2-dpo-ultrachat-lora / all_results.json
lole25's picture
Model save
c5be93f verified
{
"epoch": 2.0,
"eval_logits/chosen": 0.7325530052185059,
"eval_logits/rejected": 0.7531598806381226,
"eval_logps/chosen": -94.8434829711914,
"eval_logps/rejected": -98.55415344238281,
"eval_loss": 0.687246561050415,
"eval_rewards/accuracies": 0.33399999141693115,
"eval_rewards/chosen": -0.03116113506257534,
"eval_rewards/margins": 0.012410260736942291,
"eval_rewards/rejected": -0.04357139766216278,
"eval_runtime": 272.5724,
"eval_samples": 2000,
"eval_samples_per_second": 7.337,
"eval_steps_per_second": 0.459,
"train_loss": 0.3994182998029441,
"train_runtime": 8026.3391,
"train_samples": 30567,
"train_samples_per_second": 7.617,
"train_steps_per_second": 0.119
}