phi-2-ipo-ultrafeedback-lora / all_results.json
lole25's picture
Model save
4bd8d55 verified
{
"epoch": 2.0,
"eval_logits/chosen": 0.6623885631561279,
"eval_logits/rejected": 0.7668179869651794,
"eval_logps/chosen": -271.2955017089844,
"eval_logps/rejected": -249.1475830078125,
"eval_loss": 2156.2255859375,
"eval_rewards/accuracies": 0.6940000057220459,
"eval_rewards/chosen": -0.11051338165998459,
"eval_rewards/margins": 0.06660113483667374,
"eval_rewards/rejected": -0.17711451649665833,
"eval_runtime": 325.4702,
"eval_samples": 2000,
"eval_samples_per_second": 6.145,
"eval_steps_per_second": 0.384,
"train_loss": 2246.599348344143,
"train_runtime": 18130.6033,
"train_samples": 30567,
"train_samples_per_second": 3.372,
"train_steps_per_second": 0.053
}