dpo-selective-buffer-safeipo / all_results.json
wxzhang's picture
Model save
8ffa4d8 verified
raw
history blame
No virus
860 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -1.099204421043396,
"eval_logits/rejected": -0.6287172436714172,
"eval_logps/chosen": -231.0450897216797,
"eval_logps/rejected": -222.87437438964844,
"eval_loss": 0.34230488538742065,
"eval_rewards/accuracies": 0.7313649654388428,
"eval_rewards/chosen": -1.0060617923736572,
"eval_rewards/margins": 0.29796916246414185,
"eval_rewards/rejected": -1.3040310144424438,
"eval_rewards/safe_rewards": -0.9952626824378967,
"eval_rewards/unsafe_rewards": -1.0112754106521606,
"eval_runtime": 1122.5675,
"eval_samples": 33044,
"eval_samples_per_second": 29.436,
"eval_steps_per_second": 0.92,
"train_loss": 1.018996798697021,
"train_runtime": 22449.6551,
"train_samples": 59478,
"train_samples_per_second": 2.649,
"train_steps_per_second": 0.083
}