dpo-selective-buffer-safeipo / all_results.json
wxzhang's picture
Model save
a2514bf verified
{
"epoch": 1.0,
"eval_logits/chosen": 0.9053679704666138,
"eval_logits/rejected": 1.7481720447540283,
"eval_logps/chosen": -228.00465393066406,
"eval_logps/rejected": -198.00367736816406,
"eval_loss": 4449.90234375,
"eval_rewards/accuracies": 0.616104006767273,
"eval_rewards/chosen": -0.8765509724617004,
"eval_rewards/margins": 0.08215557038784027,
"eval_rewards/rejected": -0.9587064981460571,
"eval_rewards/safe_rewards": -0.865267813205719,
"eval_rewards/unsafe_rewards": -0.860846996307373,
"eval_runtime": 2354.4025,
"eval_samples": 35044,
"eval_samples_per_second": 14.884,
"eval_steps_per_second": 0.466,
"train_loss": 5859.617769083399,
"train_runtime": 32772.3871,
"train_samples": 120613,
"train_samples_per_second": 3.68,
"train_steps_per_second": 0.057
}