eurus-dpop-qlora-uf-5e-7-real / all_results.json
just1nseo's picture
End of training
75f002d verified
raw
history blame contribute delete
No virus
999 Bytes
{
"epoch": 1.0,
"eval_dpo_losses": 0.6691617965698242,
"eval_logits/chosen": -2.305570602416992,
"eval_logits/rejected": -2.1975905895233154,
"eval_logps/chosen": -262.83721923828125,
"eval_logps/rejected": -254.97503662109375,
"eval_loss": 0.6798878312110901,
"eval_positive_losses": 0.08152038604021072,
"eval_rewards/accuracies": 0.7123016119003296,
"eval_rewards/chosen": 0.12936948239803314,
"eval_rewards/margins": 0.05091732367873192,
"eval_rewards/margins_max": 0.19700397551059723,
"eval_rewards/margins_min": -0.07996664941310883,
"eval_rewards/margins_std": 0.09219963103532791,
"eval_rewards/rejected": 0.07845214754343033,
"eval_runtime": 387.9178,
"eval_samples": 2000,
"eval_samples_per_second": 5.156,
"eval_steps_per_second": 0.162,
"train_loss": 0.68390608707419,
"train_runtime": 41762.7734,
"train_samples": 61134,
"train_samples_per_second": 1.464,
"train_steps_per_second": 0.091
}