eurus-dpo-qlora-uf-ours-5e-6 / all_results.json
just1nseo's picture
End of training
03a7f90 verified
raw
history blame contribute delete
No virus
897 Bytes
{
"epoch": 3.0,
"eval_logits/chosen": -1.0759214162826538,
"eval_logits/rejected": -0.9100367426872253,
"eval_logps/chosen": -2645.154052734375,
"eval_logps/rejected": -3544.43115234375,
"eval_loss": 6.142495155334473,
"eval_rewards/accuracies": 0.6259999871253967,
"eval_rewards/chosen": -23.702728271484375,
"eval_rewards/margins": 9.166375160217285,
"eval_rewards/margins_max": 58.90415954589844,
"eval_rewards/margins_min": -33.25897216796875,
"eval_rewards/margins_std": 29.858272552490234,
"eval_rewards/rejected": -32.86910629272461,
"eval_runtime": 737.8634,
"eval_samples": 2000,
"eval_samples_per_second": 2.711,
"eval_steps_per_second": 0.169,
"train_loss": 0.1413031851636692,
"train_runtime": 20921.2576,
"train_samples": 5678,
"train_samples_per_second": 0.814,
"train_steps_per_second": 0.051
}