PEFT
Safetensors
qwen2
alignment-handbook
trl
dpo
Generated from Trainer
khongtrunght's picture
End of training
cfca1d8 verified
raw
history blame contribute delete
785 Bytes
{
"epoch": 0.9995419147961521,
"eval_logits/chosen": -0.6407761573791504,
"eval_logits/rejected": -0.6567662358283997,
"eval_logps/chosen": -200.93621826171875,
"eval_logps/rejected": -276.8379821777344,
"eval_loss": 0.30047258734703064,
"eval_rewards/accuracies": 0.8699421882629395,
"eval_rewards/chosen": 1.6737332344055176,
"eval_rewards/margins": 2.166951894760132,
"eval_rewards/rejected": -0.4932188391685486,
"eval_runtime": 252.7931,
"eval_samples": 2763,
"eval_samples_per_second": 10.93,
"eval_steps_per_second": 1.369,
"total_flos": 0.0,
"train_loss": 0.37405444834616947,
"train_runtime": 8738.9435,
"train_samples": 34924,
"train_samples_per_second": 3.996,
"train_steps_per_second": 0.125
}