{ "epoch": 1.971563981042654, "eval_logits/chosen": 98.15704345703125, "eval_logits/rejected": 92.42511749267578, "eval_logps/chosen": -438.6038818359375, "eval_logps/rejected": -467.9371643066406, "eval_loss": 0.4697837233543396, "eval_rewards/accuracies": 0.7395833134651184, "eval_rewards/chosen": -3.746781587600708, "eval_rewards/margins": 1.5465270280838013, "eval_rewards/rejected": -5.293309211730957, "eval_runtime": 119.3692, "eval_samples": 750, "eval_samples_per_second": 6.283, "eval_steps_per_second": 0.201 }