{ "epoch": 5.0, "eval_logits/chosen": -2.9824023246765137, "eval_logits/rejected": -3.0452840328216553, "eval_logps/chosen": -86.44639587402344, "eval_logps/rejected": -122.97762298583984, "eval_loss": 0.6935876607894897, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": 0.000459142291219905, "eval_rewards/margins": 0.00031183898681774735, "eval_rewards/rejected": 0.00014730336260981858, "eval_runtime": 11.9423, "eval_samples": 20, "eval_samples_per_second": 1.675, "eval_steps_per_second": 0.167, "train_loss": 0.17302747964859008, "train_runtime": 39.8084, "train_samples": 5, "train_samples_per_second": 0.628, "train_steps_per_second": 0.126 }