{ "epoch": 0.99, "eval_logits/chosen": 0.8022828698158264, "eval_logits/rejected": 0.8200842142105103, "eval_logps/chosen": -91.75594329833984, "eval_logps/rejected": -94.22090911865234, "eval_loss": 0.6932105422019958, "eval_rewards/accuracies": 0.24799999594688416, "eval_rewards/chosen": -0.0002857264771591872, "eval_rewards/margins": -4.674374213209376e-05, "eval_rewards/rejected": -0.00023898274230305105, "eval_runtime": 273.9391, "eval_samples": 2000, "eval_samples_per_second": 7.301, "eval_steps_per_second": 0.456 }