{ "epoch": 0.9982631930527722, "eval_logits/chosen": -0.6438767313957214, "eval_logits/rejected": -0.6447061896324158, "eval_logps/chosen": -418.2021179199219, "eval_logps/rejected": -458.6069641113281, "eval_loss": 0.6083793044090271, "eval_rewards/accuracies": 0.6808943152427673, "eval_rewards/chosen": -1.6265408992767334, "eval_rewards/margins": 0.346989244222641, "eval_rewards/rejected": -1.9735301733016968, "eval_runtime": 375.4309, "eval_samples": 1961, "eval_samples_per_second": 5.223, "eval_steps_per_second": 0.328, "total_flos": 0.0, "train_loss": 0.6321173631915189, "train_runtime": 21471.9268, "train_samples": 59875, "train_samples_per_second": 2.789, "train_steps_per_second": 0.022 }