statking's picture
End of training
b08cda7 verified
{
"epoch": 0.9997382884061764,
"eval_logits/chosen": -0.8381435871124268,
"eval_logits/rejected": -0.8568249344825745,
"eval_logps/chosen": -507.26611328125,
"eval_logps/rejected": -604.2300415039062,
"eval_loss": 0.4785204529762268,
"eval_rewards/accuracies": 0.7760000228881836,
"eval_rewards/chosen": -2.3087284564971924,
"eval_rewards/margins": 1.200958251953125,
"eval_rewards/rejected": -3.5096867084503174,
"eval_runtime": 490.7419,
"eval_samples": 2000,
"eval_samples_per_second": 4.075,
"eval_steps_per_second": 0.255,
"total_flos": 0.0,
"train_loss": 0.504705511212973,
"train_runtime": 53071.3067,
"train_samples": 61134,
"train_samples_per_second": 1.152,
"train_steps_per_second": 0.036
}