phi-2-gpo-test-longest-iter-4 / all_results.json
BraylonDash's picture
Model save
2974093 verified
{
"epoch": 1.98,
"eval_logits/chosen": -0.0055040339939296246,
"eval_logits/rejected": 0.09165824949741364,
"eval_logps/chosen": -306.3620910644531,
"eval_logps/rejected": -278.66876220703125,
"eval_loss": 0.010676206089556217,
"eval_rewards/accuracies": 0.5084999799728394,
"eval_rewards/chosen": -2.4409100660705008e-05,
"eval_rewards/margins": 0.00048807679559104145,
"eval_rewards/rejected": -0.0005124859162606299,
"eval_runtime": 433.0135,
"eval_samples": 2000,
"eval_samples_per_second": 4.619,
"eval_steps_per_second": 1.155,
"train_loss": 0.010321829197627882,
"train_runtime": 1406.1718,
"train_samples": 61135,
"train_samples_per_second": 1.422,
"train_steps_per_second": 0.088
}