zephyr-7b-gpo-update3-i0 / all_results.json
lole25's picture
Model save
0174c0e verified
{
"epoch": 1.0,
"eval_logits/chosen": -1.0169163942337036,
"eval_logits/rejected": -0.8867645263671875,
"eval_logps/chosen": -412.1428527832031,
"eval_logps/rejected": -479.4647521972656,
"eval_loss": 0.02242046222090721,
"eval_rewards/accuracies": 0.6754999756813049,
"eval_rewards/chosen": -0.18013788759708405,
"eval_rewards/margins": 0.08771497756242752,
"eval_rewards/rejected": -0.26785287261009216,
"eval_runtime": 713.9076,
"eval_samples": 2000,
"eval_samples_per_second": 2.801,
"eval_steps_per_second": 1.401,
"train_loss": 0.028396672180453324,
"train_runtime": 172620.3313,
"train_samples": 61135,
"train_samples_per_second": 0.354,
"train_steps_per_second": 0.089
}