mamba_0_875_sft / all_results.json
Junxiong Wang
add models
cea4024
{
"epoch": 1.0,
"eval_loss": 0.5037005543708801,
"eval_runtime": 18.1339,
"eval_samples": 4096,
"eval_samples_per_second": 47.204,
"eval_steps_per_second": 1.489,
"total_flos": 1.2575785360765747e+17,
"train_loss": 0.6373805527283185,
"train_runtime": 394515.55,
"train_samples": 15487525,
"train_samples_per_second": 8.933,
"train_steps_per_second": 0.14
}