|
{
|
|
"base_model_name": "Meta-Llama-3-8b",
|
|
"base_model_class": "LlamaForCausalLM",
|
|
"base_loaded_in_4bit": true,
|
|
"base_loaded_in_8bit": false,
|
|
"projections": "q, v",
|
|
"loss": 3.1269,
|
|
"grad_norm": 2.9809577465057373,
|
|
"learning_rate": 1.5906680805938495e-09,
|
|
"epoch": 3.0,
|
|
"current_steps": 1985,
|
|
"current_steps_adjusted": 1985,
|
|
"epoch_adjusted": 3.0,
|
|
"train_runtime": 3501.3176,
|
|
"train_samples_per_second": 2.268,
|
|
"train_steps_per_second": 0.567,
|
|
"total_flos": 9.170665646063616e+16,
|
|
"train_loss": 2.9956131780375648
|
|
} |