Mistral-Adastra-IA3 / training_log.json
Astris's picture
Upload Model
dcd4153
raw
history blame
465 Bytes
{
"base_model_name": "mistralai_Mistral-7B-v0.1",
"base_model_class": "MistralForCausalLM",
"base_loaded_in_4bit": false,
"base_loaded_in_8bit": false,
"projections": "gate, down, up, q, k, v, o",
"loss": 1.5442,
"learning_rate": 0.0,
"epoch": 1.0,
"current_steps": 1247,
"train_runtime": 537.1855,
"train_samples_per_second": 2.323,
"train_steps_per_second": 0.29,
"total_flos": 2.726526696239923e+16,
"train_loss": 1.7653875748316448
}