tim-lawson's picture
Push model using huggingface_hub.
e953564 verified
raw
history blame contribute delete
396 Bytes
{
"accumulate_grad_batches": 64,
"auxk": 256,
"auxk_coef": 0.03125,
"batch_size": 1,
"dead_steps_threshold": null,
"dead_threshold": 0.001,
"dead_tokens_threshold": 10000000,
"expansion_factor": 64,
"k": 32,
"layers": null,
"lr": 0.0001,
"max_length": 2048,
"model_name": "google/gemma-2-2b",
"skip_special_tokens": true,
"standardize": true,
"tuned_lens": false
}