{ | |
"accumulate_grad_batches": 64, | |
"auxk": 256, | |
"auxk_coef": 0.03125, | |
"batch_size": 1, | |
"dead_steps_threshold": null, | |
"dead_threshold": 0.001, | |
"dead_tokens_threshold": 10000000, | |
"expansion_factor": 64, | |
"k": 32, | |
"layers": null, | |
"lr": 0.0001, | |
"max_length": 2048, | |
"model_name": "google/gemma-2-2b", | |
"skip_special_tokens": true, | |
"standardize": true, | |
"tuned_lens": false | |
} |