{ "accumulate_grad_batches": 64, "auxk": 256, "auxk_coef": 0.03125, "batch_size": 1, "dead_steps_threshold": null, "dead_threshold": 0.001, "dead_tokens_threshold": 10000000, "expansion_factor": 64, "k": 32, "layers": [ 18 ], "lr": 0.0001, "max_length": 2048, "model_name": "EleutherAI/pythia-410m-deduped", "skip_special_tokens": true, "standardize": true, "tuned_lens": false }