{ "seed": 42, "model_name_or_path": "EleutherAI/pythia-70m-deduped", "hook_point": "gpt_neox.layers.3", "dataset_name_or_path": "jbrinkma/pile-300k", "activation_size": -1, "add_bos_token": false, "expansion_factor": 4, "b_dec_init_method": "", "n_steps": -1, "device": "cuda", "batch_size": 32, "ctx_length": 256, "lr": 0.001, "min_lr": 0.0, "lr_warmup_steps": 5000, "sparsity_coefficient": 0.008, "evaluation_interval": 200, "beta1": 0.9, "beta2": 0.999, "l1_sqrt": false, "cos_sim_reg": false, "cos_sim_alpha": 0.0, "n_tokens_in_feature_cache": 500000.0, "use_ghost_grads": false, "output_dir": "outputs", "cache_dir": "cache", "checkpoint_interval": 200, "use_wandb": true, "wandb_entity": "best_sae", "wandb_project": "best_sae", "wandb_name": "sparsity_coefficient_0.008_20240311154120354226", "wandb_group": "L1_v2" }