{ "seed": 49, "epochs": 5000, "batch_size": 16384, "lr": 0.001, "l1_coeff": 0, "beta1": 0.9, "beta2": 0.99, "num_tokens": 1000000000, "max_grad_norm": 100000, "seq_len": 128, "dtype": "torch.float32", "model_name": "gpt2-small", "site": "resid_pre", "layer": 8, "act_size": 768, "dict_size": 2304, "device": "cuda:0", "model_batch_size": 512, "num_batches_in_buffer": 10, "dataset_path": "Skylion007/openwebtext", "wandb_project": "gpt2-feature-splitting-saes", "input_unit_norm": false, "perf_log_freq": 1000, "sae_type": "topk", "architecture": "standard", "checkpoint_freq": 1000, "n_batches_to_dead": 100, "top_k": 4, "top_k_aux": 512, "aux_penalty": 0.0625, "hook_point": "blocks.8.hook_resid_pre", "threshold": null, "name": "gpt2-small_blocks.8.hook_resid_pre_2304_topk_4_0.001", "cosine_penalty": 0.0 }