{ "seed": 42, "model_name_or_path": "gpt2", "revision": "", "hook_point": "transformer.h.0.mlp", "dataset_name_or_path": "jbrinkma/pile-100k", "activation_size": -1, "add_bos_token": false, "evaluation_batches": 10, "expansion_factor": 4, "b_dec_init_method": "", "use_pre_encoder_bias": true, "tied": false, "n_steps": -1, "device": "cuda", "batch_size": 32, "ctx_length": 256, "lr": 0.0001, "min_lr": 0.0, "lr_warmup_steps": 5000, "sparsity_coefficient": 0.003, "evaluation_interval": 200, "beta1": 0.9, "beta2": 0.999, "l1_sqrt": false, "cos_sim_reg": false, "cos_sim_alpha": 0.0, "decoder_normalization": true, "decoder_norm_smaller_than_one": false, "l1_with_norm": false, "sqrt_mse": false, "dynamic_weighting": false, "l1_warmup_steps": 2000, "target_l0": 40, "n_tokens_in_feature_cache": 500000.0, "use_ghost_grads": false, "use_neuron_resampling": false, "resampling_steps": -1, "output_dir": "outputs", "cache_dir": "cache", "checkpoint_interval": 200, "use_wandb": true, "wandb_entity": "jannikbrinkmann", "wandb_project": "best-sae", "wandb_name": "GPT2_L0_lr_0.0001_20240323115650955023", "wandb_group": "L0_L1_Sweeps" }