chanind's picture
Upload SAE matching-pursuit/l0-85
91b7043 verified
{
"sae": {
"d_in": 2304,
"d_sae": 32768,
"apply_b_dec_to_input": true,
"normalize_activations": "none",
"reshape_activations": "none",
"decoder_init_norm": 1.0,
"residual_threshold": 50,
"max_iterations": 300,
"stop_on_duplicate_support": true,
"architecture": "matching_pursuit"
},
"model_name": "google/gemma-2-2b",
"model_class_name": "AutoModelForCausalLM",
"hook_name": "model.layers.12",
"dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-2B",
"streaming": false,
"is_dataset_tokenized": true,
"context_size": 1024,
"use_cached_activations": false,
"cached_activations_path": null,
"from_pretrained_path": null,
"n_batches_in_buffer": 64,
"training_tokens": 300000000,
"store_batch_size_prompts": 12,
"disable_concat_sequences": false,
"sequence_separator_token": "bos",
"device": "cuda",
"act_store_device": "cuda",
"seed": 0,
"dtype": "float32",
"prepend_bos": true,
"autocast": true,
"autocast_lm": true,
"compile_llm": false,
"llm_compilation_mode": null,
"compile_sae": false,
"sae_compilation_mode": null,
"train_batch_size_tokens": 3072,
"adam_beta1": 0.9,
"adam_beta2": 0.999,
"lr": 0.0003,
"lr_scheduler_name": "constant",
"lr_warm_up_steps": 0,
"lr_end": 2.9999999999999997e-05,
"lr_decay_steps": 0,
"n_restart_cycles": 1,
"dead_feature_window": 1000,
"feature_sampling_window": 2000,
"dead_feature_threshold": 1e-08,
"n_eval_batches": 10,
"eval_batch_size_prompts": 2,
"n_checkpoints": 10,
"save_final_checkpoint": false,
"resume_from_checkpoint": null,
"verbose": true,
"model_kwargs": {},
"model_from_pretrained_kwargs": {},
"sae_lens_version": "6.26.0",
"sae_lens_training_version": "6.26.0",
"exclude_special_tokens": true
}