| { | |
| "sae": { | |
| "d_in": 2304, | |
| "d_sae": 32768, | |
| "apply_b_dec_to_input": true, | |
| "normalize_activations": "none", | |
| "reshape_activations": "none", | |
| "decoder_init_norm": 1.0, | |
| "residual_threshold": 50, | |
| "max_iterations": 300, | |
| "stop_on_duplicate_support": true, | |
| "architecture": "matching_pursuit" | |
| }, | |
| "model_name": "google/gemma-2-2b", | |
| "model_class_name": "AutoModelForCausalLM", | |
| "hook_name": "model.layers.12", | |
| "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-2B", | |
| "streaming": false, | |
| "is_dataset_tokenized": true, | |
| "context_size": 1024, | |
| "use_cached_activations": false, | |
| "cached_activations_path": null, | |
| "from_pretrained_path": null, | |
| "n_batches_in_buffer": 64, | |
| "training_tokens": 300000000, | |
| "store_batch_size_prompts": 12, | |
| "disable_concat_sequences": false, | |
| "sequence_separator_token": "bos", | |
| "device": "cuda", | |
| "act_store_device": "cuda", | |
| "seed": 0, | |
| "dtype": "float32", | |
| "prepend_bos": true, | |
| "autocast": true, | |
| "autocast_lm": true, | |
| "compile_llm": false, | |
| "llm_compilation_mode": null, | |
| "compile_sae": false, | |
| "sae_compilation_mode": null, | |
| "train_batch_size_tokens": 3072, | |
| "adam_beta1": 0.9, | |
| "adam_beta2": 0.999, | |
| "lr": 0.0003, | |
| "lr_scheduler_name": "constant", | |
| "lr_warm_up_steps": 0, | |
| "lr_end": 2.9999999999999997e-05, | |
| "lr_decay_steps": 0, | |
| "n_restart_cycles": 1, | |
| "dead_feature_window": 1000, | |
| "feature_sampling_window": 2000, | |
| "dead_feature_threshold": 1e-08, | |
| "n_eval_batches": 10, | |
| "eval_batch_size_prompts": 2, | |
| "n_checkpoints": 10, | |
| "save_final_checkpoint": false, | |
| "resume_from_checkpoint": null, | |
| "verbose": true, | |
| "model_kwargs": {}, | |
| "model_from_pretrained_kwargs": {}, | |
| "sae_lens_version": "6.26.0", | |
| "sae_lens_training_version": "6.26.0", | |
| "exclude_special_tokens": true | |
| } |