neuronpedia
commited on
Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- 10-res_scefr-ajt/cfg.json +1 -0
- 10-res_scefr-ajt/sae_weights.safetensors +3 -0
- 10-res_scefr-ajt/sparsity.safetensors +3 -0
- 2-res_scefr-ajt/cfg.json +1 -0
- 2-res_scefr-ajt/sae_weights.safetensors +3 -0
- 2-res_scefr-ajt/sparsity.safetensors +3 -0
- 6-res_scefr-ajt/cfg.json +1 -0
- 6-res_scefr-ajt/sae_weights.safetensors +3 -0
- 6-res_scefr-ajt/sparsity.safetensors +3 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
10-res_scefr-ajt/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.10.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "cvj5um2h/rop2d6mb", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
10-res_scefr-ajt/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e6ec2e7c1b256c788e2e05653af13600d1fe311f0c82e854f504a44f950b11b
|
3 |
+
size 283487640
|
10-res_scefr-ajt/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dcced92d84fbce7e12c9e2f7232ec5651cf9c4b6316310729328b087c2d04d6
|
3 |
+
size 184400
|
2-res_scefr-ajt/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.2.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "visi12en/kmk2b4re", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
2-res_scefr-ajt/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc62ac98c10bc513fef99087a6412f3aa2b0b74b9c54406b5f9b2a0aa2afb296
|
3 |
+
size 283487640
|
2-res_scefr-ajt/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b01a91187f18e97698d505ebb8ca6268efb6a4348e6d121085869d4985d0b58f
|
3 |
+
size 184400
|
6-res_scefr-ajt/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "gpt2-small", "model_class_name": "HookedTransformer", "hook_point": "blocks.6.hook_resid_pre", "hook_point_eval": "blocks.{layer}.attn.pattern", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "apollo-research/Skylion007-openwebtext-tokenizer-gpt2", "is_dataset_tokenized": true, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "d_in": 768, "d_sae": 46080, "b_dec_init_method": "geometric_median", "expansion_factor": 60, "activation_fn": "relu", "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": true, "n_batches_in_buffer": 8, "training_tokens": 2000000, "finetuning_tokens": 0, "store_batch_size": 16, "train_batch_size": 4096, "device": "mps", "seed": 42, "dtype": "torch.float32", "prepend_bos": false, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 0.001, "lp_norm": 1, "lr": 0.0003, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 2.9999999999999997e-05, "lr_decay_steps": 0, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_language_model", "run_name": "46080-L1-0.001-LR-0.0003-Tokens-2.000e+06", "wandb_entity": null, "wandb_log_frequency": 10, "n_checkpoints": 0, "checkpoint_path": "2lzle2f0/mlznmn9a", "verbose": false, "model_kwargs": {}, "tokens_per_buffer": 4194304}
|
6-res_scefr-ajt/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4a74c2c86899a9c2d64692de9526892991bfd1bce777992dbb2822d6e91b919
|
3 |
+
size 283487640
|
6-res_scefr-ajt/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56a99516877c88826ff6da945d53e7ee45c3d531b63c686a421200ab6dccbfe1
|
3 |
+
size 184400
|