jbloom commited on
Commit
5bd69d8
1 Parent(s): e12443f

Uploaded SAE weights such that loading won't require old code.

Browse files
Files changed (39) hide show
  1. blocks.0.hook_resid_pre/cfg.json +1 -0
  2. blocks.0.hook_resid_pre/sae_weights.safetensors +3 -0
  3. blocks.0.hook_resid_pre/sparsity.safetensors +3 -0
  4. blocks.1.hook_resid_pre/cfg.json +1 -0
  5. blocks.1.hook_resid_pre/sae_weights.safetensors +3 -0
  6. blocks.1.hook_resid_pre/sparsity.safetensors +3 -0
  7. blocks.10.hook_resid_pre/cfg.json +1 -0
  8. blocks.10.hook_resid_pre/sae_weights.safetensors +3 -0
  9. blocks.10.hook_resid_pre/sparsity.safetensors +3 -0
  10. blocks.11.hook_resid_post/cfg.json +1 -0
  11. blocks.11.hook_resid_post/sae_weights.safetensors +3 -0
  12. blocks.11.hook_resid_post/sparsity.safetensors +3 -0
  13. blocks.11.hook_resid_pre/cfg.json +1 -0
  14. blocks.11.hook_resid_pre/sae_weights.safetensors +3 -0
  15. blocks.11.hook_resid_pre/sparsity.safetensors +3 -0
  16. blocks.2.hook_resid_pre/cfg.json +1 -0
  17. blocks.2.hook_resid_pre/sae_weights.safetensors +3 -0
  18. blocks.2.hook_resid_pre/sparsity.safetensors +3 -0
  19. blocks.3.hook_resid_pre/cfg.json +1 -0
  20. blocks.3.hook_resid_pre/sae_weights.safetensors +3 -0
  21. blocks.3.hook_resid_pre/sparsity.safetensors +3 -0
  22. blocks.4.hook_resid_pre/cfg.json +1 -0
  23. blocks.4.hook_resid_pre/sae_weights.safetensors +3 -0
  24. blocks.4.hook_resid_pre/sparsity.safetensors +3 -0
  25. blocks.5.hook_resid_pre/cfg.json +1 -0
  26. blocks.5.hook_resid_pre/sae_weights.safetensors +3 -0
  27. blocks.5.hook_resid_pre/sparsity.safetensors +3 -0
  28. blocks.6.hook_resid_pre/cfg.json +1 -0
  29. blocks.6.hook_resid_pre/sae_weights.safetensors +3 -0
  30. blocks.6.hook_resid_pre/sparsity.safetensors +3 -0
  31. blocks.7.hook_resid_pre/cfg.json +1 -0
  32. blocks.7.hook_resid_pre/sae_weights.safetensors +3 -0
  33. blocks.7.hook_resid_pre/sparsity.safetensors +3 -0
  34. blocks.8.hook_resid_pre/cfg.json +1 -0
  35. blocks.8.hook_resid_pre/sae_weights.safetensors +3 -0
  36. blocks.8.hook_resid_pre/sparsity.safetensors +3 -0
  37. blocks.9.hook_resid_pre/cfg.json +1 -0
  38. blocks.9.hook_resid_pre/sae_weights.safetensors +3 -0
  39. blocks.9.hook_resid_pre/sparsity.safetensors +3 -0
blocks.0.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.0.hook_resid_pre", "hook_point_layer": 0, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.0.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/y1t51byy", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.0.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f59f9b8baa40b8ae042b731f801b41115e3aea38bb12d9ec304fd323e32c0a5
3
+ size 151096640
blocks.0.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34e4ea473e3ff542132815cf326ce284c6ae2a3304eaf692f2226c3414d9dda8
3
+ size 98384
blocks.1.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.1.hook_resid_pre", "hook_point_layer": 1, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.1.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/mm179kd2", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.1.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f28edd189f39a566af489bf86b340e799d8297adb57bb78220ca664b60aa1830
3
+ size 151096640
blocks.1.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1232ac12530accb6cfb0c6cdb9d047c5c4d2e6366554141c91096b73d4c2e73
3
+ size 98384
blocks.10.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.10.hook_resid_pre", "hook_point_layer": 10, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.10.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/9vu4ulem", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.10.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8039df054735de8291683f77557913ad67cae327ea7a0f0e6a10e9bbe3ec1d0f
3
+ size 151096640
blocks.10.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c773c2d858e8bad554ebe5d4e0a8b5439310e9952180f125938429cb0eb376d1
3
+ size 98384
blocks.11.hook_resid_post/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.11.hook_resid_post", "hook_point_layer": 11, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.11.hook_resid_post", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 4e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/ycfyp18l", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-4e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.11.hook_resid_post/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bcbeb3b2fae281978ed85003a6aec32e702489e1f957470338ee0aa460e528c
3
+ size 151096640
blocks.11.hook_resid_post/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16efe3d3d06441ea127b466cd0d30211426e9f0659d1d530207268e261fa7a44
3
+ size 98384
blocks.11.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.11.hook_resid_pre", "hook_point_layer": 11, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.11.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/gf296egd", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.11.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb4a926a342336734e12452c9eba94663b9b4855494a9ebebaba7ea104f89e6
3
+ size 151096640
blocks.11.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e19aef51cbbf066abe54f4310b19a203a30994b8972083686097d5d9ea8cbe69
3
+ size 98384
blocks.2.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.2.hook_resid_pre", "hook_point_layer": 2, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.2.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/59dzvtdt", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.2.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57aae3d13394d8ecfc6950fb4d40e45cabf6ebc871f3cc52845742e5d6ffb979
3
+ size 151096640
blocks.2.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b82da0b220150cf07740b38ca83496fb6e823575a107c8ceb1728da732be79b
3
+ size 98384
blocks.3.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.3.hook_resid_pre", "hook_point_layer": 3, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.3.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/0sgl1gqz", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.3.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd8148397ada4fd6fd189668a83c3719edfc869fc6064196c8fa85c6bafd6b65
3
+ size 151096640
blocks.3.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f069061ea6cb708e75601a3be6c6782b74765635290c645927b8de6218306200
3
+ size 98384
blocks.4.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.4.hook_resid_pre", "hook_point_layer": 4, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.4.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/7rzeo1iv", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.4.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f27352d2d3b4d7a619c33a02fab455101623d1a6b4925e642dfbe32e47f1e2c
3
+ size 151096640
blocks.4.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a25ab21e1909f8bbd374032801e14267f88a39a2748ffb23286de3f68695141a
3
+ size 98384
blocks.5.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.5.hook_resid_pre", "hook_point_layer": 5, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.5.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/65ufbyeo", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.5.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e884d76a089658fbd5ce2cd25f5579c4fa0b09a361fb3ee9b2641e54aed8d126
3
+ size 151096640
blocks.5.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322fce74d3bff82978d800ba20a5ad1ace7b482d7c8114e76c94ba7bf24bad26
3
+ size 98384
blocks.6.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.6.hook_resid_pre", "hook_point_layer": 6, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.6.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/vjvlfpxa", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.6.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ff899406a9fb3c33aebca365d6a8065a2f48eba9cef699543fb8602abe207be
3
+ size 151096640
blocks.6.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6e4a1071b1d5a0370d05abd844bc0388fb973962d0f409b74e114cf93ebeb00
3
+ size 98384
blocks.7.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.7.hook_resid_pre", "hook_point_layer": 7, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.7.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/n6gbpj7s", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.7.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47bfb75008fdd7ebf068044c0c3a212606aaa3f5dc05f1d1a7cffe502002c0b6
3
+ size 151096640
blocks.7.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85a39daf13987de6a2234afa4814282a1f18626fc87c9a1a161ecc4e1e50992b
3
+ size 98384
blocks.8.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.8.hook_resid_pre", "hook_point_layer": 8, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/ut7lhl4q", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.8.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d46a41aa7a9e0475c135e41dde0cb3d8510528ce3140568a18a0e29d3a624d8b
3
+ size 151096640
blocks.8.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3fdd622817b0695ffc68d3965eb26beea6da21dcf8a02c02d4ac6d99e872458
3
+ size 98384
blocks.9.hook_resid_pre/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_name": "gpt2-small", "hook_point": "blocks.9.hook_resid_pre", "hook_point_layer": 9, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.9.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": false, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_small_resid_pre_5", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/wg1xo7vo", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}
blocks.9.hook_resid_pre/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beee562c3fee612b76c0e79994d45b82780c2055cfa1305d2644b598d49efec7
3
+ size 151096640
blocks.9.hook_resid_pre/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cda7bc856dd536b0a33cd1e4d2ee4bb442a2b74950985b41de9c4811710876e7
3
+ size 98384