jbloom
/

GPT2-Small-Feature-Splitting-Experiment-Layer-8

Model card Files Files and versions Community

jbloom commited on Apr 20, 2024

Commit

2adca04

verified ·

1 Parent(s): 13ef79e

Upload 24 files

Browse files

Files changed (24) hide show

blocks.8.hook_resid_pre_12288/cfg.json +1 -0
blocks.8.hook_resid_pre_12288/sae_weights.safetensors +3 -0
blocks.8.hook_resid_pre_12288/sparsity.safetensors +3 -0
blocks.8.hook_resid_pre_1536/cfg.json +1 -0
blocks.8.hook_resid_pre_1536/sae_weights.safetensors +3 -0
blocks.8.hook_resid_pre_1536/sparsity.safetensors +3 -0
blocks.8.hook_resid_pre_24576/cfg.json +1 -0
blocks.8.hook_resid_pre_24576/sae_weights.safetensors +3 -0
blocks.8.hook_resid_pre_24576/sparsity.safetensors +3 -0
blocks.8.hook_resid_pre_3072/cfg.json +1 -0
blocks.8.hook_resid_pre_3072/sae_weights.safetensors +3 -0
blocks.8.hook_resid_pre_3072/sparsity.safetensors +3 -0
blocks.8.hook_resid_pre_49152/cfg.json +1 -0
blocks.8.hook_resid_pre_49152/sae_weights.safetensors +3 -0
blocks.8.hook_resid_pre_49152/sparsity.safetensors +3 -0
blocks.8.hook_resid_pre_6144/cfg.json +1 -0
blocks.8.hook_resid_pre_6144/sae_weights.safetensors +3 -0
blocks.8.hook_resid_pre_6144/sparsity.safetensors +3 -0
blocks.8.hook_resid_pre_768/cfg.json +1 -0
blocks.8.hook_resid_pre_768/sae_weights.safetensors +3 -0
blocks.8.hook_resid_pre_768/sparsity.safetensors +3 -0
blocks.8.hook_resid_pre_98304/cfg.json +1 -0
blocks.8.hook_resid_pre_98304/sae_weights.safetensors +3 -0
blocks.8.hook_resid_pre_98304/sparsity.safetensors +3 -0

blocks.8.hook_resid_pre_12288/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"model_name": "gpt2-small", "hook_point": "blocks.8.hook_resid_pre", "hook_point_layer": 8, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 16, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": true, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_feature_splitting_experiment", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/83dxxo6a", "d_sae": 12288, "tokens_per_buffer": 67108864, "run_name": "12288-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}

blocks.8.hook_resid_pre_12288/sae_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1abfaf5ce9004a59cf8902678f64c7252c48fcea73f752325b840d5493544fc0
+size 75599240

blocks.8.hook_resid_pre_12288/sparsity.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:730faacc0d119daf0540e5eb44fd9c7d6c61517efb1099d28a83cbe3de8ba080
+size 49232

blocks.8.hook_resid_pre_1536/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"model_name": "gpt2-small", "hook_point": "blocks.8.hook_resid_pre", "hook_point_layer": 8, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 2, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": true, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_feature_splitting_experiment", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/wv0n1cz3", "d_sae": 1536, "tokens_per_buffer": 67108864, "run_name": "1536-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}

blocks.8.hook_resid_pre_1536/sae_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cc879e6fe8b2b3a025619c138ef48319d73770e0dafd976504bb87b8b470089
+size 9452928

blocks.8.hook_resid_pre_1536/sparsity.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8abdbd5a642cfa1fc557bbf55047ff3a7c0ecc65ec30ae5fead7993f1a3ae7b5
+size 6224

blocks.8.hook_resid_pre_24576/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"model_name": "gpt2-small", "hook_point": "blocks.8.hook_resid_pre", "hook_point_layer": 8, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 32, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": true, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_feature_splitting_experiment", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/cbvk8gtc", "d_sae": 24576, "tokens_per_buffer": 67108864, "run_name": "24576-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}

blocks.8.hook_resid_pre_24576/sae_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d16593d26e48f1009e0bb43cecec66a629dc95693c78c0ec78fa75bad8d26dfc
+size 151195024

blocks.8.hook_resid_pre_24576/sparsity.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a825695c263ba6f4d3e9292d124551955eb5d7df3adb9b3a5c717a7e2a0a2a6d
+size 98384

blocks.8.hook_resid_pre_3072/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"model_name": "gpt2-small", "hook_point": "blocks.8.hook_resid_pre", "hook_point_layer": 8, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 4, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": true, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_feature_splitting_experiment", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/v5wlsd29", "d_sae": 3072, "tokens_per_buffer": 67108864, "run_name": "3072-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}

blocks.8.hook_resid_pre_3072/sae_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1ee6da5a164e5e3bab40769161f4ecb6c9da6ff28df15ea223a1ff46e1d0b7c
+size 18902408

blocks.8.hook_resid_pre_3072/sparsity.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f001d6284bf938b143e8745a350983e8c4b2f38f0bbabc8be8c5a18d5f08baf
+size 12368

blocks.8.hook_resid_pre_49152/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"model_name": "gpt2-small", "hook_point": "blocks.8.hook_resid_pre", "hook_point_layer": 8, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 64, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": true, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_feature_splitting_experiment", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/u4xlxwrh", "d_sae": 49152, "tokens_per_buffer": 67108864, "run_name": "49152-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}

blocks.8.hook_resid_pre_49152/sae_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a350ad88c73be15c28ca552d4ef9425bc39dac71b2fe11c9943be4041aea3a1
+size 302386584

blocks.8.hook_resid_pre_49152/sparsity.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a57a8b877e10dae8bb43407191e6a531a82942a352bb94d2e3006cd4aff6c35
+size 196688

blocks.8.hook_resid_pre_6144/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"model_name": "gpt2-small", "hook_point": "blocks.8.hook_resid_pre", "hook_point_layer": 8, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 8, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": true, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_feature_splitting_experiment", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/jy7r81cs", "d_sae": 6144, "tokens_per_buffer": 67108864, "run_name": "6144-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}

blocks.8.hook_resid_pre_6144/sae_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d622ffaee848d7d8f52853954d85c92f85f1d1dd5f3d2da1cf945356a5c00a27
+size 37801352

blocks.8.hook_resid_pre_6144/sparsity.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f665aa1472bf1563dfde20d920810c65b2bbc10e0689708c5f1420f243772e88
+size 24656

blocks.8.hook_resid_pre_768/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"model_name": "gpt2-small", "hook_point": "blocks.8.hook_resid_pre", "hook_point_layer": 8, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 1, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": true, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_feature_splitting_experiment", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/ap977vbz", "d_sae": 768, "tokens_per_buffer": 67108864, "run_name": "768-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}

blocks.8.hook_resid_pre_768/sae_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b34a479ee06302ad09d85a76c610bb045621823c1658b7e839c8ea342c2a3b8e
+size 4728184

blocks.8.hook_resid_pre_768/sparsity.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2300bf557b752548fbf581d00e6b72f195a0840130fbad1596b0ae1a9342bb5
+size 3152

blocks.8.hook_resid_pre_98304/cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"model_name": "gpt2-small", "hook_point": "blocks.8.hook_resid_pre", "hook_point_layer": 8, "hook_point_head_index": null, "dataset_path": "Skylion007/openwebtext", "is_dataset_tokenized": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": "activations/Skylion007_openwebtext/gpt2-small/blocks.8.hook_resid_pre", "d_in": 768, "n_batches_in_buffer": 128, "total_training_tokens": 300000000, "store_batch_size": 32, "device": "mps", "seed": 42, "dtype": "torch.float32", "b_dec_init_method": "geometric_median", "expansion_factor": 128, "from_pretrained_path": null, "l1_coefficient": 8e-05, "lr": 0.0004, "lr_scheduler_name": null, "lr_warm_up_steps": 5000, "train_batch_size": 4096, "use_ghost_grads": true, "feature_sampling_window": 1000, "feature_sampling_method": null, "resample_batches": 1028, "feature_reinit_scale": 0.2, "dead_feature_window": 5000, "dead_feature_estimation_method": "no_fire", "dead_feature_threshold": 1e-08, "log_to_wandb": true, "wandb_project": "mats_sae_training_gpt2_feature_splitting_experiment", "wandb_entity": null, "wandb_log_frequency": 100, "n_checkpoints": 10, "checkpoint_path": "checkpoints/kknavokh", "d_sae": 98304, "tokens_per_buffer": 67108864, "run_name": "98304-L1-8e-05-LR-0.0004-Tokens-3.000e+08"}

blocks.8.hook_resid_pre_98304/sae_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88476c994f38950781b73c3500ddc1ab9fa07667351ca33d7baac92bb7cad212
+size 604769688

blocks.8.hook_resid_pre_98304/sparsity.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6126c566fc8aca04b602801397c86c5fad61421fc7e76911fe4a8f2fac666724
+size 393296