jacobcd52
/

gpt2-small-sparse-autoencoders

Model card Files Files and versions Community

jacobcd52 commited on Nov 25, 2023

Commit

8a6e044

•

1 Parent(s): 55da197

Upload 42 files

Browse files

Files changed (42) hide show

gpt2-small_6144_mlp_out_0.json +1 -0
gpt2-small_6144_mlp_out_0.pt +3 -0
gpt2-small_6144_mlp_out_1.json +1 -0
gpt2-small_6144_mlp_out_1.pt +3 -0
gpt2-small_6144_mlp_out_10.pt +3 -0
gpt2-small_6144_mlp_out_10_cfg.json +1 -0
gpt2-small_6144_mlp_out_11.pt +3 -0
gpt2-small_6144_mlp_out_11_cfg.json +1 -0
gpt2-small_6144_mlp_out_2.pt +3 -0
gpt2-small_6144_mlp_out_2_cfg.json +1 -0
gpt2-small_6144_mlp_out_3.pt +3 -0
gpt2-small_6144_mlp_out_3_cfg.json +1 -0
gpt2-small_6144_mlp_out_4.pt +3 -0
gpt2-small_6144_mlp_out_4_cfg.json +1 -0
gpt2-small_6144_mlp_out_5.pt +3 -0
gpt2-small_6144_mlp_out_5_cfg.json +1 -0
gpt2-small_6144_mlp_out_6.pt +3 -0
gpt2-small_6144_mlp_out_6_cfg.json +1 -0
gpt2-small_6144_mlp_out_7.pt +3 -0
gpt2-small_6144_mlp_out_7_cfg.json +1 -0
gpt2-small_6144_mlp_out_8.pt +3 -0
gpt2-small_6144_mlp_out_8_cfg.json +1 -0
gpt2-small_6144_mlp_out_9.pt +3 -0
gpt2-small_6144_mlp_out_9_cfg.json +1 -0
gpt2-small_6144_resid_pre_10.pt +3 -0
gpt2-small_6144_resid_pre_10_cfg.json +1 -0
gpt2-small_6144_resid_pre_11.pt +3 -0
gpt2-small_6144_resid_pre_11_cfg.json +1 -0
gpt2-small_6144_resid_pre_3.pt +3 -0
gpt2-small_6144_resid_pre_3_cfg.json +1 -0
gpt2-small_6144_resid_pre_4.pt +3 -0
gpt2-small_6144_resid_pre_4_cfg.json +1 -0
gpt2-small_6144_resid_pre_5.pt +3 -0
gpt2-small_6144_resid_pre_5_cfg.json +1 -0
gpt2-small_6144_resid_pre_6.pt +3 -0
gpt2-small_6144_resid_pre_6_cfg.json +1 -0
gpt2-small_6144_resid_pre_7.pt +3 -0
gpt2-small_6144_resid_pre_7_cfg.json +1 -0
gpt2-small_6144_resid_pre_8.pt +3 -0
gpt2-small_6144_resid_pre_8_cfg.json +1 -0
gpt2-small_6144_resid_pre_9.pt +3 -0
gpt2-small_6144_resid_pre_9_cfg.json +1 -0

gpt2-small_6144_mlp_out_0.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 6.364386499626561e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 0, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.0.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_0_6144_mlp_out"}

gpt2-small_6144_mlp_out_0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b166bf58231eea27a19fa53022609de73c6db065ede241c6272f3a3839d2eb0
+size 37778152

gpt2-small_6144_mlp_out_1.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 1.0669395123841242e-06, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 1, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.1.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_1_6144_mlp_out"}

gpt2-small_6144_mlp_out_1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6f3fa27fd656967d1270a8bfeab11870fe30243c902a75c2ad31e7f4f9891a9
+size 37778152

gpt2-small_6144_mlp_out_10.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b125ac92d5c6cb3fcf4e63b7708d8a5a481e179964aa6a88a7c7b6838a65e018
+size 37778152

gpt2-small_6144_mlp_out_10_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.0005172568489797413, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 10, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.10.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_10_6144_mlp_out"}

gpt2-small_6144_mlp_out_11.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c510dd1e26bc4f6fbbe5a613b707d4a8deba36e8b3242ee4337ac628af78d3c4
+size 37778152

gpt2-small_6144_mlp_out_11_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.0008060140535235405, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 11, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.11.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_11_6144_mlp_out"}

gpt2-small_6144_mlp_out_2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ea8110ff39ef795b955a13c113d8ca33cdb594d6ce373efa24a27ba4c202d9c
+size 37778152

gpt2-small_6144_mlp_out_2_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 3.688410215545445e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 2, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.2.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_2_6144_mlp_out"}

gpt2-small_6144_mlp_out_3.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5409f8d65cd6fed2ac312422c6c67841e0470559f997620781af991f99b1edf8
+size 37778152

gpt2-small_6144_mlp_out_3_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.00013478870096150786, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 3, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.3.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_3_6144_mlp_out"}

gpt2-small_6144_mlp_out_4.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:401612dac4068db227c8a59bf7d03c4a06fff2c1a1edc64c4c38b5ec521c9b51
+size 37778152

gpt2-small_6144_mlp_out_4_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.00013783478061668575, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 4, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.4.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_4_6144_mlp_out"}

gpt2-small_6144_mlp_out_5.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:853ba439b3b78d595c1451c898cbdb4aeb6f4a2f7224eb2fb2af4894cdfa4c29
+size 37778152

gpt2-small_6144_mlp_out_5_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.00016076180327218026, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 5, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.5.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_5_6144_mlp_out"}

gpt2-small_6144_mlp_out_6.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9dbb271a28109fc3a52d691a02c65711693ac572191677fde6e33b228e5d5098
+size 37778152

gpt2-small_6144_mlp_out_6_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.000189751255675219, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 6, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.6.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_6_6144_mlp_out"}

gpt2-small_6144_mlp_out_7.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f80a5343ea3a75ff1e7feda63ecae4b789ee13738f06eb36a1eb599ac42da936
+size 37778152

gpt2-small_6144_mlp_out_7_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.00023112074995879084, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 7, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.7.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_7_6144_mlp_out"}

gpt2-small_6144_mlp_out_8.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac55b664e3975bdc2f4c601145ae52b20908a677d0e7c0c17e938fb3e43880c3
+size 37778152

gpt2-small_6144_mlp_out_8_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.00027299890643917024, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 8, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.8.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_8_6144_mlp_out"}

gpt2-small_6144_mlp_out_9.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae88fa99607ed63ae213742a7418642b29add12a78bdeb7d972323919845e0c6
+size 37778152

gpt2-small_6144_mlp_out_9_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.0003489422088023275, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "mlp_out", "layer": 9, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.9.hook_mlp_out", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_9_6144_mlp_out"}

gpt2-small_6144_resid_pre_10.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4e35f4741846abc9c283168c60f28b3db0190434cbed0060dc90f3887c4ce20
+size 37778672

gpt2-small_6144_resid_pre_10_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 1e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "resid_pre", "layer": 10, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.10.hook_resid_pre", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_10_6144_resid_pre"}

gpt2-small_6144_resid_pre_11.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ccf2f2794659fd0ade0fdfc926dc007de44a2a107c1ed6c2cc34e4f2b1582383
+size 37778672

gpt2-small_6144_resid_pre_11_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 3e-05, "num_tokens": 4000000000, "l1_coeff": 1e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "resid_pre", "layer": 11, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.11.hook_resid_pre", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_11_6144_resid_pre"}

gpt2-small_6144_resid_pre_3.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4471f4599c26359778254ba9ed784627de7b23d62b9416909b06f0d2aa5062f
+size 37778152

gpt2-small_6144_resid_pre_3_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.00018832433852367103, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "resid_pre", "layer": 3, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.3.hook_resid_pre", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_3_6144_resid_pre"}

gpt2-small_6144_resid_pre_4.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bfdebe01e2fb626fdd2facffd6ab2dfbe944984499bf9b76f56b5e0ecafa4aa
+size 37778152

gpt2-small_6144_resid_pre_4_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 0.00021865325106773525, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "resid_pre", "layer": 4, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.4.hook_resid_pre", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_4_6144_resid_pre"}

gpt2-small_6144_resid_pre_5.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e1dab4829b36a5810aa7e8c4377390c2afe9d6bc3873560b661fd3a804ec627
+size 37778664

gpt2-small_6144_resid_pre_5_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 1e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "resid_pre", "layer": 5, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.5.hook_resid_pre", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_5_6144_resid_pre"}

gpt2-small_6144_resid_pre_6.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de0c1b7c5de7ad4769e40b26b8fef5ea5fb69190f7daa6ec49962cbebc4ff172
+size 37778664

gpt2-small_6144_resid_pre_6_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 1e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "resid_pre", "layer": 6, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.6.hook_resid_pre", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_6_6144_resid_pre"}

gpt2-small_6144_resid_pre_7.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c59646dce05f0fcd71d1bbae16be7e3d2566f3981ea0370d57e2da828732d2c6
+size 37778664

gpt2-small_6144_resid_pre_7_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 1e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "resid_pre", "layer": 7, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.7.hook_resid_pre", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_7_6144_resid_pre"}

gpt2-small_6144_resid_pre_8.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8db4a9c9be262ecdfba6b07656c1b2d4b4c26ffef7b0da67ea98c8b49aea967
+size 37778664

gpt2-small_6144_resid_pre_8_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 1e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "resid_pre", "layer": 8, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.8.hook_resid_pre", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_8_6144_resid_pre"}

gpt2-small_6144_resid_pre_9.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:837c593d829db6e60af15692204f713d731f197b57b864c9f7496f417c4a35d3
+size 37778664

gpt2-small_6144_resid_pre_9_cfg.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"seed": 49, "batch_size": 1024, "buffer_mult": 384, "lr": 5e-05, "num_tokens": 1000000000, "l1_coeff": 1e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "gpt2-small", "site": "resid_pre", "layer": 9, "device": "cuda", "model_batch_size": 128, "buffer_size": 393216, "buffer_batches": 3072, "act_name": "blocks.9.hook_resid_pre", "act_size": 768, "dict_size": 6144, "name": "gpt2-small_9_6144_resid_pre"}