canrager commited on
Commit
1888f2a
·
verified ·
1 Parent(s): f0301fe

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_0/ae.pt +3 -0
  2. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_0/config.json +26 -0
  3. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_1/ae.pt +3 -0
  4. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_1/config.json +26 -0
  5. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_2/ae.pt +3 -0
  6. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_2/config.json +26 -0
  7. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_3/ae.pt +3 -0
  8. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_3/config.json +26 -0
  9. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_4/ae.pt +3 -0
  10. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_4/config.json +26 -0
  11. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_5/ae.pt +3 -0
  12. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_5/config.json +26 -0
  13. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt +3 -0
  14. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json +26 -0
  15. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/ae.pt +3 -0
  16. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/config.json +26 -0
  17. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/ae.pt +3 -0
  18. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/config.json +26 -0
  19. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/ae.pt +3 -0
  20. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/config.json +26 -0
  21. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/ae.pt +3 -0
  22. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/config.json +26 -0
  23. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/ae.pt +3 -0
  24. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/config.json +26 -0
  25. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/ae.pt +3 -0
  26. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/config.json +26 -0
  27. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt +3 -0
  28. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json +26 -0
  29. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/ae.pt +3 -0
  30. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/config.json +26 -0
  31. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/ae.pt +3 -0
  32. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/config.json +26 -0
  33. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/ae.pt +3 -0
  34. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/config.json +26 -0
  35. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/ae.pt +3 -0
  36. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/config.json +26 -0
  37. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/ae.pt +3 -0
  38. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/config.json +26 -0
  39. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/ae.pt +3 -0
  40. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/config.json +26 -0
  41. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt +3 -0
  42. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json +26 -0
  43. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/ae.pt +3 -0
  44. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/config.json +26 -0
  45. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/ae.pt +3 -0
  46. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/config.json +26 -0
  47. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/ae.pt +3 -0
  48. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/config.json +26 -0
  49. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/ae.pt +3 -0
  50. gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/config.json +26 -0
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660283962396ea26438c26427150e8e6221d61f0b4ad236876a9affd154d2a4d
3
+ size 75524904
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": 97656,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 20,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53d6d53d470c9ba0ddda24fea0484c2639b69194df6766d7381bfe76ac7f5bed
3
+ size 75524904
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_1/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": 97656,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 40,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c26438a0279e1974412f9e1bb1283f65fd3034511e5249960cf35c30f58acd
3
+ size 75524904
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_2/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": 97656,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 80,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caf7fa2056e78044a7f75cdd22f0fe6542cab9af8e80e73d0d594ba0acb5b0a5
3
+ size 75524904
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_3/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": 97656,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 160,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441beebc04cc47e0ca3774242c3f916b2732e2d4e3460bf81c193e91cd2f9333
3
+ size 75524904
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_4/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": 97656,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 320,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af97e8b509a198c2f74c189e799b06469dc944106619e3abb7db01277c19312a
3
+ size 75524904
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_5/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": 97656,
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 640,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e8687672893fed801b4f8625d58d76fa716488dbbf97d8889ca4c6dd6134b6
3
+ size 75524920
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 20,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2401dfae15537d6df33149dc8f9ddcc09e1f086aab1626c03fc2ddf0e48361ea
3
+ size 75524936
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "308",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 20,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2fe58130b7b99e4089290a434879c6b7952ad6f56d0b180e1fd922001f668a
3
+ size 75525008
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "3088",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 20,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42afa736cbb64667873202c7f0f1dfbac567ede7e1516a9495e419f45f044eec
3
+ size 75525208
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "30881",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 20,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c365fb15c786ff9b707851c87b6fc1d49e84f3b7e9fda4bf8d1b82c35c1d7b4
3
+ size 75524928
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "97",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 20,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f171bbf71d65ee4475ce4c14ea93ac3efeec3298b0dceb49d564acb018750d
3
+ size 75524936
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "976",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 20,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63bd37b92dd3b252f4061796f1c7ea153db3656acc1bfeef5ac8763d04903de7
3
+ size 75525008
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "9765",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 20,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e8687672893fed801b4f8625d58d76fa716488dbbf97d8889ca4c6dd6134b6
3
+ size 75524920
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 40,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3642ef8e0ed2fc41fc33cf76c5b3700e4409d721ce6f19b05512ac5af4e3442
3
+ size 75524936
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "308",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 40,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3986db197c5da7b73f968165e2f12ef6ccbe8cd6206851820df21aca0c5d236
3
+ size 75525008
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "3088",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 40,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e33877d043abf17d68ecdddd82bda885ac3357ae81eb013441af3627d6e91ed8
3
+ size 75525208
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "30881",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 40,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84d600afe62d8462f9f9b0d4bc054077211466352e4469c110d1166142ed6662
3
+ size 75524928
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "97",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 40,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a430be8e00518de00bd99b17aa75f8589bec2652c611d36ab90bd164373a3665
3
+ size 75524936
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "976",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 40,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d7c639c6470e5d9384546b6ab6eaefd1f4f857c2a1ef422cae4a1f3603b452
3
+ size 75525008
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "9765",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 40,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e8687672893fed801b4f8625d58d76fa716488dbbf97d8889ca4c6dd6134b6
3
+ size 75524920
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 80,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c478184d2fd776ad3c798b53f537111ca3ef3420047e7e7c89becbef45fa1f
3
+ size 75524936
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "308",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 80,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83c8e69bb710c78fbc06bdd0d1404ab644dd071018c474a58b7b2c6ff1d149a9
3
+ size 75525008
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "3088",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 80,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d6e499c5a02eba2aee4a6d8e96f3712ee43ce413846ce01de6b99f594be1ec5
3
+ size 75525208
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "30881",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 80,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fedabc0da17b7bb0c2b8c3d2f072021f3bcce44ae9f4e20f7d93fe7fe2a1930
3
+ size 75524928
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0004,
6
+ "steps": "97",
7
+ "seed": 0,
8
+ "activation_dim": 2304,
9
+ "dict_size": 4096,
10
+ "k": 80,
11
+ "device": "cuda:4",
12
+ "layer": 12,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
15
+ "submodule_name": "resid_post_layer_12"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2048,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 2048,
24
+ "device": "cuda:4"
25
+ }
26
+ }