canrager commited on
Commit
993682a
1 Parent(s): caac31f

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt +3 -0
  2. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/config.json +25 -0
  3. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json +1 -0
  4. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt +3 -0
  5. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/config.json +25 -0
  6. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json +1 -0
  7. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt +3 -0
  8. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/config.json +25 -0
  9. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json +1 -0
  10. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt +3 -0
  11. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/config.json +25 -0
  12. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json +1 -0
  13. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt +3 -0
  14. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/config.json +25 -0
  15. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json +1 -0
  16. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt +3 -0
  17. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/config.json +25 -0
  18. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json +1 -0
  19. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt +3 -0
  20. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/config.json +25 -0
  21. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json +1 -0
  22. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt +3 -0
  23. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/config.json +25 -0
  24. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json +1 -0
  25. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt +3 -0
  26. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/config.json +25 -0
  27. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json +1 -0
  28. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt +3 -0
  29. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/config.json +25 -0
  30. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json +1 -0
  31. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt +3 -0
  32. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/config.json +25 -0
  33. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json +1 -0
  34. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt +3 -0
  35. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/config.json +25 -0
  36. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json +1 -0
  37. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt +3 -0
  38. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/config.json +25 -0
  39. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json +1 -0
  40. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt +3 -0
  41. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/config.json +25 -0
  42. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json +1 -0
  43. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt +3 -0
  44. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/config.json +25 -0
  45. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json +1 -0
  46. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt +3 -0
  47. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/config.json +25 -0
  48. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json +1 -0
  49. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt +3 -0
  50. pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/config.json +25 -0
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dcfa434a82a9bbcc9863757599777159bb9db31af0e68383e9dba21883bbafb
3
+ size 16830614
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 4096,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.1,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.7389051914215088, "l1_loss": 131.95904541015625, "l0": 407.3849792480469, "frac_variance_explained": 0.9963776767253876, "cossim": 0.9922364354133606, "l2_ratio": 0.9922811985015869, "relative_reconstruction_bias": 1.0007711052894592, "loss_original": 5.109375, "loss_reconstructed": 5.203125, "loss_zero": 13.5625, "frac_recovered": 0.98828125, "frac_alive": 0.45361328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32354d48b22953b9d7e305ace8ccb667512ab1a385fdafe6cb7d1c62f0a232f6
3
+ size 16830614
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 4096,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.1,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 0.42098043859004974, "l1_loss": 221.11109161376953, "l0": 606.5899658203125, "frac_variance_explained": 0.9997320473194122, "cossim": 0.9995822012424469, "l2_ratio": 1.0001044273376465, "relative_reconstruction_bias": 1.0014447569847107, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.48046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69a08bdd1d11bca24e9dc22bbb206ed65d68bc33bb227bc94fea19a342440309
3
+ size 67309718
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.5,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.303561568260193, "l1_loss": 71.1842155456543, "l0": 130.38500213623047, "frac_variance_explained": 0.953961968421936, "cossim": 0.9726590514183044, "l2_ratio": 0.9666237831115723, "relative_reconstruction_bias": 0.9969744980335236, "loss_original": 5.109375, "loss_reconstructed": 5.34375, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.29925537109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fea5a4cbcffc318ae74d8f96ee6a5e7cf9844a7f9ba8897d06ff2dcecf2504f
3
+ size 67309718
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.5,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.5663819313049316, "l1_loss": 122.03194046020508, "l0": 241.2249984741211, "frac_variance_explained": 0.9915377795696259, "cossim": 0.9824479222297668, "l2_ratio": 0.9817214906215668, "relative_reconstruction_bias": 0.9983848631381989, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.363037109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3885681809f2fddbf063b10077898ed4f516532fd29577202f34a0222d339bef
3
+ size 16830614
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 4096,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.7,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.748379111289978, "l1_loss": 60.0474910736084, "l0": 96.12499618530273, "frac_variance_explained": 0.9261168241500854, "cossim": 0.9605966508388519, "l2_ratio": 0.9601201415061951, "relative_reconstruction_bias": 1.0017150044441223, "loss_original": 5.109375, "loss_reconstructed": 5.453125, "loss_zero": 13.5625, "frac_recovered": 0.9609375, "frac_alive": 0.797119140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ec504fa45bfb3d62d2d94b88fccad6d89ec472399fc161535aa113b1db683e
3
+ size 16830614
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 4096,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.7,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.400195837020874, "l1_loss": 103.83464813232422, "l0": 125.91999816894531, "frac_variance_explained": 0.9450815618038177, "cossim": 0.9684658944606781, "l2_ratio": 0.9677082598209381, "relative_reconstruction_bias": 1.0002705752849579, "loss_original": 5.109375, "loss_reconstructed": 5.375, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.603515625, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:055b53bbbde076f75d8811d691b8dd5a31779e183f50b504cc7aafd44887cd2c
3
+ size 67309718
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.7,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.701678156852722, "l1_loss": 59.957834243774414, "l0": 87.77499771118164, "frac_variance_explained": 0.9801019430160522, "cossim": 0.964617520570755, "l2_ratio": 0.9765737950801849, "relative_reconstruction_bias": 1.0015305280685425, "loss_original": 5.109375, "loss_reconstructed": 5.40625, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.25762939453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9019bb938a02b4dadebcef1b98fd6800876b6630d62c7058d1ea506f9670ee00
3
+ size 67309718
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.7,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.244529366493225, "l1_loss": 120.72209167480469, "l0": 134.03500366210938, "frac_variance_explained": 0.9876659214496613, "cossim": 0.9710160791873932, "l2_ratio": 0.9653101563453674, "relative_reconstruction_bias": 0.9983204305171967, "loss_original": 5.109375, "loss_reconstructed": 5.328125, "loss_zero": 13.5625, "frac_recovered": 0.97265625, "frac_alive": 0.2999267578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a48ac1307ad50ac232ab00d4f4637d4d6bb41ae53baf63d9278f73e94cda1e8c
3
+ size 16830614
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 4096,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.9,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.192893028259277, "l1_loss": 51.17982292175293, "l0": 68.81499862670898, "frac_variance_explained": 0.8996331989765167, "cossim": 0.9526893496513367, "l2_ratio": 0.957265168428421, "relative_reconstruction_bias": 1.0063791275024414, "loss_original": 5.109375, "loss_reconstructed": 5.5, "loss_zero": 13.5625, "frac_recovered": 0.95703125, "frac_alive": 0.694091796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a4a5cb499690d90aa667bf599608380f4c2c09248a1adb47959266238071574
3
+ size 16830614
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 4096,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.9,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.077711820602417, "l1_loss": 99.79027938842773, "l0": 79.9749984741211, "frac_variance_explained": 0.9596592485904694, "cossim": 0.9543067216873169, "l2_ratio": 0.9473299384117126, "relative_reconstruction_bias": 0.9983561635017395, "loss_original": 5.109375, "loss_reconstructed": 5.546875, "loss_zero": 13.5625, "frac_recovered": 0.951171875, "frac_alive": 0.476318359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e34d8d1ae12ae97e6bda7ff5c02fdf6e88ae88689fe1bddae57d001aec7fa6a
3
+ size 67309718
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.9,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.091695547103882, "l1_loss": 48.56572914123535, "l0": 60.5049991607666, "frac_variance_explained": 0.956131637096405, "cossim": 0.9561952650547028, "l2_ratio": 0.9431371688842773, "relative_reconstruction_bias": 0.9953996241092682, "loss_original": 5.109375, "loss_reconstructed": 5.46875, "loss_zero": 13.5625, "frac_recovered": 0.95703125, "frac_alive": 0.2049560546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d0c40f77fa82b3ee99367aa1c9c6fddfa4568a522b2950f72000e5e92086124
3
+ size 67309718
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.9,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.802935004234314, "l1_loss": 91.14981079101562, "l0": 90.1349983215332, "frac_variance_explained": 0.9635432958602905, "cossim": 0.9614428579807281, "l2_ratio": 0.9609856605529785, "relative_reconstruction_bias": 0.9999611675739288, "loss_original": 5.109375, "loss_reconstructed": 5.390625, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.2374267578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbda330eb07bb354e45b709ed66b72e46e00ff36053039ab13a79c508a30aafc
3
+ size 67309718
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.1,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.8509351015090942, "l1_loss": 131.13249969482422, "l0": 396.88499450683594, "frac_variance_explained": 0.9975889325141907, "cossim": 0.9916780889034271, "l2_ratio": 0.9986166059970856, "relative_reconstruction_bias": 1.0027380585670471, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.1112060546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee0bbb022a986edae00574b8fbbfa1fca21624fe23929d848901c0aad59c508
3
+ size 67309718
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.1,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 0.43181225657463074, "l1_loss": 216.08472442626953, "l0": 590.9849853515625, "frac_variance_explained": 0.9997637271881104, "cossim": 0.9995618760585785, "l2_ratio": 1.0001021027565002, "relative_reconstruction_bias": 1.0001211762428284, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.15557861328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc4d313ec47bc3cd65ef3b01a0f2eeb3c11cfc32cb71d09162755f2e7283e7da
3
+ size 16830614
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 4096,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.3,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.3242627382278442, "l1_loss": 103.32945251464844, "l0": 252.79999542236328, "frac_variance_explained": 0.9789170026779175, "cossim": 0.9860871434211731, "l2_ratio": 0.9804092943668365, "relative_reconstruction_bias": 0.9959313869476318, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.826416015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23f9e5f6e411e78c58f132b1da3a717b5ce8175edca4916da2a0c98fa18ddc07
3
+ size 16830614
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 4096,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.3,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.2129348516464233, "l1_loss": 197.0015640258789, "l0": 379.8599853515625, "frac_variance_explained": 0.9937676191329956, "cossim": 0.9962087273597717, "l2_ratio": 0.9963479340076447, "relative_reconstruction_bias": 1.0004808902740479, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.5693359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08572b71d671661971159a29db5a44e18796435245ba85ea709d45e041a82edc
3
+ size 67309718
pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "GatedAutoEncoder",
4
+ "trainer_class": "GatedSAETrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.001,
8
+ "l1_penalty": 0.3,
9
+ "warmup_steps": 1000,
10
+ "device": "cuda:0",
11
+ "layer": 3,
12
+ "lm_name": "EleutherAI/pythia-70m-deduped",
13
+ "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
14
+ "submodule_name": "resid_post_layer_3"
15
+ },
16
+ "buffer": {
17
+ "d_submodule": 512,
18
+ "io": "out",
19
+ "n_ctxs": 10000,
20
+ "ctx_len": 128,
21
+ "refresh_batch_size": 32,
22
+ "out_batch_size": 4096,
23
+ "device": "cuda:0"
24
+ }
25
+ }