nshan144 commited on
Commit
322ae7a
·
verified ·
1 Parent(s): 882c2c8

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt +3 -0
  2. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json +30 -0
  3. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json +1 -0
  4. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt +3 -0
  5. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json +30 -0
  6. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json +1 -0
  7. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt +3 -0
  8. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json +30 -0
  9. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json +1 -0
  10. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt +3 -0
  11. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json +30 -0
  12. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json +1 -0
  13. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt +3 -0
  14. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json +30 -0
  15. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json +1 -0
  16. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt +3 -0
  17. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json +30 -0
  18. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json +1 -0
  19. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt +3 -0
  20. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json +30 -0
  21. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json +1 -0
  22. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt +3 -0
  23. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json +30 -0
  24. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json +1 -0
  25. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt +3 -0
  26. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json +30 -0
  27. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json +1 -0
  28. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt +3 -0
  29. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json +30 -0
  30. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json +1 -0
  31. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt +3 -0
  32. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json +30 -0
  33. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json +1 -0
  34. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt +3 -0
  35. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json +30 -0
  36. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json +1 -0
  37. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt +3 -0
  38. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json +30 -0
  39. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json +1 -0
  40. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt +3 -0
  41. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json +30 -0
  42. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json +1 -0
  43. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt +3 -0
  44. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json +30 -0
  45. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json +1 -0
  46. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt +3 -0
  47. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json +30 -0
  48. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json +1 -0
  49. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt +3 -0
  50. ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json +30 -0
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08041a12047d806f49b1ca3c1febc9aca731beb68569a444d23fc2daaddf1fc4
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.4167590290307999, "l1_loss": 62.0523624420166, "l0": 1568.3194427490234, "frac_variance_explained": 0.943539559841156, "cossim": 0.9821989126503468, "l2_ratio": 0.9195215627551079, "relative_reconstruction_bias": 0.9397303834557533, "loss_original": 2.988885059952736, "loss_reconstructed": 3.106488525867462, "loss_zero": 8.828418850898743, "frac_recovered": 0.9798414222896099, "frac_alive": 0.48284912109375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68a2c94c70e951bbc10069ccbc6c9ec24e20cb02f19c2d15ee3a673adf1a300
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.015,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.5789328217506409, "l1_loss": 55.72900319099426, "l0": 1330.4508972167969, "frac_variance_explained": 0.9300252571702003, "cossim": 0.9778566658496857, "l2_ratio": 0.9065023027360439, "relative_reconstruction_bias": 0.9307215549051762, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1464501470327377, "loss_zero": 8.828418850898743, "frac_recovered": 0.9729870557785034, "frac_alive": 0.47406005859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bed5c2fd61816e5fc0719d9adca7311a85129b8d78272cbf46cb62748ab5548
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.02,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.8266066312789917, "l1_loss": 47.685190200805664, "l0": 1031.1428604125977, "frac_variance_explained": 0.9062778800725937, "cossim": 0.9702106080949306, "l2_ratio": 0.8861087150871754, "relative_reconstruction_bias": 0.9171949252486229, "loss_original": 2.988885059952736, "loss_reconstructed": 3.251991391181946, "loss_zero": 8.828418850898743, "frac_recovered": 0.9548681862652302, "frac_alive": 0.4541015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72edbdb237b1133d7d63248e9dbdc9dd0826fb9fa0fcb64603ee890a4f3e6661
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.280204176902771, "l1_loss": 36.662381649017334, "l0": 639.2051010131836, "frac_variance_explained": 0.8530002571642399, "cossim": 0.952362023293972, "l2_ratio": 0.8507192321121693, "relative_reconstruction_bias": 0.8974457383155823, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4817975610494614, "loss_zero": 8.828418850898743, "frac_recovered": 0.9160263948142529, "frac_alive": 0.43353271484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51a70b6f60418f10aa224b5abc555b4be0f46b8862c98962ca0741e4677d7e5
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.630507007241249, "l1_loss": 28.577664136886597, "l0": 394.4646053314209, "frac_variance_explained": 0.8021240644156933, "cossim": 0.9342639222741127, "l2_ratio": 0.8253069184720516, "relative_reconstruction_bias": 0.8880686983466148, "loss_original": 2.988885059952736, "loss_reconstructed": 3.6719041615724564, "loss_zero": 8.828418850898743, "frac_recovered": 0.883676066994667, "frac_alive": 0.41949462890625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ed64d5e83254a4a1680542ca54f585d6989792806da6f1d1d6e084493d1b67f
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 0,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
19
+ "submodule_name": "resid_post_layer_0"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.089244395494461, "l1_loss": 18.967594146728516, "l0": 164.81410884857178, "frac_variance_explained": 0.7212224006652832, "cossim": 0.9034432731568813, "l2_ratio": 0.7909861765801907, "relative_reconstruction_bias": 0.8814779929816723, "loss_original": 2.988885059952736, "loss_reconstructed": 4.005634561181068, "loss_zero": 8.828418850898743, "frac_recovered": 0.8266438916325569, "frac_alive": 0.39056396484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820854280e0d77fddf8f509bb79ad6e9a428b4b75d9060bc1ddbcb19b7ab8d89
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 1.921069249510765, "l1_loss": 86.7099380493164, "l0": 1889.5925827026367, "frac_variance_explained": 0.9374731332063675, "cossim": 0.9791730977594852, "l2_ratio": 0.9044194445014, "relative_reconstruction_bias": 0.9294404052197933, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0913854241371155, "loss_zero": 8.97075641155243, "frac_recovered": 0.9828116111457348, "frac_alive": 0.47479248046875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5b98cc0e9bcfc76d8507051e5bd1366145f34774c39ab5c0d83fdef56df14af
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.015,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.1518948525190353, "l1_loss": 78.03551197052002, "l0": 1625.4286727905273, "frac_variance_explained": 0.9218312874436378, "cossim": 0.9738432057201862, "l2_ratio": 0.8885384723544121, "relative_reconstruction_bias": 0.9186785258352757, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1272957623004913, "loss_zero": 8.97075641155243, "frac_recovered": 0.9767856597900391, "frac_alive": 0.47882080078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bdbc4935c9d5298096ee6ea78bfe243612fc4731ea0caf2a1d368f266b17448
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.02,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 2.5168990939855576, "l1_loss": 66.47064352035522, "l0": 1277.5725173950195, "frac_variance_explained": 0.8933972716331482, "cossim": 0.9639947153627872, "l2_ratio": 0.8639187514781952, "relative_reconstruction_bias": 0.9028670713305473, "loss_original": 2.988885059952736, "loss_reconstructed": 3.2126090973615646, "loss_zero": 8.97075641155243, "frac_recovered": 0.9625394903123379, "frac_alive": 0.47686767578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b606532e1f82f19fdd1e40b0c23c64f9ebe39ae79d7d587a61e65325d2ab30
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.169589176774025, "l1_loss": 49.724985122680664, "l0": 787.4504432678223, "frac_variance_explained": 0.8310047425329685, "cossim": 0.9411170482635498, "l2_ratio": 0.8247413076460361, "relative_reconstruction_bias": 0.8834847621619701, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4419959783554077, "loss_zero": 8.97075641155243, "frac_recovered": 0.9246886782348156, "frac_alive": 0.478271484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a161fb8dfc91099dfd8e610fb705a9dab9c3bd9356622160f322cdaaf6bac4
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.70350344479084, "l1_loss": 37.7823760509491, "l0": 474.227840423584, "frac_variance_explained": 0.7687290497124195, "cossim": 0.9165537543594837, "l2_ratio": 0.7957354746758938, "relative_reconstruction_bias": 0.876005794852972, "loss_original": 2.988885059952736, "loss_reconstructed": 3.693199545145035, "loss_zero": 8.97075641155243, "frac_recovered": 0.882872398942709, "frac_alive": 0.48974609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b8b04d6c4829e44878a7cfc9bb99f3fa16deff3e2edcb5e19d4e4720640ced
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 1,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
19
+ "submodule_name": "resid_post_layer_1"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.367744535207748, "l1_loss": 24.294288277626038, "l0": 183.46984004974365, "frac_variance_explained": 0.6762260124087334, "cossim": 0.8779518343508244, "l2_ratio": 0.7569525502622128, "relative_reconstruction_bias": 0.871852483600378, "loss_original": 2.988885059952736, "loss_reconstructed": 4.113821625709534, "loss_zero": 8.97075641155243, "frac_recovered": 0.8127008564770222, "frac_alive": 0.48626708984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cedc3d1f87faf8d918f58ab0874219bf8b4cb884f1745c03bc57247330e6d45
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.012,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.1716324537992477, "l1_loss": 109.77040338516235, "l0": 1611.395637512207, "frac_variance_explained": 0.9486353807151318, "cossim": 0.9696135818958282, "l2_ratio": 0.8780255950987339, "relative_reconstruction_bias": 0.9444989748299122, "loss_original": 2.988885059952736, "loss_reconstructed": 3.313967987895012, "loss_zero": 9.41981041431427, "frac_recovered": 0.9495267793536186, "frac_alive": 0.532470703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:309d11c65c3e16ffda7685ea51f4b04ddd935ed7c6a49823a9dea9e8b41f420d
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.015,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 3.5997580736875534, "l1_loss": 96.4208664894104, "l0": 1306.8117294311523, "frac_variance_explained": 0.9347885400056839, "cossim": 0.9604156315326691, "l2_ratio": 0.8589782565832138, "relative_reconstruction_bias": 0.9393252618610859, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4662882536649704, "loss_zero": 9.41981041431427, "frac_recovered": 0.9260412231087685, "frac_alive": 0.5325927734375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c85ab3b7913d8c2116da2c18df2340f628b33b7ad444fe6fac5ed284aa26d12d
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.02,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 4.234767764806747, "l1_loss": 79.00359535217285, "l0": 927.1257019042969, "frac_variance_explained": 0.9083689413964748, "cossim": 0.9438304528594017, "l2_ratio": 0.8312639258801937, "relative_reconstruction_bias": 0.931897010654211, "loss_original": 2.988885059952736, "loss_reconstructed": 3.781919851899147, "loss_zero": 9.41981041431427, "frac_recovered": 0.8772863261401653, "frac_alive": 0.54339599609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f4f1b6ae72ea4efc055b7106d07c4edb55f74f99ebb2f9f4b2553a958d9d4ea
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.03,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 5.191680729389191, "l1_loss": 56.57015347480774, "l0": 475.47170066833496, "frac_variance_explained": 0.8640795983374119, "cossim": 0.9118851572275162, "l2_ratio": 0.7894385904073715, "relative_reconstruction_bias": 0.9269732721149921, "loss_original": 2.988885059952736, "loss_reconstructed": 4.469057783484459, "loss_zero": 9.41981041431427, "frac_recovered": 0.7708443030714989, "frac_alive": 0.5369873046875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:520643a4d5d7e9668d1db789c4e292fa8cbf0a008d9343fc10af3597b518f147
3
+ size 67178280
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 512,
6
+ "dict_size": 16384,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 10,
10
+ "resample_steps": null,
11
+ "sparsity_warmup_steps": 10,
12
+ "steps": 488,
13
+ "decay_start": 390,
14
+ "seed": 0,
15
+ "device": "cuda:5",
16
+ "layer": 2,
17
+ "lm_name": "EleutherAI/pythia-70m-deduped",
18
+ "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
19
+ "submodule_name": "resid_post_layer_2"
20
+ },
21
+ "buffer": {
22
+ "d_submodule": 512,
23
+ "io": "out",
24
+ "n_ctxs": 244,
25
+ "ctx_len": 1024,
26
+ "refresh_batch_size": 64,
27
+ "out_batch_size": 2048,
28
+ "device": "cuda:5"
29
+ }
30
+ }