canrager commited on
Commit
caac31f
1 Parent(s): b5f16a7

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt +3 -0
  2. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json +26 -0
  3. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json +1 -0
  4. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt +3 -0
  5. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json +26 -0
  6. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json +1 -0
  7. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt +3 -0
  8. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json +26 -0
  9. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json +1 -0
  10. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt +3 -0
  11. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json +26 -0
  12. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json +1 -0
  13. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt +3 -0
  14. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json +26 -0
  15. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json +1 -0
  16. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt +3 -0
  17. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json +26 -0
  18. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json +1 -0
  19. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt +3 -0
  20. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +27 -0
  21. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json +1 -0
  22. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt +3 -0
  23. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json +27 -0
  24. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json +1 -0
  25. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt +3 -0
  26. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json +27 -0
  27. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json +1 -0
  28. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt +3 -0
  29. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +27 -0
  30. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json +1 -0
  31. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt +3 -0
  32. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json +27 -0
  33. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json +1 -0
  34. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt +3 -0
  35. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +27 -0
  36. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json +1 -0
  37. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt +3 -0
  38. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json +27 -0
  39. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json +1 -0
  40. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt +3 -0
  41. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json +27 -0
  42. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json +1 -0
  43. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt +3 -0
  44. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +27 -0
  45. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json +1 -0
  46. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt +3 -0
  47. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json +27 -0
  48. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json +1 -0
  49. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt +3 -0
  50. gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +27 -0
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec94483dfdaf6baef38641e79ae4390fd1c4be0db552e4313408c7adfc9a25e6
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 44.21599082946777, "l1_loss": 689.0535034179687, "l0": 653.0125183105469, "frac_variance_explained": 0.876203840970993, "cossim": 0.9503044188022614, "l2_ratio": 0.9132377088069916, "relative_reconstruction_bias": 0.9689015865325927, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.516097593307495, "loss_zero": 12.452933025360107, "frac_recovered": 0.9933268785476684, "frac_alive": 0.722276508808136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c35b088143b227affa0d50ac160e0d4d4b288be1b3e7a9d8a2eec7262290aa10
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.783888244628905, "l1_loss": 430.45958251953124, "l0": 216.9250061035156, "frac_variance_explained": 0.821322637796402, "cossim": 0.9228447735309601, "l2_ratio": 0.8802441656589508, "relative_reconstruction_bias": 0.9668310403823852, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.602967619895935, "loss_zero": 12.452933025360107, "frac_recovered": 0.9846588850021363, "frac_alive": 0.4129774272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86956cdaef37056627cc68976f5eee8268b1f88273c33b2aaa183dd8606e002a
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 60.528709411621094, "l1_loss": 360.99082946777344, "l0": 142.25417404174806, "frac_variance_explained": 0.7947052717208862, "cossim": 0.9053027153015136, "l2_ratio": 0.855720329284668, "relative_reconstruction_bias": 0.9635509788990021, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.675975728034973, "loss_zero": 12.452933025360107, "frac_recovered": 0.9773781895637512, "frac_alive": 0.28173828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93578a2028d6821229712eff158bc843ed03693b1af5c760ef67ec9baf415170
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.05,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 69.19085235595703, "l1_loss": 260.5812042236328, "l0": 68.32500228881835, "frac_variance_explained": 0.6767194271087646, "cossim": 0.8739384233951568, "l2_ratio": 0.8242608070373535, "relative_reconstruction_bias": 0.9541641473770142, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.999904203414917, "loss_zero": 12.452933025360107, "frac_recovered": 0.9450006365776062, "frac_alive": 0.1184895858168602, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f34e7a335652c290cd383cbf538b93ac8ff531ecf08c4006f9d6f1274c529fd
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 77.07271423339844, "l1_loss": 209.26783905029296, "l0": 35.49583435058594, "frac_variance_explained": 0.673879736661911, "cossim": 0.8379469156265259, "l2_ratio": 0.7764925301074982, "relative_reconstruction_bias": 0.9512474119663239, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.923832106590271, "loss_zero": 12.452933025360107, "frac_recovered": 0.8525889277458191, "frac_alive": 0.0455729179084301, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db4d7607331b3bdeac0bfe2cd3a0a36835669ba6970db38b0867617476e00d7b
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.07,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 85.48247909545898, "l1_loss": 188.83019409179687, "l0": 20.800000953674317, "frac_variance_explained": 0.6401739776134491, "cossim": 0.7948622822761535, "l2_ratio": 0.7354530394077301, "relative_reconstruction_bias": 0.9544944584369659, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.871973085403442, "loss_zero": 12.452933025360107, "frac_recovered": 0.757849270105362, "frac_alive": 0.0176866315305233, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "0"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 262.5618362426758, "l1_loss": 15419.97216796875, "l0": 9220.8796875, "frac_variance_explained": -1.1251216292381288, "cossim": 0.0072056266712024804, "l2_ratio": 1.1523211359977723, "relative_reconstruction_bias": -3121.6480613708495, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31d05f8c2d8a1c6f452b8f2693b7741e4c32908630d8f62d27d9d917d7721e22
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "19528"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 44.36017074584961, "l1_loss": 678.7422302246093, "l0": 664.40419921875, "frac_variance_explained": 0.8578554630279541, "cossim": 0.9489532053470612, "l2_ratio": 0.9064467251300812, "relative_reconstruction_bias": 0.962804764509201, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5181885957717896, "loss_zero": 12.452933025360107, "frac_recovered": 0.9931147575378418, "frac_alive": 0.7009006142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8459951ead9228cb6bdc403d6492a22030927249f67f15d6eedf8d679405a47
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "29292"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 45.5604850769043, "l1_loss": 697.0533264160156, "l0": 648.0208557128906, "frac_variance_explained": 0.8660320043563843, "cossim": 0.9488271653652192, "l2_ratio": 0.9062425673007966, "relative_reconstruction_bias": 0.9621650040149688, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5167696475982666, "loss_zero": 12.452933025360107, "frac_recovered": 0.9932600975036621, "frac_alive": 0.7202690839767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:166ad074d24b1dc38c4856fb9a9dd17479a591fc052877141aa4c9e674cb40a0
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "4882"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 55.02547416687012, "l1_loss": 694.5177673339844, "l0": 500.9125122070312, "frac_variance_explained": 0.8116631209850311, "cossim": 0.9266311347484588, "l2_ratio": 0.8777791380882263, "relative_reconstruction_bias": 0.9579416394233704, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.589069104194641, "loss_zero": 12.452933025360107, "frac_recovered": 0.9860494375228882, "frac_alive": 0.2698567807674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f23c778c5115ad5249b24bb4c98958cedf405069dc1f0b538dfcdd601bf582a
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "9764"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 46.25635299682617, "l1_loss": 680.0863464355468, "l0": 628.8416809082031, "frac_variance_explained": 0.857069319486618, "cossim": 0.9428604900836944, "l2_ratio": 0.8974497258663178, "relative_reconstruction_bias": 0.9643844664096832, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.531099796295166, "loss_zero": 12.452933025360107, "frac_recovered": 0.9918265163898468, "frac_alive": 0.529405415058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "0"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 231.1881851196289, "l1_loss": 13521.2337890625, "l0": 9215.55029296875, "frac_variance_explained": -1.0510494828224182, "cossim": 0.006548775953706354, "l2_ratio": 1.1542882323265076, "relative_reconstruction_bias": -167.9729995727539, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13631c3eb49456356ffd1f81d364c8eb7a4fd6bfbce4eb486cc0aba7c317caf
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "19528"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.52329711914062, "l1_loss": 434.2824768066406, "l0": 215.03750610351562, "frac_variance_explained": 0.8672008395195008, "cossim": 0.9186275959014892, "l2_ratio": 0.8766422688961029, "relative_reconstruction_bias": 0.9781448543071747, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.614857029914856, "loss_zero": 12.452933025360107, "frac_recovered": 0.9834800899028778, "frac_alive": 0.3849283754825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad859e564cc052ffedbf6c98a7a93979224f7ab6b738a5512393d8690c564a1a
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "29292"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 55.41632499694824, "l1_loss": 412.8124267578125, "l0": 213.2916702270508, "frac_variance_explained": 0.7951547861099243, "cossim": 0.9193026781082153, "l2_ratio": 0.8765688300132751, "relative_reconstruction_bias": 0.963158255815506, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6072664499282836, "loss_zero": 12.452933025360107, "frac_recovered": 0.9842307686805725, "frac_alive": 0.4042426347732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ec54a4ea78ec5eee0dc141018e261b00f34ef64e54d5766536618f48cc767c4
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "4882"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 65.93699836730957, "l1_loss": 390.71771545410155, "l0": 140.8791732788086, "frac_variance_explained": 0.7948267936706543, "cossim": 0.8820845127105713, "l2_ratio": 0.833953058719635, "relative_reconstruction_bias": 0.9701622486114502, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8883158922195435, "loss_zero": 12.452933025360107, "frac_recovered": 0.9561446309089661, "frac_alive": 0.149685338139534, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d5f4dc455408710d281b07d9cfa3c041e4796169c85bac5d612a4df911cff2e
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "9764"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 59.162353134155275, "l1_loss": 431.5362915039062, "l0": 187.0416732788086, "frac_variance_explained": 0.7984853267669678, "cossim": 0.9067982614040375, "l2_ratio": 0.8616488099098205, "relative_reconstruction_bias": 0.9632077217102051, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6684057474136353, "loss_zero": 12.452933025360107, "frac_recovered": 0.9781309604644776, "frac_alive": 0.2636176347732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d
3
+ size 339823336
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 18432,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "0"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }