noanabeshima commited on
Commit
9052bef
1 Parent(s): d41c1ab

Upload folder using huggingface_hub

Browse files
Files changed (48) hide show
  1. res_pre_mlp/Rm0_S-1_R1_P0.pt +3 -0
  2. res_pre_mlp/Rm0_S-1_R1_P0_config.json +38 -0
  3. res_pre_mlp/Rm0_S-2_R1_P0.pt +3 -0
  4. res_pre_mlp/Rm0_S-2_R1_P0_config.json +38 -0
  5. res_pre_mlp/Rm0_S-3_R1_P0.pt +3 -0
  6. res_pre_mlp/Rm0_S-3_R1_P0_config.json +38 -0
  7. res_pre_mlp/Rm0_S-4_R1_P0.pt +3 -0
  8. res_pre_mlp/Rm0_S-4_R1_P0_config.json +38 -0
  9. res_pre_mlp/Rm0_S-5_R1_P0.pt +3 -0
  10. res_pre_mlp/Rm0_S-5_R1_P0_config.json +38 -0
  11. res_pre_mlp/Rm0_S-6_R1_P0.pt +3 -0
  12. res_pre_mlp/Rm0_S-6_R1_P0_config.json +38 -0
  13. res_pre_mlp/Rm1_S-1_R1_P0.pt +3 -0
  14. res_pre_mlp/Rm1_S-1_R1_P0_config.json +38 -0
  15. res_pre_mlp/Rm1_S-2_R1_P0.pt +3 -0
  16. res_pre_mlp/Rm1_S-2_R1_P0_config.json +38 -0
  17. res_pre_mlp/Rm1_S-3_R1_P0.pt +3 -0
  18. res_pre_mlp/Rm1_S-3_R1_P0_config.json +38 -0
  19. res_pre_mlp/Rm1_S-4_R1_P0.pt +3 -0
  20. res_pre_mlp/Rm1_S-4_R1_P0_config.json +38 -0
  21. res_pre_mlp/Rm1_S-5_R1_P0.pt +3 -0
  22. res_pre_mlp/Rm1_S-5_R1_P0_config.json +38 -0
  23. res_pre_mlp/Rm1_S-6_R1_P0.pt +3 -0
  24. res_pre_mlp/Rm1_S-6_R1_P0_config.json +38 -0
  25. res_pre_mlp/Rm2_S-1_R1_P0.pt +3 -0
  26. res_pre_mlp/Rm2_S-1_R1_P0_config.json +38 -0
  27. res_pre_mlp/Rm2_S-2_R1_P0.pt +3 -0
  28. res_pre_mlp/Rm2_S-2_R1_P0_config.json +38 -0
  29. res_pre_mlp/Rm2_S-3_R1_P0.pt +3 -0
  30. res_pre_mlp/Rm2_S-3_R1_P0_config.json +38 -0
  31. res_pre_mlp/Rm2_S-4_R1_P0.pt +3 -0
  32. res_pre_mlp/Rm2_S-4_R1_P0_config.json +38 -0
  33. res_pre_mlp/Rm2_S-5_R1_P0.pt +3 -0
  34. res_pre_mlp/Rm2_S-5_R1_P0_config.json +38 -0
  35. res_pre_mlp/Rm2_S-6_R1_P0.pt +3 -0
  36. res_pre_mlp/Rm2_S-6_R1_P0_config.json +38 -0
  37. res_pre_mlp/Rm3_S-1_R1_P0.pt +3 -0
  38. res_pre_mlp/Rm3_S-1_R1_P0_config.json +38 -0
  39. res_pre_mlp/Rm3_S-2_R1_P0.pt +3 -0
  40. res_pre_mlp/Rm3_S-2_R1_P0_config.json +38 -0
  41. res_pre_mlp/Rm3_S-3_R1_P0.pt +3 -0
  42. res_pre_mlp/Rm3_S-3_R1_P0_config.json +38 -0
  43. res_pre_mlp/Rm3_S-4_R1_P0.pt +3 -0
  44. res_pre_mlp/Rm3_S-4_R1_P0_config.json +38 -0
  45. res_pre_mlp/Rm3_S-5_R1_P0.pt +3 -0
  46. res_pre_mlp/Rm3_S-5_R1_P0_config.json +38 -0
  47. res_pre_mlp/Rm3_S-6_R1_P0.pt +3 -0
  48. res_pre_mlp/Rm3_S-6_R1_P0_config.json +38 -0
res_pre_mlp/Rm0_S-1_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd45d769aa0c28ad4ba961df9b38b89b6bd60655245948e4058d94d682feb44
3
+ size 153705088
res_pre_mlp/Rm0_S-1_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm0_S-1_R1_P0"
38
+ }
res_pre_mlp/Rm0_S-2_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a069663395f9b3ffce93f725a10c04f668ee30d6245d93deec31b9df2cd6103
3
+ size 153705088
res_pre_mlp/Rm0_S-2_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm0_S-2_R1_P0"
38
+ }
res_pre_mlp/Rm0_S-3_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a691258a2de93c8679f3c25c17fc0a7640aecc70744146a5cfa57621df0152d9
3
+ size 153705088
res_pre_mlp/Rm0_S-3_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -3,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm0_S-3_R1_P0"
38
+ }
res_pre_mlp/Rm0_S-4_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9747dc098cb524d612e058f20fb5d1c53bb4f008c6df1bc3aab0c215e56849b0
3
+ size 153705088
res_pre_mlp/Rm0_S-4_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm0_S-4_R1_P0"
38
+ }
res_pre_mlp/Rm0_S-5_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93d8bfe8fcfd33e2fe501bb3f67f38f3e383da1492e25ae3426cb81b5a8d9406
3
+ size 153705088
res_pre_mlp/Rm0_S-5_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -5,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm0_S-5_R1_P0"
38
+ }
res_pre_mlp/Rm0_S-6_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:812e917f8ff0d1fa0a3581bb9525bcd6159ea168862024cab9896028c946695a
3
+ size 153705088
res_pre_mlp/Rm0_S-6_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm0_S-6_R1_P0"
38
+ }
res_pre_mlp/Rm1_S-1_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c59c6799c971a4b6d34820795525b5ba0f138dfc74cab8fe963d5ce7aec6c943
3
+ size 153705088
res_pre_mlp/Rm1_S-1_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm1_S-1_R1_P0"
38
+ }
res_pre_mlp/Rm1_S-2_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c0090240406213acbecae5f70669dec85605b4a8a54fc5fa125cbd62038941
3
+ size 153705088
res_pre_mlp/Rm1_S-2_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm1_S-2_R1_P0"
38
+ }
res_pre_mlp/Rm1_S-3_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ee6f7dda5c12262e0891b076be0633e5eecaa8d4774d556bffb72b87e8b99f
3
+ size 153705088
res_pre_mlp/Rm1_S-3_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -3,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm1_S-3_R1_P0"
38
+ }
res_pre_mlp/Rm1_S-4_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a5aa5180058b83b73ec3b2b34b61314e402c859efd37e754ae61cec150e19a
3
+ size 153705088
res_pre_mlp/Rm1_S-4_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm1_S-4_R1_P0"
38
+ }
res_pre_mlp/Rm1_S-5_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:094466fd13b69cbab01efc9af80cdd584946d932c35354f5f5b5caee03849baa
3
+ size 153705088
res_pre_mlp/Rm1_S-5_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -5,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm1_S-5_R1_P0"
38
+ }
res_pre_mlp/Rm1_S-6_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:323856004211c2892f90bb6646092c9f978dfe5412a41c0d7f0203d9e1c62f9a
3
+ size 153705088
res_pre_mlp/Rm1_S-6_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm1_S-6_R1_P0"
38
+ }
res_pre_mlp/Rm2_S-1_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3498111206f5af5177a62916216ee8b991b4790063a3285616a653dcfc161a1
3
+ size 153705088
res_pre_mlp/Rm2_S-1_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 2,
20
+ "l1_exp": -1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm2_S-1_R1_P0"
38
+ }
res_pre_mlp/Rm2_S-2_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54a939b27aaae14db6d153a16eaf229653f1aaaa6fe31b5090710681fbf47757
3
+ size 153705088
res_pre_mlp/Rm2_S-2_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 2,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm2_S-2_R1_P0"
38
+ }
res_pre_mlp/Rm2_S-3_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4aa5171c0db2fceedb86fee267890d25ccdee2e79d7347417402c6f07aea132
3
+ size 153705088
res_pre_mlp/Rm2_S-3_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 2,
20
+ "l1_exp": -3,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm2_S-3_R1_P0"
38
+ }
res_pre_mlp/Rm2_S-4_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b36c464096fa9d04d6f57d5091ec217764b8d7d933e07de04bb03ba66653adf
3
+ size 153705088
res_pre_mlp/Rm2_S-4_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 2,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm2_S-4_R1_P0"
38
+ }
res_pre_mlp/Rm2_S-5_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:014bc4090b47fa09fcacdfd663c15d8ebd13e44cdd83d5b572e93a67f54e98bc
3
+ size 153705088
res_pre_mlp/Rm2_S-5_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 2,
20
+ "l1_exp": -5,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm2_S-5_R1_P0"
38
+ }
res_pre_mlp/Rm2_S-6_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b78c8a2598fef1dbcec12e8f7e2f786d3da7ef799018d2e74b89fe85186552ae
3
+ size 153705088
res_pre_mlp/Rm2_S-6_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 2,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm2_S-6_R1_P0"
38
+ }
res_pre_mlp/Rm3_S-1_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9508a79866c6484117f48fe8f6c771efe489ed3a08afff00f1200de70ebefee3
3
+ size 153705088
res_pre_mlp/Rm3_S-1_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 3,
20
+ "l1_exp": -1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm3_S-1_R1_P0"
38
+ }
res_pre_mlp/Rm3_S-2_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a499d162b1d688b92d82d751ee44aca3ebf4af75f1add808ea34947440a1a801
3
+ size 153705088
res_pre_mlp/Rm3_S-2_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 3,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm3_S-2_R1_P0"
38
+ }
res_pre_mlp/Rm3_S-3_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd58590a88859a26222fadf422f55ff0045676939e19ee79823de99339aa4819
3
+ size 153705088
res_pre_mlp/Rm3_S-3_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 3,
20
+ "l1_exp": -3,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm3_S-3_R1_P0"
38
+ }
res_pre_mlp/Rm3_S-4_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc2fc2aa7b626858e2b7756f705bc168b7871635fe837acfc025d3dbb6fab9a
3
+ size 153705088
res_pre_mlp/Rm3_S-4_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 3,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm3_S-4_R1_P0"
38
+ }
res_pre_mlp/Rm3_S-5_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b60aef2c23d744928280f1ffbeee956d5defe6f9f2b8ae0511e38da90161740a
3
+ size 153705088
res_pre_mlp/Rm3_S-5_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 3,
20
+ "l1_exp": -5,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm3_S-5_R1_P0"
38
+ }
res_pre_mlp/Rm3_S-6_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288034a630d32d3dc93541215ce00e9beb793aec5f6926afc03849ae21e36eff
3
+ size 153705088
res_pre_mlp/Rm3_S-6_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 3,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Rm{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "res_pre_mlp",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "res_B",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Rm3_S-6_R1_P0"
38
+ }