noanabeshima commited on
Commit
84e485d
1 Parent(s): e95dd23

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. mlp_map_test/M0_S-10_R16_P2.pt +3 -0
  2. mlp_map_test/M0_S-10_R16_P2_config.json +38 -0
  3. mlp_map_test/M0_S-10_R16_P3.pt +3 -0
  4. mlp_map_test/M0_S-10_R16_P3_config.json +38 -0
  5. mlp_map_test/M0_S-10_R16_P4.pt +3 -0
  6. mlp_map_test/M0_S-10_R16_P4_config.json +38 -0
  7. mlp_map_test/M0_S-10_R16_P5.pt +3 -0
  8. mlp_map_test/M0_S-10_R16_P5_config.json +38 -0
  9. mlp_map_test/M0_S-10_R4_P1.pt +3 -0
  10. mlp_map_test/M0_S-10_R4_P1_config.json +38 -0
  11. mlp_map_test/M0_S-10_R4_P5.pt +3 -0
  12. mlp_map_test/M0_S-10_R4_P5_config.json +38 -0
  13. mlp_map_test/M0_S-10_R8_P1.pt +3 -0
  14. mlp_map_test/M0_S-10_R8_P1_config.json +38 -0
  15. mlp_map_test/M0_S-10_R8_P2.pt +3 -0
  16. mlp_map_test/M0_S-10_R8_P2_config.json +38 -0
  17. mlp_map_test/M0_S-10_R8_P3.pt +3 -0
  18. mlp_map_test/M0_S-10_R8_P3_config.json +38 -0
  19. mlp_map_test/M0_S-2_R16_P1.pt +3 -0
  20. mlp_map_test/M0_S-2_R16_P1_config.json +38 -0
  21. mlp_map_test/M0_S-2_R16_P4.pt +3 -0
  22. mlp_map_test/M0_S-2_R16_P4_config.json +38 -0
  23. mlp_map_test/M0_S-2_R16_P6.pt +3 -0
  24. mlp_map_test/M0_S-2_R16_P6_config.json +38 -0
  25. mlp_map_test/M0_S-2_R1_P0.pt +3 -0
  26. mlp_map_test/M0_S-2_R1_P0_config.json +38 -0
  27. mlp_map_test/M0_S-2_R2_P2.pt +3 -0
  28. mlp_map_test/M0_S-2_R2_P2_config.json +38 -0
  29. mlp_map_test/M0_S-2_R2_P3.pt +3 -0
  30. mlp_map_test/M0_S-2_R2_P3_config.json +38 -0
  31. mlp_map_test/M0_S-2_R2_P6.pt +3 -0
  32. mlp_map_test/M0_S-2_R2_P6_config.json +38 -0
  33. mlp_map_test/M0_S-2_R4_P2.pt +3 -0
  34. mlp_map_test/M0_S-2_R4_P2_config.json +38 -0
  35. mlp_map_test/M0_S-2_R8_P1.pt +3 -0
  36. mlp_map_test/M0_S-2_R8_P1_config.json +38 -0
  37. mlp_map_test/M0_S-2_R8_P6.pt +3 -0
  38. mlp_map_test/M0_S-2_R8_P6_config.json +38 -0
  39. mlp_map_test/M0_S-4_R1_P0.pt +3 -0
  40. mlp_map_test/M0_S-4_R1_P0_config.json +38 -0
  41. mlp_map_test/M0_S-4_R2_P2.pt +3 -0
  42. mlp_map_test/M0_S-4_R2_P2_config.json +38 -0
  43. mlp_map_test/M0_S-4_R2_P5.pt +3 -0
  44. mlp_map_test/M0_S-4_R2_P5_config.json +38 -0
  45. mlp_map_test/M0_S-4_R4_P3.pt +3 -0
  46. mlp_map_test/M0_S-4_R4_P3_config.json +38 -0
  47. mlp_map_test/M0_S-4_R8_P4.pt +3 -0
  48. mlp_map_test/M0_S-4_R8_P4_config.json +38 -0
  49. mlp_map_test/M0_S-6_R16_P3.pt +3 -0
  50. mlp_map_test/M0_S-6_R16_P3_config.json +38 -0
mlp_map_test/M0_S-10_R16_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a476d3aef4c5a1dcc75418e38609399e84fb2cb55b7dd460d0a07449aef9a18
3
+ size 153705096
mlp_map_test/M0_S-10_R16_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-10_R16_P2"
38
+ }
mlp_map_test/M0_S-10_R16_P3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f8e0615563aef8d1a9540b22e7223e6e80c3813d825b3aa49cd0355db998292
3
+ size 153705096
mlp_map_test/M0_S-10_R16_P3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 3,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-10_R16_P3"
38
+ }
mlp_map_test/M0_S-10_R16_P4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a46db602dbd14919d1c35b25e05cd34ff2e46e7b3b2449c6b16a267b1e184f
3
+ size 153705096
mlp_map_test/M0_S-10_R16_P4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 4,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-10_R16_P4"
38
+ }
mlp_map_test/M0_S-10_R16_P5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a85488e911ab5a73fc9d82a330e78c4184eed5007a6b0e1cabf54da3baec1f7
3
+ size 153705096
mlp_map_test/M0_S-10_R16_P5_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 5,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-10_R16_P5"
38
+ }
mlp_map_test/M0_S-10_R4_P1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05e2b03ae00efdc04924bc67fd57561adc0ef02bcd70ec500e3612930c78d304
3
+ size 153705088
mlp_map_test/M0_S-10_R4_P1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 1,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-10_R4_P1"
38
+ }
mlp_map_test/M0_S-10_R4_P5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d71afabdb9f20aa17f3e6a1576b42c6ab73c271c14da4109fb5028c37cbc36d
3
+ size 153705088
mlp_map_test/M0_S-10_R4_P5_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 5,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-10_R4_P5"
38
+ }
mlp_map_test/M0_S-10_R8_P1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc902d6b03e5f89da1308cd533dd49a3fbc503f117691e677522a46bcaa9a5ff
3
+ size 153705088
mlp_map_test/M0_S-10_R8_P1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 1,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-10_R8_P1"
38
+ }
mlp_map_test/M0_S-10_R8_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c72c5cb9c4d6b17693c77ca0a05a7c812e2d19d6e83ec0eeb2853d1f156b12d
3
+ size 153705088
mlp_map_test/M0_S-10_R8_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-10_R8_P2"
38
+ }
mlp_map_test/M0_S-10_R8_P3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39899c5a0eb2a0de2615591467f7133389e29c7cf6743642e743a0f8586b3c23
3
+ size 153705088
mlp_map_test/M0_S-10_R8_P3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 3,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-10_R8_P3"
38
+ }
mlp_map_test/M0_S-2_R16_P1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fcddc6c89f38a708069bae5fb748625dbb057c8214bec471f353d98838a844d
3
+ size 153705088
mlp_map_test/M0_S-2_R16_P1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 1,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R16_P1"
38
+ }
mlp_map_test/M0_S-2_R16_P4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0221c4acac9ff33d429738173547f422a82dadfbf4f5313e17a18a0b6fc2b4e0
3
+ size 153705088
mlp_map_test/M0_S-2_R16_P4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 4,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R16_P4"
38
+ }
mlp_map_test/M0_S-2_R16_P6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d54835151ee0f914c46031271ff86e17264ebd39be55897c2ab3364c65ef54cd
3
+ size 153705088
mlp_map_test/M0_S-2_R16_P6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 6,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R16_P6"
38
+ }
mlp_map_test/M0_S-2_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a794ba2abdc801c955216c172bd99f67f0b8af8847c5bb25276bf55765bfba
3
+ size 153705080
mlp_map_test/M0_S-2_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R1_P0"
38
+ }
mlp_map_test/M0_S-2_R2_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e824360311d126899f3afa8aaa8c9694bc76c9d6824685f11158bad9f347e95b
3
+ size 153705080
mlp_map_test/M0_S-2_R2_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 2,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R2_P2"
38
+ }
mlp_map_test/M0_S-2_R2_P3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e0adffbde68d106fb7352f1bda49a9e2bbd7a29204e453444ab9e41b26180f
3
+ size 153705080
mlp_map_test/M0_S-2_R2_P3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 2,
32
+ "l1_p": 3,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R2_P3"
38
+ }
mlp_map_test/M0_S-2_R2_P6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c0096062a00d780ba2059a0428c9edf6a7dcfa11a0a8a755c60e2872b8b83e
3
+ size 153705080
mlp_map_test/M0_S-2_R2_P6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 2,
32
+ "l1_p": 6,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R2_P6"
38
+ }
mlp_map_test/M0_S-2_R4_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0dc239455dd8d2730ec3d6281a8a6970c9533fe57c8fbfd87d854cd02f1a056
3
+ size 153705080
mlp_map_test/M0_S-2_R4_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R4_P2"
38
+ }
mlp_map_test/M0_S-2_R8_P1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd089c61fe3b8f6377e1e637136c8fe5936e49136e68fabe3b67639beb77e69f
3
+ size 153705080
mlp_map_test/M0_S-2_R8_P1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 1,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R8_P1"
38
+ }
mlp_map_test/M0_S-2_R8_P6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:444bea44001a54b7c3b3236ce2ee724ec413d73eac1e176e86c3978732454bc6
3
+ size 153705080
mlp_map_test/M0_S-2_R8_P6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 6,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R8_P6"
38
+ }
mlp_map_test/M0_S-4_R1_P0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9343d0e4a26550fb8bc89c5b28e44dce35b5b15de7e597823f38808dc58eb497
3
+ size 153705080
mlp_map_test/M0_S-4_R1_P0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-4_R1_P0"
38
+ }
mlp_map_test/M0_S-4_R2_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e98075cc14067e15ea786d491a3bd58133b766b412e26f9fe6773d2d68a16b50
3
+ size 153705080
mlp_map_test/M0_S-4_R2_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 2,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-4_R2_P2"
38
+ }
mlp_map_test/M0_S-4_R2_P5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e85a33973a1a7e5a2417de023819a307b4268232f8a6245fd9b33031b14aa9
3
+ size 153705080
mlp_map_test/M0_S-4_R2_P5_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 2,
32
+ "l1_p": 5,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-4_R2_P5"
38
+ }
mlp_map_test/M0_S-4_R4_P3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a74f2eada1afa47e29f97115efd783050cd0202de87e8be1a81268c24109f09
3
+ size 153705080
mlp_map_test/M0_S-4_R4_P3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 3,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-4_R4_P3"
38
+ }
mlp_map_test/M0_S-4_R8_P4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb3af53c98345aec597d8431a88eea554620b047c5e1e31b329da4d24b1e639
3
+ size 153705080
mlp_map_test/M0_S-4_R8_P4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 4,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-4_R8_P4"
38
+ }
mlp_map_test/M0_S-6_R16_P3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f410085c13cb1ac593dcdde8365300d35d3c5287974a60a84feee4b76f116c74
3
+ size 153705088
mlp_map_test/M0_S-6_R16_P3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 3,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-6_R16_P3"
38
+ }