noanabeshima
commited on
Commit
•
78c7da6
1
Parent(s):
22b3030
Upload folder using huggingface_hub
Browse files- mlp/M0_S-1_R1_P0.pt +3 -0
- mlp/M0_S-1_R1_P0_config.json +38 -0
- mlp/M0_S-2_R1_P0.pt +3 -0
- mlp/M0_S-2_R1_P0_config.json +38 -0
- mlp/M0_S-3_R1_P0.pt +3 -0
- mlp/M0_S-3_R1_P0_config.json +38 -0
- mlp/M0_S-4_R1_P0.pt +3 -0
- mlp/M0_S-4_R1_P0_config.json +38 -0
- mlp/M0_S-5_R1_P0.pt +3 -0
- mlp/M0_S-5_R1_P0_config.json +38 -0
- mlp/M0_S-6_R1_P0.pt +3 -0
- mlp/M0_S-6_R1_P0_config.json +38 -0
- mlp/M1_S-1_R1_P0.pt +3 -0
- mlp/M1_S-1_R1_P0_config.json +38 -0
- mlp/M1_S-2_R1_P0.pt +3 -0
- mlp/M1_S-2_R1_P0_config.json +38 -0
- mlp/M1_S-3_R1_P0.pt +3 -0
- mlp/M1_S-3_R1_P0_config.json +38 -0
- mlp/M1_S-4_R1_P0.pt +3 -0
- mlp/M1_S-4_R1_P0_config.json +38 -0
- mlp/M1_S-5_R1_P0.pt +3 -0
- mlp/M1_S-5_R1_P0_config.json +38 -0
- mlp/M1_S-6_R1_P0.pt +3 -0
- mlp/M1_S-6_R1_P0_config.json +38 -0
- mlp/M2_S-1_R1_P0.pt +3 -0
- mlp/M2_S-1_R1_P0_config.json +38 -0
- mlp/M2_S-2_R1_P0.pt +3 -0
- mlp/M2_S-2_R1_P0_config.json +38 -0
- mlp/M2_S-3_R1_P0.pt +3 -0
- mlp/M2_S-3_R1_P0_config.json +38 -0
- mlp/M2_S-4_R1_P0.pt +3 -0
- mlp/M2_S-4_R1_P0_config.json +38 -0
- mlp/M2_S-5_R1_P0.pt +3 -0
- mlp/M2_S-5_R1_P0_config.json +38 -0
- mlp/M2_S-6_R1_P0.pt +3 -0
- mlp/M2_S-6_R1_P0_config.json +38 -0
- mlp/M3_S-1_R1_P0.pt +3 -0
- mlp/M3_S-1_R1_P0_config.json +38 -0
- mlp/M3_S-2_R1_P0.pt +3 -0
- mlp/M3_S-2_R1_P0_config.json +38 -0
- mlp/M3_S-3_R1_P0.pt +3 -0
- mlp/M3_S-3_R1_P0_config.json +38 -0
- mlp/M3_S-4_R1_P0.pt +3 -0
- mlp/M3_S-4_R1_P0_config.json +38 -0
- mlp/M3_S-5_R1_P0.pt +3 -0
- mlp/M3_S-5_R1_P0_config.json +38 -0
- mlp/M3_S-6_R1_P0.pt +3 -0
- mlp/M3_S-6_R1_P0_config.json +38 -0
mlp/M0_S-1_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcda59b9785874b2b9eeae92c397e7425610983d1f016ecb9e3a44fb758804f0
|
3 |
+
size 153705080
|
mlp/M0_S-1_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 0,
|
20 |
+
"l1_exp": -1,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo0_S-1_R1_P0"
|
38 |
+
}
|
mlp/M0_S-2_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aebed9ea9ffe0ecbfeeffdffe5c1be42542af6d034ddf64505a9609533cea8b9
|
3 |
+
size 153705080
|
mlp/M0_S-2_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 0,
|
20 |
+
"l1_exp": -2,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo0_S-2_R1_P0"
|
38 |
+
}
|
mlp/M0_S-3_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2106f5872e56bf8ed8038860c28e5edb3fb8769b19459b287f537be85d1504ee
|
3 |
+
size 153705080
|
mlp/M0_S-3_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 0,
|
20 |
+
"l1_exp": -3,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo0_S-3_R1_P0"
|
38 |
+
}
|
mlp/M0_S-4_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3be61e61581bc5b130727e693446be29bf02677eb1f03f04a8612121df2af6e
|
3 |
+
size 153705080
|
mlp/M0_S-4_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 0,
|
20 |
+
"l1_exp": -4,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo0_S-4_R1_P0"
|
38 |
+
}
|
mlp/M0_S-5_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e982b4d7aceacbfbc992b601c6717e9b0968c50125b44ae27d1e09069ed3fa13
|
3 |
+
size 153705080
|
mlp/M0_S-5_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 0,
|
20 |
+
"l1_exp": -5,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo0_S-5_R1_P0"
|
38 |
+
}
|
mlp/M0_S-6_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:695badca456d4db5a317ce8470e33f7bab393cf7ca5d1207ffcab5b4bcd259db
|
3 |
+
size 153705080
|
mlp/M0_S-6_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 0,
|
20 |
+
"l1_exp": -6,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo0_S-6_R1_P0"
|
38 |
+
}
|
mlp/M1_S-1_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3a48f741f8318af05e9d4141c5bae377b1af8a4fe79e2d459bc692dbf4b8700
|
3 |
+
size 153705080
|
mlp/M1_S-1_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 1,
|
20 |
+
"l1_exp": -1,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo1_S-1_R1_P0"
|
38 |
+
}
|
mlp/M1_S-2_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4630e009c4d2c3cc9ccb563744c679e6726138f27beda74ba1fac2a47768991
|
3 |
+
size 153705080
|
mlp/M1_S-2_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 1,
|
20 |
+
"l1_exp": -2,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo1_S-2_R1_P0"
|
38 |
+
}
|
mlp/M1_S-3_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c430af5b000c58973a893ad8ec61c5bbdc3d6dd9cc1d1b01a949403b29cefcc
|
3 |
+
size 153705080
|
mlp/M1_S-3_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 1,
|
20 |
+
"l1_exp": -3,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo1_S-3_R1_P0"
|
38 |
+
}
|
mlp/M1_S-4_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e8787e12c7fe5edf2a56a87b1c3df9acf4495df59a78c95cceebb86dc7ccf87
|
3 |
+
size 153705080
|
mlp/M1_S-4_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 1,
|
20 |
+
"l1_exp": -4,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo1_S-4_R1_P0"
|
38 |
+
}
|
mlp/M1_S-5_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f0f5e9f230869bbaf44eac1550a099ea58c1d86ea8df73c3cd9f316dd5f1650
|
3 |
+
size 153705080
|
mlp/M1_S-5_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 1,
|
20 |
+
"l1_exp": -5,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo1_S-5_R1_P0"
|
38 |
+
}
|
mlp/M1_S-6_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61d6441e8dbc5b092dc2e11bd525a13eee4d72111e0b0173deb3cb89b6e18075
|
3 |
+
size 153705080
|
mlp/M1_S-6_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 1,
|
20 |
+
"l1_exp": -6,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo1_S-6_R1_P0"
|
38 |
+
}
|
mlp/M2_S-1_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b242e709f9c64bbe0aa82a8cc904b25cb1aeb7388ea89529b2685f8a528bf37
|
3 |
+
size 153705080
|
mlp/M2_S-1_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 2,
|
20 |
+
"l1_exp": -1,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo2_S-1_R1_P0"
|
38 |
+
}
|
mlp/M2_S-2_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25eec217d1c72e4d83180698a252235692a536bbe598c68e97a0fa5140a0ebdd
|
3 |
+
size 153705080
|
mlp/M2_S-2_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 2,
|
20 |
+
"l1_exp": -2,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo2_S-2_R1_P0"
|
38 |
+
}
|
mlp/M2_S-3_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71a43b0400729a9bcde458b0629c32feacb53eaa9622d476334d65842a6b9833
|
3 |
+
size 153705080
|
mlp/M2_S-3_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 2,
|
20 |
+
"l1_exp": -3,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo2_S-3_R1_P0"
|
38 |
+
}
|
mlp/M2_S-4_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d98cefdab793343dd1a8be86c606414af42262f651d63fcb13096efff490356e
|
3 |
+
size 153705080
|
mlp/M2_S-4_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 2,
|
20 |
+
"l1_exp": -4,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo2_S-4_R1_P0"
|
38 |
+
}
|
mlp/M2_S-5_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06caf3e157a9254766c09eeaf0dab9c3c69d7947be202b62a30db79dd7a74d7b
|
3 |
+
size 153705080
|
mlp/M2_S-5_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 2,
|
20 |
+
"l1_exp": -5,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo2_S-5_R1_P0"
|
38 |
+
}
|
mlp/M2_S-6_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9a47f28e4720773cf93b81565d38fc85c22e4574e3dd1f44095b1d55648ae16
|
3 |
+
size 153705080
|
mlp/M2_S-6_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 2,
|
20 |
+
"l1_exp": -6,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo2_S-6_R1_P0"
|
38 |
+
}
|
mlp/M3_S-1_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b90f2559cfeb894ec51fa727a7c87fabf0d1262f2b3e3814cc4acbe09a947bba
|
3 |
+
size 153705080
|
mlp/M3_S-1_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 3,
|
20 |
+
"l1_exp": -1,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo3_S-1_R1_P0"
|
38 |
+
}
|
mlp/M3_S-2_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa88e4135dea096926082aab18328e5ec40330c45c1b12c59cc365a28a473000
|
3 |
+
size 153705080
|
mlp/M3_S-2_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 3,
|
20 |
+
"l1_exp": -2,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo3_S-2_R1_P0"
|
38 |
+
}
|
mlp/M3_S-3_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b8f9ce9ac97dfc06c90b27082e7756766234835cabcc79cf7330714fa3972db
|
3 |
+
size 153705080
|
mlp/M3_S-3_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 3,
|
20 |
+
"l1_exp": -3,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo3_S-3_R1_P0"
|
38 |
+
}
|
mlp/M3_S-4_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ddc41001f648e468699a0010a9647f9eaece946e1ae265ed1c8af1772249732
|
3 |
+
size 153705080
|
mlp/M3_S-4_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 3,
|
20 |
+
"l1_exp": -4,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo3_S-4_R1_P0"
|
38 |
+
}
|
mlp/M3_S-5_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83cb301d90dac4a9d06d5c99ca5b62fe40b7492d8744ce5f49caadd316551956
|
3 |
+
size 153705080
|
mlp/M3_S-5_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 3,
|
20 |
+
"l1_exp": -5,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo3_S-5_R1_P0"
|
38 |
+
}
|
mlp/M3_S-6_R1_P0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7fb31406999c1274c69e37022648b502b704c7fcb9d6fd805c84f4ce779ca71
|
3 |
+
size 153705080
|
mlp/M3_S-6_R1_P0_config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"n_features": 25000,
|
3 |
+
"d_model": 768,
|
4 |
+
"lr_exp": -10,
|
5 |
+
"disable_comet": false,
|
6 |
+
"per_neuron_reinit_interval": 0,
|
7 |
+
"reservoir_time_discount": 0.995,
|
8 |
+
"reinit_interval": 800,
|
9 |
+
"max_reinit_neurons": 5000,
|
10 |
+
"reservoir_size": 5000,
|
11 |
+
"n_piles": 292,
|
12 |
+
"log_interval": 200,
|
13 |
+
"reinit_input_norm": "target_scaled",
|
14 |
+
"reinit_input": "x",
|
15 |
+
"reinit_norm_alpha": 0.3,
|
16 |
+
"data_loc": "mlp_data",
|
17 |
+
"reinit_threshold": -6,
|
18 |
+
"scheduler": "wsd",
|
19 |
+
"layer_idx": 3,
|
20 |
+
"l1_exp": -6,
|
21 |
+
"neuron_reinit_percent": 0.85,
|
22 |
+
"beta1": 1,
|
23 |
+
"beta2": 4,
|
24 |
+
"reinit_target": "error",
|
25 |
+
"sparse_adam": false,
|
26 |
+
"run_template": "Mo{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
|
27 |
+
"project_name": "mlp_out_test",
|
28 |
+
"decoder_bias": true,
|
29 |
+
"l1_beta": 0.99,
|
30 |
+
"alt_sparsity_loss": "log",
|
31 |
+
"l1_ratio": 1,
|
32 |
+
"l1_p": 0,
|
33 |
+
"optimizer": "sparse_adam",
|
34 |
+
"model_type": "mlp_out",
|
35 |
+
"adam_beta1": 0.5,
|
36 |
+
"adam_beta2": 0.9375,
|
37 |
+
"run_name": "Mo3_S-6_R1_P0"
|
38 |
+
}
|