Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json +30 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json +1 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt +3 -0
- ._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json +30 -0
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08041a12047d806f49b1ca3c1febc9aca731beb68569a444d23fc2daaddf1fc4
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.012,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 0,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
|
19 |
+
"submodule_name": "resid_post_layer_0"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 1.4167590290307999, "l1_loss": 62.0523624420166, "l0": 1568.3194427490234, "frac_variance_explained": 0.943539559841156, "cossim": 0.9821989126503468, "l2_ratio": 0.9195215627551079, "relative_reconstruction_bias": 0.9397303834557533, "loss_original": 2.988885059952736, "loss_reconstructed": 3.106488525867462, "loss_zero": 8.828418850898743, "frac_recovered": 0.9798414222896099, "frac_alive": 0.48284912109375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f68a2c94c70e951bbc10069ccbc6c9ec24e20cb02f19c2d15ee3a673adf1a300
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.015,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 0,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
|
19 |
+
"submodule_name": "resid_post_layer_0"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 1.5789328217506409, "l1_loss": 55.72900319099426, "l0": 1330.4508972167969, "frac_variance_explained": 0.9300252571702003, "cossim": 0.9778566658496857, "l2_ratio": 0.9065023027360439, "relative_reconstruction_bias": 0.9307215549051762, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1464501470327377, "loss_zero": 8.828418850898743, "frac_recovered": 0.9729870557785034, "frac_alive": 0.47406005859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bed5c2fd61816e5fc0719d9adca7311a85129b8d78272cbf46cb62748ab5548
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.02,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 0,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
|
19 |
+
"submodule_name": "resid_post_layer_0"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 1.8266066312789917, "l1_loss": 47.685190200805664, "l0": 1031.1428604125977, "frac_variance_explained": 0.9062778800725937, "cossim": 0.9702106080949306, "l2_ratio": 0.8861087150871754, "relative_reconstruction_bias": 0.9171949252486229, "loss_original": 2.988885059952736, "loss_reconstructed": 3.251991391181946, "loss_zero": 8.828418850898743, "frac_recovered": 0.9548681862652302, "frac_alive": 0.4541015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72edbdb237b1133d7d63248e9dbdc9dd0826fb9fa0fcb64603ee890a4f3e6661
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.03,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 0,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
|
19 |
+
"submodule_name": "resid_post_layer_0"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 2.280204176902771, "l1_loss": 36.662381649017334, "l0": 639.2051010131836, "frac_variance_explained": 0.8530002571642399, "cossim": 0.952362023293972, "l2_ratio": 0.8507192321121693, "relative_reconstruction_bias": 0.8974457383155823, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4817975610494614, "loss_zero": 8.828418850898743, "frac_recovered": 0.9160263948142529, "frac_alive": 0.43353271484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c51a70b6f60418f10aa224b5abc555b4be0f46b8862c98962ca0741e4677d7e5
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.04,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 0,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
|
19 |
+
"submodule_name": "resid_post_layer_0"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 2.630507007241249, "l1_loss": 28.577664136886597, "l0": 394.4646053314209, "frac_variance_explained": 0.8021240644156933, "cossim": 0.9342639222741127, "l2_ratio": 0.8253069184720516, "relative_reconstruction_bias": 0.8880686983466148, "loss_original": 2.988885059952736, "loss_reconstructed": 3.6719041615724564, "loss_zero": 8.828418850898743, "frac_recovered": 0.883676066994667, "frac_alive": 0.41949462890625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ed64d5e83254a4a1680542ca54f585d6989792806da6f1d1d6e084493d1b67f
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.06,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 0,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0",
|
19 |
+
"submodule_name": "resid_post_layer_0"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 3.089244395494461, "l1_loss": 18.967594146728516, "l0": 164.81410884857178, "frac_variance_explained": 0.7212224006652832, "cossim": 0.9034432731568813, "l2_ratio": 0.7909861765801907, "relative_reconstruction_bias": 0.8814779929816723, "loss_original": 2.988885059952736, "loss_reconstructed": 4.005634561181068, "loss_zero": 8.828418850898743, "frac_recovered": 0.8266438916325569, "frac_alive": 0.39056396484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:820854280e0d77fddf8f509bb79ad6e9a428b4b75d9060bc1ddbcb19b7ab8d89
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.012,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 1,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
|
19 |
+
"submodule_name": "resid_post_layer_1"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 1.921069249510765, "l1_loss": 86.7099380493164, "l0": 1889.5925827026367, "frac_variance_explained": 0.9374731332063675, "cossim": 0.9791730977594852, "l2_ratio": 0.9044194445014, "relative_reconstruction_bias": 0.9294404052197933, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0913854241371155, "loss_zero": 8.97075641155243, "frac_recovered": 0.9828116111457348, "frac_alive": 0.47479248046875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5b98cc0e9bcfc76d8507051e5bd1366145f34774c39ab5c0d83fdef56df14af
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.015,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 1,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
|
19 |
+
"submodule_name": "resid_post_layer_1"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 2.1518948525190353, "l1_loss": 78.03551197052002, "l0": 1625.4286727905273, "frac_variance_explained": 0.9218312874436378, "cossim": 0.9738432057201862, "l2_ratio": 0.8885384723544121, "relative_reconstruction_bias": 0.9186785258352757, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1272957623004913, "loss_zero": 8.97075641155243, "frac_recovered": 0.9767856597900391, "frac_alive": 0.47882080078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bdbc4935c9d5298096ee6ea78bfe243612fc4731ea0caf2a1d368f266b17448
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.02,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 1,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
|
19 |
+
"submodule_name": "resid_post_layer_1"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 2.5168990939855576, "l1_loss": 66.47064352035522, "l0": 1277.5725173950195, "frac_variance_explained": 0.8933972716331482, "cossim": 0.9639947153627872, "l2_ratio": 0.8639187514781952, "relative_reconstruction_bias": 0.9028670713305473, "loss_original": 2.988885059952736, "loss_reconstructed": 3.2126090973615646, "loss_zero": 8.97075641155243, "frac_recovered": 0.9625394903123379, "frac_alive": 0.47686767578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10b606532e1f82f19fdd1e40b0c23c64f9ebe39ae79d7d587a61e65325d2ab30
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.03,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 1,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
|
19 |
+
"submodule_name": "resid_post_layer_1"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 3.169589176774025, "l1_loss": 49.724985122680664, "l0": 787.4504432678223, "frac_variance_explained": 0.8310047425329685, "cossim": 0.9411170482635498, "l2_ratio": 0.8247413076460361, "relative_reconstruction_bias": 0.8834847621619701, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4419959783554077, "loss_zero": 8.97075641155243, "frac_recovered": 0.9246886782348156, "frac_alive": 0.478271484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66a161fb8dfc91099dfd8e610fb705a9dab9c3bd9356622160f322cdaaf6bac4
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.04,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 1,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
|
19 |
+
"submodule_name": "resid_post_layer_1"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 3.70350344479084, "l1_loss": 37.7823760509491, "l0": 474.227840423584, "frac_variance_explained": 0.7687290497124195, "cossim": 0.9165537543594837, "l2_ratio": 0.7957354746758938, "relative_reconstruction_bias": 0.876005794852972, "loss_original": 2.988885059952736, "loss_reconstructed": 3.693199545145035, "loss_zero": 8.97075641155243, "frac_recovered": 0.882872398942709, "frac_alive": 0.48974609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69b8b04d6c4829e44878a7cfc9bb99f3fa16deff3e2edcb5e19d4e4720640ced
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.06,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 1,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1",
|
19 |
+
"submodule_name": "resid_post_layer_1"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 4.367744535207748, "l1_loss": 24.294288277626038, "l0": 183.46984004974365, "frac_variance_explained": 0.6762260124087334, "cossim": 0.8779518343508244, "l2_ratio": 0.7569525502622128, "relative_reconstruction_bias": 0.871852483600378, "loss_original": 2.988885059952736, "loss_reconstructed": 4.113821625709534, "loss_zero": 8.97075641155243, "frac_recovered": 0.8127008564770222, "frac_alive": 0.48626708984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cedc3d1f87faf8d918f58ab0874219bf8b4cb884f1745c03bc57247330e6d45
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.012,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 2,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
|
19 |
+
"submodule_name": "resid_post_layer_2"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 3.1716324537992477, "l1_loss": 109.77040338516235, "l0": 1611.395637512207, "frac_variance_explained": 0.9486353807151318, "cossim": 0.9696135818958282, "l2_ratio": 0.8780255950987339, "relative_reconstruction_bias": 0.9444989748299122, "loss_original": 2.988885059952736, "loss_reconstructed": 3.313967987895012, "loss_zero": 9.41981041431427, "frac_recovered": 0.9495267793536186, "frac_alive": 0.532470703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:309d11c65c3e16ffda7685ea51f4b04ddd935ed7c6a49823a9dea9e8b41f420d
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.015,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 2,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
|
19 |
+
"submodule_name": "resid_post_layer_2"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 3.5997580736875534, "l1_loss": 96.4208664894104, "l0": 1306.8117294311523, "frac_variance_explained": 0.9347885400056839, "cossim": 0.9604156315326691, "l2_ratio": 0.8589782565832138, "relative_reconstruction_bias": 0.9393252618610859, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4662882536649704, "loss_zero": 9.41981041431427, "frac_recovered": 0.9260412231087685, "frac_alive": 0.5325927734375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c85ab3b7913d8c2116da2c18df2340f628b33b7ad444fe6fac5ed284aa26d12d
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.02,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 2,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
|
19 |
+
"submodule_name": "resid_post_layer_2"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 4.234767764806747, "l1_loss": 79.00359535217285, "l0": 927.1257019042969, "frac_variance_explained": 0.9083689413964748, "cossim": 0.9438304528594017, "l2_ratio": 0.8312639258801937, "relative_reconstruction_bias": 0.931897010654211, "loss_original": 2.988885059952736, "loss_reconstructed": 3.781919851899147, "loss_zero": 9.41981041431427, "frac_recovered": 0.8772863261401653, "frac_alive": 0.54339599609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f4f1b6ae72ea4efc055b7106d07c4edb55f74f99ebb2f9f4b2553a958d9d4ea
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.03,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 2,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
|
19 |
+
"submodule_name": "resid_post_layer_2"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 5.191680729389191, "l1_loss": 56.57015347480774, "l0": 475.47170066833496, "frac_variance_explained": 0.8640795983374119, "cossim": 0.9118851572275162, "l2_ratio": 0.7894385904073715, "relative_reconstruction_bias": 0.9269732721149921, "loss_original": 2.988885059952736, "loss_reconstructed": 4.469057783484459, "loss_zero": 9.41981041431427, "frac_recovered": 0.7708443030714989, "frac_alive": 0.5369873046875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:520643a4d5d7e9668d1db789c4e292fa8cbf0a008d9343fc10af3597b518f147
|
3 |
+
size 67178280
|
._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 512,
|
6 |
+
"dict_size": 16384,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.04,
|
9 |
+
"warmup_steps": 10,
|
10 |
+
"resample_steps": null,
|
11 |
+
"sparsity_warmup_steps": 10,
|
12 |
+
"steps": 488,
|
13 |
+
"decay_start": 390,
|
14 |
+
"seed": 0,
|
15 |
+
"device": "cuda:5",
|
16 |
+
"layer": 2,
|
17 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
18 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2",
|
19 |
+
"submodule_name": "resid_post_layer_2"
|
20 |
+
},
|
21 |
+
"buffer": {
|
22 |
+
"d_submodule": 512,
|
23 |
+
"io": "out",
|
24 |
+
"n_ctxs": 244,
|
25 |
+
"ctx_len": 1024,
|
26 |
+
"refresh_batch_size": 64,
|
27 |
+
"out_batch_size": 2048,
|
28 |
+
"device": "cuda:5"
|
29 |
+
}
|
30 |
+
}
|