Delete files resid_post_layer_11_checkpoints/ resid_post_layer_15/ resid_post_layer_15_checkpoints/ resid_post_layer_19/ resid_post_layer_19_checkpoints/ resid_post_layer_3/ resid_post_layer_3_checkpoints/ resid_post_layer_7/ resid_post_layer_7_checkpoints/ with huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_3_step_0/config.json +0 -26
- resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json +0 -1
- resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt +0 -3
- resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json +0 -26
resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863
|
3 |
-
size 339823416
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_0/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "0",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 144.1739944458008, "l1_loss": 113.89709167480468, "l0": 20.0, "frac_variance_explained": 0.06153666377067566, "cossim": 0.285680028796196, "l2_ratio": 0.1798807665705681, "relative_reconstruction_bias": 0.6311139702796936, "loss_original": 2.4489264488220215, "loss_reconstructed": 15.055834197998047, "loss_zero": 12.452933025360107, "frac_recovered": -0.26015533953905107, "frac_alive": 0.1353081613779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9f56e92af04268566c7ff342c4e7210babc785a03db17bd8eaa38e3442012ce9
|
3 |
-
size 339823704
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "19528",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 63.222343826293944, "l1_loss": 284.57915954589845, "l0": 19.99583339691162, "frac_variance_explained": 0.7308643460273743, "cossim": 0.8872040271759033, "l2_ratio": 0.8889125108718872, "relative_reconstruction_bias": 1.0020551800727844, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7907193899154663, "loss_zero": 12.452933025360107, "frac_recovered": 0.9659082174301148, "frac_alive": 0.1557074636220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a5a1d888a0537e9383cc3a9d7da73404627105eedb05a2901ea9b5d8c79679bf
|
3 |
-
size 339823704
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "29292",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 63.71059875488281, "l1_loss": 272.7416564941406, "l0": 20.0, "frac_variance_explained": 0.7049950003623963, "cossim": 0.8891237080097198, "l2_ratio": 0.8906138241291046, "relative_reconstruction_bias": 1.0008544504642487, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.785463571548462, "loss_zero": 12.452933025360107, "frac_recovered": 0.9664257526397705, "frac_alive": 0.1563585102558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:bb73459d9ba27070497a75047dc056f86559bd5d41a888abb6cbebb52718ffed
|
3 |
-
size 339823504
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "4882",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 67.39788055419922, "l1_loss": 348.3453887939453, "l0": 20.0, "frac_variance_explained": 0.7809036612510681, "cossim": 0.8779726147651672, "l2_ratio": 0.8785419166088104, "relative_reconstruction_bias": 1.000831699371338, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.875354194641113, "loss_zero": 12.452933025360107, "frac_recovered": 0.9574480593204499, "frac_alive": 0.1525065153837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3ce91e1320f81e78c9f7f2fd837f151c9fd488709b4f9aada2c4c18e38e7d585
|
3 |
-
size 339823504
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "9764",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 20,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 66.6925880432129, "l1_loss": 304.12717895507814, "l0": 20.0, "frac_variance_explained": 0.7223313331604004, "cossim": 0.8877674520015717, "l2_ratio": 0.8910203695297241, "relative_reconstruction_bias": 1.0043164610862731, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8143189668655397, "loss_zero": 12.452933025360107, "frac_recovered": 0.9635527789592743, "frac_alive": 0.1507703959941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863
|
3 |
-
size 339823416
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_0/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "0",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 146.67851715087892, "l1_loss": 225.0533248901367, "l0": 40.0, "frac_variance_explained": 0.10613001585006714, "cossim": 0.37147045135498047, "l2_ratio": 0.2499557614326477, "relative_reconstruction_bias": 0.670610225200653, "loss_original": 2.4489264488220215, "loss_reconstructed": 13.592330741882325, "loss_zero": 12.452933025360107, "frac_recovered": -0.1137192726135254, "frac_alive": 0.2194553017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9d443898512cabf167f64076cb6ce357da7b374f711e79c43b4c6c5834406617
|
3 |
-
size 339823704
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "19528",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 55.811133575439456, "l1_loss": 451.9648834228516, "l0": 40.0, "frac_variance_explained": 0.820844042301178, "cossim": 0.9156029880046844, "l2_ratio": 0.9176408350467682, "relative_reconstruction_bias": 1.0015530705451965, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.618153429031372, "loss_zero": 12.452933025360107, "frac_recovered": 0.9831358790397644, "frac_alive": 0.2994249165058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5eed5e6b2f32121b1976aaa8492cce7320da5abca20185e3d43c17daea9a225b
|
3 |
-
size 339823704
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "29292",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 55.91380500793457, "l1_loss": 427.1486541748047, "l0": 39.983334350585935, "frac_variance_explained": 0.8604467451572418, "cossim": 0.9129667043685913, "l2_ratio": 0.9151833534240723, "relative_reconstruction_bias": 1.0012677431106567, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6114421129226684, "loss_zero": 12.452933025360107, "frac_recovered": 0.9838047802448273, "frac_alive": 0.29443359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:499307aaa2265a6de8ec06ed3ce6364ca443c6ec7597187a8d0f154e86a1ed0f
|
3 |
-
size 339823504
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "4882",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 59.28143844604492, "l1_loss": 393.00905456542966, "l0": 40.0, "frac_variance_explained": 0.7567337930202485, "cossim": 0.9030908882617951, "l2_ratio": 0.9034847617149353, "relative_reconstruction_bias": 0.9998072028160095, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.678276252746582, "loss_zero": 12.452933025360107, "frac_recovered": 0.9771327614784241, "frac_alive": 0.2982313334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a2c7b989a53a1687048ca1175380bb26a0a17e1a51bb994937c36929faa3aaf7
|
3 |
-
size 339823504
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "9764",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 40,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 57.661796188354494, "l1_loss": 467.5274963378906, "l0": 40.0, "frac_variance_explained": 0.8447021842002869, "cossim": 0.9108584702014924, "l2_ratio": 0.9144052922725677, "relative_reconstruction_bias": 1.003432297706604, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.624210572242737, "loss_zero": 12.452933025360107, "frac_recovered": 0.9825274705886841, "frac_alive": 0.2957899272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863
|
3 |
-
size 339823416
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_0/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "0",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 137.5836051940918, "l1_loss": 414.7282440185547, "l0": 80.0, "frac_variance_explained": 0.17110393047332764, "cossim": 0.4680758684873581, "l2_ratio": 0.34816921055316924, "relative_reconstruction_bias": 0.7378275513648986, "loss_original": 2.4489264488220215, "loss_reconstructed": 14.558675193786621, "loss_zero": 12.452933025360107, "frac_recovered": -0.2103082224726677, "frac_alive": 0.3181966245174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e25c4eab862d23187a5b98b7ed83d77d3e2b3188b59d300d535208c5d34aedf7
|
3 |
-
size 339823704
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "19528",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 52.232007598876955, "l1_loss": 543.6768280029297, "l0": 79.9, "frac_variance_explained": 0.8336090505123138, "cossim": 0.9266528010368347, "l2_ratio": 0.9269256889820099, "relative_reconstruction_bias": 1.0003283321857452, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5528225183486937, "loss_zero": 12.452933025360107, "frac_recovered": 0.9896542429924011, "frac_alive": 0.46240234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:aae8444f307b4f0275f1e88a9ee35b83003813811a3d8ba90ad8eb0fb861b798
|
3 |
-
size 339823704
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "29292",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 51.525339126586914, "l1_loss": 510.0916351318359, "l0": 80.0, "frac_variance_explained": 0.8004042208194733, "cossim": 0.928622841835022, "l2_ratio": 0.9312313497066498, "relative_reconstruction_bias": 1.0022627532482147, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5504817724227906, "loss_zero": 12.452933025360107, "frac_recovered": 0.9898871839046478, "frac_alive": 0.4539930522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:278bc3f69f3e86c9998fd15f4c04da6c2d50f1ce93c1a3ddf7c401570e19915c
|
3 |
-
size 339823504
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "4882",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 56.42006950378418, "l1_loss": 607.1793884277344, "l0": 80.0, "frac_variance_explained": 0.8453537464141846, "cossim": 0.9180462002754212, "l2_ratio": 0.9192540943622589, "relative_reconstruction_bias": 1.0001774728298187, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5884765863418577, "loss_zero": 12.452933025360107, "frac_recovered": 0.9860971808433533, "frac_alive": 0.4753689169883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:09277bb07f753cc705b46903843df2ccff8845b6887d0ed9a3bf12dab4305d09
|
3 |
-
size 339823504
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "9764",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 80,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 52.48485107421875, "l1_loss": 528.1922485351563, "l0": 80.0, "frac_variance_explained": 0.808493971824646, "cossim": 0.9282886624336243, "l2_ratio": 0.9306568443775177, "relative_reconstruction_bias": 1.0026726007461548, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.56360719203949, "loss_zero": 12.452933025360107, "frac_recovered": 0.988573682308197, "frac_alive": 0.4756944477558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863
|
3 |
-
size 339823416
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_3_step_0/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "0",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 160,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"l2_loss": 131.11762619018555, "l1_loss": 788.7969665527344, "l0": 160.0, "frac_variance_explained": 0.25940428376197816, "cossim": 0.5706947863101959, "l2_ratio": 0.49342564642429354, "relative_reconstruction_bias": 0.849498838186264, "loss_original": 2.4489264488220215, "loss_reconstructed": 10.614991569519043, "loss_zero": 12.452933025360107, "frac_recovered": 0.18391464054584503, "frac_alive": 0.4411349892616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
|
|
|
resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:01f748938c4f7810260a5082db07606c1dc8e6e8e10deb0d989f8e118c5773ab
|
3 |
-
size 339823704
|
|
|
|
|
|
|
|
resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"trainer": {
|
3 |
-
"trainer_class": "TrainerTopK",
|
4 |
-
"dict_class": "AutoEncoderTopK",
|
5 |
-
"lr": 0.0001885618083164127,
|
6 |
-
"steps": "19528",
|
7 |
-
"seed": 0,
|
8 |
-
"activation_dim": 2304,
|
9 |
-
"dict_size": 18432,
|
10 |
-
"k": 160,
|
11 |
-
"device": "cuda:0",
|
12 |
-
"layer": 11,
|
13 |
-
"lm_name": "google/gemma-2-2b",
|
14 |
-
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
-
"submodule_name": "resid_post_layer_11"
|
16 |
-
},
|
17 |
-
"buffer": {
|
18 |
-
"d_submodule": 2304,
|
19 |
-
"io": "out",
|
20 |
-
"n_ctxs": 2000,
|
21 |
-
"ctx_len": 128,
|
22 |
-
"refresh_batch_size": 32,
|
23 |
-
"out_batch_size": 4096,
|
24 |
-
"device": "cuda:0"
|
25 |
-
}
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|