Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_0/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_0/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_1/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_1/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_2/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_2/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_3/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_3/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_4/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_4/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_5/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_5/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/config.json +26 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/ae.pt +3 -0
- gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/config.json +26 -0
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:660283962396ea26438c26427150e8e6221d61f0b4ad236876a9affd154d2a4d
|
3 |
+
size 75524904
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": 97656,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53d6d53d470c9ba0ddda24fea0484c2639b69194df6766d7381bfe76ac7f5bed
|
3 |
+
size 75524904
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_1/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": 97656,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5c26438a0279e1974412f9e1bb1283f65fd3034511e5249960cf35c30f58acd
|
3 |
+
size 75524904
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_2/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": 97656,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 80,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caf7fa2056e78044a7f75cdd22f0fe6542cab9af8e80e73d0d594ba0acb5b0a5
|
3 |
+
size 75524904
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_3/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": 97656,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 160,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:441beebc04cc47e0ca3774242c3f916b2732e2d4e3460bf81c193e91cd2f9333
|
3 |
+
size 75524904
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_4/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": 97656,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 320,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af97e8b509a198c2f74c189e799b06469dc944106619e3abb7db01277c19312a
|
3 |
+
size 75524904
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12/trainer_5/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": 97656,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 640,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0e8687672893fed801b4f8625d58d76fa716488dbbf97d8889ca4c6dd6134b6
|
3 |
+
size 75524920
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "0",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2401dfae15537d6df33149dc8f9ddcc09e1f086aab1626c03fc2ddf0e48361ea
|
3 |
+
size 75524936
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "308",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca2fe58130b7b99e4089290a434879c6b7952ad6f56d0b180e1fd922001f668a
|
3 |
+
size 75525008
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "3088",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42afa736cbb64667873202c7f0f1dfbac567ede7e1516a9495e419f45f044eec
|
3 |
+
size 75525208
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "30881",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c365fb15c786ff9b707851c87b6fc1d49e84f3b7e9fda4bf8d1b82c35c1d7b4
|
3 |
+
size 75524928
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "97",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57f171bbf71d65ee4475ce4c14ea93ac3efeec3298b0dceb49d564acb018750d
|
3 |
+
size 75524936
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "976",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63bd37b92dd3b252f4061796f1c7ea153db3656acc1bfeef5ac8763d04903de7
|
3 |
+
size 75525008
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "9765",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0e8687672893fed801b4f8625d58d76fa716488dbbf97d8889ca4c6dd6134b6
|
3 |
+
size 75524920
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "0",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3642ef8e0ed2fc41fc33cf76c5b3700e4409d721ce6f19b05512ac5af4e3442
|
3 |
+
size 75524936
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "308",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3986db197c5da7b73f968165e2f12ef6ccbe8cd6206851820df21aca0c5d236
|
3 |
+
size 75525008
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "3088",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e33877d043abf17d68ecdddd82bda885ac3357ae81eb013441af3627d6e91ed8
|
3 |
+
size 75525208
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "30881",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84d600afe62d8462f9f9b0d4bc054077211466352e4469c110d1166142ed6662
|
3 |
+
size 75524928
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "97",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a430be8e00518de00bd99b17aa75f8589bec2652c611d36ab90bd164373a3665
|
3 |
+
size 75524936
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "976",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43d7c639c6470e5d9384546b6ab6eaefd1f4f857c2a1ef422cae4a1f3603b452
|
3 |
+
size 75525008
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "9765",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0e8687672893fed801b4f8625d58d76fa716488dbbf97d8889ca4c6dd6134b6
|
3 |
+
size 75524920
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "0",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 80,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34c478184d2fd776ad3c798b53f537111ca3ef3420047e7e7c89becbef45fa1f
|
3 |
+
size 75524936
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "308",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 80,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83c8e69bb710c78fbc06bdd0d1404ab644dd071018c474a58b7b2c6ff1d149a9
|
3 |
+
size 75525008
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "3088",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 80,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d6e499c5a02eba2aee4a6d8e96f3712ee43ce413846ce01de6b99f594be1ec5
|
3 |
+
size 75525208
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "30881",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 80,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fedabc0da17b7bb0c2b8c3d2f072021f3bcce44ae9f4e20f7d93fe7fe2a1930
|
3 |
+
size 75524928
|
gemma-2-2b_topk_width-2pow12_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0004,
|
6 |
+
"steps": "97",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2304,
|
9 |
+
"dict_size": 4096,
|
10 |
+
"k": 80,
|
11 |
+
"device": "cuda:4",
|
12 |
+
"layer": 12,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12",
|
15 |
+
"submodule_name": "resid_post_layer_12"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2048,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 2048,
|
24 |
+
"device": "cuda:4"
|
25 |
+
}
|
26 |
+
}
|