Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +27 -0
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec94483dfdaf6baef38641e79ae4390fd1c4be0db552e4313408c7adfc9a25e6
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 44.21599082946777, "l1_loss": 689.0535034179687, "l0": 653.0125183105469, "frac_variance_explained": 0.876203840970993, "cossim": 0.9503044188022614, "l2_ratio": 0.9132377088069916, "relative_reconstruction_bias": 0.9689015865325927, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.516097593307495, "loss_zero": 12.452933025360107, "frac_recovered": 0.9933268785476684, "frac_alive": 0.722276508808136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c35b088143b227affa0d50ac160e0d4d4b288be1b3e7a9d8a2eec7262290aa10
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 54.783888244628905, "l1_loss": 430.45958251953124, "l0": 216.9250061035156, "frac_variance_explained": 0.821322637796402, "cossim": 0.9228447735309601, "l2_ratio": 0.8802441656589508, "relative_reconstruction_bias": 0.9668310403823852, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.602967619895935, "loss_zero": 12.452933025360107, "frac_recovered": 0.9846588850021363, "frac_alive": 0.4129774272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86956cdaef37056627cc68976f5eee8268b1f88273c33b2aaa183dd8606e002a
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.04,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 60.528709411621094, "l1_loss": 360.99082946777344, "l0": 142.25417404174806, "frac_variance_explained": 0.7947052717208862, "cossim": 0.9053027153015136, "l2_ratio": 0.855720329284668, "relative_reconstruction_bias": 0.9635509788990021, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.675975728034973, "loss_zero": 12.452933025360107, "frac_recovered": 0.9773781895637512, "frac_alive": 0.28173828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93578a2028d6821229712eff158bc843ed03693b1af5c760ef67ec9baf415170
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.05,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 69.19085235595703, "l1_loss": 260.5812042236328, "l0": 68.32500228881835, "frac_variance_explained": 0.6767194271087646, "cossim": 0.8739384233951568, "l2_ratio": 0.8242608070373535, "relative_reconstruction_bias": 0.9541641473770142, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.999904203414917, "loss_zero": 12.452933025360107, "frac_recovered": 0.9450006365776062, "frac_alive": 0.1184895858168602, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f34e7a335652c290cd383cbf538b93ac8ff531ecf08c4006f9d6f1274c529fd
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.06,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 77.07271423339844, "l1_loss": 209.26783905029296, "l0": 35.49583435058594, "frac_variance_explained": 0.673879736661911, "cossim": 0.8379469156265259, "l2_ratio": 0.7764925301074982, "relative_reconstruction_bias": 0.9512474119663239, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.923832106590271, "loss_zero": 12.452933025360107, "frac_recovered": 0.8525889277458191, "frac_alive": 0.0455729179084301, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db4d7607331b3bdeac0bfe2cd3a0a36835669ba6970db38b0867617476e00d7b
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.07,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 85.48247909545898, "l1_loss": 188.83019409179687, "l0": 20.800000953674317, "frac_variance_explained": 0.6401739776134491, "cossim": 0.7948622822761535, "l2_ratio": 0.7354530394077301, "relative_reconstruction_bias": 0.9544944584369659, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.871973085403442, "loss_zero": 12.452933025360107, "frac_recovered": 0.757849270105362, "frac_alive": 0.0176866315305233, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "0"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 262.5618362426758, "l1_loss": 15419.97216796875, "l0": 9220.8796875, "frac_variance_explained": -1.1251216292381288, "cossim": 0.0072056266712024804, "l2_ratio": 1.1523211359977723, "relative_reconstruction_bias": -3121.6480613708495, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31d05f8c2d8a1c6f452b8f2693b7741e4c32908630d8f62d27d9d917d7721e22
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "19528"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 44.36017074584961, "l1_loss": 678.7422302246093, "l0": 664.40419921875, "frac_variance_explained": 0.8578554630279541, "cossim": 0.9489532053470612, "l2_ratio": 0.9064467251300812, "relative_reconstruction_bias": 0.962804764509201, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5181885957717896, "loss_zero": 12.452933025360107, "frac_recovered": 0.9931147575378418, "frac_alive": 0.7009006142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8459951ead9228cb6bdc403d6492a22030927249f67f15d6eedf8d679405a47
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "29292"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 45.5604850769043, "l1_loss": 697.0533264160156, "l0": 648.0208557128906, "frac_variance_explained": 0.8660320043563843, "cossim": 0.9488271653652192, "l2_ratio": 0.9062425673007966, "relative_reconstruction_bias": 0.9621650040149688, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5167696475982666, "loss_zero": 12.452933025360107, "frac_recovered": 0.9932600975036621, "frac_alive": 0.7202690839767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:166ad074d24b1dc38c4856fb9a9dd17479a591fc052877141aa4c9e674cb40a0
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "4882"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 55.02547416687012, "l1_loss": 694.5177673339844, "l0": 500.9125122070312, "frac_variance_explained": 0.8116631209850311, "cossim": 0.9266311347484588, "l2_ratio": 0.8777791380882263, "relative_reconstruction_bias": 0.9579416394233704, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.589069104194641, "loss_zero": 12.452933025360107, "frac_recovered": 0.9860494375228882, "frac_alive": 0.2698567807674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f23c778c5115ad5249b24bb4c98958cedf405069dc1f0b538dfcdd601bf582a
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "9764"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 46.25635299682617, "l1_loss": 680.0863464355468, "l0": 628.8416809082031, "frac_variance_explained": 0.857069319486618, "cossim": 0.9428604900836944, "l2_ratio": 0.8974497258663178, "relative_reconstruction_bias": 0.9643844664096832, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.531099796295166, "loss_zero": 12.452933025360107, "frac_recovered": 0.9918265163898468, "frac_alive": 0.529405415058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "0"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 231.1881851196289, "l1_loss": 13521.2337890625, "l0": 9215.55029296875, "frac_variance_explained": -1.0510494828224182, "cossim": 0.006548775953706354, "l2_ratio": 1.1542882323265076, "relative_reconstruction_bias": -167.9729995727539, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f13631c3eb49456356ffd1f81d364c8eb7a4fd6bfbce4eb486cc0aba7c317caf
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "19528"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 54.52329711914062, "l1_loss": 434.2824768066406, "l0": 215.03750610351562, "frac_variance_explained": 0.8672008395195008, "cossim": 0.9186275959014892, "l2_ratio": 0.8766422688961029, "relative_reconstruction_bias": 0.9781448543071747, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.614857029914856, "loss_zero": 12.452933025360107, "frac_recovered": 0.9834800899028778, "frac_alive": 0.3849283754825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad859e564cc052ffedbf6c98a7a93979224f7ab6b738a5512393d8690c564a1a
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "29292"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 55.41632499694824, "l1_loss": 412.8124267578125, "l0": 213.2916702270508, "frac_variance_explained": 0.7951547861099243, "cossim": 0.9193026781082153, "l2_ratio": 0.8765688300132751, "relative_reconstruction_bias": 0.963158255815506, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6072664499282836, "loss_zero": 12.452933025360107, "frac_recovered": 0.9842307686805725, "frac_alive": 0.4042426347732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ec54a4ea78ec5eee0dc141018e261b00f34ef64e54d5766536618f48cc767c4
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "4882"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 65.93699836730957, "l1_loss": 390.71771545410155, "l0": 140.8791732788086, "frac_variance_explained": 0.7948267936706543, "cossim": 0.8820845127105713, "l2_ratio": 0.833953058719635, "relative_reconstruction_bias": 0.9701622486114502, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8883158922195435, "loss_zero": 12.452933025360107, "frac_recovered": 0.9561446309089661, "frac_alive": 0.149685338139534, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d5f4dc455408710d281b07d9cfa3c041e4796169c85bac5d612a4df911cff2e
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "9764"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 59.162353134155275, "l1_loss": 431.5362915039062, "l0": 187.0416732788086, "frac_variance_explained": 0.7984853267669678, "cossim": 0.9067982614040375, "l2_ratio": 0.8616488099098205, "relative_reconstruction_bias": 0.9632077217102051, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6684057474136353, "loss_zero": 12.452933025360107, "frac_recovered": 0.9781309604644776, "frac_alive": 0.2636176347732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d
|
3 |
+
size 339823336
|
gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 18432,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.04,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "0"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|