Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/config.json +26 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json +27 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json +1 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt +3 -0
- gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +27 -0
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3371aefeca2f4995d599fa6a14600eb0abe9cfb19d9d0f3893587ebceec3dd16
|
3 |
+
size 84964136
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 51.09783172607422, "l1_loss": 696.4310302734375, "l0": 427.0833435058594, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.9793744087219238, "cossim": 0.9331231117248535, "l2_ratio": 0.8912414312362671, "relative_reconstruction_bias": 0.9950931072235107, "loss_original": 2.432832717895508, "loss_reconstructed": 2.5339653491973877, "loss_zero": 12.452934265136719, "frac_recovered": 0.9899070262908936, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da19506f642d4877189236d721933dbb1fdb6930aaf6fd38349e25f0008e170d
|
3 |
+
size 84964136
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 61.166587829589844, "l1_loss": 347.56976318359375, "l0": 153.0, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.6767814755439758, "cossim": 0.8976640105247498, "l2_ratio": 0.8425207138061523, "relative_reconstruction_bias": 0.9373458623886108, "loss_original": 2.432832717895508, "loss_reconstructed": 2.690847396850586, "loss_zero": 12.452934265136719, "frac_recovered": 0.974250316619873, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb1dc41c217bf34e752839990fbafe63d55d3aef64bfee0da2ca71acc194d8a7
|
3 |
+
size 84964136
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.04,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 61.01639938354492, "l1_loss": 368.2674560546875, "l0": 97.91667175292969, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.9684154987335205, "cossim": 0.9024285078048706, "l2_ratio": 0.8613905310630798, "relative_reconstruction_bias": 0.9948885440826416, "loss_original": 2.432832717895508, "loss_reconstructed": 2.804324150085449, "loss_zero": 12.452934265136719, "frac_recovered": 0.962925374507904, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79626d09db121cfdd709d316f0435a2bcd0f7b7d1dc86ca711e7212f43ffa713
|
3 |
+
size 84964136
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.05,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 78.22975158691406, "l1_loss": 218.60513305664062, "l0": 50.833335876464844, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.5495132207870483, "cossim": 0.8355358839035034, "l2_ratio": 0.7730690836906433, "relative_reconstruction_bias": 0.9281772375106812, "loss_original": 2.432832717895508, "loss_reconstructed": 3.467956066131592, "loss_zero": 12.452934265136719, "frac_recovered": 0.8966952562332153, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7231f173716248635598e5075872d81b5acdf54ccc41075ebc05229e88d54dd
|
3 |
+
size 84964136
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.06,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 74.44535827636719, "l1_loss": 236.12059020996094, "l0": 24.83333396911621, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.9570982456207275, "cossim": 0.8187623023986816, "l2_ratio": 0.7615946531295776, "relative_reconstruction_bias": 0.9943456649780273, "loss_original": 2.432832717895508, "loss_reconstructed": 4.803515434265137, "loss_zero": 12.452934265136719, "frac_recovered": 0.763407289981842, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95757030f87c5e68f02d9f900611d0908f13566c58bfd78ec15c30126b8ac221
|
3 |
+
size 84964136
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.07,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2304,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 91.40856170654297, "l1_loss": 147.08984375, "l0": 17.45833396911621, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.39455533027648926, "cossim": 0.783570408821106, "l2_ratio": 0.7340455055236816, "relative_reconstruction_bias": 0.9417886734008789, "loss_original": 2.432832717895508, "loss_reconstructed": 5.698957920074463, "loss_zero": 12.452934265136719, "frac_recovered": 0.6740427017211914, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4174f2b10df5afac120f58a012b3e15fcc2a9afa2d9937340977b82e7dfd84
|
3 |
+
size 84964152
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "0"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 167.1028854370117, "l1_loss": 3259.6132568359376, "l0": 2315.0375732421876, "frac_variance_explained": -0.26188645362854, "cossim": 0.004149633987981361, "l2_ratio": 0.5866222858428956, "relative_reconstruction_bias": -41.212248992919925, "loss_original": 2.4483999013900757, "loss_reconstructed": 17.234307861328126, "loss_zero": 12.452933025360107, "frac_recovered": -0.47787620425224303, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8d84e90cb2965413f89ce0cdbb00676fdde5cd7ad47f69b6debc967559ca6e2
|
3 |
+
size 84964440
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "19528"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 52.390929794311525, "l1_loss": 664.6068298339844, "l0": 430.4250122070313, "frac_variance_explained": 0.8581684529781342, "cossim": 0.929267168045044, "l2_ratio": 0.8806805431842804, "relative_reconstruction_bias": 0.9670958697795868, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.5743808269500734, "loss_zero": 12.452933025360107, "frac_recovered": 0.9874637722969055, "frac_alive": 0.9769965410232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7862f1f0c27d1a4a51759b17e418d17d81db073068eeee02d079ec3e837b76b
|
3 |
+
size 84964440
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "29292"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 52.17484703063965, "l1_loss": 653.6545166015625, "l0": 428.90000915527344, "frac_variance_explained": 0.842798912525177, "cossim": 0.9301473379135132, "l2_ratio": 0.8885334372520447, "relative_reconstruction_bias": 0.9680565476417542, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.5747411012649537, "loss_zero": 12.452933025360107, "frac_recovered": 0.9874280393123627, "frac_alive": 0.9793837070465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09eaff8c4400b3d580bd949c5de608d2d10cccebef131e1267edd05c07cd7f25
|
3 |
+
size 84964240
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "4882"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 60.687442016601565, "l1_loss": 625.6324584960937, "l0": 339.5833465576172, "frac_variance_explained": 0.8289074659347534, "cossim": 0.9064209163188934, "l2_ratio": 0.8561354637145996, "relative_reconstruction_bias": 0.9688756346702576, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.676463174819946, "loss_zero": 12.452933025360107, "frac_recovered": 0.9772705137729645, "frac_alive": 0.5711805820465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b7b3002992ad2719bdca27b3c3ac7d5e88a20f923f149339a945b8960830d13
|
3 |
+
size 84964240
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.025,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "9764"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 53.42113838195801, "l1_loss": 660.417529296875, "l0": 428.55418090820314, "frac_variance_explained": 0.8469472169876099, "cossim": 0.9262736797332763, "l2_ratio": 0.8801985681056976, "relative_reconstruction_bias": 0.966783630847931, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.58748140335083, "loss_zero": 12.452933025360107, "frac_recovered": 0.9861565232276917, "frac_alive": 0.9040798544883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4174f2b10df5afac120f58a012b3e15fcc2a9afa2d9937340977b82e7dfd84
|
3 |
+
size 84964152
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "0"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 175.92864990234375, "l1_loss": 3441.5954345703126, "l0": 2317.941748046875, "frac_variance_explained": -0.26664564609527586, "cossim": 0.003556889179162681, "l2_ratio": 0.5888823807239533, "relative_reconstruction_bias": -136.97222518920898, "loss_original": 2.4483999013900757, "loss_reconstructed": 17.234307861328126, "loss_zero": 12.452933025360107, "frac_recovered": -0.47787620425224303, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd031da2442ba9006771f4bf9e4d06c17748877213c6da0d774f3f4a3eb4375f
|
3 |
+
size 84964440
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "19528"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 62.69771194458008, "l1_loss": 424.6473724365234, "l0": 157.85000457763672, "frac_variance_explained": 0.8613891124725341, "cossim": 0.9007849872112275, "l2_ratio": 0.8550859570503235, "relative_reconstruction_bias": 0.9797493934631347, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.720248818397522, "loss_zero": 12.452933025360107, "frac_recovered": 0.9728983402252197, "frac_alive": 0.7018229365348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28cf681f5de9c3d12a06baf7ee1a17e356c61e2c9f0cb3153ecd89220e41054f
|
3 |
+
size 84964440
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "29292"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 62.6892448425293, "l1_loss": 391.03612365722654, "l0": 158.2791717529297, "frac_variance_explained": 0.7394937157630921, "cossim": 0.901382839679718, "l2_ratio": 0.8549536645412446, "relative_reconstruction_bias": 0.9570579826831818, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.71214439868927, "loss_zero": 12.452933025360107, "frac_recovered": 0.9737131774425507, "frac_alive": 0.7230902910232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cae7229a90784a1f2b8cd1d1f7939f633997952c77b272675e2815a2bae58c59
|
3 |
+
size 84964240
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "4882"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 73.07725524902344, "l1_loss": 347.9136077880859, "l0": 103.04583511352538, "frac_variance_explained": 0.7512376010417938, "cossim": 0.8572899758815765, "l2_ratio": 0.8055591583251953, "relative_reconstruction_bias": 0.966018682718277, "loss_original": 2.4483999013900757, "loss_reconstructed": 3.2463842153549196, "loss_zero": 12.452933025360107, "frac_recovered": 0.9202560782432556, "frac_alive": 0.2736545205116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57c6496b429688a9b48745b0e45c555118bde43217a8f06c9d76ccb37762b0ac
|
3 |
+
size 84964240
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.035,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "9764"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 66.39950981140137, "l1_loss": 364.6046905517578, "l0": 141.72083892822266, "frac_variance_explained": 0.6755688190460205, "cossim": 0.8846192836761475, "l2_ratio": 0.8318478167057037, "relative_reconstruction_bias": 0.9467559456825256, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.855544424057007, "loss_zero": 12.452933025360107, "frac_recovered": 0.9593764483928681, "frac_alive": 0.5112847089767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4174f2b10df5afac120f58a012b3e15fcc2a9afa2d9937340977b82e7dfd84
|
3 |
+
size 84964152
|
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"dict_class": "AutoEncoder",
|
4 |
+
"trainer_class": "StandardTrainer",
|
5 |
+
"activation_dim": 2304,
|
6 |
+
"dict_size": 4608,
|
7 |
+
"lr": 0.0003,
|
8 |
+
"l1_penalty": 0.04,
|
9 |
+
"warmup_steps": 1000,
|
10 |
+
"resample_steps": null,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "google/gemma-2-2b",
|
14 |
+
"wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11",
|
16 |
+
"steps": "0"
|
17 |
+
},
|
18 |
+
"buffer": {
|
19 |
+
"d_submodule": 2304,
|
20 |
+
"io": "out",
|
21 |
+
"n_ctxs": 2000,
|
22 |
+
"ctx_len": 128,
|
23 |
+
"refresh_batch_size": 24,
|
24 |
+
"out_batch_size": 4096,
|
25 |
+
"device": "cuda:0"
|
26 |
+
}
|
27 |
+
}
|