diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c9f4e7f06214facb1dbbea11f414efc35f4ca7a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec94483dfdaf6baef38641e79ae4390fd1c4be0db552e4313408c7adfc9a25e6 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..153cfd8f0df8b58857dfcbf226f57b02c94ea5ec --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c6f24658908376f2295f244f0a9771e71bd60230 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.21599082946777, "l1_loss": 689.0535034179687, "l0": 653.0125183105469, "frac_variance_explained": 0.876203840970993, "cossim": 0.9503044188022614, "l2_ratio": 0.9132377088069916, "relative_reconstruction_bias": 0.9689015865325927, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.516097593307495, "loss_zero": 12.452933025360107, "frac_recovered": 0.9933268785476684, "frac_alive": 0.722276508808136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e5ac998a40eed05c2c1aa02693081afaa9947e4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c35b088143b227affa0d50ac160e0d4d4b288be1b3e7a9d8a2eec7262290aa10 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..299389a7ab8de44f0ec27f6e5f9a7decaed62bba --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ee5d1435153d906c1a2fd1b600ad575d878995d8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 54.783888244628905, "l1_loss": 430.45958251953124, "l0": 216.9250061035156, "frac_variance_explained": 0.821322637796402, "cossim": 0.9228447735309601, "l2_ratio": 0.8802441656589508, "relative_reconstruction_bias": 0.9668310403823852, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.602967619895935, "loss_zero": 12.452933025360107, "frac_recovered": 0.9846588850021363, "frac_alive": 0.4129774272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fcfdba2b762c3161be178689ebd2f56ff67fd74 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86956cdaef37056627cc68976f5eee8268b1f88273c33b2aaa183dd8606e002a +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbbbc5dc6a794602b9dcf734d688b1a28bcd460 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..527d7f6736a2b7407232af14700ab6f73caa9d35 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 60.528709411621094, "l1_loss": 360.99082946777344, "l0": 142.25417404174806, "frac_variance_explained": 0.7947052717208862, "cossim": 0.9053027153015136, "l2_ratio": 0.855720329284668, "relative_reconstruction_bias": 0.9635509788990021, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.675975728034973, "loss_zero": 12.452933025360107, "frac_recovered": 0.9773781895637512, "frac_alive": 0.28173828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..51b6a209273e270ac71cc6e913051846d7f7d2b2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93578a2028d6821229712eff158bc843ed03693b1af5c760ef67ec9baf415170 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d0bd2493386f64ba3986b4cd8d96b24bd440f329 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2c65d4fee8669e50979ec61868084b364d40f197 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.19085235595703, "l1_loss": 260.5812042236328, "l0": 68.32500228881835, "frac_variance_explained": 0.6767194271087646, "cossim": 0.8739384233951568, "l2_ratio": 0.8242608070373535, "relative_reconstruction_bias": 0.9541641473770142, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.999904203414917, "loss_zero": 12.452933025360107, "frac_recovered": 0.9450006365776062, "frac_alive": 0.1184895858168602, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6721a8a34b56e4ed66c33b41741e7ff30f789239 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f34e7a335652c290cd383cbf538b93ac8ff531ecf08c4006f9d6f1274c529fd +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..629a7122b8271de6d99097f96bdc9f2393a6594c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f159dae4be5e87201c043db00f317c041c2aa3bf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 77.07271423339844, "l1_loss": 209.26783905029296, "l0": 35.49583435058594, "frac_variance_explained": 0.673879736661911, "cossim": 0.8379469156265259, "l2_ratio": 0.7764925301074982, "relative_reconstruction_bias": 0.9512474119663239, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.923832106590271, "loss_zero": 12.452933025360107, "frac_recovered": 0.8525889277458191, "frac_alive": 0.0455729179084301, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..39e892013cbefe4507f213598814a848b63b8187 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db4d7607331b3bdeac0bfe2cd3a0a36835669ba6970db38b0867617476e00d7b +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e6fdd80ffb60c3fe18e5d6da960329c5f9fdfb0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8729fb48d526e8874ccbbce31dab88aa6fe01871 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 85.48247909545898, "l1_loss": 188.83019409179687, "l0": 20.800000953674317, "frac_variance_explained": 0.6401739776134491, "cossim": 0.7948622822761535, "l2_ratio": 0.7354530394077301, "relative_reconstruction_bias": 0.9544944584369659, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.871973085403442, "loss_zero": 12.452933025360107, "frac_recovered": 0.757849270105362, "frac_alive": 0.0176866315305233, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f92795b1db26c0c61cbc35ada8dac9382ab53c4d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..063c5fc77767c97892d020227fce282b94ce9fb7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 262.5618362426758, "l1_loss": 15419.97216796875, "l0": 9220.8796875, "frac_variance_explained": -1.1251216292381288, "cossim": 0.0072056266712024804, "l2_ratio": 1.1523211359977723, "relative_reconstruction_bias": -3121.6480613708495, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b783900204f1ba4bfcd20f3033ff5a459fd9941 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d05f8c2d8a1c6f452b8f2693b7741e4c32908630d8f62d27d9d917d7721e22 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ffa8bb4d1fa2df10d0223ab18544078ce5c4431 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..12c7fa9558721694a914e0a1adf392e1a7167b32 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.36017074584961, "l1_loss": 678.7422302246093, "l0": 664.40419921875, "frac_variance_explained": 0.8578554630279541, "cossim": 0.9489532053470612, "l2_ratio": 0.9064467251300812, "relative_reconstruction_bias": 0.962804764509201, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5181885957717896, "loss_zero": 12.452933025360107, "frac_recovered": 0.9931147575378418, "frac_alive": 0.7009006142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3167f1408535153d0e7dfb2e814b4cfb9f5f33b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8459951ead9228cb6bdc403d6492a22030927249f67f15d6eedf8d679405a47 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2ecdb1eeda76b33f4f7db907d2595f50b10ef4a1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ab3c7f72a8c157ea2daba79a3afbb57084396ea1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 45.5604850769043, "l1_loss": 697.0533264160156, "l0": 648.0208557128906, "frac_variance_explained": 0.8660320043563843, "cossim": 0.9488271653652192, "l2_ratio": 0.9062425673007966, "relative_reconstruction_bias": 0.9621650040149688, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5167696475982666, "loss_zero": 12.452933025360107, "frac_recovered": 0.9932600975036621, "frac_alive": 0.7202690839767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7aa37518901daa97269e613f62ba78b7596cc966 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:166ad074d24b1dc38c4856fb9a9dd17479a591fc052877141aa4c9e674cb40a0 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b98f9f2b0ec259b242b4c4e18c7bbae5b89b5a8c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f851d5b555b2f7cde3dd7bb6da195df1a42c399d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.02547416687012, "l1_loss": 694.5177673339844, "l0": 500.9125122070312, "frac_variance_explained": 0.8116631209850311, "cossim": 0.9266311347484588, "l2_ratio": 0.8777791380882263, "relative_reconstruction_bias": 0.9579416394233704, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.589069104194641, "loss_zero": 12.452933025360107, "frac_recovered": 0.9860494375228882, "frac_alive": 0.2698567807674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3182d37bc65b62a5616887ad057bfa0e610b4719 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f23c778c5115ad5249b24bb4c98958cedf405069dc1f0b538dfcdd601bf582a +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c69dd17247a6dbe0ac23f57b5e37d2a7be0bf9b8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..115761d5a56670bfc25103a73c3d12a1588e5c7a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 46.25635299682617, "l1_loss": 680.0863464355468, "l0": 628.8416809082031, "frac_variance_explained": 0.857069319486618, "cossim": 0.9428604900836944, "l2_ratio": 0.8974497258663178, "relative_reconstruction_bias": 0.9643844664096832, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.531099796295166, "loss_zero": 12.452933025360107, "frac_recovered": 0.9918265163898468, "frac_alive": 0.529405415058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3017b0e1ec2fa0a7935ea5740383ce3280346a02 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..764c483c1df47d4a8b2dfe53476c61d6c6971f0b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 231.1881851196289, "l1_loss": 13521.2337890625, "l0": 9215.55029296875, "frac_variance_explained": -1.0510494828224182, "cossim": 0.006548775953706354, "l2_ratio": 1.1542882323265076, "relative_reconstruction_bias": -167.9729995727539, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f46f05cdd9460741c4842215c80bccc3d79e4da2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f13631c3eb49456356ffd1f81d364c8eb7a4fd6bfbce4eb486cc0aba7c317caf +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..17745b4fac7301880dc2444a59e35c5b42225b29 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..273e14c904fe590710bb5703a4856d0ca55b654c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 54.52329711914062, "l1_loss": 434.2824768066406, "l0": 215.03750610351562, "frac_variance_explained": 0.8672008395195008, "cossim": 0.9186275959014892, "l2_ratio": 0.8766422688961029, "relative_reconstruction_bias": 0.9781448543071747, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.614857029914856, "loss_zero": 12.452933025360107, "frac_recovered": 0.9834800899028778, "frac_alive": 0.3849283754825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f59f7cda0e04b30716ff47a3ef37b58aa5d774a1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad859e564cc052ffedbf6c98a7a93979224f7ab6b738a5512393d8690c564a1a +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e8fff167e29dfab19ba9187c12b5fd743fe721 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bf26fd6a41309f91303ee2d351286b6ba192aa63 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.41632499694824, "l1_loss": 412.8124267578125, "l0": 213.2916702270508, "frac_variance_explained": 0.7951547861099243, "cossim": 0.9193026781082153, "l2_ratio": 0.8765688300132751, "relative_reconstruction_bias": 0.963158255815506, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6072664499282836, "loss_zero": 12.452933025360107, "frac_recovered": 0.9842307686805725, "frac_alive": 0.4042426347732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..06a3548a8d9e0c5c16653fef9a6b5bf46b65c3fc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec54a4ea78ec5eee0dc141018e261b00f34ef64e54d5766536618f48cc767c4 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a2997fb4faa5fa3ff468c24b769376cfde01036 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a681d52a394165d3568ec7f02f31fe86d84ec613 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 65.93699836730957, "l1_loss": 390.71771545410155, "l0": 140.8791732788086, "frac_variance_explained": 0.7948267936706543, "cossim": 0.8820845127105713, "l2_ratio": 0.833953058719635, "relative_reconstruction_bias": 0.9701622486114502, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8883158922195435, "loss_zero": 12.452933025360107, "frac_recovered": 0.9561446309089661, "frac_alive": 0.149685338139534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e1968185576e9075de5f87885f354580da44265 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5f4dc455408710d281b07d9cfa3c041e4796169c85bac5d612a4df911cff2e +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..043e115416d55a1b5d81041f5769dfe2de0d0392 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6d7da686035576d73f162207e39af092a24320ad --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.162353134155275, "l1_loss": 431.5362915039062, "l0": 187.0416732788086, "frac_variance_explained": 0.7984853267669678, "cossim": 0.9067982614040375, "l2_ratio": 0.8616488099098205, "relative_reconstruction_bias": 0.9632077217102051, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6684057474136353, "loss_zero": 12.452933025360107, "frac_recovered": 0.9781309604644776, "frac_alive": 0.2636176347732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fbf44b89ede4e821fcae7bfd45764c473cec8848 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4eea117bb1c583c9a7be246f7f9ccf590fcc3821 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 258.50673828125, "l1_loss": 15125.352734375, "l0": 9211.10859375, "frac_variance_explained": -1.1248699188232423, "cossim": 0.0037612241867464036, "l2_ratio": 1.1529219269752502, "relative_reconstruction_bias": 508.3486152648926, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a44ac9ea50fe89ee20be112162fd2e9cba46238b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3af33a1f98fcdc5c5a7d7e95bdd3a37723a18b279c869fe9d43b96db7f971052 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f748f49e974a0258c3e6a7829c26d228ee473267 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..00191856060a8533cb7f51ca86749fe1e902ec76 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.95433006286621, "l1_loss": 344.7991943359375, "l0": 140.75417098999023, "frac_variance_explained": 0.7319669544696807, "cossim": 0.9040613055229187, "l2_ratio": 0.8558434069156646, "relative_reconstruction_bias": 0.9546543955802917, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6972389936447145, "loss_zero": 12.452933025360107, "frac_recovered": 0.975246912240982, "frac_alive": 0.2594943642616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b6b1db85cdde012b942bf9a5753be6276359ac4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6df8cbe6a37fdef15635f769b3b4c6f532101ce13c065723282e15f8a784927 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a68ab7382d97dc3251026842d09bf788475bfb2f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f4cbbffb392f562f930495f859a5ff1157f2b3d2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.4619327545166, "l1_loss": 348.9124359130859, "l0": 139.27083740234374, "frac_variance_explained": 0.7726053297519684, "cossim": 0.903406971693039, "l2_ratio": 0.8542409300804138, "relative_reconstruction_bias": 0.961150300502777, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.680323767662048, "loss_zero": 12.452933025360107, "frac_recovered": 0.9769425928592682, "frac_alive": 0.2785373330116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b8caacd36477b5c426d47a05c7988c2730b7814 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ada284b32497d5e8c0320a00f0aaea6074b0783ca80fe66002e3df29f0f0d0 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e78a1b618be9800ef1fe15b6b67f2de26b2e900e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0586da3b7dbafbdbd5f86aaee80e1cd0c25078b6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.56183013916015, "l1_loss": 298.6728546142578, "l0": 87.73333587646485, "frac_variance_explained": 0.6486897647380829, "cossim": 0.8650417923927307, "l2_ratio": 0.8146487534046173, "relative_reconstruction_bias": 0.9494877696037293, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.289353442192078, "loss_zero": 12.452933025360107, "frac_recovered": 0.9159802734851837, "frac_alive": 0.0842556431889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f70147ae9bf9460df12d02faee465cfacfcb6cb3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d047486cb89ef4df5c613a45ce951e291c03ac41aedcf47b7ef3eba4c23c7aa5 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..62504203d288fc5e3160844a876331575a13e111 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9c44cd7cfbd0c153b893bf29728f46825aca3f2c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 64.95185241699218, "l1_loss": 337.8774475097656, "l0": 119.8708366394043, "frac_variance_explained": 0.7428133428096771, "cossim": 0.8860678613185883, "l2_ratio": 0.8326881527900696, "relative_reconstruction_bias": 0.9573476433753967, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.812422513961792, "loss_zero": 12.452933025360107, "frac_recovered": 0.963723224401474, "frac_alive": 0.1712782084941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..de2d18f22bbbb2807cc95abe8ee0af4e247e4a9f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6e1bf4b79b530f5a0ad7b80e499d5da5bfac2ee4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 274.2146881103516, "l1_loss": 16089.59677734375, "l0": 9220.0044921875, "frac_variance_explained": -1.1421878576278686, "cossim": 0.0065057534608058635, "l2_ratio": 1.154287075996399, "relative_reconstruction_bias": 18769.018077087403, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dee18955e8e129b074f713de4062c796b82a4c8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4dbbec1fb6581af50808770671dcc47b6ef7f268926c90c418a8cb098bbaf88 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..44a67b23967e407fe83010f71f5be1489fe91639 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0861d7413fd049ec22aecdad49bab46a3542695d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 70.6308380126953, "l1_loss": 267.8436737060547, "l0": 66.60000228881836, "frac_variance_explained": 0.7148071527481079, "cossim": 0.865442568063736, "l2_ratio": 0.8128104329109191, "relative_reconstruction_bias": 0.958704662322998, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.0564446449279785, "loss_zero": 12.452933025360107, "frac_recovered": 0.9393680632114411, "frac_alive": 0.1125217005610466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6beecc76ec2705ab345c787c63ef1c7dfa79a2e9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38127a28a1598d8b1dcb9afcf228feac7beb3c6e4c547c89fe84f4912970280 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..89a5b9935bf11fc69358f83a204ab21c6cf4a39c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1c7e4495018c482adb8e805ce123a6f55a94382f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.23471069335938, "l1_loss": 280.60032806396487, "l0": 68.15000228881836, "frac_variance_explained": 0.75370854139328, "cossim": 0.8718753635883332, "l2_ratio": 0.8194641590118408, "relative_reconstruction_bias": 0.9629104971885681, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.0053123950958254, "loss_zero": 12.452933025360107, "frac_recovered": 0.9444657742977143, "frac_alive": 0.115614153444767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..209714376bd05c93945108b0fe0c4068e32035c5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96dd83ca6a089264d69cc26002486e8afbb0be4ec9fa47e87392636293f0f3fd +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e209dc7629222f6d88651553b56c17cb881612a9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3b16b6013abde43f55655b59041e6d8726732d1d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 80.68263626098633, "l1_loss": 225.83195953369142, "l0": 41.77916793823242, "frac_variance_explained": 0.6305564761161804, "cossim": 0.8209482312202454, "l2_ratio": 0.767198258638382, "relative_reconstruction_bias": 0.959108853340149, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.263485717773437, "loss_zero": 12.452933025360107, "frac_recovered": 0.8185814321041107, "frac_alive": 0.0421006940305233, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e380c02f01909c65a7a9fb880b1172d02dd8d72d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f4555a5dbba4b49747af708b75f3f2fe35543a32749df80f96906593b2614ca +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb21fb59a3cbeebabb1a0a4a25cb3eb2df7a6152 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3ceb67fc0889e5f934ea963813f5e2216a13cfe4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 75.94515762329101, "l1_loss": 248.12893371582032, "l0": 55.70833473205566, "frac_variance_explained": 0.628894853591919, "cossim": 0.8447924613952636, "l2_ratio": 0.7909220278263092, "relative_reconstruction_bias": 0.9520269095897674, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.493554949760437, "loss_zero": 12.452933025360107, "frac_recovered": 0.8955767214298248, "frac_alive": 0.0681966170668602, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d8d1c02de442457025fcded30b3cc68355597a74 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a10eda04f0e4d4bf1c1254a939682989d68c6c8a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 224.75511474609374, "l1_loss": 13160.74765625, "l0": 9226.1044921875, "frac_variance_explained": -1.0322468400001525, "cossim": 0.00744592803530395, "l2_ratio": 1.153313195705414, "relative_reconstruction_bias": 166.77607421875, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3a34ab3acfca9a02c27af352edfc8256f4e6d8e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d8ff705594a3d3ad413ba6e7555306387bb5c8e16e9d40d940a198ccb1fe23a +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..698426a9593768331d11d00b3a6b711f4a3ced9d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4729796e3552ea431b8a4ee2aa349c69a6ff338d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 77.85688781738281, "l1_loss": 218.3060272216797, "l0": 35.80833435058594, "frac_variance_explained": 0.6659238994121551, "cossim": 0.8269996345043182, "l2_ratio": 0.7675947904586792, "relative_reconstruction_bias": 0.9576776623725891, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.992558979988098, "loss_zero": 12.452933025360107, "frac_recovered": 0.8457254767417908, "frac_alive": 0.0439453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1008de382dfeb2d61d5e2479ac33e67609f1e0b5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceed377b9a9165a29049dad2cce2e1ec40b63744f8fb3ee8b6b33e6d639c38b0 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..948ddab91966580bd4fd6696f00d556a5dd7c2fc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9f6bc7873d350b4ed95afe10d63ed49734b2de4c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 78.90703659057617, "l1_loss": 204.0153045654297, "l0": 36.31250114440918, "frac_variance_explained": 0.594800877571106, "cossim": 0.8339714348316193, "l2_ratio": 0.774432796239853, "relative_reconstruction_bias": 0.9429995357990265, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.9304433822631837, "loss_zero": 12.452933025360107, "frac_recovered": 0.8519228398799896, "frac_alive": 0.04541015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..611f76f3b4b7ee2fd8d1572ce734e1b3812240f5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2afc9973e91b9acf533f914595c3edaafd44574e1c4d14a95752928a3ea956 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cadccca4f95503d59212d42bf9ce4c6a3beff406 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5ecb59bd5887affb0ffac57b77efb8e2d782fd9c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 88.91005172729493, "l1_loss": 174.78749084472656, "l0": 23.68750057220459, "frac_variance_explained": 0.4893746078014374, "cossim": 0.7889236032962799, "l2_ratio": 0.7290889263153076, "relative_reconstruction_bias": 0.9413408517837525, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.214775848388672, "loss_zero": 12.452933025360107, "frac_recovered": 0.7235329747200012, "frac_alive": 0.01752387173473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ed375123c9369cf451633c204957f7cfde10d7b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b544edbf53ca4123e0f8a6837b2c6c272e31864b58a8e22caf392e5ce77ee1 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a16a5a0721ad5d70482b8fbe481c8c1490463dd5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..985382fde3c034d9aafcc962d86c8429877aa916 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 82.64426651000977, "l1_loss": 185.1240982055664, "l0": 29.10416736602783, "frac_variance_explained": 0.5237946152687073, "cossim": 0.8047848761081695, "l2_ratio": 0.7462130308151245, "relative_reconstruction_bias": 0.9380469918251038, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.567673206329346, "loss_zero": 12.452933025360107, "frac_recovered": 0.7882242977619172, "frac_alive": 0.02701822854578495, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1c70993871aebe2111581d5d5ac85bccd0c2ec1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f00ab4bed34eacd88b8471bcbbf67516cbb367a7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 264.0029922485352, "l1_loss": 15478.43876953125, "l0": 9216.20859375, "frac_variance_explained": -1.1124252080917358, "cossim": 0.004309083218686283, "l2_ratio": 1.1540743947029113, "relative_reconstruction_bias": -808.8423194885254, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.26519641876221, "loss_zero": 12.452933025360107, "frac_recovered": -0.7811744093894959, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..abac746770a770fa8f8b92b751f28991d6f1f802 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73234d949cb066805ecf7a080b7e7d411f1569ecbb131e8111d231b7f9f2ac1f +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..912c7a9c6ed0c33166da2e18159300475f868bc2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..db8398c919ce97a44f7b862a529189efb88a9c9a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 87.86799011230468, "l1_loss": 185.46568298339844, "l0": 21.84166717529297, "frac_variance_explained": 0.5730636775493622, "cossim": 0.7987316846847534, "l2_ratio": 0.7340438425540924, "relative_reconstruction_bias": 0.9430574297904968, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.911713790893555, "loss_zero": 12.452933025360107, "frac_recovered": 0.7538710176944733, "frac_alive": 0.0167100690305233, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0939cff6643ebb8ea00ab6c24ac3725d369fad6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c90ec1c65332ee8b7bb1193538c14accde413314a003dd0f276ce014a19a55 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..407c5240ed0a115555a411aa76ce6a87d89dfb60 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a604d0a01fd886378a849ecf0876e91313c15873 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 81.0098274230957, "l1_loss": 172.93630676269532, "l0": 21.220833778381348, "frac_variance_explained": 0.577200037240982, "cossim": 0.8052893579006195, "l2_ratio": 0.7448508679866791, "relative_reconstruction_bias": 0.9451340019702912, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.850847578048706, "loss_zero": 12.452933025360107, "frac_recovered": 0.7599575161933899, "frac_alive": 0.01801215298473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7c4dcbf3c06257eae6c295ad96e5cf52428def4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93fb2e5164c787597f0844950622e3f682759c3a7ceaf1941d729e34beda419e +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0f46ccaaa502941ed03153e855ff2ab57eda947 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..48b82027be3b874384f33af08980d8344665c4fe --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 92.6896354675293, "l1_loss": 165.45609283447266, "l0": 15.129166984558106, "frac_variance_explained": 0.5438343524932862, "cossim": 0.7621895015239716, "l2_ratio": 0.6955153405666351, "relative_reconstruction_bias": 0.9493862390518188, "loss_original": 2.4489264488220215, "loss_reconstructed": 6.1313272476196286, "loss_zero": 12.452933025360107, "frac_recovered": 0.6319315254688262, "frac_alive": 0.00873480923473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5efc19aa6de0a7e3cc75979c3b2f66ab8c01e82 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3c52effbfd1682807f67bf29a392a4a1eda5205e9f224c39ea3fc9153d53b5 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..87faede6a8d2f0f1fb3d6e14e29e4d33a988af84 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b0d3a3de196283d8f60801167ba76592ab9c8889 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 89.99450988769532, "l1_loss": 174.3315689086914, "l0": 17.187500953674316, "frac_variance_explained": 0.5984835743904113, "cossim": 0.7765738308429718, "l2_ratio": 0.710818636417389, "relative_reconstruction_bias": 0.9526469767093658, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.6064231395721436, "loss_zero": 12.452933025360107, "frac_recovered": 0.6844373285770416, "frac_alive": 0.0091688372194767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c51de1feaedcfc635b9e08fb874c2dcd5b22050 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f5a804d79a6334400eb7ffe98a0d45c7372bd61c338178705a58bb914f94ac +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d857b116f33e36f58c10c1de8c4b3f84beb42b54 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..81b0f9056870d0ab5b890e11f4eecc3dbcee6e1c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.63053817749024, "l1_loss": 868.72734375, "l0": 563.5500183105469, "frac_variance_explained": 0.8843899667263031, "cossim": 0.9587938010692596, "l2_ratio": 0.9266170263290405, "relative_reconstruction_bias": 0.9712898671627045, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5277384996414183, "loss_zero": 12.452933025360107, "frac_recovered": 0.9921594679355621, "frac_alive": 0.717881977558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_1/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..de9d4fceeaccf4783b4c2d28a5edb369eed8319f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63de1498cf966387b69a0dd6b88ce34581db2be527e28ac675a8893c7503ff1 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_1/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3b9b527cfaf134dd0479d8eded882c69883efde7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_1/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9aaaf3aa96997268f0e191805490d0564d74c273 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.06571426391602, "l1_loss": 582.3175720214844, "l0": 211.7916717529297, "frac_variance_explained": 0.8105550169944763, "cossim": 0.9292273700237275, "l2_ratio": 0.893325787782669, "relative_reconstruction_bias": 0.9668030560016632, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6204474687576296, "loss_zero": 12.452933025360107, "frac_recovered": 0.9829143047332763, "frac_alive": 0.4157443642616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_2/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..62f3e752162fad5d0562cb93509987096366689e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca20a92fc1918fc4ee531c6010af4d59438666ed75fa1d35c74b0c8e11f910e6 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_2/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..163276a2db7a1f47e8b79d5f6e787c568be0350f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_2/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7bfe905fb7218e0bcaee563a7948bc53994c5307 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 74.8343132019043, "l1_loss": 518.3208557128906, "l0": 142.83750610351564, "frac_variance_explained": 0.8427973449230194, "cossim": 0.9188935577869415, "l2_ratio": 0.8754505813121796, "relative_reconstruction_bias": 0.9714335024356842, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7009095668792726, "loss_zero": 12.452933025360107, "frac_recovered": 0.9748838484287262, "frac_alive": 0.29736328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_3/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f22f9f2cc494f5bf605882bebfda4929bf2644b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb076abc70028b91bbd487da4db9fd0b19bda3d621cf5d53d567d8fb7dd7b3b +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_3/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b4edc05e1a6dec41e45bbe8222c9c4ff6b3a486 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_3/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bbf20e7708bb5e982b0082d6dc55332afda14bc6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 86.71902770996094, "l1_loss": 364.9580352783203, "l0": 71.50417022705078, "frac_variance_explained": 0.6865922451019287, "cossim": 0.8930394947528839, "l2_ratio": 0.8478472054004669, "relative_reconstruction_bias": 0.9547323644161224, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.048255777359009, "loss_zero": 12.452933025360107, "frac_recovered": 0.9401768267154693, "frac_alive": 0.1317816823720932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_4/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2d65909944dc56be1a33e7128b8620238af657d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b3684b4423ea65e7b5b588018dac83262e0c7532b9929e5a4676d229903f07 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_4/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4043709f020d7aaf5ed5a6b776bf1cd14fb19f15 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_4/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..20cc8df9856553188d2040e653b38e674f70db12 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 100.62484130859374, "l1_loss": 293.18482360839846, "l0": 36.97916793823242, "frac_variance_explained": 0.6519956588745117, "cossim": 0.8504042506217957, "l2_ratio": 0.7920243203639984, "relative_reconstruction_bias": 0.9472394466400147, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.856266975402832, "loss_zero": 12.452933025360107, "frac_recovered": 0.8594030737876892, "frac_alive": 0.0527886301279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_5/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..63f89854ba4c9e071ded094d2d32c09494bb9d79 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b712c6e96abebbc3529abb325cb5a3cfc2854c0c9b945a2232eef529c493c8 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_5/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7b498af6bc7d488f8183d0733871b851ab2e44cc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_5/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..83471a75c1ebbde2059a4bf49a13c3a075c9b5af --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 116.31973800659179, "l1_loss": 210.37218780517577, "l0": 21.50833396911621, "frac_variance_explained": 0.46255811452865603, "cossim": 0.8006493985652924, "l2_ratio": 0.7345048308372497, "relative_reconstruction_bias": 0.9271275222301483, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.067263555526734, "loss_zero": 12.452933025360107, "frac_recovered": 0.738304591178894, "frac_alive": 0.0166015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..be34c5953f2fd5798b6b0f1b3e941e9c7f38dc48 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bddf26f929307c5f5ee0d3de3dbe3e00cae1c445 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 345.34686584472655, "l1_loss": 20296.1171875, "l0": 9202.53388671875, "frac_variance_explained": -1.0981090784072876, "cossim": 0.01141314918641001, "l2_ratio": 1.1528827905654908, "relative_reconstruction_bias": 377.4131286621094, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.19396209716797, "loss_zero": 12.452933025360107, "frac_recovered": -0.7740277111530304, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..369da6434a2e873b7099400152130c6396a23565 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4119d3ac7d11bfe4574704fe3cd6dd52523857275febd068c0b7953f20cc8e44 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..957b4b3d1b4df915da14402a474f6c5d12caf33b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ca0c136033daefec9b76238eeda7702425eed549 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 56.267147827148435, "l1_loss": 881.8441711425781, "l0": 572.9500244140625, "frac_variance_explained": 0.8788844347000122, "cossim": 0.957276564836502, "l2_ratio": 0.9238235116004944, "relative_reconstruction_bias": 0.9706537663936615, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5339195251464846, "loss_zero": 12.452933025360107, "frac_recovered": 0.9915426135063171, "frac_alive": 0.7018229365348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..43470e705bc22de82b0245ac572d9b9620a8d48e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06422891cf9aeec5db72cb3d8f34cbfca625c94aedf70dd8ded1e52441d3139e +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..75bf9ee8a60527352a28b39a7fd0f518ea5206d7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6e76a11a6f9f0379aa43fe069ffc6f1b366b736b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 56.3968994140625, "l1_loss": 864.1727722167968, "l0": 567.258349609375, "frac_variance_explained": 0.8775751173496247, "cossim": 0.9554377496242523, "l2_ratio": 0.9192665934562683, "relative_reconstruction_bias": 0.9688698828220368, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.532389187812805, "loss_zero": 12.452933025360107, "frac_recovered": 0.9916963338851928, "frac_alive": 0.7164713740348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d86ef5532e0bce55688719ba814bd14d1eb7e4e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73828a6b630b891a32d7d7b3ffcabb62477989a93ff9ff054dc41b96054d1350 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ae5c2141efdbff2c6c4c43d9d8c58f08bc5cc0b3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..930310919bf9760fd55d0dd6401fbc3deadb94b4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 65.83646240234376, "l1_loss": 860.1312683105468, "l0": 455.60834045410155, "frac_variance_explained": 0.8343700289726257, "cossim": 0.9382294058799744, "l2_ratio": 0.898718786239624, "relative_reconstruction_bias": 0.9650586485862732, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.612096905708313, "loss_zero": 12.452933025360107, "frac_recovered": 0.9837513506412506, "frac_alive": 0.3087022602558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..20c2f0c8865314fc837c50cbf27ccf3631ed13ae --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e994b55620a88bd17cbdf390621a9a9c62448f1555e8290beee9963117bc110 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..29f26d2009c020e2f5495c65d69ead6919fe51b6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d1d30373fa0d66271e1164918925cb1c854e9042 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.59352188110351, "l1_loss": 894.8587585449219, "l0": 550.5958557128906, "frac_variance_explained": 0.8665464878082275, "cossim": 0.952945327758789, "l2_ratio": 0.9129960715770722, "relative_reconstruction_bias": 0.9654709935188294, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5457217693328857, "loss_zero": 12.452933025360107, "frac_recovered": 0.9903663218021392, "frac_alive": 0.558702290058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..613902cc7278d4ce8b21353e0e5de50d35cb73b8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..69074dfbf99eee2df8b4b14fb9f91de7386efbb9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 309.31931457519534, "l1_loss": 18174.6140625, "l0": 9206.60869140625, "frac_variance_explained": -1.0168615102767944, "cossim": 0.014068684540688992, "l2_ratio": 1.148919689655304, "relative_reconstruction_bias": 85.3345558166504, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.19396209716797, "loss_zero": 12.452933025360107, "frac_recovered": -0.7740277111530304, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..35f864edef6e6d9bc4d5b4f5d9fa9206cff32bcb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9dc293011b06e0f3249089e66eacb304ac8399e9800499a90450862b798405 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b73bd0c4d7374687af0e4ffae9bf1119a6218a86 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..238eb3fd028579692ea02b35531cf821753c4244 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.64362335205078, "l1_loss": 597.3470397949219, "l0": 210.09584197998046, "frac_variance_explained": 0.8484870254993438, "cossim": 0.9353990972042083, "l2_ratio": 0.9044112861156464, "relative_reconstruction_bias": 0.9764942646026611, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.630460834503174, "loss_zero": 12.452933025360107, "frac_recovered": 0.9819164752960206, "frac_alive": 0.4020182192325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb27e32e90c09aa3d5aa0af8c232380aa9d5280b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d604231cf9917c9ce5594a28a75fa586aeeede056eaba9c839707ab2b0653c +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25ab8d33ceeb276d0eb40f703e225e663582a478 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0ab7ad10a6c8eb95fdc46bad24a1d9b02e604f39 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 67.32218284606934, "l1_loss": 593.622802734375, "l0": 204.5291717529297, "frac_variance_explained": 0.874887353181839, "cossim": 0.9349782288074493, "l2_ratio": 0.891505342721939, "relative_reconstruction_bias": 0.9683610677719117, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6259650945663453, "loss_zero": 12.452933025360107, "frac_recovered": 0.9823659896850586, "frac_alive": 0.4109157919883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..44532db88211af5a0070cb6ae4ca34ba0616bc08 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:062f12176e3d1cae29c4ebdefa572d2d6aba4fe9a790795f86dfb3686fdb9c51 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2644c9102713f0d7f006307f04816db6d073676f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2f3628f3aa9521dabe58c7569b3c8217ae0e1b35 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 86.48891220092773, "l1_loss": 549.2065277099609, "l0": 140.87083740234374, "frac_variance_explained": 0.7946570932865142, "cossim": 0.895643126964569, "l2_ratio": 0.8471280336380005, "relative_reconstruction_bias": 0.9659393191337585, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.91333270072937, "loss_zero": 12.452933025360107, "frac_recovered": 0.9536597430706024, "frac_alive": 0.1510959267616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd4fb883466f3c19fdc7fe00dbca1ed33b8a2059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b101465c519ba7f5f5ec01bfd763f4c22ea426155c91ff7add7a93761e82c43c +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee1b1e7db496a447cbe58ba2fd7e25aa75ea7bb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..245f7ba5793e66c7e42873bd5a2735b9a81155cd --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 76.43365097045898, "l1_loss": 576.5745178222656, "l0": 193.6166732788086, "frac_variance_explained": 0.7833783626556396, "cossim": 0.921230536699295, "l2_ratio": 0.8782177805900574, "relative_reconstruction_bias": 0.9608027756214141, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.687347412109375, "loss_zero": 12.452933025360107, "frac_recovered": 0.9762332677841187, "frac_alive": 0.3069661557674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1da0aa00e1b3cd254e173c33b938429065ca4cc2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..52954613d6ddedb2c249957875c85878353b0647 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 357.96155700683596, "l1_loss": 21082.8451171875, "l0": 9206.68349609375, "frac_variance_explained": -1.1313880801200866, "cossim": 0.008467582450248302, "l2_ratio": 1.1504011392593383, "relative_reconstruction_bias": 650.3504539489746, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.19396209716797, "loss_zero": 12.452933025360107, "frac_recovered": -0.7740277111530304, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bfca15773c0b4d042de77c1be7823434e2c9c2f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aee0f046442a0ef23a1830104d2094fda86ff6c7693a27910253d95f3f739f6 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..33e0c82ab012568b81ec17c3ae7d668c402ba566 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..39b04da2ebf6688c742cd7d0103edbc7ef8d3b9a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 76.40582733154297, "l1_loss": 494.91371765136716, "l0": 141.6041717529297, "frac_variance_explained": 0.7803987979888916, "cossim": 0.9178543150424957, "l2_ratio": 0.8756958305835724, "relative_reconstruction_bias": 0.9623727262020111, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.718927788734436, "loss_zero": 12.452933025360107, "frac_recovered": 0.9730895340442658, "frac_alive": 0.2835286557674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5a631c604c6771857c358869f3ead511140ff0a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:268e09e30445842cf4b50fbc64f4705bdaddbbb61b2392523d988d2ad8b6a45f +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2df692513a798a2172857747b1e2da8292122da9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2537b3c2304be68903934bf357d91602d15e6a63 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 75.05004501342773, "l1_loss": 480.6728576660156, "l0": 139.8375045776367, "frac_variance_explained": 0.7775776326656342, "cossim": 0.9192296743392945, "l2_ratio": 0.8699380576610565, "relative_reconstruction_bias": 0.9531834602355957, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.708043622970581, "loss_zero": 12.452933025360107, "frac_recovered": 0.9741721630096436, "frac_alive": 0.2931857705116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6121cf927fc23103371cd0676fa48140f3e8e2b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31c60512404a48549689fa9c40c07934c98524eebe70ead7652f4ef96ec920c +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11b04dd602efb3895246d694157f0eeb0129d808 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..742c90c6b3ba3833de2a395e76e0c04dfccf3358 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 89.6248908996582, "l1_loss": 487.31353454589845, "l0": 89.47500305175781, "frac_variance_explained": 0.8164544761180877, "cossim": 0.8867722988128662, "l2_ratio": 0.8388439238071441, "relative_reconstruction_bias": 0.9730870962142945, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.213028359413147, "loss_zero": 12.452933025360107, "frac_recovered": 0.9237131655216217, "frac_alive": 0.1077473983168602, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..91ac75d262280fc3693f9e7db70ce9af01cf5fa5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22b19ec31c94db1862a8f33319fa75edcec5fa50a0d77a6d8d2aa35f618228b +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..515b985f76ff07071b89d0bc97fcdd56926c697a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e3acfb1227322731ec3f698ad01af5796ce1ca94 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 82.32062911987305, "l1_loss": 506.02118530273435, "l0": 124.69583663940429, "frac_variance_explained": 0.8028147995471955, "cossim": 0.9091424822807312, "l2_ratio": 0.8598536193370819, "relative_reconstruction_bias": 0.9641382932662964, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8192770004272463, "loss_zero": 12.452933025360107, "frac_recovered": 0.9630652248859406, "frac_alive": 0.2059461772441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e12cdd7dd887bb2873762fb9f077e1297d67038 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89a3e3e35bcbe02ae723c0e1ea73755e90a2d1c7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 329.96795959472655, "l1_loss": 19392.84921875, "l0": 9206.46708984375, "frac_variance_explained": -1.0648847818374634, "cossim": 0.01210685190744698, "l2_ratio": 1.1510992288589477, "relative_reconstruction_bias": 433.75506896972655, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.19396209716797, "loss_zero": 12.452933025360107, "frac_recovered": -0.7740277111530304, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..48fc74c34327855e8ff25760139223910f6b3ae9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:768586d22fa86867450ca12c2e0035846422b1125f4ed9285442f98e36afbb12 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6b8caa143619b624eff3b868fa31079bc2434704 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..af71d5354392d6c1c33447e5b6ce2e3767173fe3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 88.54714508056641, "l1_loss": 365.0473266601563, "l0": 71.38333587646484, "frac_variance_explained": 0.6749896943569184, "cossim": 0.8906834661960602, "l2_ratio": 0.8345278620719909, "relative_reconstruction_bias": 0.9417788088321686, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.06937689781189, "loss_zero": 12.452933025360107, "frac_recovered": 0.9380646049976349, "frac_alive": 0.1263563334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c79c791de598edeeb6de2e7fc11ecafd2eda5b6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99fce5a43bda4a62a3c0d598de96a1d6fa40bcfe06c89b9216342006df125d72 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dbe13632fccdd3b19fce84e3334786a2c604d2e3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1f4fbdb7f3b5ed94d5b9154208a890036dc3dbc2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 89.96531372070312, "l1_loss": 384.5672210693359, "l0": 73.38333587646484, "frac_variance_explained": 0.7096147537231445, "cossim": 0.8850144863128662, "l2_ratio": 0.8333487749099732, "relative_reconstruction_bias": 0.9550144493579864, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.054198741912842, "loss_zero": 12.452933025360107, "frac_recovered": 0.939582222700119, "frac_alive": 0.1322699636220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..454ad6f2c1023f220f76b0e4b04874b80b0d9e71 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f7ec2f0e2ccbe5cc10afd6fe4f14e577a81cb2d96bd711c7b61e79a32adcaf +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..abe5682b1961a6baed4634eb1f23de5d6be0aef4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e047652540e2d9ae5bf15acb314d3829d9a9fc72 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 105.83427963256835, "l1_loss": 308.1408233642578, "l0": 44.86250114440918, "frac_variance_explained": 0.5613835871219635, "cossim": 0.8373046159744263, "l2_ratio": 0.7850826799869537, "relative_reconstruction_bias": 0.9457923650741578, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.157148933410644, "loss_zero": 12.452933025360107, "frac_recovered": 0.8292386412620545, "frac_alive": 0.0461697056889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ff18709a0f0ce4b8faa03256c28bff4d9ec49f2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c5dac1258d5a2bdfd63a6a60482e08dc83e9a4df28f0410617b793c8b69fe9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a3cdc6fe974acd1664347d5e20f1ec04c5faae81 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ca39d500afd128d308e0a2b4f32c3a7c90cb5936 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 95.02903060913086, "l1_loss": 369.2692474365234, "l0": 60.35416831970215, "frac_variance_explained": 0.6689476788043975, "cossim": 0.8694744765758514, "l2_ratio": 0.8168606281280517, "relative_reconstruction_bias": 0.9511541247367858, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.3427521228790282, "loss_zero": 12.452933025360107, "frac_recovered": 0.9107385158538819, "frac_alive": 0.08740234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7206eec54f72d898c94e30e9b590a7c898cd40ef --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1e8c77b2a271569db276148bbd860e827477b705 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 300.9563018798828, "l1_loss": 17645.642578125, "l0": 9198.13779296875, "frac_variance_explained": -1.013127624988556, "cossim": 0.013861041935160755, "l2_ratio": 1.1500229835510254, "relative_reconstruction_bias": 88.0575668334961, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.19396209716797, "loss_zero": 12.452933025360107, "frac_recovered": -0.7740277111530304, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..41a777b0ed40c325cf51dfb2921f9e4b82242662 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88e71055dd8e230d5e460f05baae144ff3f088047aac7b0cd86db34da4a99eaa +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..775dd2630330ec6a4c32fde1ab839e50970e8851 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d1f350d9d835637efea1e4cf77bcb7ae8a171598 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 103.40009841918945, "l1_loss": 296.4202911376953, "l0": 38.68750152587891, "frac_variance_explained": 0.6152091562747956, "cossim": 0.848116797208786, "l2_ratio": 0.7903030157089234, "relative_reconstruction_bias": 0.9468436658382415, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.885764646530151, "loss_zero": 12.452933025360107, "frac_recovered": 0.8564519286155701, "frac_alive": 0.0512152798473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fadf5af4688ab83615391b23a8b094492f98c762 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f460386298d8d29271eb0ac6b4f136e7e7ed58261d3ad5edfda7e0f578013cf6 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..62d6ee83f0a049a72ee09cf4b823f8d51dd61b6e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4772421176575e93acd272874370a6f05985de06 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 98.76273498535156, "l1_loss": 310.260791015625, "l0": 38.066667556762695, "frac_variance_explained": 0.7056249916553498, "cossim": 0.854061508178711, "l2_ratio": 0.796537584066391, "relative_reconstruction_bias": 0.955382913351059, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.8572248458862304, "loss_zero": 12.452933025360107, "frac_recovered": 0.859305465221405, "frac_alive": 0.0527886301279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd9505f0789d68cee300a646519bd0254842270f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60653f6d7d3feeb30329cd91db4469587a74107ae5543d7ae666496821373092 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7853e916a382ce4419ad87a67de52387e121d198 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1757e77655398de3c6284d8de55bd6f8d0f58d19 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 117.62990493774414, "l1_loss": 277.4144821166992, "l0": 26.11250057220459, "frac_variance_explained": 0.5763835966587066, "cossim": 0.7921292901039123, "l2_ratio": 0.7330048859119416, "relative_reconstruction_bias": 0.9526737928390503, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.120844125747681, "loss_zero": 12.452933025360107, "frac_recovered": 0.7329265534877777, "frac_alive": 0.02294921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6b7cc7ab4cd502356218800e5d55865beb43587 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c4ffec8aa328a51f34d3f780f29c074ae762c69971025231b88f3136c351b75 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d70e38ffd5e7a17d689865bb4c55e88eca7690cb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f73312f41de2986360cd7478aae584a448f167c0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 108.08128051757812, "l1_loss": 283.7849319458008, "l0": 32.73750114440918, "frac_variance_explained": 0.609447431564331, "cossim": 0.8258947014808655, "l2_ratio": 0.7666231095790863, "relative_reconstruction_bias": 0.9466954946517945, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.421977472305298, "loss_zero": 12.452933025360107, "frac_recovered": 0.8028035283088684, "frac_alive": 0.0334201380610466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..02b232fc4f828da81651541a6b340362984b8ea4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..94bc0c6cfe36819ebe8d641866f95a87c2373be0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 306.9012969970703, "l1_loss": 18003.7328125, "l0": 9203.4712890625, "frac_variance_explained": -1.019401490688324, "cossim": 0.01279164464212954, "l2_ratio": 1.1494730234146118, "relative_reconstruction_bias": 89.16192741394043, "loss_original": 2.4489264488220215, "loss_reconstructed": 20.19396209716797, "loss_zero": 12.452933025360107, "frac_recovered": -0.7740277111530304, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdcde8901957e4667d062c424dd08d08b6c32303 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31ec9fbfb3d764b86f0062fd139ec235d93a915f677c22e6394b172d07ccbe9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b49f3f39a0a5b5763392b8991498017da4efc204 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..85f532051999d5ee124d1e08841edead0f0a6801 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 113.40176162719726, "l1_loss": 229.5638916015625, "l0": 21.400000381469727, "frac_variance_explained": 0.5192161083221436, "cossim": 0.811490386724472, "l2_ratio": 0.745463764667511, "relative_reconstruction_bias": 0.932452243566513, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.110394096374511, "loss_zero": 12.452933025360107, "frac_recovered": 0.7339873611927032, "frac_alive": 0.01595052145421505, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbc5f33e26c6db2ebc16d53c0d085e12981ba679 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d596b84adac8ff6976e3fd9fe8ec460b6a7ce9932594276a3d0b3b7b02a55b61 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d8cbbfe31c65f850112a0ecbb6a9539b52a2a441 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..da27fe594e755877b77c08d3a70d2f4ff4d9a7ad --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 116.43068313598633, "l1_loss": 254.01712493896486, "l0": 20.925000762939455, "frac_variance_explained": 0.6302931845188141, "cossim": 0.8085563004016876, "l2_ratio": 0.745689457654953, "relative_reconstruction_bias": 0.9521325170993805, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.067006683349609, "loss_zero": 12.452933025360107, "frac_recovered": 0.7383169591426849, "frac_alive": 0.01676432229578495, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2528279a3a618bd0e0c71ab6471ad7d3d79e790 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b4f04024be51c96e8168e099e07fe676fdcb89ed0e57ea2141b35946c7d8f9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..617c732c84d4b5738ace945d58a68f0522192b13 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9b9a288ce2a8b521da6c30ea1da3b5dba939c43b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 127.0767593383789, "l1_loss": 196.8880386352539, "l0": 14.475000381469727, "frac_variance_explained": 0.3702893376350403, "cossim": 0.7571694314479828, "l2_ratio": 0.6895628035068512, "relative_reconstruction_bias": 0.9235762596130371, "loss_original": 2.4489264488220215, "loss_reconstructed": 6.232675361633301, "loss_zero": 12.452933025360107, "frac_recovered": 0.621836656332016, "frac_alive": 0.00775824673473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..581bbc3214858238c66e286dfd95cb3f64fc1f45 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a453c34ec34ac0281a657ec3de82c5ccbd81979ee6b5dce6b0e71765e03014 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7266aa37d2353b66f04993db730ce041f059548e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3e883f8f4d14ddf5139bf9fcdab8874cf67007e4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 118.67613143920899, "l1_loss": 244.48780670166016, "l0": 18.704167747497557, "frac_variance_explained": 0.6204754829406738, "cossim": 0.7890335321426392, "l2_ratio": 0.7282597661018372, "relative_reconstruction_bias": 0.9558484613895416, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.5330281257629395, "loss_zero": 12.452933025360107, "frac_recovered": 0.6917610883712768, "frac_alive": 0.01166449673473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..19b93d3ee53bbe57b8fcb7fd520254c5fab537e9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393b57828419648325d194d55730a9425d055841ab5c707b130a1d40a3a41dd9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5f8d7c705be08b143af8dc24d3c430c6c02db6c2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..13eb3833e494b38aa0f244c92bf93b4f96b051a8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 91.28402938842774, "l1_loss": 1256.6742797851562, "l0": 435.30417785644534, "frac_variance_explained": 0.8860715329647064, "cossim": 0.9472658216953278, "l2_ratio": 0.9143549501895905, "relative_reconstruction_bias": 0.9769266545772552, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5746376276016236, "loss_zero": 12.452933025360107, "frac_recovered": 0.9874874651432037, "frac_alive": 0.733018696308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_1/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c04bd33f6b0455bf85119027ba1c4edf67566565 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0018a5a6fdfb44deb4be3429469449d7d0c93c457d19a3054982592648887bb6 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_1/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..53af6bb5e907ffb9e7a1991ea880144d31606d90 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_1/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cbce44d11591cdf9c2f5a93e046726caa2789457 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 110.14564514160156, "l1_loss": 852.2773986816406, "l0": 177.4000030517578, "frac_variance_explained": 0.7850531160831451, "cossim": 0.9229176878929138, "l2_ratio": 0.8877681434154511, "relative_reconstruction_bias": 0.965803575515747, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.677743363380432, "loss_zero": 12.452933025360107, "frac_recovered": 0.9771980524063111, "frac_alive": 0.4453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_2/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..290b4c2203f9191659a4b310a0c900a45c59dcfe --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a91c90001a70c60f45e2340610e1724905b95ecd9370dae10fb1be3660138d9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_2/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d2f91424eba5179b81b236f1a35d2a91aba18a7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_2/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5b25367b54a863a9a4232a7d005709c662956e86 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 118.12515640258789, "l1_loss": 760.8950073242188, "l0": 124.0583381652832, "frac_variance_explained": 0.7938235461711883, "cossim": 0.91240553855896, "l2_ratio": 0.8692996561527252, "relative_reconstruction_bias": 0.963777494430542, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.759493088722229, "loss_zero": 12.452933025360107, "frac_recovered": 0.9690388798713684, "frac_alive": 0.3219943642616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_3/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..498920c89712cec82fda004d474c59afdff545ca --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6142c0d75448a77d964a5f93cee25531355443f136832a96c55e6a777853e2d2 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_3/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..841610391713750c29fe5537cc05dbeb051775f4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_3/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..15f02d0c6a9f990c67b0943cd384f701f712a723 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 141.95765686035156, "l1_loss": 574.080859375, "l0": 65.35833473205567, "frac_variance_explained": 0.6718634366989136, "cossim": 0.8699315905570983, "l2_ratio": 0.8178958654403686, "relative_reconstruction_bias": 0.9495936155319213, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.064043641090393, "loss_zero": 12.452933025360107, "frac_recovered": 0.9386282324790954, "frac_alive": 0.155056431889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_4/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c95ba88594751c6dd85b243a7278ec4696d7328 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc43c493408245ef1f62c62353b39e96e6bd202a8ce4a26fdd44df574e0a9f95 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_4/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ce6e8b11c856e6ef883d1fdfe2824bb3e594b1b1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_4/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b064bcbff1744e18d9c5a960db97a4d87f2119bb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 161.2818618774414, "l1_loss": 432.40709228515624, "l0": 35.02916831970215, "frac_variance_explained": 0.5828464150428772, "cossim": 0.8308584272861481, "l2_ratio": 0.772969126701355, "relative_reconstruction_bias": 0.9414819777011871, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.6968069553375242, "loss_zero": 12.452933025360107, "frac_recovered": 0.8754043757915497, "frac_alive": 0.0658637136220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_5/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..aab4530406bf9dcac350e1abbe5b1b5f8d561ea2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:267129d4446a708b4decf11e3aded7cd75883fd853aa2427d7581603d5be6294 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_5/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..519c1a5dff465e4190b33f753bded7b65fd90064 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_5/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b78b7aaa41fc3dd20f050d11206faed57ee8a271 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 178.40693359375, "l1_loss": 341.96990966796875, "l0": 19.866667175292967, "frac_variance_explained": 0.572351050376892, "cossim": 0.7818877041339874, "l2_ratio": 0.7191039443016052, "relative_reconstruction_bias": 0.9454742312431336, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.59279203414917, "loss_zero": 12.452933025360107, "frac_recovered": 0.7858266532421112, "frac_alive": 0.02707248367369175, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1586df0888618284871d0c5b8be71ff0b10ae9df --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b15a0e4a7fd24c15184fbfee17e4daa152bdac10 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 476.5137939453125, "l1_loss": 27929.941015625, "l0": 9201.62548828125, "frac_variance_explained": -1.0295320749282837, "cossim": 0.012236462812870741, "l2_ratio": 1.1525774598121643, "relative_reconstruction_bias": 113.99189834594726, "loss_original": 2.4489264488220215, "loss_reconstructed": 22.127364730834962, "loss_zero": 12.452933025360107, "frac_recovered": -0.9674367189407349, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2142b03fcd62ada3991f54b5a37b5eb9914548f5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:182cfdcb8a551d3ca89a1964b2d880c43b6e0849e9a7eb4a290b56d69472bd80 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3aa9b9e7ebccdd0a6f5fdcdfa0f80b64a1a79995 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bcb42b409fa6831fb10cf39ae39c72a2723c4229 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 93.86397018432618, "l1_loss": 1263.2129150390624, "l0": 440.7541778564453, "frac_variance_explained": 0.8598763763904571, "cossim": 0.9463169157505036, "l2_ratio": 0.906745970249176, "relative_reconstruction_bias": 0.9656191468238831, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5787819623947144, "loss_zero": 12.452933025360107, "frac_recovered": 0.9870810329914093, "frac_alive": 0.7139214277267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d2661085c608a592cfdf8e7ac323fdeebdde0d6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652e5b447d66b2754ffd1a3491a1458f92f162578e9766977967bcfb5ed553e3 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5fcd503ddff8911a2fab836cd60d1b23e9a78f02 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b461a0d7a9b638720dccd59924a00ef850362b0b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 92.67095947265625, "l1_loss": 1256.2594482421875, "l0": 437.90001220703124, "frac_variance_explained": 0.8567610919475556, "cossim": 0.9466577529907226, "l2_ratio": 0.914499944448471, "relative_reconstruction_bias": 0.9709823846817016, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.577598452568054, "loss_zero": 12.452933025360107, "frac_recovered": 0.9871953308582306, "frac_alive": 0.7294921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..875baafe4fe36dc7efb6b98c52ac238a51d202e5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee533a9c23dde23404879c2f94dc9363b0bdbbff7ccb65f537ac8d0abad4415d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fcc5a2220edd92a3f8d089023a6c906957f64002 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..27076342b7466d26a5a498d5a55da3c08b3a08d8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 108.87662658691406, "l1_loss": 1333.6712890625, "l0": 387.06250915527346, "frac_variance_explained": 0.8603636085987091, "cossim": 0.9263006448745728, "l2_ratio": 0.8838371932506561, "relative_reconstruction_bias": 0.9714521706104279, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6761178255081175, "loss_zero": 12.452933025360107, "frac_recovered": 0.9773569166660309, "frac_alive": 0.2958441972732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3a83afa67980e340a7bdde5dcdb05687c12180b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9a2f99f66f993533069a237482e7fd142c2110e564edda705ec5ca956fd3fac +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b19092767a83f3197946597cb1b777e1b15a205 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..16a774245ba0d1ca46774117cd53bb396c5581d2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 97.31823806762695, "l1_loss": 1314.0627319335938, "l0": 438.8916748046875, "frac_variance_explained": 0.8712191760540009, "cossim": 0.9404881417751312, "l2_ratio": 0.899276328086853, "relative_reconstruction_bias": 0.9690877258777618, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5978754281997682, "loss_zero": 12.452933025360107, "frac_recovered": 0.985168582201004, "frac_alive": 0.5619574785232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..71893d4f1c953776bcc0092523ffe9842a3d9c7b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..78f5afa5fc73b11389ce21c8bd521d23023e7691 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 469.7210327148438, "l1_loss": 27510.2091796875, "l0": 9203.58369140625, "frac_variance_explained": -1.0270875334739684, "cossim": 0.011769048497080803, "l2_ratio": 1.1531781554222107, "relative_reconstruction_bias": 115.07438316345215, "loss_original": 2.4489264488220215, "loss_reconstructed": 22.127364730834962, "loss_zero": 12.452933025360107, "frac_recovered": -0.9674367189407349, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..884007a3489183cce4813f8f1cdf08a07a54c8e9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:289cf0ed06408bc09e8d3cac49219262b9b4e744e5cfafcc0a56954208764771 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0c8c79d7d69693951b9c4f692c95462220c8cac --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..88170cdbc9c440bf11e269911c5a969689075027 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 110.03227386474609, "l1_loss": 873.5697265625, "l0": 168.37083740234374, "frac_variance_explained": 0.8519340872764587, "cossim": 0.9215126216411591, "l2_ratio": 0.8836429595947266, "relative_reconstruction_bias": 0.9735068917274475, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.684986972808838, "loss_zero": 12.452933025360107, "frac_recovered": 0.9764781534671784, "frac_alive": 0.4166666567325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5aad7925eb9e8f24f98714187da2bd98447677f5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c62b690c54087d234a939ab084ed6f0198861116e413b3dfe2580b759ed83b9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..55d60c0df1b91a5651589d77616a964114e71de7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0925dde614132d11aba2f9b729efa0c1f8236106 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 108.9969581604004, "l1_loss": 854.9797668457031, "l0": 167.38750610351562, "frac_variance_explained": 0.8387749314308166, "cossim": 0.923299503326416, "l2_ratio": 0.8797519683837891, "relative_reconstruction_bias": 0.9683898210525512, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.68062047958374, "loss_zero": 12.452933025360107, "frac_recovered": 0.9769135475158691, "frac_alive": 0.4310438334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..def16bd06e01f747fa2dfafaf848f59f60c3d4c2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66b31ab212e1e7a15468bab03762948e9d9bc5f5da5d08b6a25e68e77b290266 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a62381c1ea1a62cae751d9b0278ba9de95c95c9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c6dd47c95eee3b6fc2c58d0b9bb000a9d9416f98 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 141.41783447265624, "l1_loss": 745.2005798339844, "l0": 115.66666870117187, "frac_variance_explained": 0.7110332548618317, "cossim": 0.8681071102619171, "l2_ratio": 0.8197387874126434, "relative_reconstruction_bias": 0.9568406403064728, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.047093152999878, "loss_zero": 12.452933025360107, "frac_recovered": 0.9403061449527741, "frac_alive": 0.1588541716337204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..82961e4b5ddd609719dd7b2df8d00eaea5a45c4b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd4c4e75be0c9e6d4e2ca3b748f5f1cfd27aaceae84b1ae036b182965ed4533 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..65f86b464aaaa6299b8fdd29789fce8e3805be61 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5c85b14283f30e1c7b9b97e7d4a3fe90c9894000 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 120.82672958374023, "l1_loss": 828.5349914550782, "l0": 153.17917022705078, "frac_variance_explained": 0.7635988712310791, "cossim": 0.9056476771831512, "l2_ratio": 0.8602029979228973, "relative_reconstruction_bias": 0.9580097973346711, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7556072473526, "loss_zero": 12.452933025360107, "frac_recovered": 0.9694255471229554, "frac_alive": 0.3186849057674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a09ba467e251ec5a0df94b5828b89ac288a3c1b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..43c4a99e27259c8db89047f4e7693dac8fc962ee --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 543.9961242675781, "l1_loss": 32119.88125, "l0": 9198.7751953125, "frac_variance_explained": -1.130611777305603, "cossim": 0.015365668013691903, "l2_ratio": 1.152680516242981, "relative_reconstruction_bias": 170.06737594604493, "loss_original": 2.4489264488220215, "loss_reconstructed": 22.127364730834962, "loss_zero": 12.452933025360107, "frac_recovered": -0.9674367189407349, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfec54b30e9260e72924ae33ee53684289bb38e1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:085dba3bda0105c97b6140f033c712967dd270fa64ea9657c20034c5d5cf0449 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e2a7c259e791620a82e1e0da9a10eddc6862183 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..616c0f882cdfba2ea7a9ba7883cf19065eff148f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 122.56941299438476, "l1_loss": 754.7602233886719, "l0": 117.95000228881835, "frac_variance_explained": 0.79875568151474, "cossim": 0.9004019260406494, "l2_ratio": 0.854559189081192, "relative_reconstruction_bias": 0.9649579346179962, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7700485467910765, "loss_zero": 12.452933025360107, "frac_recovered": 0.9679890871047974, "frac_alive": 0.3129340410232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ac8b1c18e5b0cadd2e2387e482dc8206d0c01bc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d6e08ae5813385042d486b722e720175f9e4e7be28d87d1d2503fc249f592bc +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7273acaa142090cd310725025f7ea6b9474e6d85 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..84adfa1dfffa2e9bb9428612007f4577a5b4ac18 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 118.51541519165039, "l1_loss": 735.8483154296875, "l0": 122.21250228881836, "frac_variance_explained": 0.7559566974639893, "cossim": 0.9130769848823548, "l2_ratio": 0.8663942098617554, "relative_reconstruction_bias": 0.9520857751369476, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.76176860332489, "loss_zero": 12.452933025360107, "frac_recovered": 0.9688154280185699, "frac_alive": 0.3194444477558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..004a4c7f35e92b57dde88eec14a413e503206d90 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ace6dfcc8a1a08822f992ae56882f447ef7d5a7de06e9120019144aa86bd4c5 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3f9ac82659a1cd525c7f4212e419a59d3242ba4d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a4af9a2a154ae3305c9fa26b27c077b054bca854 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 152.86988372802733, "l1_loss": 618.8284118652343, "l0": 77.82916946411133, "frac_variance_explained": 0.5949138164520263, "cossim": 0.855855792760849, "l2_ratio": 0.8027310013771057, "relative_reconstruction_bias": 0.9428261280059814, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.321594715118408, "loss_zero": 12.452933025360107, "frac_recovered": 0.9128723978996277, "frac_alive": 0.1066623255610466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5958c4d1e8edce8676282e2e719a015465d01216 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fab6ea0d1834cf06c150e3451ae8a87abbf346de76797fa471c2b035e26ed6e0 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c00af8a9a6bba79443632a561615bd0ab8b7e481 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..371fd1f4ba6d7dd60bd7058ad9c76468826fe09c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 131.56388092041016, "l1_loss": 719.8920776367188, "l0": 107.79166946411132, "frac_variance_explained": 0.7195221364498139, "cossim": 0.8908648729324341, "l2_ratio": 0.8409525513648987, "relative_reconstruction_bias": 0.9525454103946686, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8993061780929565, "loss_zero": 12.452933025360107, "frac_recovered": 0.9550687432289123, "frac_alive": 0.2173394113779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c387de73fb2121e0a088f098002561b2a1005af1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..45f182a9f2fb96505bf3617d6e2d00d23939b7d1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 469.60608825683596, "l1_loss": 27557.1306640625, "l0": 9206.516796875, "frac_variance_explained": -1.0296395778656007, "cossim": 0.014173783641308547, "l2_ratio": 1.1525371074676514, "relative_reconstruction_bias": 104.63867149353027, "loss_original": 2.4489264488220215, "loss_reconstructed": 22.127364730834962, "loss_zero": 12.452933025360107, "frac_recovered": -0.9674367189407349, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..757d97ec25cc36117c1293bd8eca49ee7c5ad34d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:857eb4ac41dc99acca02740c96e761d79d272be2d6b6a6c7e60c9323cefe2dd8 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a105715264338c392e7833c16944ae79221a23ab --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..db384fe3c363d865170216ec8f91f6db075171a6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 140.60096740722656, "l1_loss": 592.593115234375, "l0": 62.77083473205566, "frac_variance_explained": 0.7345730066299438, "cossim": 0.8723000228404999, "l2_ratio": 0.8172793984413147, "relative_reconstruction_bias": 0.9543917536735534, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.0884200811386107, "loss_zero": 12.452933025360107, "frac_recovered": 0.9361966669559478, "frac_alive": 0.1486002653837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f4c431945b0959aa69ffd20c2fd44cec4e315a0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:141d683d8e8e7a4d6ed12e0d926c040a22ea54cc1c29305984f5021e1ee34a38 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6dff6d4fa51f3ca104bff0c5ded4e05ca5f810ed --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d8f31a207c7a461d1c15a0928290afa55f8c8a90 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 141.89391021728517, "l1_loss": 573.1503356933594, "l0": 66.05000152587891, "frac_variance_explained": 0.6471415638923645, "cossim": 0.8746284604072571, "l2_ratio": 0.82119722366333, "relative_reconstruction_bias": 0.9438917577266693, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.0703896999359133, "loss_zero": 12.452933025360107, "frac_recovered": 0.9379962086677551, "frac_alive": 0.156032994389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d218725fc930f2f488fb65087043d17fb3f1d05b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d1660fea94094f4efccf2165e69ea3e8d9f5b3251edaa676a6952bf57d79d6 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3983dfcc581ef9359d16f603ea5322e6b20cc0dd --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c5ef7dcacbe047396c5ec120bce49418b3264081 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 172.27003631591796, "l1_loss": 463.71543273925784, "l0": 36.61250190734863, "frac_variance_explained": 0.5708129644393921, "cossim": 0.8067130506038666, "l2_ratio": 0.7494237124919891, "relative_reconstruction_bias": 0.944784414768219, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.11127507686615, "loss_zero": 12.452933025360107, "frac_recovered": 0.833939003944397, "frac_alive": 0.0470377616584301, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e495ae916f77114a6ea33175d8402752245f4b6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e58b02a527e757819d144cac1e17fb7846e4e8c3318d7f8af358be18949b72 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..74c377b1245f116ad3717a8a1b18b872e4f6c593 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1c41a0a6eb81ca6f6d14b38b1ea2031c49cb2d05 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 152.03309936523436, "l1_loss": 544.0792388916016, "l0": 49.91666870117187, "frac_variance_explained": 0.7213342726230622, "cossim": 0.8455823957920074, "l2_ratio": 0.7891826808452607, "relative_reconstruction_bias": 0.9584948718547821, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.4054911136627197, "loss_zero": 12.452933025360107, "frac_recovered": 0.904506778717041, "frac_alive": 0.0912543386220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..449523a3faffcc4672087c6471ba332245b86812 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..57b48db187e1ffb3bf92ce0b2dd4c26b4867187a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 523.540640258789, "l1_loss": 30849.794921875, "l0": 9205.79208984375, "frac_variance_explained": -1.0934860944747924, "cossim": 0.013197313947603106, "l2_ratio": 1.151525616645813, "relative_reconstruction_bias": 199.27118606567382, "loss_original": 2.4489264488220215, "loss_reconstructed": 22.127364730834962, "loss_zero": 12.452933025360107, "frac_recovered": -0.9674367189407349, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b4db16c7ad8418430e1424acbf26e687c64d05c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5cf7c53e7c70056a40bc547163c24268b843da0be7babcc94ee3f41d85088a +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..50c89183c3f23be5c4c5024c2e0f10a499378698 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..53936c1332e2968f981f94fae07d5ece8048498f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 158.19207763671875, "l1_loss": 460.4234375, "l0": 33.40000114440918, "frac_variance_explained": 0.7011104464530945, "cossim": 0.8312147915363312, "l2_ratio": 0.7733805060386658, "relative_reconstruction_bias": 0.9571642696857452, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.747956371307373, "loss_zero": 12.452933025360107, "frac_recovered": 0.8702886164188385, "frac_alive": 0.0635850727558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bc0ed7088e51b81af5eb8f96f26e9747ff08823 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5afb09d20f57c52412b518ad7a65588551f3171ed38e7abdc55bf77ab61b2136 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d5537aaa176480c371581e1ee5fb48568b2d5698 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3f736804b3f5e7bcf4b7e9c27507a352298fc51b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 160.17070617675782, "l1_loss": 431.9152435302734, "l0": 34.45416812896728, "frac_variance_explained": 0.5782649576663971, "cossim": 0.8358017086982727, "l2_ratio": 0.7761706471443176, "relative_reconstruction_bias": 0.9387483537197113, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.707629418373108, "loss_zero": 12.452933025360107, "frac_recovered": 0.8743325412273407, "frac_alive": 0.0665147602558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..75f86ff4cb3a7f94e56a62ba1f41e437571d8ff5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be29b90d437eb9959ff369d04c45031f1c4fecb45b55299101e0fcfeb63d7a4 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9a4a26a4f0947f56d9c11f5838bd93d7ba79ec32 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d10b48030e978061ef7a3fb5b39212500e206b8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 187.81900634765626, "l1_loss": 357.656689453125, "l0": 20.36250057220459, "frac_variance_explained": 0.47903150916099546, "cossim": 0.7516765773296357, "l2_ratio": 0.692807924747467, "relative_reconstruction_bias": 0.9434179425239563, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.202389144897461, "loss_zero": 12.452933025360107, "frac_recovered": 0.7247971415519714, "frac_alive": 0.02349175326526165, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..551daa7ef65ad194376278c530f069251d54c19d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcb33f1ce1dac7d13f9306171af073cf4ab8db9dff80230112a85505924a3858 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c7a5772c813d662d515d8315007e388787475dc4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b53dc4502cc15b5c6f69a594e2263bcb1d65ec17 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 175.21607360839843, "l1_loss": 437.87403564453126, "l0": 27.829167366027832, "frac_variance_explained": 0.634596335887909, "cossim": 0.7985280632972718, "l2_ratio": 0.7378181278705597, "relative_reconstruction_bias": 0.9533836245536804, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.295857715606689, "loss_zero": 12.452933025360107, "frac_recovered": 0.81548712849617, "frac_alive": 0.0353732630610466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..284a769e4ee5f56bc60e1780588cff965695879c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..409118774ba60cd251c6d16668c0bbbdfe344bd8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 453.9008422851563, "l1_loss": 26550.1759765625, "l0": 9205.71708984375, "frac_variance_explained": -1.009181010723114, "cossim": 0.012192491674795747, "l2_ratio": 1.1533514976501464, "relative_reconstruction_bias": 107.34699058532715, "loss_original": 2.4489264488220215, "loss_reconstructed": 22.127364730834962, "loss_zero": 12.452933025360107, "frac_recovered": -0.9674367189407349, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d868733eff0594e145dd0c4adf6a7897631c2686 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b90600d2f2c0676f9741d27207e659bc9a2d1c5aab16f3f60fba11a6d2f5caae +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d82c73ca3416f5a77b9220e03be67e62ae53768 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ddc1ec74fab4cd5ed6f026d9d2f370e8ef31ac7c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 182.0844497680664, "l1_loss": 352.6810729980469, "l0": 19.283334159851073, "frac_variance_explained": 0.569837486743927, "cossim": 0.7766727924346923, "l2_ratio": 0.7147558212280274, "relative_reconstruction_bias": 0.9462656915187836, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.676406669616699, "loss_zero": 12.452933025360107, "frac_recovered": 0.7774727761745452, "frac_alive": 0.02506510354578495, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..49c01c945ed3afe72afe910e9b5d7d6bb1520085 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8549e44914215988300e89410fbc9145d06710e914258b3ee99401170a90701b +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e97c1ad257b3887fac8b9e8530f7392621464d91 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cadd11d607e730120d4fb5f9d6925e2888a6c183 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 178.9276870727539, "l1_loss": 337.96388549804686, "l0": 19.908334159851073, "frac_variance_explained": 0.49259382486343384, "cossim": 0.7897185206413269, "l2_ratio": 0.7270867347717285, "relative_reconstruction_bias": 0.931782352924347, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.58164610862732, "loss_zero": 12.452933025360107, "frac_recovered": 0.7869459748268127, "frac_alive": 0.02690972201526165, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..64e6eace03a42fa6f2eff17a268971748abfa37d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2280b568a288556cc9e227f07599539c48540419a559bfe2ca734931c40a8cab +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aca3bf18d4c71a7bb987de5c2b606ac46b324e54 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3a66b74f74b6e2b6397c0128bd0e9b45259e154a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 200.49181060791017, "l1_loss": 289.4018981933594, "l0": 13.220833587646485, "frac_variance_explained": 0.41973212361335754, "cossim": 0.7268201112747192, "l2_ratio": 0.6677527308464051, "relative_reconstruction_bias": 0.9409182786941528, "loss_original": 2.4489264488220215, "loss_reconstructed": 6.150494718551636, "loss_zero": 12.452933025360107, "frac_recovered": 0.630077064037323, "frac_alive": 0.0227322056889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4e083203d87f9936de12e6c4dc16e9060f0a373 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45ded633435af96fd604fad0a640e220c6cebe09d2afa718f8c2e6cf5c7d902 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b0e419b22910791af96f27de2364628af815e967 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..65d9e7893799c16dbd305f75195ba2707d679990 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 195.21346740722657, "l1_loss": 281.17481994628906, "l0": 15.216666984558106, "frac_variance_explained": 0.3317651033401489, "cossim": 0.7389422357082367, "l2_ratio": 0.6757572948932647, "relative_reconstruction_bias": 0.9222545623779297, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.470939493179321, "loss_zero": 12.452933025360107, "frac_recovered": 0.6980372726917267, "frac_alive": 0.01274956576526165, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..891519c437dabad734d1986294d1c85062e4ab1c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9f08d99dd2c71a890be672d4a1ff65c0954cab6913b3a63d4fb02efcb6195b +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b28c7bad529badc19086c1a502b83ca2abc99a6b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3426044a9806d0e27dd1436033039f8555511ad7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.335896301269532, "l1_loss": 270.1961273193359, "l0": 320.6666748046875, "frac_variance_explained": 0.8943248569965363, "cossim": 0.9556451797485351, "l2_ratio": 0.9313349306583405, "relative_reconstruction_bias": 0.9799374282360077, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5290406227111815, "loss_zero": 12.452933025360107, "frac_recovered": 0.9920366525650024, "frac_alive": 0.7665473222732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_1/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f30cf06cd6a5284014a01ca439fe9b7c64358208 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14c6a2841ac7abfe974cd42cbc4771c13d2cd32efd15013b044438c6a1b34eb +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_1/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a23f671c477c89ecede95af17c3a3a62b6069a3b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_1/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..321fa4c338bc88bc882f9911386c033c028c5edf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.491335487365724, "l1_loss": 188.43201599121093, "l0": 129.9791717529297, "frac_variance_explained": 0.8262831807136536, "cossim": 0.93378546833992, "l2_ratio": 0.906765204668045, "relative_reconstruction_bias": 0.9746900081634522, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5945015430450438, "loss_zero": 12.452933025360107, "frac_recovered": 0.9855089008808136, "frac_alive": 0.4690755307674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_2/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7424d99a54fdca5a35f56b05a9dbb21d00550583 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45dbe78eeaa9289e0fa9bdc7b7c0f253ae116c9a48722043c6b47ce40fe8894f +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_2/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..623bfe4e4a20b8f67109214ce44807dcaa1994a4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_2/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d27b3c2eb8aa742bf584dae79a6b6adfc1d70be3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 30.10821990966797, "l1_loss": 162.0966369628906, "l0": 92.37917022705078, "frac_variance_explained": 0.7919197320938111, "cossim": 0.9215337336063385, "l2_ratio": 0.8894334852695465, "relative_reconstruction_bias": 0.967528659105301, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6364114046096803, "loss_zero": 12.452933025360107, "frac_recovered": 0.981322419643402, "frac_alive": 0.3464626669883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_3/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..be9d639a41ce7cabb0980d1013bad99001caf651 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22170c6b1f8b17c26a500d279eae81e9384b45c8c98087d5cb0c2ec528d68f6 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_3/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8820b9d2faf9023174fcdc0582d25b380a87a4f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_3/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a2b24e70f05976faf45c24c5d65edb23acb2df98 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.156801795959474, "l1_loss": 130.57741775512696, "l0": 53.18750190734863, "frac_variance_explained": 0.7166793942451477, "cossim": 0.8976885914802551, "l2_ratio": 0.857718962430954, "relative_reconstruction_bias": 0.9572011172771454, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7937287092208862, "loss_zero": 12.452933025360107, "frac_recovered": 0.9656153500080109, "frac_alive": 0.189724400639534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_4/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0439fc712079c822fa47301ea67b9758ca576e18 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea3aa5917d50e84540b22ac330512ee65debe910a939ac6595f34dd535360b53 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_4/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7764fec0326c4a213e76f02e5ccbe02e80b2a923 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_4/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ae562c8dc50b84f9bacaeeb16e156f18a7af1c7e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 42.25794639587402, "l1_loss": 115.70429000854492, "l0": 32.9708345413208, "frac_variance_explained": 0.6629013299942017, "cossim": 0.8515677750110626, "l2_ratio": 0.8004781782627106, "relative_reconstruction_bias": 0.9513620138168335, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.185274624824524, "loss_zero": 12.452933025360107, "frac_recovered": 0.9264948964118958, "frac_alive": 0.093153215944767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_5/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3eb6752beb80eeeb0634348a97d15ec51dc4083 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4b07539facba7e55d9a6778b50848188783b4367bc46b78c7cc38125f601940 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_5/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..73d43881328e1b5a6a1485099f65395dbb923323 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_5/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..559e628afae121dfb07d65d49b906bef41a67d06 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.68552360534668, "l1_loss": 81.06160049438476, "l0": 18.433334159851075, "frac_variance_explained": 0.4669169783592224, "cossim": 0.8034469485282898, "l2_ratio": 0.7498004794120788, "relative_reconstruction_bias": 0.9302173733711243, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.220582270622254, "loss_zero": 12.452933025360107, "frac_recovered": 0.8229339301586152, "frac_alive": 0.0354275181889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1fda1c95e9fa5a16d2ee8cb4a561b73a77e99f48 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..daeb24d89633b9af151535c4d5392deae519302d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 132.67230148315429, "l1_loss": 7896.978857421875, "l0": 9321.47119140625, "frac_variance_explained": -1.0492967128753663, "cossim": 0.014029730018228293, "l2_ratio": 1.1619217038154601, "relative_reconstruction_bias": 92.79410705566406, "loss_original": 2.4489264488220215, "loss_reconstructed": 13.947317600250244, "loss_zero": 12.452933025360107, "frac_recovered": -0.1494984433054924, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..537082dec26b476a99a2b65048b2b1c91425b743 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e94d367e84bae89e1a01322c6312155a6397f397d45cd751cef2e232fe0a9be8 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..20604853584b7cbb75e23951307f65c3eaad8dc4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a0d15f3e36c803fa3be49cd15d334d2c0dd89ca6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.174807929992674, "l1_loss": 267.56239166259763, "l0": 326.3125061035156, "frac_variance_explained": 0.8694223523139953, "cossim": 0.954661077260971, "l2_ratio": 0.9257999181747436, "relative_reconstruction_bias": 0.9702165067195893, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5284759283065794, "loss_zero": 12.452933025360107, "frac_recovered": 0.9920903384685517, "frac_alive": 0.7787001132965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..06aba7432d17211a438ef24702405a018f76e4b4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c1264ce89aeefc9d2015ba97073627062ccfbe82e2ec7fa9b5bbe775f74db7 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..861a77c440ea38351f267152ca2b9a47eb0822fc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..673096d10495e75f4377c55cd6d394b61fc07c76 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 24.448015022277833, "l1_loss": 275.5758575439453, "l0": 341.5541717529297, "frac_variance_explained": 0.8425181627273559, "cossim": 0.9503746449947357, "l2_ratio": 0.9214557409286499, "relative_reconstruction_bias": 0.9667651295661926, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5311416149139405, "loss_zero": 12.452933025360107, "frac_recovered": 0.9918260753154755, "frac_alive": 0.7785915732383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bfd99f9a93c5cfc40b62a02a74f7e06c9deccd8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4254e3b58c88aa9bd481ea8504f47395d27d1752d5fbfe6758bf1e1464ee5de3 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a691abd64a438e410a5abaeced51a405b9723f6c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..011b04e0582ff852187d6441abebb2118257270a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.143254852294923, "l1_loss": 309.8203094482422, "l0": 346.2291778564453, "frac_variance_explained": 0.8080565750598907, "cossim": 0.9353252410888672, "l2_ratio": 0.8947455406188964, "relative_reconstruction_bias": 0.9575241327285766, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5765707731246947, "loss_zero": 12.452933025360107, "frac_recovered": 0.9872860491275788, "frac_alive": 0.434624582529068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f43207e8cdd1e22c08a03830a531902f78d6fd2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a5c64401af158da8e6f6a31009722393f2cf7515ca8ebdbbb3498bda2e19cee +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..12ddabe1c1f04176b83e3ac293e7c0dacb8fac57 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..02d5000041f204e5672f57879f30d14c37c4c9ab --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.138610649108887, "l1_loss": 276.55173645019534, "l0": 336.70001220703125, "frac_variance_explained": 0.880555385351181, "cossim": 0.9577016115188599, "l2_ratio": 0.9328980505466461, "relative_reconstruction_bias": 0.9758973777294159, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.534040379524231, "loss_zero": 12.452933025360107, "frac_recovered": 0.9915366172790527, "frac_alive": 0.7199978232383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c02ad9ad22f3172a57d24d8ceceffa96975029b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee682a505b50a16bfe7064cdfc54ac4fddc8843811b66154d0c44e44fbee834 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..33f25839267b165c9c3375b8fbf523c26b75cf77 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..44a64f51bd17aac4956590470a3a21404ac3885a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.986676025390626, "l1_loss": 195.54286193847656, "l0": 130.85833816528321, "frac_variance_explained": 0.8168558537960052, "cossim": 0.9306705057621002, "l2_ratio": 0.8992097437381744, "relative_reconstruction_bias": 0.9705277383327484, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5927810430526734, "loss_zero": 12.452933025360107, "frac_recovered": 0.9856770396232605, "frac_alive": 0.4718967080116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..eae624ebadc1239b2c9b0e2a60fe0643e620f445 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:730f94ed1cdeaaf026d2602d443edb5969008aba55b17007940f1d373750e500 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eb74636ed3f1a191eedfea03be34496302098be9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..76670428e0de99d198aa7735b098db80cf07ca96 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 29.50926818847656, "l1_loss": 194.8966506958008, "l0": 134.10833969116212, "frac_variance_explained": 0.8170803487300873, "cossim": 0.9282366037368774, "l2_ratio": 0.8934679508209229, "relative_reconstruction_bias": 0.968377274274826, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5922626495361327, "loss_zero": 12.452933025360107, "frac_recovered": 0.985729593038559, "frac_alive": 0.4752604067325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..57d2f4aad353469e292c6ca5d64610c23b0e3076 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6180a83db8cb0b568e41da930496315abaf3e6cdb5a6d3d2439c14855aba525a +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1b5982c3a75b4fed106cd63e2f7eef025ed0adf4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..23e4592be5048c7f0afe193ec4af2af93b803f72 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.04509086608887, "l1_loss": 200.5556167602539, "l0": 121.5833351135254, "frac_variance_explained": 0.7443788290023804, "cossim": 0.8983681380748749, "l2_ratio": 0.8559872567653656, "relative_reconstruction_bias": 0.9589756071567536, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.738658046722412, "loss_zero": 12.452933025360107, "frac_recovered": 0.9711036086082458, "frac_alive": 0.2491862028837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e50b744b748e998babddb0fd88a3353b280a6ef --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008cb798cb9855e65daf5e1d930eb968e123328971d2c643c352612899c0a2f5 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1021146ca01c4b5e9d85eabd1acc7c497935cac3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..241dc815915bf1107f7833911234696faa7ed8ed --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.24656524658203, "l1_loss": 196.7987091064453, "l0": 130.19167098999023, "frac_variance_explained": 0.8241580545902252, "cossim": 0.9326215803623199, "l2_ratio": 0.8987816631793976, "relative_reconstruction_bias": 0.966037118434906, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6186776638031004, "loss_zero": 12.452933025360107, "frac_recovered": 0.9830881774425506, "frac_alive": 0.419976145029068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..288fbc19f04e6e390e8eb3b70147846eb41ba713 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec9d3bb0354e9e08ee861ed33309c3cf17d2a04f46a67e6c3d11c754a9a821f +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c5e731c1207cb284977daf86600c939f16ed846b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..df0a36d29417b9a4c1d1e88de4ffcdc364a3cbe5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 30.061773109436036, "l1_loss": 167.34809112548828, "l0": 93.00000228881837, "frac_variance_explained": 0.7952910900115967, "cossim": 0.9204221606254578, "l2_ratio": 0.8858135342597961, "relative_reconstruction_bias": 0.9675338208675385, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.638899230957031, "loss_zero": 12.452933025360107, "frac_recovered": 0.9810698688030243, "frac_alive": 0.3466254472732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..326671eaf4f523bee5b0cb69412e80dda3bde95b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95cbabf32985b518ebee31b35e19c7e0ccedb2a666bea5eb1112b120eb145be9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..64ba575686430dff5dc4c4dc91dc5af543aa1913 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8e96a512f023d32b4a1c788b2f0a16a8f113536e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 29.112895202636718, "l1_loss": 160.3605697631836, "l0": 91.43750228881837, "frac_variance_explained": 0.7986908614635467, "cossim": 0.9263977110385895, "l2_ratio": 0.894413274526596, "relative_reconstruction_bias": 0.9673278868198395, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.63939950466156, "loss_zero": 12.452933025360107, "frac_recovered": 0.9810234129428863, "frac_alive": 0.3358289897441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4d826fb760b6c5d5378698e667dacb7f68086f8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f0ff2026f8ece5473238956e68e096cee56d06419dadb0aa2650f18ad8755e +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..58c4c4e1ef15f7d61bf4666cd5dd535a0190198f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f2aa7a66e6345f6fd263217028b595a939f1e4af --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 38.26353435516357, "l1_loss": 178.24551696777343, "l0": 85.30000228881836, "frac_variance_explained": 0.7033773958683014, "cossim": 0.8797521650791168, "l2_ratio": 0.8335693180561066, "relative_reconstruction_bias": 0.9551890611648559, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.852721858024597, "loss_zero": 12.452933025360107, "frac_recovered": 0.9597122371196747, "frac_alive": 0.1773546040058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..42ecc02798b77ba87c9edbc17d0eeb87da3c735a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2cb04508bfa99e37d173b209b5c4e1c0dc7a33b3240edf596bbecfab0a46d27 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8764b9e07c650a0657b8cc766e2e4f621afd6e24 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..198f40f2dba1baf8144a55f2ec0564d77dae80f3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.972361755371093, "l1_loss": 166.81767578125, "l0": 92.15833587646485, "frac_variance_explained": 0.7366561174392701, "cossim": 0.9105015695095062, "l2_ratio": 0.8736934006214142, "relative_reconstruction_bias": 0.9608269810676575, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6745104074478148, "loss_zero": 12.452933025360107, "frac_recovered": 0.977510267496109, "frac_alive": 0.3017578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6f67e2449babf19147cf0830ff412af0be735dd --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d1a86317fcd53630a009b3e3ea355ab75933e256f135c2e139b20485424b9e9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..48c0638fa0b2ad65ae0db09907dc9e5272d67dfa --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f4717737a72d3d548e3fdab6520a7f4ca905826 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.72249355316162, "l1_loss": 143.64809875488282, "l0": 54.37083473205566, "frac_variance_explained": 0.7413396954536438, "cossim": 0.8959884762763977, "l2_ratio": 0.8528176546096802, "relative_reconstruction_bias": 0.9592033505439759, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.789938282966614, "loss_zero": 12.452933025360107, "frac_recovered": 0.9659878551959992, "frac_alive": 0.1890733540058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..70a76e6f1455acaa263ae4b133599d16f5f14faf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b768d0e5a61b1ee660a15f6d86fa5610c7b6eaf8f8eb15be41898917008114 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3d6f40eef78f8e31d9d707147052413a9ed7aa0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d71822c25c920e65953d9fa730a58a6fd1b1015b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.69659004211426, "l1_loss": 136.8420051574707, "l0": 53.03750152587891, "frac_variance_explained": 0.7243206083774567, "cossim": 0.8898881435394287, "l2_ratio": 0.8455968856811523, "relative_reconstruction_bias": 0.9565320551395416, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7930211782455445, "loss_zero": 12.452933025360107, "frac_recovered": 0.9656856715679168, "frac_alive": 0.1864149272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d6d862a6216be81295199537d2ee20fa7eefd1b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826544bb5c453a5d024ca7ed51813e300f58745bdae45ca8e8b03df035f7a337 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..105852e64392c6d8c8186dce81bb2796fb1b3836 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ae22470967eb121b5d0880f39e7e88b7729bf2d3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.428307723999026, "l1_loss": 133.05520553588866, "l0": 43.9541675567627, "frac_variance_explained": 0.6137557804584504, "cossim": 0.8548845827579499, "l2_ratio": 0.8067225992679596, "relative_reconstruction_bias": 0.9476282954216003, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.26673367023468, "loss_zero": 12.452933025360107, "frac_recovered": 0.918316251039505, "frac_alive": 0.0846896693110466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6841acdd98fa418ded9641db849cc11dd956cd76 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76a3e5a4864d1a8bd40fa971b25c454024cd3de5e0a6d255011a643b482a931 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..77bfd67d2afbc929e2fd5ac762f9b1dde0b887e8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fe015a3faf7b1825031dcfda13bfc635ac094978 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.489636993408205, "l1_loss": 139.07800750732423, "l0": 51.070834732055665, "frac_variance_explained": 0.7031453490257263, "cossim": 0.8837112009525299, "l2_ratio": 0.837453293800354, "relative_reconstruction_bias": 0.954179972410202, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8732847452163695, "loss_zero": 12.452933025360107, "frac_recovered": 0.9576579630374908, "frac_alive": 0.1483289897441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4df16f1f9fb54649967fee9e284e9e08f1cda32 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f4db0f92258e04308fae8ee956156ad724cf23b8c0201258ba7b8d8957f961 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..89fee32c8c53d76d917512b7c59838bffbff4be3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f98f6faa1a294db32cdd814cb6db5ac8ab177f68 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 43.63106117248535, "l1_loss": 114.25520782470703, "l0": 32.91250152587891, "frac_variance_explained": 0.5969315648078919, "cossim": 0.8403679311275483, "l2_ratio": 0.7858543932437897, "relative_reconstruction_bias": 0.9410768210887909, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.178047776222229, "loss_zero": 12.452933025360107, "frac_recovered": 0.9272184848785401, "frac_alive": 0.09033203125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..040383a07ee84da15b331e56f790fb354c8e7bad --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8184f3d7bd79f83016d84853571e9a8cda1b8e557f6e88d78dff68fbdc9090d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..34546ae43bc69cef2bca4aa919a122b3b0fbdef0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd69097273209de9791a61153a7e3ddce73d90b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.658189392089845, "l1_loss": 110.4730354309082, "l0": 31.33750057220459, "frac_variance_explained": 0.6587839841842651, "cossim": 0.858588021993637, "l2_ratio": 0.808927583694458, "relative_reconstruction_bias": 0.9502517282962799, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.172284412384033, "loss_zero": 12.452933025360107, "frac_recovered": 0.9277953445911408, "frac_alive": 0.0890842005610466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..276b2c239cc1a70aec024260e2c91f97c01696a8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704f4e108f09465c5fd955d049dc19eb6cc6537b2aeb300893100eb6878a4965 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8e6939a3fd3a2cde20ab98b3aebb41f39671fceb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..629eca2795012f474793291ac5396eced1390b8a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.8109188079834, "l1_loss": 111.0880615234375, "l0": 26.220833778381348, "frac_variance_explained": 0.6009923398494721, "cossim": 0.8106343030929566, "l2_ratio": 0.7519308686256408, "relative_reconstruction_bias": 0.9449797689914703, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.222089123725891, "loss_zero": 12.452933025360107, "frac_recovered": 0.8227586388587952, "frac_alive": 0.037109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..412f931e8f08e12cd5a4b12d6f22d2b7d5540edb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7076172f1881472408decf682549fcb05c779f34a2d7b5289ba72a59217d82 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..31496d6697a1584236e6b164fbc6d5ad2f9f4cfa --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f456184680605fa655064e5fa4af7a8c0a537bfc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 42.78617362976074, "l1_loss": 112.46215209960937, "l0": 29.391667366027832, "frac_variance_explained": 0.6220115184783935, "cossim": 0.8414349615573883, "l2_ratio": 0.7866164088249207, "relative_reconstruction_bias": 0.9447615027427674, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.4521493434906008, "loss_zero": 12.452933025360107, "frac_recovered": 0.899797773361206, "frac_alive": 0.0643988698720932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6418189548edb9645c5af966a0a2050c2510533 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b558e4fcf59509afaa8d4ff671c7d4cef88e5bcc115bce89d664f04f328e2e +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c8905d2d990310ba5f42cc73023c91c9bbc2487 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e8589080708e3dc02a24525a78c135f902ccdcf5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.95832061767578, "l1_loss": 88.95420455932617, "l0": 19.48333396911621, "frac_variance_explained": 0.49233954548835757, "cossim": 0.7979032516479492, "l2_ratio": 0.73763507604599, "relative_reconstruction_bias": 0.9263910591602326, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.135448241233826, "loss_zero": 12.452933025360107, "frac_recovered": 0.8314984440803528, "frac_alive": 0.0346137173473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..298fe34da39baed657a0cb06f08090a9aaca3cdd --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58c8ca1d0e459f70cc3f0f3a9ef7059a42569144ef24ac8df54d2adf0d04dc1e +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7683ddc2efa575486de5207e4b253c41380cce15 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..130d58708729372dcf6e03e7b9236fb33392270d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.18345756530762, "l1_loss": 88.04943771362305, "l0": 18.220833778381348, "frac_variance_explained": 0.5548478960990906, "cossim": 0.807228010892868, "l2_ratio": 0.7497315287590027, "relative_reconstruction_bias": 0.9400316476821899, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.191076922416687, "loss_zero": 12.452933025360107, "frac_recovered": 0.8258992850780487, "frac_alive": 0.0344509556889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..63fbc398528d01e320c4d4a6e8a0b91e8dfa0be6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663d5905f6d03243d0d2be07547c6736e5140ab7177f756ecb2b7a9c7b830e66 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..793051521668ed8a33b74a5bad9714993f595664 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..264b88c3fca43d8fce8be915c1ffca44322d13c3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.77299880981445, "l1_loss": 74.3282455444336, "l0": 13.679166984558105, "frac_variance_explained": 0.3262187957763672, "cossim": 0.7297035753726959, "l2_ratio": 0.6640038251876831, "relative_reconstruction_bias": 0.914098572731018, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.701044416427612, "loss_zero": 12.452933025360107, "frac_recovered": 0.6749620139598846, "frac_alive": 0.01470269076526165, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..471556d5b4d046a3df88ad52f13ca3f223f90a1c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0157019d8d3249d6fc5130529be58e568a07768109d59c7c823d0c2686d0caa +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ece93aa6f93fa0a4825eff57cc12c0f86eb6d53 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d013d1c4b2df01aa88cb0870652578ac20830aa4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 50.874039840698245, "l1_loss": 95.45915374755859, "l0": 17.69583406448364, "frac_variance_explained": 0.5547674536705017, "cossim": 0.7814474105834961, "l2_ratio": 0.7156918764114379, "relative_reconstruction_bias": 0.9380373418331146, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.681138324737549, "loss_zero": 12.452933025360107, "frac_recovered": 0.776915842294693, "frac_alive": 0.0243055559694767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..545d54965cb3c64cf895c599170e8b223a566f44 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192aca68704252ebaf1c0bcd81e52ab8b204c6aaa966f1d8df90db9c434084f4 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9a52c740294e6707b05acf9a1653bead94297a65 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8693f1bc98a547e909bd5eeba82b22d62c5c6436 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 29.080005836486816, "l1_loss": 426.9056671142578, "l0": 422.0083404541016, "frac_variance_explained": 0.923843252658844, "cossim": 0.9572624862194061, "l2_ratio": 0.9247695922851562, "relative_reconstruction_bias": 0.9795760452747345, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.501347064971924, "loss_zero": 12.452933025360107, "frac_recovered": 0.9947918713092804, "frac_alive": 0.6989474892616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_1/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5dffe26c7699d3c7405714137cc5356670208e2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdc296c096728feeb6a2c4f72e8b620718209ce6d113cd500e4545931893bcff +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_1/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dd9f377ac917a0101a90de32b1e423b1bc076f3d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_1/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5954904374ef29014177925cdf8d794589fb994c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.03860740661621, "l1_loss": 283.4460815429687, "l0": 180.9875045776367, "frac_variance_explained": 0.8018329918384552, "cossim": 0.9354411423206329, "l2_ratio": 0.8980011582374573, "relative_reconstruction_bias": 0.9603017032146454, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5653805255889894, "loss_zero": 12.452933025360107, "frac_recovered": 0.9884158194065094, "frac_alive": 0.4109157919883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_2/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8d31335f52ce39d2849da64ce41fcd1f89fcd50 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac956388272c3df3253172a405d2307a535ace337da208aa1ec142c2a9f5ab9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_2/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..82844f70b43df8d59f11089c15770831d83dd38a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_2/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..84dab079022acc053caa46c913f2f18b2a3e6c6a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.072514343261716, "l1_loss": 247.1628173828125, "l0": 123.87500381469727, "frac_variance_explained": 0.7669894337654114, "cossim": 0.9212132513523101, "l2_ratio": 0.8806450128555298, "relative_reconstruction_bias": 0.9582830250263215, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6193800449371336, "loss_zero": 12.452933025360107, "frac_recovered": 0.9830265641212463, "frac_alive": 0.2927517294883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_3/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fb05a3a60813642e6bbaf661c82306db58db944 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:283b55c65f05c218064666de92365ba9d4ead66ccc310f469271494045879fac +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_3/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4e6c3a0b8f1eb9486dd92f69e82208a5fbc1856 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_3/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d047e554bdc052eeff0c306994fc553ef6dbafa0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.9877426147461, "l1_loss": 204.8866943359375, "l0": 65.37083511352539, "frac_variance_explained": 0.7179788053035736, "cossim": 0.8782737255096436, "l2_ratio": 0.8260804891586304, "relative_reconstruction_bias": 0.951469624042511, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8807432413101197, "loss_zero": 12.452933025360107, "frac_recovered": 0.956915944814682, "frac_alive": 0.1409505158662796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_4/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a58d300b4b839f977003decbf4c742bb88e05c9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee4ed05e024ef01a11dc3db09029a87494d2fa0a37a793025b0b90c651d66dbb +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_4/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7398587fc4eac2bfc820deb8ab15c786a64dffd4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_4/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b9455ee659fd42f5998b8b59b141b2f5eb2096d2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 53.2313648223877, "l1_loss": 147.2660385131836, "l0": 37.40833435058594, "frac_variance_explained": 0.5986019253730774, "cossim": 0.8492132067680359, "l2_ratio": 0.7880723178386688, "relative_reconstruction_bias": 0.933878880739212, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.481455445289612, "loss_zero": 12.452933025360107, "frac_recovered": 0.8968801438808441, "frac_alive": 0.0559353306889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_5/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4db7cc080226138bcf6575680bdfe4436e4898d1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b019a2923760ac7bd846e518f8aa99523c3242a2ef670352692d39fbeceadf70 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_5/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..42164096559301aa6a00d41c852e11914da7ce49 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_5/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f773d97813c2deb9dfc19b71c38b653e627acc97 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 62.87034683227539, "l1_loss": 116.25719909667968, "l0": 21.183333969116212, "frac_variance_explained": 0.4952318072319031, "cossim": 0.7969144999980926, "l2_ratio": 0.740835702419281, "relative_reconstruction_bias": 0.9371668994426727, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.547642850875855, "loss_zero": 12.452933025360107, "frac_recovered": 0.7902627408504486, "frac_alive": 0.02191840298473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbec54d9bfa8329ce2e3f964d5e6421e27851059 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ea3e0cae878ffa8c34a4c1ebd21f82cc85af01eed1e09ba7ce855ae6dfe27d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e5e28413a12761c13c147440ea9e35f68e68b56 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..82ecbb420231cb715ac1862f4ac28f79067099f7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 159.67227630615236, "l1_loss": 9287.28115234375, "l0": 9207.362890625, "frac_variance_explained": -1.0092440962791442, "cossim": 0.009908614633604883, "l2_ratio": 1.156403946876526, "relative_reconstruction_bias": 142.80149154663087, "loss_original": 2.4489264488220215, "loss_reconstructed": 17.8469446182251, "loss_zero": 12.452933025360107, "frac_recovered": -0.5394477754831314, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fe84ef0f79f1eb1dae9b7e2214de9aedabe49a5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed5ecd73b39983db669524a65cac89276214643bc8f110d7d7c0b1fa494d4e8 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8bb62fc65e4d26b0a2e551809dc504fd05a830 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cd1cdd64572d4c0b49daf6e87ffc8a39d3bb8569 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 30.957966232299803, "l1_loss": 430.53013610839844, "l0": 466.31250915527346, "frac_variance_explained": 0.8763152778148651, "cossim": 0.9540156483650207, "l2_ratio": 0.9190684974193573, "relative_reconstruction_bias": 0.9677236139774322, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5051952362060548, "loss_zero": 12.452933025360107, "frac_recovered": 0.9944131016731262, "frac_alive": 0.6903212070465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f094008547f08fb4db5bbc6811d611e9efad82a4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb25d40dbd03ba1762b5ef3e9b366ebfb2bf98a712fb1972ad6bdb87cd05eff +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c6bb0a15da456ebd2bcf217b9df67fbafcbce0c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5249b727c8c2f744758365ef88002d774024a944 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.44601936340332, "l1_loss": 447.42922058105466, "l0": 468.52918395996096, "frac_variance_explained": 0.8851749539375305, "cossim": 0.9533333539962768, "l2_ratio": 0.924415135383606, "relative_reconstruction_bias": 0.9773047566413879, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5042022228240968, "loss_zero": 12.452933025360107, "frac_recovered": 0.9945091128349304, "frac_alive": 0.700303852558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e95418e61c2d03d4ae1094606c62ff6c3502cde6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ad1c91528dc6d6ff29ba7f3170b30db39a4ae0a8f6e9c1e1b120401bbffc88b +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..057536e32aa93dd5df6f09ea86876a5b97f74456 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5957e942be043bf8404a47ef7655096b65775d1b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.44141807556152, "l1_loss": 455.6988098144531, "l0": 411.45001220703125, "frac_variance_explained": 0.8395122468471528, "cossim": 0.9357959568500519, "l2_ratio": 0.8992688834667206, "relative_reconstruction_bias": 0.9681542575359344, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5559090852737425, "loss_zero": 12.452933025360107, "frac_recovered": 0.9893602132797241, "frac_alive": 0.2976888120174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e996f1cb6e4927db3e0e1d157fcdc5f4fb9241e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:037b8e1ba2dc8e4791c14e4ad71b63d4177316c95dd9a26f0cde46f7a9ce03cf +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d3bd5adf753ac6951c2ffc1aeb5c5c8f753eed0e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ba7da1ff8cc0e006befcc282cb1b76e11b187094 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 33.05034618377685, "l1_loss": 442.59373168945314, "l0": 468.0375061035156, "frac_variance_explained": 0.8539032399654388, "cossim": 0.9466826617717743, "l2_ratio": 0.9128996968269348, "relative_reconstruction_bias": 0.9683820605278015, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.516424059867859, "loss_zero": 12.452933025360107, "frac_recovered": 0.9932908892631531, "frac_alive": 0.5691189169883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3603c7a88f9652286898fa2c7998dc15395c508f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0447d1a483b64c7240b430143a1f947371f94b510d92510f0f2bc67a34d3e4b9 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c14c62afd7bcc0095f551cd0d4be2f85be21e9f4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e8c78f900fb3c3db95c3d5d018d793c0d294d0eb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.29960174560547, "l1_loss": 298.68935241699216, "l0": 177.89583740234374, "frac_variance_explained": 0.8346909761428833, "cossim": 0.9307305991649628, "l2_ratio": 0.8916606843471527, "relative_reconstruction_bias": 0.965721744298935, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.570395755767822, "loss_zero": 12.452933025360107, "frac_recovered": 0.9879159748554229, "frac_alive": 0.4040256142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a802479c36cb7344a8d8c21f9832bbc8d3884c58 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aec09ddce1e01df8cc5fdc3b65fb353cf943b25ae7785c0748ec1042a279c39 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..361015b57a242ea8071b06fe4b0781660c4ac267 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2ab73664688472d624a776846588903c72d537a5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.99704513549805, "l1_loss": 293.3637756347656, "l0": 179.5500030517578, "frac_variance_explained": 0.8173972725868225, "cossim": 0.9323728322982788, "l2_ratio": 0.8951810896396637, "relative_reconstruction_bias": 0.9639217793941498, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5657249689102173, "loss_zero": 12.452933025360107, "frac_recovered": 0.9883790731430053, "frac_alive": 0.4105902910232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..35cd60906a8a63eecdff2003b6734accdbdd8286 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceff12e7e3214a29041c4928027749f6e3a24848c9576a9a361d01c159e34c40 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ffcbb8fdd2e95a5238b024f9166916b4473d113e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..071e05531717fc97cab39045e4fbd4eea8134ca5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 45.110770797729494, "l1_loss": 291.7300109863281, "l0": 140.6666717529297, "frac_variance_explained": 0.7882416784763336, "cossim": 0.8972113788127899, "l2_ratio": 0.8491501867771148, "relative_reconstruction_bias": 0.9632423162460327, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.789890003204346, "loss_zero": 12.452933025360107, "frac_recovered": 0.9659987926483155, "frac_alive": 0.1514756977558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7888e34529a8feb20a04f993e4d6cf827d2c0190 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce13bc28631676a131ed2f3d70dc395aa56aa16d075d86df4063c599f948ed07 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c390ef463ca751ad05a84d6c994b69d683b1490e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..22901779d6b0a72f3d1062fa376be2a520592c3f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.323495101928714, "l1_loss": 293.8645812988281, "l0": 163.75833892822266, "frac_variance_explained": 0.8282193839550018, "cossim": 0.9221918046474457, "l2_ratio": 0.8749778389930725, "relative_reconstruction_bias": 0.9642571628093719, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6060997247695923, "loss_zero": 12.452933025360107, "frac_recovered": 0.9843538224697113, "frac_alive": 0.3030056357383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9097cfc34448d1f7d0dbf7b68a672ca9116efc8f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dbeeb48f492b53d6e444a692d6fb43848174398203a904c54c67eb883b6b183 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d54571fd5f2a0a92e7ae73c81be57809ae9450f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6b7255c41f31775b5fa5956811b2500d38a92a67 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.30303688049317, "l1_loss": 252.92496337890626, "l0": 121.93750305175782, "frac_variance_explained": 0.7834434509277344, "cossim": 0.9174316346645355, "l2_ratio": 0.875529408454895, "relative_reconstruction_bias": 0.9585324406623841, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.625407409667969, "loss_zero": 12.452933025360107, "frac_recovered": 0.982425969839096, "frac_alive": 0.2892252504825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..848d83b4d2648136fb68fe18d5ca4adbe82118fb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a48bc779c6e55a876781a721fc0b09fd7257dbb2014a0b7043788e6aa098f61c +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..17f204721f081fdbe33c25929f248978662261a9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3bf67d45ca88a0b44b17ebae3bf6b0b42ae0f683 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.51176109313965, "l1_loss": 251.97252349853517, "l0": 123.79166946411132, "frac_variance_explained": 0.7788640022277832, "cossim": 0.9185352981090545, "l2_ratio": 0.8722439408302307, "relative_reconstruction_bias": 0.9559475481510162, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.621143174171448, "loss_zero": 12.452933025360107, "frac_recovered": 0.9828494489192963, "frac_alive": 0.2962782084941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f032782f8ccea448b676aec6b132ed7f598cc19 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5507228fe04beb10974fb8d1e8762a3330b878619809b33265b493f0bcd944bf +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f44b15e462c128e33295398d024f7075631a45da --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cedeea56e3ed52ac12f846898f09f5be3dfde786 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 49.04096221923828, "l1_loss": 231.90460968017578, "l0": 95.58750228881836, "frac_variance_explained": 0.6702616393566132, "cossim": 0.8765593767166138, "l2_ratio": 0.8224286675453186, "relative_reconstruction_bias": 0.9442986965179443, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.983953928947449, "loss_zero": 12.452933025360107, "frac_recovered": 0.9466049134731293, "frac_alive": 0.1021050363779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9cd69f480873ea59c9c895b7837cce16dc061d80 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f040e17292488ec0cf05681a023d247f5a23975d95e53902a49b61b9304103 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a604f4136c5b820bd008406fb28160d761f60774 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..110ba155c9e57334dad1b1ee96f4bba372e0f1fe --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.1085262298584, "l1_loss": 253.99161224365236, "l0": 115.33333587646484, "frac_variance_explained": 0.7475803554058075, "cossim": 0.9047161996364593, "l2_ratio": 0.8591941118240356, "relative_reconstruction_bias": 0.9544724762439728, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6995904207229615, "loss_zero": 12.452933025360107, "frac_recovered": 0.9750228881835937, "frac_alive": 0.2065972238779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e0477440ef81836282e8db6cef2084b955e7181 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c468bf09e7f72133dd2c63dea4404f67983c07b4e7649020ac249501f65ce53 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bb9eca2b7400191b24647e84fdbefba5ff1fbb24 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f674292060c6eabd9f47565c53f8f9eb1d90e6fa --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.63495407104492, "l1_loss": 199.56231079101562, "l0": 66.98750228881836, "frac_variance_explained": 0.6900550603866578, "cossim": 0.8817582309246064, "l2_ratio": 0.8287044763565063, "relative_reconstruction_bias": 0.9469349026679993, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.900096893310547, "loss_zero": 12.452933025360107, "frac_recovered": 0.9549891114234924, "frac_alive": 0.1330295205116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..094d8e6f24229a3ecff6e06c82837baed8f0e90a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bd9782beda560637f0a534db7119e30c7d117cbb610e6315863337b5a28e47d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f50010db141e4c210693543279e31dfacdb5d5d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..538fe079883f867ec95916330ba7ed16d9424df1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 50.81889114379883, "l1_loss": 207.49078826904298, "l0": 70.18333587646484, "frac_variance_explained": 0.6773387014865875, "cossim": 0.8791859149932861, "l2_ratio": 0.8233663678169251, "relative_reconstruction_bias": 0.9441014409065247, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8836801052093506, "loss_zero": 12.452933025360107, "frac_recovered": 0.9566263318061828, "frac_alive": 0.138454869389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac2bd9e11f14cb50cd4343092c9b328465b12357 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1a8fb85e5dcb2070d9b07e93de542344716d48a317c8a4f0770b40c9d1b6d2 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0770bd9c047db3340924b681cd31dcbff1bcb4f7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..75768fbf35445e411aa2187160e52f0ed4bde42d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 56.27819900512695, "l1_loss": 167.54239959716796, "l0": 48.62083473205566, "frac_variance_explained": 0.519008457660675, "cossim": 0.835083794593811, "l2_ratio": 0.7794098854064941, "relative_reconstruction_bias": 0.9354207575321197, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.7809175729751585, "loss_zero": 12.452933025360107, "frac_recovered": 0.8668676912784576, "frac_alive": 0.0465494804084301, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..13b64d0d729d824c782f09d2b7a0bf009ae691b4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e92179821effdb50591200a6f03f62b042f35262f96df62fb1d36e49e4304d8 +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f008fab647600083d4bf21297244d7f46c14dbe2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9d0c0de42d0f776032204755edff3cfcd2d13873 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 51.80388565063477, "l1_loss": 191.16077880859376, "l0": 60.80000190734863, "frac_variance_explained": 0.6333850383758545, "cossim": 0.8619512438774108, "l2_ratio": 0.8076131403446197, "relative_reconstruction_bias": 0.943832129240036, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.1301766872406005, "loss_zero": 12.452933025360107, "frac_recovered": 0.9319755971431732, "frac_alive": 0.0876736119389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bee6f9802b67e9954e8a5e26e9064fe7a33660a2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59c60dd0225e914694cc9686c3f467cc6a46476092aeaa5770a3002d8428eedc +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cad7f8a7f34c6a157478402bad46c3fff180587e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f814dbd65c7dfc0a53f187e311308b4e9d28d2e2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 54.830928039550784, "l1_loss": 167.94821319580078, "l0": 38.45833511352539, "frac_variance_explained": 0.6782689392566681, "cossim": 0.8508976340293884, "l2_ratio": 0.7949335932731628, "relative_reconstruction_bias": 0.9521509230136871, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.5118600130081177, "loss_zero": 12.452933025360107, "frac_recovered": 0.8938413679599762, "frac_alive": 0.05419921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..07927ab9902e38df6946d03330e3422a886649af --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d9753a78adf3933ca1ba398a14d243353416b939cdf656f14f9a150ccb8cce +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6ee69cca1228baefa0277b1672037f55464f75e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0bf56785cce733b0381f2223b543a9a3cd5d9683 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 56.66085700988769, "l1_loss": 164.49048767089843, "l0": 39.31250152587891, "frac_variance_explained": 0.6290608644485474, "cossim": 0.8422188460826874, "l2_ratio": 0.7822328388690949, "relative_reconstruction_bias": 0.9430650889873504, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.481442618370056, "loss_zero": 12.452933025360107, "frac_recovered": 0.8968838036060334, "frac_alive": 0.0553927943110466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4eaf20ffe7b60a6d9148cc44892bfdf3ccbdc37b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9b0482d1088a8d90c63726102a33c021981bd1013e78832271c81fd5d1bffc +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..63319307330ff4de2396d95baa9dd2f5eb59bddf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c2d3863ab1f686f95f965ccf1b7a47dbdb2a20b4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 62.67911376953125, "l1_loss": 136.42315521240235, "l0": 25.150000762939452, "frac_variance_explained": 0.5286945521831512, "cossim": 0.793884414434433, "l2_ratio": 0.733313363790512, "relative_reconstruction_bias": 0.9398350119590759, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.934321451187134, "loss_zero": 12.452933025360107, "frac_recovered": 0.7516048014163971, "frac_alive": 0.0203993059694767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7298f1925e89144cf19c366d52949263bf900bba --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4daaf126d0a65dbe376a72b81b203016a45411845c10f93d0af5b565486f961d +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d81454b84d26933f36dd827a899cc3b7b0a045e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4a4b3ba59e80df1349d58d6a388aab15a152d22e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 58.42602729797363, "l1_loss": 151.59864044189453, "l0": 32.32916736602783, "frac_variance_explained": 0.5871770262718201, "cossim": 0.8211628794670105, "l2_ratio": 0.7572241306304932, "relative_reconstruction_bias": 0.9389375150203705, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.037073063850403, "loss_zero": 12.452933025360107, "frac_recovered": 0.8413214743137359, "frac_alive": 0.0323893241584301, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..205d4729a191a23a1ffbc4a86961ede8e6181662 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819b77ffa939d2a7e94172e5b734506c725aad5cf92bb07b8694bf534e98d8ee +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6c0e9e6999101f3473918906f6bf98697340078 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "19528" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e46ae4a12f34f2c1da63c28b0d6bcc3959d7f4ab --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 62.46188621520996, "l1_loss": 129.69930114746094, "l0": 21.558334350585938, "frac_variance_explained": 0.5429405629634857, "cossim": 0.7954939723014831, "l2_ratio": 0.730623996257782, "relative_reconstruction_bias": 0.9338679432868957, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.553743124008179, "loss_zero": 12.452933025360107, "frac_recovered": 0.7897360563278198, "frac_alive": 0.0207790806889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9191e2572795c7e9a0dc1f455428bfcb4c26474 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad36ddf462d39cf4408a3837ee4a0e5e00aad64cf82c9788b46dcff8202cb3dc +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..60d8cd1ea38aec673ec022d8f1f37a96e41e931a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "29292" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..62c05ed5096056e1d0a6125931c620efba436176 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 60.625074768066405, "l1_loss": 114.87514801025391, "l0": 22.08333396911621, "frac_variance_explained": 0.46042917370796205, "cossim": 0.8100155889987946, "l2_ratio": 0.7488573372364045, "relative_reconstruction_bias": 0.9266091048717499, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.590645599365234, "loss_zero": 12.452933025360107, "frac_recovered": 0.7859954953193664, "frac_alive": 0.0208875872194767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9821593d1dbac3af748dfda1836529051e97b669 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13341cd7f7af4ed538d0441303b5c409fc5c6259d2203077e5db9a250f0323bb +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..774760e71ca65a94dcbb0773a1b3b9c0f82c0d9c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..464f1e92b3906469f9897128ca9f53c8dd6bc483 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 66.936967086792, "l1_loss": 106.96089401245118, "l0": 13.883333587646485, "frac_variance_explained": 0.44835478663444517, "cossim": 0.7481483101844788, "l2_ratio": 0.6830538272857666, "relative_reconstruction_bias": 0.9316767692565918, "loss_original": 2.4489264488220215, "loss_reconstructed": 6.000245904922485, "loss_zero": 12.452933025360107, "frac_recovered": 0.6450616836547851, "frac_alive": 0.011393229477107525, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfafa14c872a52e81ac3ad816f899808a9995cac --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fb321dec791c5af11acfac4e16693a1372deb9bed4e33e2e408450e2aeaceac +size 339823336 diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ab5efc4183a31347a9ddd05975bbdb139186203c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "9764" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a5850aacb5ed3a34d11c44e1132ac67b08f657cd --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 63.68552169799805, "l1_loss": 106.80061340332031, "l0": 18.02500057220459, "frac_variance_explained": 0.39550771117210387, "cossim": 0.7768594920635223, "l2_ratio": 0.7111204385757446, "relative_reconstruction_bias": 0.9190560281276703, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.283021402359009, "loss_zero": 12.452933025360107, "frac_recovered": 0.7167817533016205, "frac_alive": 0.013780382461845875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0489607e6aa5e24efe09f2a6693f411479ccdfa --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b126ec8751f5711e48a7018d54d687e98798243f105739e234b18cc8bf2225f +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..80e3a47917d8a45047ff08654b20cba796859338 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3288205a972fce4f99e2f6411840cc740e61ac98 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 64.61817817687988, "l1_loss": 282.2537475585938, "l0": 20.0, "frac_variance_explained": 0.737516725063324, "cossim": 0.8891037285327912, "l2_ratio": 0.8893811702728271, "relative_reconstruction_bias": 1.000152826309204, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7688143968582155, "loss_zero": 12.452933025360107, "frac_recovered": 0.9680845320224762, "frac_alive": 0.1599934846162796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..eda3789b8030f290bdc281fa0d6d41483ec07c3f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2ffc4cb928a1533b4693c641e53626062f2dd4a20308cb8dc011200142eab6a +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a19262c11f59e161681a58fd7eaf70cb7f8adb54 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..97ca1a2b2133eba828f12dbba85ad9fd4ce7ed5d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.950561141967775, "l1_loss": 401.8522033691406, "l0": 40.0, "frac_variance_explained": 0.8127304315567017, "cossim": 0.9176948845386506, "l2_ratio": 0.9174629509449005, "relative_reconstruction_bias": 0.999979829788208, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6059114217758177, "loss_zero": 12.452933025360107, "frac_recovered": 0.9843562066555023, "frac_alive": 0.3069118857383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7dfd5799b43d3a75724c2128650bcb5b0a5fa921 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5acddaebdd83dd47a94cb8ae405ac2887631695d5fcda43598879a95d921310 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b5dd04f55b02310d76c0081671f1735adc6a7f1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ee69c94b7efe93b882fb4e57f59132195695cdb4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 51.766046142578126, "l1_loss": 551.8687042236328, "l0": 79.95416717529297, "frac_variance_explained": 0.8506697595119477, "cossim": 0.9300514996051789, "l2_ratio": 0.9307749271392822, "relative_reconstruction_bias": 1.0002500534057617, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.546255683898926, "loss_zero": 12.452933025360107, "frac_recovered": 0.9903075456619262, "frac_alive": 0.4618598222732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1fb1175ffc5767152f98f8cc74f1d096c6f9d10 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fc73ab6e050096a438c0f76e5e4be60822483d182f2ac2034ead0162ae4b92 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c20deed411c129e91f65fa8ecd98b4d826a4e0e0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3627182670d2a23e16774f905d8b56762c32167f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 45.96233024597168, "l1_loss": 756.8620727539062, "l0": 160.0, "frac_variance_explained": 0.8569788098335266, "cossim": 0.9457451581954956, "l2_ratio": 0.9459684491157532, "relative_reconstruction_bias": 1.0003461122512818, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.510911059379578, "loss_zero": 12.452933025360107, "frac_recovered": 0.9938320398330689, "frac_alive": 0.6036784052848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3f09506c0c6b68c0431f8a6a3cfa39250c2ee9b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b50dc8f2eb4a148028b99c2326cf4e880d5659569332c6f591fe86d8ca7937f0 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9bb03ad0277599c2f02239167696b7dcc6aab283 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..99463002091f3d473acff9603358452c0eda5592 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.02006721496582, "l1_loss": 1198.095361328125, "l0": 319.93333435058594, "frac_variance_explained": 0.9105471670627594, "cossim": 0.9585575938224793, "l2_ratio": 0.9586278080940247, "relative_reconstruction_bias": 0.9996210753917694, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.486758494377136, "loss_zero": 12.452933025360107, "frac_recovered": 0.9962394773960114, "frac_alive": 0.6825087070465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..deec16025c2aad5ac7a2fd44d15b444865b00dc0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2572ddd4dd479049e6cf9a78ffa47b0238d8280e93eb7e894faf12d13d758fd +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d0892c99e4f0afb37f52997e1d84ee2c724c002 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3693577b56c26b7959356a15c283e3b5b08dd2c4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 32.957839012145996, "l1_loss": 2571.5229736328124, "l0": 640.0, "frac_variance_explained": 0.9456270158290863, "cossim": 0.9722206771373749, "l2_ratio": 0.9717293500900268, "relative_reconstruction_bias": 0.9989768981933593, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4674513578414916, "loss_zero": 12.452933025360107, "frac_recovered": 0.998158472776413, "frac_alive": 0.6299370527267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b8ef35097e2904943d56130b6e84abfa1336d5ce --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a55c3aa7c1e16047462c033381afa9c936e7c40a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 144.1739944458008, "l1_loss": 113.89709167480468, "l0": 20.0, "frac_variance_explained": 0.06153666377067566, "cossim": 0.285680028796196, "l2_ratio": 0.1798807665705681, "relative_reconstruction_bias": 0.6311139702796936, "loss_original": 2.4489264488220215, "loss_reconstructed": 15.055834197998047, "loss_zero": 12.452933025360107, "frac_recovered": -0.26015533953905107, "frac_alive": 0.1353081613779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..07a3c3ab0e3bfd49540c60bf258e4aa1cb7aae92 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f851325ee32bac7a43d8c363fe214cdcbefe8dc05dcf9ab7c6a30f06ee69d1 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ea28cd4c1a2c13a61ca56fd3ed1b196138f24422 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2b1920c5edba20d00ae73be9686438b59aadfbc6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 63.222343826293944, "l1_loss": 284.57915954589845, "l0": 19.99583339691162, "frac_variance_explained": 0.7308643460273743, "cossim": 0.8872040271759033, "l2_ratio": 0.8889125108718872, "relative_reconstruction_bias": 1.0020551800727844, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7907193899154663, "loss_zero": 12.452933025360107, "frac_recovered": 0.9659082174301148, "frac_alive": 0.1557074636220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..38b47517063ac32d0d9a20e600616deecb592a48 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9811fede91c85701acacf3523dae0478331e0a1cdd83ccd1c2b1737a7ef03cdb +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..26bf7267158e6a40088c5805ef4749142ca90bf8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4a489d1b2612f3f23946bbff250124826b659da0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 63.71059875488281, "l1_loss": 272.7416564941406, "l0": 20.0, "frac_variance_explained": 0.7049950003623963, "cossim": 0.8891237080097198, "l2_ratio": 0.8906138241291046, "relative_reconstruction_bias": 1.0008544504642487, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.785463571548462, "loss_zero": 12.452933025360107, "frac_recovered": 0.9664257526397705, "frac_alive": 0.1563585102558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..29624e0fc0998f344dea52fe20688b4e14f7ce76 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb43e8a50386ed23cfda211bc56534a9009691a92330143fe76a9cd8826c8a34 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f30ebe0eb2b3ae9fe54a41b65af55cc82dddbac0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..02f3af74fc0abaddf04f0f4aeb0380b49971e0ff --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 67.39788055419922, "l1_loss": 348.3453887939453, "l0": 20.0, "frac_variance_explained": 0.7809036612510681, "cossim": 0.8779726147651672, "l2_ratio": 0.8785419166088104, "relative_reconstruction_bias": 1.000831699371338, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.875354194641113, "loss_zero": 12.452933025360107, "frac_recovered": 0.9574480593204499, "frac_alive": 0.1525065153837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe7eb99d42353c7fac5ed913ee2bb5a111691e6b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c524a4bb579343a522d613b82d00a1802b4dbdc0dab52e4ec405628e4a80135 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ff243bd8a979ba3684100625fafb897f5743386c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a6a0f5137ca1400cfbcde546a430bb62b52ce725 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 66.6925880432129, "l1_loss": 304.12717895507814, "l0": 20.0, "frac_variance_explained": 0.7223313331604004, "cossim": 0.8877674520015717, "l2_ratio": 0.8910203695297241, "relative_reconstruction_bias": 1.0043164610862731, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8143189668655397, "loss_zero": 12.452933025360107, "frac_recovered": 0.9635527789592743, "frac_alive": 0.1507703959941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed1a043b76a442a087c5b6dddb0d55d13f21a943 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..845a28fd23b778e770b6946269baf468cee5d600 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 146.67851715087892, "l1_loss": 225.0533248901367, "l0": 40.0, "frac_variance_explained": 0.10613001585006714, "cossim": 0.37147045135498047, "l2_ratio": 0.2499557614326477, "relative_reconstruction_bias": 0.670610225200653, "loss_original": 2.4489264488220215, "loss_reconstructed": 13.592330741882325, "loss_zero": 12.452933025360107, "frac_recovered": -0.1137192726135254, "frac_alive": 0.2194553017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..43e42e02fcffe2f837d5f881c2adf26a426e475c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c902498349b2605f72ed73072d76ab7939d134806d92235300b0673f0d8f68c +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8642cf3d69189561e812446c1759d1c0cbad1e9e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2111c937725c76aca0166ff2e4d575c8f0841e00 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.811133575439456, "l1_loss": 451.9648834228516, "l0": 40.0, "frac_variance_explained": 0.820844042301178, "cossim": 0.9156029880046844, "l2_ratio": 0.9176408350467682, "relative_reconstruction_bias": 1.0015530705451965, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.618153429031372, "loss_zero": 12.452933025360107, "frac_recovered": 0.9831358790397644, "frac_alive": 0.2994249165058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8027c7ff5f85e987ccdfdbf9dd8f1510cd2d4e66 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:100a319cf766da323fdbe084b7dfcac838eb2927909ced729aa6de8740d6e946 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6280cfa6e6e9442c986bf8b7e645c640572c428f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..593b6118ddd611291d6f875db6365615c8d6415f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.91380500793457, "l1_loss": 427.1486541748047, "l0": 39.983334350585935, "frac_variance_explained": 0.8604467451572418, "cossim": 0.9129667043685913, "l2_ratio": 0.9151833534240723, "relative_reconstruction_bias": 1.0012677431106567, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6114421129226684, "loss_zero": 12.452933025360107, "frac_recovered": 0.9838047802448273, "frac_alive": 0.29443359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f987b91ff8b93b6fcd4dc81c5f4d6f9db090e6ff --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba5bbb4abf866d0f6c302b24c79cb0e777cbc1b89acc555d8c20f68555ea0df5 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ada689038d856b84b170c83257ea0c7eded38d7f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..58b6ec10c1450df894fbcfe30c7fe12d781a59e5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.28143844604492, "l1_loss": 393.00905456542966, "l0": 40.0, "frac_variance_explained": 0.7567337930202485, "cossim": 0.9030908882617951, "l2_ratio": 0.9034847617149353, "relative_reconstruction_bias": 0.9998072028160095, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.678276252746582, "loss_zero": 12.452933025360107, "frac_recovered": 0.9771327614784241, "frac_alive": 0.2982313334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e135c53fbace727dcdca6d41b5f0e1ff55d630c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e16bdbb94026de04cbec2c85cf712072bb9c7b006b3666d3d2c9768e421bfd8 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4122b2accfe338bb060e82db4c0bc476068789cd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..137e1224ede6081a735d87a8893542898f504198 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 57.661796188354494, "l1_loss": 467.5274963378906, "l0": 40.0, "frac_variance_explained": 0.8447021842002869, "cossim": 0.9108584702014924, "l2_ratio": 0.9144052922725677, "relative_reconstruction_bias": 1.003432297706604, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.624210572242737, "loss_zero": 12.452933025360107, "frac_recovered": 0.9825274705886841, "frac_alive": 0.2957899272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f0c98b5fa3d457b648a69048a8f4285367a7d8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6044ba15b06ebd0a70428b0df7ab7ec3c6a66fd0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 137.5836051940918, "l1_loss": 414.7282440185547, "l0": 80.0, "frac_variance_explained": 0.17110393047332764, "cossim": 0.4680758684873581, "l2_ratio": 0.34816921055316924, "relative_reconstruction_bias": 0.7378275513648986, "loss_original": 2.4489264488220215, "loss_reconstructed": 14.558675193786621, "loss_zero": 12.452933025360107, "frac_recovered": -0.2103082224726677, "frac_alive": 0.3181966245174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..804a6d6e1da612482694df2572a21c4ecc6f40a9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ce8858dbfabf0683ed3cab3ce7c1a13a3ece68d052d3260cdae301c4e28ed38 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..03e9c4db094e330f009b4d9b7c90292786b1558c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..340cdc6fa7e9c8c7bc4981991938944a95096959 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 52.232007598876955, "l1_loss": 543.6768280029297, "l0": 79.9, "frac_variance_explained": 0.8336090505123138, "cossim": 0.9266528010368347, "l2_ratio": 0.9269256889820099, "relative_reconstruction_bias": 1.0003283321857452, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5528225183486937, "loss_zero": 12.452933025360107, "frac_recovered": 0.9896542429924011, "frac_alive": 0.46240234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c98ed2c7690c72b971d9dc75695ce4263cbf9dbc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa61ce876f41c30b7aa963ff627db946c10a672cb8281c38e8f5fc861fc79c6 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..727ef9a180a142db778fa0df8a048dc208603739 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8997f42f7d279e1851ad8e31e5e20bb91d0ffd18 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 51.525339126586914, "l1_loss": 510.0916351318359, "l0": 80.0, "frac_variance_explained": 0.8004042208194733, "cossim": 0.928622841835022, "l2_ratio": 0.9312313497066498, "relative_reconstruction_bias": 1.0022627532482147, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5504817724227906, "loss_zero": 12.452933025360107, "frac_recovered": 0.9898871839046478, "frac_alive": 0.4539930522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..efdf66a7c71ff016eea2974766fbdeca93a417da --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59e94c0caaa998ae6c1e7f0ee21627f4f02179db1d20ceed37242059f89940a +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f1d636a04a744e7967f70d7e43b15dab8fc6b2c3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b5e8097bb67f163666b8d3356e4eff7c040019cb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 56.42006950378418, "l1_loss": 607.1793884277344, "l0": 80.0, "frac_variance_explained": 0.8453537464141846, "cossim": 0.9180462002754212, "l2_ratio": 0.9192540943622589, "relative_reconstruction_bias": 1.0001774728298187, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5884765863418577, "loss_zero": 12.452933025360107, "frac_recovered": 0.9860971808433533, "frac_alive": 0.4753689169883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d970f86770e0d5fed868b12b768de46e2bd707b1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f166634610040882be7a7dd9fef18a3b4b56bdea138eb98ff0d716feffa766b4 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..93d2c5c00cda17d38613ebdd9fe7b031b787b1f7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a21b936880af76966c24219a49fbb13ec3c4a299 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 52.48485107421875, "l1_loss": 528.1922485351563, "l0": 80.0, "frac_variance_explained": 0.808493971824646, "cossim": 0.9282886624336243, "l2_ratio": 0.9306568443775177, "relative_reconstruction_bias": 1.0026726007461548, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.56360719203949, "loss_zero": 12.452933025360107, "frac_recovered": 0.988573682308197, "frac_alive": 0.4756944477558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d497b813772b73bbb6f799b08670bdbba4da520 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c42aafaf8b8f38c1d3f55bba570870cd4c4bf6b6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 131.11762619018555, "l1_loss": 788.7969665527344, "l0": 160.0, "frac_variance_explained": 0.25940428376197816, "cossim": 0.5706947863101959, "l2_ratio": 0.49342564642429354, "relative_reconstruction_bias": 0.849498838186264, "loss_original": 2.4489264488220215, "loss_reconstructed": 10.614991569519043, "loss_zero": 12.452933025360107, "frac_recovered": 0.18391464054584503, "frac_alive": 0.4411349892616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8dd5fbea8026480409abf466bc5e460182df84da --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c9b68ce30895d8d78c60c967440c0da845ec9b9c28264bce68175af3927a8f1 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f37c3c35c36f347a15d2315489fbacccb764430d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..74eef0058803f27c4fdd688d2406a5b05be7b06f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.276147079467776, "l1_loss": 764.6186645507812, "l0": 160.0, "frac_variance_explained": 0.8476401448249817, "cossim": 0.9392698645591736, "l2_ratio": 0.9418351173400878, "relative_reconstruction_bias": 1.0018192648887634, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.516468143463135, "loss_zero": 12.452933025360107, "frac_recovered": 0.9932795226573944, "frac_alive": 0.6029188632965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..39dccaa51d178a664ca497e1e9aad228b63cfcdb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c91d2526f6ac4b9f7c9fd9d62dfe0d0ca199c6f22082ff577ce20d851290ea +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e2cbcfcae3119b699e8ed2d6337d89b24fb7c24 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..25e4b989bc3a47bea182b50a6786c5ca70030f87 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.30290908813477, "l1_loss": 785.4107971191406, "l0": 160.0, "frac_variance_explained": 0.8710289716720581, "cossim": 0.9411798179149627, "l2_ratio": 0.9417654693126678, "relative_reconstruction_bias": 1.0005487978458405, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.512762355804443, "loss_zero": 12.452933025360107, "frac_recovered": 0.9936471462249756, "frac_alive": 0.6195204257965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed58f63ba83f32c8ab12b6ff141a524a2084207e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15bdd1c581f3389b0389e2a4c1237d6f056ad15763bb0aacb37b7597218afcad +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4eaadecf9e60347014cfaa6e72aec5651fb0b141 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..92f3a3abaf6152f9e94f223a2fae2f798167d333 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 49.686083221435545, "l1_loss": 879.2001892089844, "l0": 160.0, "frac_variance_explained": 0.8819718539714814, "cossim": 0.9358995020389557, "l2_ratio": 0.9381371676921845, "relative_reconstruction_bias": 1.0012032091617584, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5353710651397705, "loss_zero": 12.452933025360107, "frac_recovered": 0.9913994312286377, "frac_alive": 0.6832682490348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4fca80960f971b3c0bb5a83a8f6a5279f9594ab --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670071bb9976b560568d2354d22149720851a4110632a7890fa1a61073fac926 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6fb4c55872ddcb14fe37513ab527bf0eb9e38ca7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..605c371162d6b6d122444bed2caa62756c1c2825 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.30603218078613, "l1_loss": 777.0953186035156, "l0": 159.9625, "frac_variance_explained": 0.8656655848026276, "cossim": 0.9392187178134919, "l2_ratio": 0.9420062899589539, "relative_reconstruction_bias": 1.002146726846695, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.519579267501831, "loss_zero": 12.452933025360107, "frac_recovered": 0.9929662108421325, "frac_alive": 0.6369357705116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..06c4936b9ad190edb4fd7b080175b954279bf994 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2e096f5d2e103c73bcd179dbc9e6f8f92d95fff5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 131.5750930786133, "l1_loss": 1586.4803344726563, "l0": 320.0, "frac_variance_explained": 0.35132617354393003, "cossim": 0.6678441643714905, "l2_ratio": 0.7069510042667388, "relative_reconstruction_bias": 1.0263331294059754, "loss_original": 2.4489264488220215, "loss_reconstructed": 6.194353532791138, "loss_zero": 12.452933025360107, "frac_recovered": 0.6258051276206971, "frac_alive": 0.5712890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a820a07f4aed9365f84ea0fced5ea7f7aa376100 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247b1797fefeff4a40b7c133d2c1559cdb64dce20173f993467c215fc1e83a8e +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..299ba797af00fd375ed487dc65ac176200429c35 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..db9ec2d8b9b8b1ee4225832db2bf3387fa042205 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.05187835693359, "l1_loss": 1132.3312622070312, "l0": 320.0, "frac_variance_explained": 0.8801694273948669, "cossim": 0.9543100357055664, "l2_ratio": 0.9556897640228271, "relative_reconstruction_bias": 1.00105140209198, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.491763734817505, "loss_zero": 12.452933025360107, "frac_recovered": 0.9957401871681213, "frac_alive": 0.7314995527267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c3e93aa6a5796576cb76de78a8e53db3f449fbf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1862e37ce71b3f18f381f1d2a2112fc7318135fef224a86e9cfc9ddc2ae611a +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..68c22a392783439db9250c3939b443410355e6b2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d3839200cb4565ff118af29b1af0a275879db88 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.974717712402345, "l1_loss": 1218.4020202636718, "l0": 320.0, "frac_variance_explained": 0.9071006178855896, "cossim": 0.9544732809066773, "l2_ratio": 0.955464094877243, "relative_reconstruction_bias": 1.0005109429359436, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4892306327819824, "loss_zero": 12.452933025360107, "frac_recovered": 0.995994257926941, "frac_alive": 0.6979166865348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..27bb11cc35b9a991e6ecc1f25a5d178a82e5d767 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f673f046398777ee2ad97ac9b3ad04835a3205c6865ddde2ade1a9145833558 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eff325a20fd7c50fa0ecc5a215fc6c9eda320a0e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f0be5efdfbebdd37674a36c3506a008a8b70c185 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 46.41660919189453, "l1_loss": 982.6541625976563, "l0": 320.0, "frac_variance_explained": 0.844070029258728, "cossim": 0.9450825750827789, "l2_ratio": 0.9474761605262756, "relative_reconstruction_bias": 1.0017620146274566, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5012677669525147, "loss_zero": 12.452933025360107, "frac_recovered": 0.9947916507720947, "frac_alive": 0.86083984375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..02d7cfdb540ab92ae01e533b0e7e2b35d96e6cc6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c05e9c551aaf644a5d5f858b4714f90407ab2059b89453da54c019159a3501 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ad4150060d5633f2bf26cffab03edeaf306bd7e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..24090c7e1416f6ffdafe2766b7ff2f13ac10eebd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.553976440429686, "l1_loss": 1079.3361083984375, "l0": 319.9458343505859, "frac_variance_explained": 0.8690729081630707, "cossim": 0.9506974220275879, "l2_ratio": 0.950929456949234, "relative_reconstruction_bias": 0.9997771561145783, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.494476556777954, "loss_zero": 12.452933025360107, "frac_recovered": 0.9954676687717438, "frac_alive": 0.7751736044883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..312779ffae9874b112068781310c54791ea19c19 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b55973063dc53a17224137ada4d4710cd5bbc421 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 111.73392791748047, "l1_loss": 2608.7587646484376, "l0": 640.0, "frac_variance_explained": 0.35072124004364014, "cossim": 0.7543251395225525, "l2_ratio": 1.026421320438385, "relative_reconstruction_bias": 1.3424260139465332, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.9057929515838623, "loss_zero": 12.452933025360107, "frac_recovered": 0.8545086026191712, "frac_alive": 0.7488606572151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6805387bd04116b25c2a9164dddb65bac478abb6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a56079af3ec0c3a231284c28318492537f589bc26057f926c57653764c8515d +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aba8119567e6983262708a33cf4e735dd69784c2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..40518c07ec7c45470aab0ffec46dbb11c9ae8b02 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.61446113586426, "l1_loss": 2412.594775390625, "l0": 640.0, "frac_variance_explained": 0.9390624403953552, "cossim": 0.9699587464332581, "l2_ratio": 0.9699285745620727, "relative_reconstruction_bias": 0.9990258693695069, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.470791292190552, "loss_zero": 12.452933025360107, "frac_recovered": 0.9978296816349029, "frac_alive": 0.6563042402267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..03e5141a3240c98490627b01ed86466781d4a133 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28252617dfc8b2a424cc0dd06db5a805b46b445be178c5ddb47cbbbed6b44c86 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5f7ed73e9144d04f10badf528953ffd783872634 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c0f507836ca493425d8e1d866f71bc05af63fc99 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.37075271606445, "l1_loss": 2463.444091796875, "l0": 639.3208374023437, "frac_variance_explained": 0.9328433871269226, "cossim": 0.9713874399662018, "l2_ratio": 0.9715998351573945, "relative_reconstruction_bias": 0.9999610126018524, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4686097383499144, "loss_zero": 12.452933025360107, "frac_recovered": 0.9980440974235535, "frac_alive": 0.630967915058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..470f72ce18eb2f5158d432e4738ea209e85eede2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1e983ea79181b6447c2bba34a61ad87e1062edfd5f5fce25be33660a986fb7c +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e177e2775341977e290f95a91df37f1b209df09f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ea1423c55399431a6fda5a7e4ed793ae2506842e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 38.02740745544433, "l1_loss": 2213.3583374023438, "l0": 639.7375, "frac_variance_explained": 0.9239685833454132, "cossim": 0.9633342325687408, "l2_ratio": 0.9649060130119324, "relative_reconstruction_bias": 1.0006114959716796, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4730945587158204, "loss_zero": 12.452933025360107, "frac_recovered": 0.9975976288318634, "frac_alive": 0.8318684697151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..206bdad340954dbac2105d52bcd01df5b56a109e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16002b9a30c72a8ab7ee84599c5d03e5a55cd67562290e77f03341fa50014308 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..99090ca090b21563ac484c9eb86693a36378ba46 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..df2e6b9092afe24a2b62479083fa5743605d438a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.282081604003906, "l1_loss": 2348.363134765625, "l0": 640.0, "frac_variance_explained": 0.928521353006363, "cossim": 0.967055720090866, "l2_ratio": 0.9679634690284729, "relative_reconstruction_bias": 1.001582407951355, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.472600722312927, "loss_zero": 12.452933025360107, "frac_recovered": 0.99765043258667, "frac_alive": 0.7485893964767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0568e18d7095be0226c63dc4a5b5c915788300b9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2ae4a0186e2fdb45d8720425dab7db79bdd247bf61b90a7e2f5a499113c68c +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d40c30e6f32643ed74d4b894e8587795e460e81 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..432125c8ab74d9e69067ea765f4cc09e4e41096c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 82.05952911376953, "l1_loss": 387.8919372558594, "l0": 20.0, "frac_variance_explained": 0.7454514384269715, "cossim": 0.9057712197303772, "l2_ratio": 0.9036650419235229, "relative_reconstruction_bias": 0.9980986058712006, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8554311752319337, "loss_zero": 12.452933025360107, "frac_recovered": 0.9594433963298797, "frac_alive": 0.1574435830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_1/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1aa5a7792f67bdbcdcbe481db3344db07879b00 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7707316038b3b27015684edbd374dd9ea3cd5b75dc885cf2492ea07ab5a6185 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_1/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..799390566c2f1ae55f4186ea89cf46b811b540e7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_1/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a00c800a3b1c4c3a8304422b09b9959614354940 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 73.52041473388672, "l1_loss": 528.9073120117188, "l0": 40.0, "frac_variance_explained": 0.7958128571510314, "cossim": 0.922706949710846, "l2_ratio": 0.9236413538455963, "relative_reconstruction_bias": 1.0000072419643402, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6429500341415406, "loss_zero": 12.452933025360107, "frac_recovered": 0.980656909942627, "frac_alive": 0.293511301279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_2/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b76277bc984c449b28dfe40f86ea8f01e2b3e7c1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e664315c5a2fbf36a6ce89470499048315fdec57489514993683faeb6e3dac36 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_2/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..343e98ff257e6c669e770a76685cfd8257100a5b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_2/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..13a2a04828e8b8ee4c07c65ee7b47947e12711ea --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 63.84604949951172, "l1_loss": 781.8043701171875, "l0": 80.0, "frac_variance_explained": 0.886254632472992, "cossim": 0.9405465126037598, "l2_ratio": 0.9406228601932526, "relative_reconstruction_bias": 0.999057823419571, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5650960922241213, "loss_zero": 12.452933025360107, "frac_recovered": 0.988433837890625, "frac_alive": 0.4686957597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_3/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..df9443b41e764419df91623c5010de06298ec298 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16e030282c9152fa853bba4b7b04f6f63753f3d6e0ca62f8256fa6dc8c96994c +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_3/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fc4a3f63891f56258b6a2dc8e824ce60be3016e2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_3/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cb8c93f12ffda9052d70fb8ed506987447aaa5b2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 56.946068572998044, "l1_loss": 1005.3937927246094, "l0": 160.0, "frac_variance_explained": 0.8639537990093231, "cossim": 0.9545037567615509, "l2_ratio": 0.9538737773895264, "relative_reconstruction_bias": 0.9995295643806458, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.524174451828003, "loss_zero": 12.452933025360107, "frac_recovered": 0.9925116181373597, "frac_alive": 0.6183810830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_4/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd68b51e76ef51852122e5deb7460efbe2498b98 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c244eece24c4584e38d83fd89bab7df7fc49790872aac80872f7f07c3a144c7f +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_4/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..810b6ce14c9c3a781c98938f3f0c02be972126db --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_4/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9b0b11927aa84970987fdd54bdfdd57fdc9c2959 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 49.31732063293457, "l1_loss": 1596.6747680664062, "l0": 320.0, "frac_variance_explained": 0.9155429899692535, "cossim": 0.9657738864421844, "l2_ratio": 0.9659751355648041, "relative_reconstruction_bias": 0.9998931109905242, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.494556260108948, "loss_zero": 12.452933025360107, "frac_recovered": 0.9954632163047791, "frac_alive": 0.7400173544883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_5/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..44d7401871483669aa7193c2856583ba8b190fda --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ba85e041c4696e294e05574aa3e6d4e2a5ca0afe7870672ded1e305434b39d +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_5/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbb4035903196bf0b07225ca1dd920b855f8c11e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_5/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8bece77dd812d0a7e354529364ef6cb82cf4c932 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.20635986328125, "l1_loss": 3198.0086669921875, "l0": 640.0, "frac_variance_explained": 0.9310864806175232, "cossim": 0.9767521739006042, "l2_ratio": 0.9770789384841919, "relative_reconstruction_bias": 1.0002823233604432, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4734957695007322, "loss_zero": 12.452933025360107, "frac_recovered": 0.997559267282486, "frac_alive": 0.7078993320465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..65c624ea3e7a807dbe026a8ecec3ff1fd990cf0a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f8063faa8847316c9f63ce8cfc9bb09467518f83 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 217.9843536376953, "l1_loss": 167.01827087402344, "l0": 20.0, "frac_variance_explained": 0.05807469487190246, "cossim": 0.28542253077030183, "l2_ratio": 0.17943892478942872, "relative_reconstruction_bias": 0.6283090710639954, "loss_original": 2.4489264488220215, "loss_reconstructed": 14.690529346466064, "loss_zero": 12.452933025360107, "frac_recovered": -0.22370559722185135, "frac_alive": 0.1353081613779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e813219848a7b2e71b7adf79ce96e035acd6521 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bbaadfc63d700ce313abd9f8bffbae3156724cc38047c40c3767930ef26a6c0 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..350d49a9f44daed67a9ec1a99b714d7854376c18 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..156e4cb19f367c77f6f2208654be03f1488d6171 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 82.89589004516601, "l1_loss": 387.73180236816404, "l0": 19.99583339691162, "frac_variance_explained": 0.7404085159301758, "cossim": 0.9042332589626312, "l2_ratio": 0.9021734893321991, "relative_reconstruction_bias": 0.9979009211063385, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.874876618385315, "loss_zero": 12.452933025360107, "frac_recovered": 0.9574995279312134, "frac_alive": 0.1548936665058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d13d25343201c70aedfe4534e949e8de2fb6bc4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a1db3664c7db04e2b60d5216fa669b84cf8483057a7cd7cba8f11d07eb25bf3 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1b778bcf2a3d591bd39d1768c9f830f642ade32c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..212279316df942e12947ac2af011a8f97aef8cea --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 82.90822525024414, "l1_loss": 392.2499267578125, "l0": 19.99583339691162, "frac_variance_explained": 0.735715925693512, "cossim": 0.903123289346695, "l2_ratio": 0.9033664643764496, "relative_reconstruction_bias": 1.0007203817367554, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8645946979522705, "loss_zero": 12.452933025360107, "frac_recovered": 0.9585281014442444, "frac_alive": 0.1569010466337204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e675e3dcbf13e2ff2b2b028a43c0c2bbb465a9ac --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b6bbbbd8b39d1f6978220529a187dcbd0fd6f4c1df82d81cb250568dbeae2f +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c229983f0fd58ddb65659d723482fc751a5f2c3d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b92ceddf51c0e9bee267d2cde2a12e5dbfa0a3c3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 87.38898391723633, "l1_loss": 437.2335723876953, "l0": 20.0, "frac_variance_explained": 0.7685397148132325, "cossim": 0.8913267970085144, "l2_ratio": 0.8944155275821686, "relative_reconstruction_bias": 1.001406443119049, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.9732311964035034, "loss_zero": 12.452933025360107, "frac_recovered": 0.9476731896400452, "frac_alive": 0.1499565988779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a23cc6c2ad3162c967fb6eae419a19f22727c78 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62390554921f6b3eae6d5ce99d0a05d23203255526fe002bba9bd9a334bfb1ba +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6823a309b6fcfce430953904a3a9bad2e3819d90 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d5955a10d98ac930840a317e2d3c25529ebdf415 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 81.98058166503907, "l1_loss": 392.0604583740234, "l0": 20.0, "frac_variance_explained": 0.741108912229538, "cossim": 0.8997536063194275, "l2_ratio": 0.9000672399997711, "relative_reconstruction_bias": 1.0006576836109162, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.9072537422180176, "loss_zero": 12.452933025360107, "frac_recovered": 0.9542666256427765, "frac_alive": 0.1556532084941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..082c15a181fbf4daa7b8cf7a6d269663ff93486d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c5419792413196045a3842cd0a827ab0adb7e250 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 194.64895782470703, "l1_loss": 298.46790466308596, "l0": 40.0, "frac_variance_explained": 0.10361332297325135, "cossim": 0.37167030572891235, "l2_ratio": 0.2493060812354088, "relative_reconstruction_bias": 0.6704857051372528, "loss_original": 2.4489264488220215, "loss_reconstructed": 14.783217716217042, "loss_zero": 12.452933025360107, "frac_recovered": -0.23288672268390656, "frac_alive": 0.2071397602558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..88186e941a17cc8120a986a4f6805f35acd06fbb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72028dfba3365538798b9fe63417a47c357b3c253fca267f571d16edff68fd1b +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b713eeef258b3e35f0f8b2a60c0963d31211eb6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c367dee1541e4c650d12a820694c4529e6fde338 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 74.34532241821289, "l1_loss": 536.4803894042968, "l0": 39.99583358764649, "frac_variance_explained": 0.7928689062595368, "cossim": 0.9240649998188019, "l2_ratio": 0.9261127531528472, "relative_reconstruction_bias": 1.0024328827857971, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6611002922058105, "loss_zero": 12.452933025360107, "frac_recovered": 0.9788522481918335, "frac_alive": 0.289116770029068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e662ac1ee2c2b6971c5ffb48f8288c7d88ef4b69 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3487ba14b661cc34cc3ca216c94acfc98b3aadefa0313fae9e8260900d5c8eb +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ffe5ae0b2f470cba787ab0329afa3187e824060e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ca256d9918b9514dc1e45e26cb09b29254031a57 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.34622116088867, "l1_loss": 549.1552490234375, "l0": 39.99166717529297, "frac_variance_explained": 0.8392110764980316, "cossim": 0.9306858479976654, "l2_ratio": 0.9335777938365937, "relative_reconstruction_bias": 1.0028102040290832, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.652607035636902, "loss_zero": 12.452933025360107, "frac_recovered": 0.979695725440979, "frac_alive": 0.2953559160232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ca5e4744107841179aa07f21f9b4782b9099725 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbdf6cdbf31f8abd946b9588c6a0178a594f7b834cb6236d5d608f7f51d25a02 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d8e3f8a73a15341e8f014e126c892a47fd4c394 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..14e233ce8cf8a926fbb6b7d70799d8c3da66355c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 77.56395111083984, "l1_loss": 508.2853271484375, "l0": 40.0, "frac_variance_explained": 0.7621201872825623, "cossim": 0.9176748812198638, "l2_ratio": 0.9192310273647308, "relative_reconstruction_bias": 1.0023752927780152, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.713756060600281, "loss_zero": 12.452933025360107, "frac_recovered": 0.9735915839672089, "frac_alive": 0.2897135317325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..602a26cba3ae9e8d79086be956b04636ac6dc6b0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27457fa5545a68b76836aa648d90161d4b3aedb5de4e63cadee768e3231b1237 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..abf76b86473b8114fa13542e00ad539d4848917a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..73c9ad1bc7bb4d3e9f51d6096309b4a0ce83e060 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 75.81767044067382, "l1_loss": 621.8117370605469, "l0": 40.0, "frac_variance_explained": 0.8433825016021729, "cossim": 0.920050710439682, "l2_ratio": 0.9210049569606781, "relative_reconstruction_bias": 1.0010946571826935, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.673509454727173, "loss_zero": 12.452933025360107, "frac_recovered": 0.9776096105575561, "frac_alive": 0.2875434160232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..37ed4b577ad5d22d8db065771a92a879a0fb1c96 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d5a17408c86b060f0af38c3229425cbcb10aa52b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 213.44998168945312, "l1_loss": 623.8083557128906, "l0": 80.0, "frac_variance_explained": 0.16039682626724244, "cossim": 0.4678017109632492, "l2_ratio": 0.34769041538238527, "relative_reconstruction_bias": 0.7274929761886597, "loss_original": 2.4489264488220215, "loss_reconstructed": 13.109274959564209, "loss_zero": 12.452933025360107, "frac_recovered": -0.06557218059897423, "frac_alive": 0.3215060830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..12bdb2a7d52bf8bfee48935ee33d0ee9cba80152 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c571bc736b83ccebedc5d8eb39125f6e40fd1b95809117861a7dfe2b465a470 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3299fe870b7cf6653de67ba831040725a7e4c01a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..568ae3e6a9bae8fb98da23569ab48a8dd83d52f6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 66.70211791992188, "l1_loss": 793.0866271972657, "l0": 80.0, "frac_variance_explained": 0.870151698589325, "cossim": 0.9401491701602935, "l2_ratio": 0.9408006072044373, "relative_reconstruction_bias": 1.000737911462784, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.575274610519409, "loss_zero": 12.452933025360107, "frac_recovered": 0.9874190568923951, "frac_alive": 0.4680989682674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c452267cc91a7d455171c1f9a359eb6dd2e219e1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40d38d6cb3442a01e2654064972f5d42d3dd0ab6482a162ea89516655dd284e +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..58d4f933d1f485cb0ce829bd42238acbd37d9471 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c69b70f8ce48657b095bc3595225fd37ad2ab10f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 64.55712928771973, "l1_loss": 725.3825256347657, "l0": 79.9, "frac_variance_explained": 0.8422860383987427, "cossim": 0.9409180641174316, "l2_ratio": 0.942666745185852, "relative_reconstruction_bias": 1.0019870102405548, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5692342281341554, "loss_zero": 12.452933025360107, "frac_recovered": 0.9880201041698455, "frac_alive": 0.4743381142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe02096fa9e6c9b4553ba1ea6b633e8c7e87d035 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e3cd98b4452c0bf3d80b80f4aebbcbd0345efff61faceb63b994439094b8ef +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d1ba0f2f61718c71a88bd24d656a93af8a1f3e1c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0c9a2a77aa09f081516657896ef0999be0ea0ebb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.76554336547852, "l1_loss": 857.1462219238281, "l0": 80.0, "frac_variance_explained": 0.8729984104633332, "cossim": 0.9301912546157837, "l2_ratio": 0.931358927488327, "relative_reconstruction_bias": 1.0026703178882599, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.610351014137268, "loss_zero": 12.452933025360107, "frac_recovered": 0.9839148700237275, "frac_alive": 0.4821506142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cfd5d28f36e13de75fc2e6aca3275459f88bd94 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f93942149977802d9cf9b487dbae2bc92dd26285f7b611ba05ffd5ae0fa38dc +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd0c461722740f3b5ae423e9b6cd5bb8ef060542 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..581967c5d7bebf098dcda6ba2a2f22cd0c1d53b5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 66.22293586730957, "l1_loss": 923.0601745605469, "l0": 80.0, "frac_variance_explained": 0.900476622581482, "cossim": 0.9411832451820373, "l2_ratio": 0.9404438555240631, "relative_reconstruction_bias": 1.0011389255523682, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.583176279067993, "loss_zero": 12.452933025360107, "frac_recovered": 0.9866274833679199, "frac_alive": 0.466796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8626192384a1ad31e7e14cdd733b0f0693bf3b59 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..23c21df6897d809097b2162e62f0f6f5b148b09a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 166.00693054199218, "l1_loss": 1015.9643981933593, "l0": 160.0, "frac_variance_explained": 0.2615611255168915, "cossim": 0.5708441972732544, "l2_ratio": 0.494153892993927, "relative_reconstruction_bias": 0.8625474154949189, "loss_original": 2.4489264488220215, "loss_reconstructed": 8.447930812835693, "loss_zero": 12.452933025360107, "frac_recovered": 0.40063078701496124, "frac_alive": 0.455620676279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f92d41496f4c69ffcacd1f86fd2a841a2d8a35ee --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95f2857f087f113af7c921bae70819bdd4b6dbefc969debb2094726073b3f5d2 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..48b29f21df699ec5534c50d7557625b501b981d0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..40b2bbfef875a366226b11bc0c97db382aa38f02 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 58.65425682067871, "l1_loss": 1061.904736328125, "l0": 160.0, "frac_variance_explained": 0.8731440126895904, "cossim": 0.9517025589942932, "l2_ratio": 0.9524573981761932, "relative_reconstruction_bias": 1.000987672805786, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5311554431915284, "loss_zero": 12.452933025360107, "frac_recovered": 0.9918226838111878, "frac_alive": 0.63623046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6924676eb6abd118439a2ef95af724e95827f12 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9653ec8380d0b6539d582ce9202f13fbce328efe92ba918e4ba8a3b9956d5f +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..270728f33062c51b34181a13603128f2ffca7658 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6d4e36830803e9c19fd33cfcfe9b76e44ce36d53 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 58.884486770629884, "l1_loss": 1005.7183471679688, "l0": 159.75, "frac_variance_explained": 0.8559818863868713, "cossim": 0.9523160696029663, "l2_ratio": 0.9533852934837341, "relative_reconstruction_bias": 1.0005861222743988, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.527032709121704, "loss_zero": 12.452933025360107, "frac_recovered": 0.9922284483909607, "frac_alive": 0.6035698652267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef2113519a1387cc3b29a80febb34f47453d57dc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f73f70ed9c722fc78d282cf4fb64dcb1bbb98943068f2a0bbd4c5cf8feec62 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..371879e047083e9de7693ff14081a487c3d32440 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b44a42841ed122c169e39c3ffa1683c0fcbd83a6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 63.86768913269043, "l1_loss": 1022.4768981933594, "l0": 160.0, "frac_variance_explained": 0.8641524732112884, "cossim": 0.9425764679908752, "l2_ratio": 0.9433953881263732, "relative_reconstruction_bias": 1.0005350947380065, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5517275810241697, "loss_zero": 12.452933025360107, "frac_recovered": 0.9897631585597992, "frac_alive": 0.7082248330116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9003793a32d7a11aad8eeb8f931defa8de6df06 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d85c527d7ce68fcb52d1baef45654867f956c0f28bb6ab96991a121ddb20d918 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..75bedc485fddb9f3262863395b6f1c4f700ff1f2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9907b1041dfdc76f6060c53051eb8c02a032b2a2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 60.64662590026855, "l1_loss": 980.9309875488282, "l0": 160.0, "frac_variance_explained": 0.8536317765712738, "cossim": 0.9488173604011536, "l2_ratio": 0.9503636538982392, "relative_reconstruction_bias": 1.0021162509918213, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.536962103843689, "loss_zero": 12.452933025360107, "frac_recovered": 0.9912424206733703, "frac_alive": 0.6449110507965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f836fdf8fbd99beb8f316dffa13bef30d985bcc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..488dd36a437463c426e0c0170704a7ace97a966f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 159.18578643798827, "l1_loss": 1954.7937622070312, "l0": 320.0, "frac_variance_explained": 0.3460752248764038, "cossim": 0.6690242469310761, "l2_ratio": 0.7075321555137635, "relative_reconstruction_bias": 1.0470044791698456, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.50926251411438, "loss_zero": 12.452933025360107, "frac_recovered": 0.69437575340271, "frac_alive": 0.6080729365348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e410fc70d650c7a90da997b783e0c80df3f4ad4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f66af96888f72fe025b0003cb59b993699dfe79072fb453d7339599addb15dde +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e6e2a0c694595e7a4f79ccdad608665ead15ec50 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ad83c4081722766d0b4bdb7e0628d844eae289bb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 51.75811920166016, "l1_loss": 1565.4125244140625, "l0": 319.5833343505859, "frac_variance_explained": 0.9095131993293762, "cossim": 0.9622929751873016, "l2_ratio": 0.9627440869808197, "relative_reconstruction_bias": 0.999713146686554, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.497648334503174, "loss_zero": 12.452933025360107, "frac_recovered": 0.9951550543308259, "frac_alive": 0.7630751132965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c019017fbbb04488eb2cda78fe436016ae0b899 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:739b4815254d12374f53a90fe337bc09b322aafc969cbbfea3a6ed12affc6b77 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4a3c663504f9a72c6a7c2689a66a5dd267494669 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..69754577dcdedccf96c37ac95b504a3d3f7dece5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 51.95920066833496, "l1_loss": 1560.7103271484375, "l0": 320.0, "frac_variance_explained": 0.9020806074142456, "cossim": 0.9633390665054321, "l2_ratio": 0.9633306562900543, "relative_reconstruction_bias": 1.0002520322799682, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.495920014381409, "loss_zero": 12.452933025360107, "frac_recovered": 0.9953308701515198, "frac_alive": 0.7234700322151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..56a790e15affb254cf993562d244b31e87a032da --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e11c54c7bb4aac7a88e727ef33ccd7ea673ebd462b0c8f0679bb8d9c435aed7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d7d16966e81bcdabb0eceeccfca148ee3662604 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..00afbf9f297c5c2faadd33b41d5dfae0dd572459 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 57.49758071899414, "l1_loss": 1233.705810546875, "l0": 320.0, "frac_variance_explained": 0.8589035868644714, "cossim": 0.9540941476821899, "l2_ratio": 0.9550706088542938, "relative_reconstruction_bias": 1.001019501686096, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5227089881896974, "loss_zero": 12.452933025360107, "frac_recovered": 0.9926598310470581, "frac_alive": 0.8812391757965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b55a6e9528a8348093f8c7ae43db8452e621f964 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d947ffb8ed1719f667a1bba813b77ae34158fb1c343d0c3c90d8d6a8794d1e +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9043515e154628ab859cd8fd995389ee54d41c80 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e4e4e62adf8fd0a810064e072b93c4b55f323862 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 54.3418586730957, "l1_loss": 1605.89306640625, "l0": 320.0, "frac_variance_explained": 0.9077378273010254, "cossim": 0.959366899728775, "l2_ratio": 0.9596228897571564, "relative_reconstruction_bias": 1.0003112256526947, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.502269673347473, "loss_zero": 12.452933025360107, "frac_recovered": 0.9946961760520935, "frac_alive": 0.8001301884651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d848c7cd1dffbab7192dc37f74b8dea767af582 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0ef49c04d3c460fd4a3257cfbe9b0318d128f286 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 163.25163879394532, "l1_loss": 3762.0339111328126, "l0": 640.0, "frac_variance_explained": 0.3636908054351807, "cossim": 0.7548954904079437, "l2_ratio": 1.0260148525238038, "relative_reconstruction_bias": 1.3213130354881286, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.8931538105010985, "loss_zero": 12.452933025360107, "frac_recovered": 0.8558285892009735, "frac_alive": 0.7422417402267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a54e3e033b76b643074f4a76c501792c08089352 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17cf082d6170f18ee54985a3ac4241167eafdfc3cf56e91a6266fb6ab3b4e015 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2261264e639ff9139c7d68b907529c15f2b26cd6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8d53ffca1807d9b2780fde98fa5ec883eaed81a5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 43.27035827636719, "l1_loss": 3213.1107421875, "l0": 639.9125, "frac_variance_explained": 0.9495538651943207, "cossim": 0.9743721067905426, "l2_ratio": 0.9743786692619324, "relative_reconstruction_bias": 0.9983949840068818, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.47742965221405, "loss_zero": 12.452933025360107, "frac_recovered": 0.9971720099449157, "frac_alive": 0.7562934160232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..25ec8820167f7fb69a9a49b5c3bf7084781401bc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e681a293d7e4746baae0cca279eac81151ef221c29aea115cb2f85510b66a8c3 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..70911636e8ea45b22f36cd93d726ad89523d00d1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d87adbbf6e2da35ab8bf19c8b8ec61ded65b9f94 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.98759498596191, "l1_loss": 3107.3690185546875, "l0": 639.3791748046875, "frac_variance_explained": 0.9326578259468079, "cossim": 0.9757527709007263, "l2_ratio": 0.9768386065959931, "relative_reconstruction_bias": 1.0004763901233673, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.47405526638031, "loss_zero": 12.452933025360107, "frac_recovered": 0.9975033581256867, "frac_alive": 0.733018696308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8fa6a511b62cdae7f2ced4b664a30e7ab685221 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:416dad9cccbb172b9579e4ec9e397b55f9ea45218053a6229fb57426e19d62b4 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a2672d9a3a9ad4de399da3f3fa5a8ea66761dd1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6fbd031051ac6d286bcf78829d8594b14cdadec3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 46.9846851348877, "l1_loss": 2576.3406005859374, "l0": 639.8166687011719, "frac_variance_explained": 0.9145628452301026, "cossim": 0.9695311069488526, "l2_ratio": 0.9708778202533722, "relative_reconstruction_bias": 1.0014521181583405, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4830697774887085, "loss_zero": 12.452933025360107, "frac_recovered": 0.9966086566448211, "frac_alive": 0.8044704794883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2181d9a78cd6ba801425f14190e2e6d5a6b38e2e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed8f521587df2f4d9b95d69a5de777ccfa65c1e0bea4224da919c4dc1ead92c +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e5971fbd503ab2966b79d0ac6bab76adedc09ec --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ad19027d2b6869c73b054f845159eb7a94ad0c23 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 45.278759765625, "l1_loss": 2868.8764404296876, "l0": 639.9916687011719, "frac_variance_explained": 0.9181702017784119, "cossim": 0.9717496931552887, "l2_ratio": 0.9726868629455566, "relative_reconstruction_bias": 1.0011424362659453, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.482337164878845, "loss_zero": 12.452933025360107, "frac_recovered": 0.9966827273368836, "frac_alive": 0.7840169072151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..38ce1c5798ededd4a682663c81e878d86fdb6091 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41b1b16f8c36c919b9d4e13629cb02693fc6c9f5a180c20833f25248e9abc29 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7f1a19f8cc3a8806909b838faf11629805a6ca1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2aededac48575199b02f4d4e59853cadd153bcf2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 125.56976852416992, "l1_loss": 637.6160888671875, "l0": 20.0, "frac_variance_explained": 0.7849961221218109, "cossim": 0.8981874704360961, "l2_ratio": 0.8985657155513763, "relative_reconstruction_bias": 0.9984611392021179, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8372591972351073, "loss_zero": 12.452933025360107, "frac_recovered": 0.9612571775913239, "frac_alive": 0.1710612028837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_1/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c75f1fb1e96f46627b90c65a9fe5bb301981a671 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa7c27154298715823d75a573aea55344b7e0eb250cd3ad8b57896bd8631522 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_1/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cc704f0f25fc3f265722916ef4d51692f9976fac --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_1/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..beff2994bd86060c9d03f3295f8049dd1491b9b0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 110.1037582397461, "l1_loss": 789.6739196777344, "l0": 40.0, "frac_variance_explained": 0.7854969263076782, "cossim": 0.9232691109180451, "l2_ratio": 0.9242902994155884, "relative_reconstruction_bias": 1.0012201130390168, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6683587312698362, "loss_zero": 12.452933025360107, "frac_recovered": 0.9781295597553253, "frac_alive": 0.3113064169883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_2/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..283dd535f26da01a858f5d0be0bcf4e0e2f648da --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cbec9572fb96c5991cb3a18dedcb54dbe3259399f1d93165581856a057541f8 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_2/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3f4b007e6b0b8d232fdbdf3b7504ba0020bb8e43 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_2/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0f72596903ec89b6ff70b949134484801357743c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 98.38441390991211, "l1_loss": 1133.5070922851562, "l0": 80.0, "frac_variance_explained": 0.8572348475456237, "cossim": 0.9394023716449738, "l2_ratio": 0.9390686810016632, "relative_reconstruction_bias": 1.0001055598258972, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5952465295791627, "loss_zero": 12.452933025360107, "frac_recovered": 0.9854330003261567, "frac_alive": 0.4769965410232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_3/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ed640a65d7e09b6e59aa294a3c06a077b4c3d19 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1bf7023a575f8455540fb3bdc1c93ee89ff97a2ce65166aec6cf69a7f53de8 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_3/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f30c8f88a09a9fbc1172521b4995c3107ebc97bf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_3/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..73f3bd08506f234d21d558d135069d8d4b54a891 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 90.80431671142578, "l1_loss": 1587.93564453125, "l0": 160.0, "frac_variance_explained": 0.8658691287040711, "cossim": 0.9485506474971771, "l2_ratio": 0.9495301187038422, "relative_reconstruction_bias": 1.0008091807365418, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.550182414054871, "loss_zero": 12.452933025360107, "frac_recovered": 0.9899285018444062, "frac_alive": 0.6395399570465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_4/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b276d27b68712700a59e25ec95322e9e3a5358e8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a1b08224f01a74f4f38e14b731afc221e1cc3601f7da705cd12ed9ac8ee8b1 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_4/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ebe940f7a6fe2fa4b23901dd15a8a353d494dc6d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_4/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7c71f008d6cf1cdf3e4a90c0efebf7f01228c0df --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 78.99913330078125, "l1_loss": 2518.846826171875, "l0": 320.0, "frac_variance_explained": 0.8984488129615784, "cossim": 0.9610357165336609, "l2_ratio": 0.96125727891922, "relative_reconstruction_bias": 1.000020968914032, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.51603102684021, "loss_zero": 12.452933025360107, "frac_recovered": 0.9933296144008636, "frac_alive": 0.7503255009651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_5/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..411b9ce9b5f59878fa31c698a9b3ff479b2d1c18 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5afe9f2c3a25db913c9e93b021f1206142ec2efd5cf11e0bcc3273d8890e5f86 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_5/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3aa5f96e57ad8bc39d9f73ed122dd8a1d9dbc9d9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_5/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c5ff3493f7d77e7d9345a3c8aa3e29aa02ff5d05 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 65.1626049041748, "l1_loss": 4910.604833984375, "l0": 640.0, "frac_variance_explained": 0.9424506366252899, "cossim": 0.9730470478534698, "l2_ratio": 0.9734025001525879, "relative_reconstruction_bias": 0.9997419714927673, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4924126625061036, "loss_zero": 12.452933025360107, "frac_recovered": 0.9956821620464325, "frac_alive": 0.7953559160232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cc189e44344b86b25de2a2ca647fd91bd28be143 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2eed4f6616c06e52c729ef46e67c186641e32dc2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 325.32696838378905, "l1_loss": 254.30257873535157, "l0": 20.0, "frac_variance_explained": 0.0569246768951416, "cossim": 0.28904485106468203, "l2_ratio": 0.18153051435947418, "relative_reconstruction_bias": 0.6281856417655944, "loss_original": 2.4489264488220215, "loss_reconstructed": 13.573721599578857, "loss_zero": 12.452933025360107, "frac_recovered": -0.11210165843367577, "frac_alive": 0.1335178017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..db3c07f150b895e321978842675c1dedf1b35798 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b39cec6ba46a2e36c4c28e3ae36fcf65612084626fcba30f5dbd3cbb30d6659 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..84ff878880b123f7bca626b7a6df5371e2c82e53 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..11b1472e3c2d56b71de72bf5382246c8526e912d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 125.60377655029296, "l1_loss": 658.4857788085938, "l0": 20.0, "frac_variance_explained": 0.7844563603401185, "cossim": 0.897296804189682, "l2_ratio": 0.9025928020477295, "relative_reconstruction_bias": 1.0041317164897918, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.856287217140198, "loss_zero": 12.452933025360107, "frac_recovered": 0.9593625724315643, "frac_alive": 0.1681315153837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d64df22735768ccec803031d86e3f6ad82aedf3a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea22fba9d9510af4fae3fda1a44dc3364c3e93cda1b28b35bf87bae0d52b487 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a403712fdcbee9ff1c46025ee905c56231298e7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..af78c3d3c0fa7f624fc13720343985022ea45320 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 127.23475341796875, "l1_loss": 601.5059997558594, "l0": 19.99583339691162, "frac_variance_explained": 0.7421157479286193, "cossim": 0.8986818313598632, "l2_ratio": 0.8988159477710724, "relative_reconstruction_bias": 0.9995011687278748, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8491142988204956, "loss_zero": 12.452933025360107, "frac_recovered": 0.960075843334198, "frac_alive": 0.173611119389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..04456225bdf0b82fde9b36d7923864b0cb1bc56c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8eec6bbf9bc74bfce5b3e3bd5f5c04cd64b80da6c393db190572c6d2d54f26 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..becc0e12de4c42bad3cefafe5015399395f0c9fe --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..891e6e4277764f282fb58c0caa6b29c464b77236 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 131.4759094238281, "l1_loss": 612.5915161132813, "l0": 20.0, "frac_variance_explained": 0.7235639810562133, "cossim": 0.8935853183269501, "l2_ratio": 0.8934765100479126, "relative_reconstruction_bias": 1.000877857208252, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.9022894859313966, "loss_zero": 12.452933025360107, "frac_recovered": 0.9547624230384827, "frac_alive": 0.1648762971162796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..37c2e29230e7a0f1e46b3afc3227b8675408e954 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a90649282f210fe5f4563f7103596d04c3d59e171e144eb1a523b5a4a30d8e2 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..86db6670c31bf2276fe749fc6deff1214c2ea9ae --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0e99d442614bc47972d9e9cfb34bbd674526992a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 125.47390823364258, "l1_loss": 696.7124084472656, "l0": 20.0, "frac_variance_explained": 0.8148535013198852, "cossim": 0.9014196455478668, "l2_ratio": 0.9026202023029327, "relative_reconstruction_bias": 1.0012827217578888, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.871774458885193, "loss_zero": 12.452933025360107, "frac_recovered": 0.9578073680400848, "frac_alive": 0.1685112863779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..19a6ff7ed8bc077b8948abb0a449eb3e73622f4a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..623b01e58ef493f2c6783567f1cd742772741a71 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 291.17974548339845, "l1_loss": 452.9191162109375, "l0": 40.0, "frac_variance_explained": 0.10459646582603455, "cossim": 0.37412114143371583, "l2_ratio": 0.2523693323135376, "relative_reconstruction_bias": 0.6730610311031342, "loss_original": 2.4489264488220215, "loss_reconstructed": 12.759316158294677, "loss_zero": 12.452933025360107, "frac_recovered": -0.030534679733682422, "frac_alive": 0.2155490517616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5913d906518ddb236f271c445ac52d3416a1a219 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c908e8c863642ab7b33cea61df87569ffde0edd63f46eaeaf0f55d082d7abc8 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6fb42674f704fa52761049054f91d6f087baae4c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2c1fe99300980b161985d89f31855b62681f30ca --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 111.32142944335938, "l1_loss": 808.7847717285156, "l0": 40.0, "frac_variance_explained": 0.7986537694931031, "cossim": 0.9199103713035583, "l2_ratio": 0.9218922436237336, "relative_reconstruction_bias": 1.001532244682312, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6824830293655397, "loss_zero": 12.452933025360107, "frac_recovered": 0.9767161905765533, "frac_alive": 0.3111436665058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f903666b579272a89c4438c0f03270b8f6d5bc21 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45694844d8dd55972087d84f51b74789c758fe4ba8943a6cdf38a04fd8445c0a +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a81ec9ffd0b7e605fafa00e7d2f8228504a85e8c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..79432dc8c73e54eaec23741f62e8c904b3f7d556 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 109.28896865844726, "l1_loss": 861.9073913574218, "l0": 40.0, "frac_variance_explained": 0.8541500627994537, "cossim": 0.9224141240119934, "l2_ratio": 0.9242090940475464, "relative_reconstruction_bias": 1.001121735572815, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.672131633758545, "loss_zero": 12.452933025360107, "frac_recovered": 0.977747130393982, "frac_alive": 0.3065863847732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3cd8d6567f7f839bb2d67333acf25c466b145d2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261be0e0dbd65e6ec5047256cbd8370901f174d654fb369f137bad418d29cb99 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0a1e47484b7478447d3ddc2f59c97b972575cf90 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..640368fd5d43a64262e97b9b801ef79f42dd70c3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 120.10947036743164, "l1_loss": 823.4704406738281, "l0": 40.0, "frac_variance_explained": 0.7695347607135773, "cossim": 0.9084151089191437, "l2_ratio": 0.9094584107398986, "relative_reconstruction_bias": 1.0002698957920075, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.720379114151001, "loss_zero": 12.452933025360107, "frac_recovered": 0.9729329884052277, "frac_alive": 0.3050672709941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a4eadcf578ec48495aee909521b9f501b541574 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74c677c7d15a24ae09b47fc8092b633aeda3da4c7d525bf40685c6a1b994b5ab +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ea29810f7f6e5da6f80d7febe244ac074491d77b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c4b23db294b2138926e99a3d908d3c2d627f3a0d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 113.54811477661133, "l1_loss": 853.2123229980468, "l0": 40.0, "frac_variance_explained": 0.8128659963607788, "cossim": 0.9165951013565063, "l2_ratio": 0.9161898851394653, "relative_reconstruction_bias": 0.9994335174560547, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6936707973480223, "loss_zero": 12.452933025360107, "frac_recovered": 0.9755988657474518, "frac_alive": 0.3001302182674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e47eb7937f1a5b7e3b9b49285bb01feb802e8fe4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7adca99baaa4cd50b89f003de2f3dfa4b0814637 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 296.4805191040039, "l1_loss": 888.6040588378906, "l0": 80.0, "frac_variance_explained": 0.16352716088294983, "cossim": 0.47093939781188965, "l2_ratio": 0.3511354446411133, "relative_reconstruction_bias": 0.7378839135169983, "loss_original": 2.4489264488220215, "loss_reconstructed": 11.214914798736572, "loss_zero": 12.452933025360107, "frac_recovered": 0.12416270188987255, "frac_alive": 0.3341471254825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1806167816510763002499bad312d17534a4ab1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37f51725cf22b70ec0cb67e4e029071294f517d4bba1719e2386adc8bd9aedb +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b18a1db7d4f19b4946a3bd91097ef467448911c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..adf0788b7c94c50f52df64416f085412dfb4305c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 103.5383804321289, "l1_loss": 1107.8344848632812, "l0": 80.0, "frac_variance_explained": 0.8265561103820801, "cossim": 0.9326160907745361, "l2_ratio": 0.9331199288368225, "relative_reconstruction_bias": 1.000600242614746, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.602700686454773, "loss_zero": 12.452933025360107, "frac_recovered": 0.9846857130527497, "frac_alive": 0.4645182192325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..11788b20ed110c94457a9c11971c6e94be8c1b05 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14915c144073df6347356b6b9a508aaa37db967de492eceea805a144d6e35d91 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3035a34dcaef49fc324cec42ad646beb5f250d09 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..969c92e6842ba00b720dbbb372895107671e9b53 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 101.87953567504883, "l1_loss": 1135.567333984375, "l0": 80.0, "frac_variance_explained": 0.8610418558120727, "cossim": 0.9316335439682006, "l2_ratio": 0.9330399096012115, "relative_reconstruction_bias": 1.0011163651943207, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6009809970855713, "loss_zero": 12.452933025360107, "frac_recovered": 0.9848611414432525, "frac_alive": 0.4721137285232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..75c63af4a58f54deca0a38c96de207bedbb0688e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c9fe9a24bdded187bee50c1ebd02e970ce59782c810b77a208de5761c92a7f +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9038130ae8d45ff8ba03ec9f81bd05d33efab4d2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ab7b944f76577f091ef69ab047ca0034c8871414 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 106.97534942626953, "l1_loss": 1321.9621337890626, "l0": 80.0, "frac_variance_explained": 0.8818290293216705, "cossim": 0.9320721805095673, "l2_ratio": 0.9306482017040253, "relative_reconstruction_bias": 1.0000323891639709, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.634953498840332, "loss_zero": 12.452933025360107, "frac_recovered": 0.9814700245857239, "frac_alive": 0.4689127504825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..59ff8fac00abe79c418f951475068d7ca46d8364 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0fc1b35e958695e83486a54b257ccb367acdb13b03b403aa3ab73c186e855ec +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f64d85f4a4a9eba656e59408f66c463b0fa9346 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..358256dc315c97e70b0ec819be9b184e1201f1bb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 104.74476852416993, "l1_loss": 1089.078857421875, "l0": 80.0, "frac_variance_explained": 0.80969318151474, "cossim": 0.9345323204994201, "l2_ratio": 0.9369295358657836, "relative_reconstruction_bias": 1.0023408353328704, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6124815225601195, "loss_zero": 12.452933025360107, "frac_recovered": 0.9837063908576965, "frac_alive": 0.4520399272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..49337ad19cbeecf4e0d60f57609138c7f7d44873 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4297844cfd01e1e19f1aa3d55a35639c32c2e8ff --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 249.4814208984375, "l1_loss": 1557.7815795898437, "l0": 160.0, "frac_variance_explained": 0.2693453192710876, "cossim": 0.5758010566234588, "l2_ratio": 0.5003698885440826, "relative_reconstruction_bias": 0.8689843118190765, "loss_original": 2.4489264488220215, "loss_reconstructed": 6.904741621017456, "loss_zero": 12.452933025360107, "frac_recovered": 0.5548302710056305, "frac_alive": 0.4581163227558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..851f5fc977a4a671634e5c94324d809dd2861b21 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb09b5fecd1c0ac6566496a884c3cdc21c5406a5e0300af86200ff60aca0677 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4b329592d1fce8a94c947d140f5dc6f0db38197 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..36e9f87e5b43611ee6f705238750fe32a14917f3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 91.7035026550293, "l1_loss": 1663.0046997070312, "l0": 160.0, "frac_variance_explained": 0.9000596463680267, "cossim": 0.9463433861732483, "l2_ratio": 0.9473640978336334, "relative_reconstruction_bias": 1.0002830386161805, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.560414028167725, "loss_zero": 12.452933025360107, "frac_recovered": 0.9889058411121369, "frac_alive": 0.6365559697151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc98e549b49dbeac4cb6c1a17ddcac5e96960f76 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:909a6e308bdc604063b6311f8a0feb20c00c615b3b6c82cfb025cb1cb5ef48db +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2dd230ed1820af759087a4287a2c9c6d8c3a76cd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..69c7a5fc93e20d1151073684d642c6f192326451 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 90.6346321105957, "l1_loss": 1634.7385131835938, "l0": 160.0, "frac_variance_explained": 0.8902020215988159, "cossim": 0.9480599403381348, "l2_ratio": 0.948556911945343, "relative_reconstruction_bias": 1.0000574469566346, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5537137746810914, "loss_zero": 12.452933025360107, "frac_recovered": 0.9895727157592773, "frac_alive": 0.6273871660232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f50dba9ee64adc2ba879105afbe7868c7e02d362 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9491ca3cea4cab2777c240b0e26abcc7a560e5bdba9c3f18894688e3c3d54e2 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cf314aa77c0fc30f8b86b2543fd2ec989a69bb80 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0963d49f93e2b326df1fc7535f55fd65d32c6b9d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 96.5266212463379, "l1_loss": 1513.4279052734375, "l0": 160.0, "frac_variance_explained": 0.8468407332897187, "cossim": 0.9394637823104859, "l2_ratio": 0.9399228096008301, "relative_reconstruction_bias": 1.0007961332798003, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.583479952812195, "loss_zero": 12.452933025360107, "frac_recovered": 0.9866007685661315, "frac_alive": 0.7013888955116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb0e94cc9a2a882980f71f80b07ace374fb68f6c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04301578f0cdf21ea8022284fd4c6ec9812c68073104bc0b00655d8cdd70dacf +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..68d5c69e45edfbf11daccf9ca0cacd3497e98ab8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3c35b66695d803ab296f3c508154d7b6cd53c422 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 93.82733917236328, "l1_loss": 1625.241357421875, "l0": 160.0, "frac_variance_explained": 0.8716640174388885, "cossim": 0.9456298291683197, "l2_ratio": 0.944654256105423, "relative_reconstruction_bias": 0.9988911032676697, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5651297330856324, "loss_zero": 12.452933025360107, "frac_recovered": 0.9884312570095062, "frac_alive": 0.6468641757965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e45f6a4f4c39a23377131bac277674e4576d2eb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..349e692a8aeab7f20d5d607dd6ea38bbf1595765 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 224.30277557373046, "l1_loss": 2840.585400390625, "l0": 320.0, "frac_variance_explained": 0.35540305376052855, "cossim": 0.6738833487033844, "l2_ratio": 0.7177993714809418, "relative_reconstruction_bias": 1.0651414752006532, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.629237842559815, "loss_zero": 12.452933025360107, "frac_recovered": 0.782224440574646, "frac_alive": 0.611436665058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..24f35496b74d040174c552d136eb2bef1971fcc7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58aea2fc5cf71e4a3aeff6d5edf4f71cf09d779ce7f0cc960ced4b6e2c7c199c +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..238bb82b236b01d84bf1a5b39ce38ca7e7f546ce --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..80b9e786cb26f52c3f9cdf7a95a895b2b6fe0aa5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 80.71283187866212, "l1_loss": 2646.23154296875, "l0": 320.0, "frac_variance_explained": 0.9224247574806214, "cossim": 0.9600512623786926, "l2_ratio": 0.9615144371986389, "relative_reconstruction_bias": 1.0007874190807342, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5228251695632933, "loss_zero": 12.452933025360107, "frac_recovered": 0.9926568508148194, "frac_alive": 0.7572699785232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6e7d5dcb46105b20fb8e4aef2323bcee3e6eebc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:579c9bf52b1d67ca5521f7c7c225a2db3802ef18ceb5f0cbc93a75b9fd712245 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bca7a6516587bb926415ae653764ccdf130feb3b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fd76171dfea82ca5728a545d67f57b92f506377a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 78.85371398925781, "l1_loss": 2574.3860595703127, "l0": 320.0, "frac_variance_explained": 0.9240821838378906, "cossim": 0.9596679151058197, "l2_ratio": 0.9616780996322631, "relative_reconstruction_bias": 0.9999951899051667, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.520471453666687, "loss_zero": 12.452933025360107, "frac_recovered": 0.9928891241550446, "frac_alive": 0.770128071308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..33f1e24767c02c61599eb4a3ae8c834437c2a6ea --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3980a2c1eb1e3bc006f16ab5d8f6b7df7f7562a5051fc97a3a93933864a647af +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f54888f75155a944836265b59a5237bcaf9ed543 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2b33a4be7e3fbf25167fdc4c63fd53423fa77ba7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 94.2914825439453, "l1_loss": 2336.6464233398438, "l0": 320.0, "frac_variance_explained": 0.902019590139389, "cossim": 0.9459708452224731, "l2_ratio": 0.9458859682083129, "relative_reconstruction_bias": 0.9999112665653229, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5462760448455812, "loss_zero": 12.452933025360107, "frac_recovered": 0.9903128445148468, "frac_alive": 0.905381977558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d56a055292b8277ced3d57c24e4d2238974c6e0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc73f4e82739e93e144edec0ab37436ff64a14385082fd9e4d21a9838a21fcd +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fa4cc2696f8b497a43f6531d76238436e4668100 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..73e08657f7285b6790ca072433596b9a5b86df6a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 84.83213348388672, "l1_loss": 2498.9994140625, "l0": 320.0, "frac_variance_explained": 0.8912880837917327, "cossim": 0.9524414777755738, "l2_ratio": 0.9530382812023163, "relative_reconstruction_bias": 1.0004355311393738, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5275760650634767, "loss_zero": 12.452933025360107, "frac_recovered": 0.9921787083148956, "frac_alive": 0.809190571308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ab43fa8a3b1dcc8a9302e233d6c95ea41da7b3d7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4aa416c1e78f1bf3d5db083c39385e1e16e0eb83 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 221.45138397216797, "l1_loss": 5213.847412109375, "l0": 640.0, "frac_variance_explained": 0.34842745065689085, "cossim": 0.7575249195098877, "l2_ratio": 1.03416006565094, "relative_reconstruction_bias": 1.354014503955841, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.551916003227234, "loss_zero": 12.452933025360107, "frac_recovered": 0.8898488104343414, "frac_alive": 0.7508680820465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..db80503a7ecbf50700d062c50189ce8630cc6f5c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c76808d4813fe50c4c3757b00f3de6fdf033c91b889f3c7d4c053b7937c8a9 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2851528a9c70533ad4fa0649f6b439e227b8fd8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b4a51967df6ecd877a21d011f5e628ac8ca10b75 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.01775970458985, "l1_loss": 4621.40478515625, "l0": 640.0, "frac_variance_explained": 0.9090996205806732, "cossim": 0.9679088115692138, "l2_ratio": 0.9691721737384796, "relative_reconstruction_bias": 1.0014350891113282, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4963836431503297, "loss_zero": 12.452933025360107, "frac_recovered": 0.9952901721000671, "frac_alive": 0.777452290058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0973313a6704f6c3ad90ce0b329d14dfb64083e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a618d5f441bc9aa8560a18fa05cf0435b0a0721c3fb91e623f5457fc42d344b +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..72cec99e3e65cb90b7f3daa9aa1601f20e822a8b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..57a47042d8d9db18a6ea81cb636c167a7f40b0e7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 67.18106384277344, "l1_loss": 4894.515576171875, "l0": 640.0, "frac_variance_explained": 0.9415188729763031, "cossim": 0.9716631412506104, "l2_ratio": 0.9726373791694641, "relative_reconstruction_bias": 1.000336092710495, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.493886113166809, "loss_zero": 12.452933025360107, "frac_recovered": 0.9955352962017059, "frac_alive": 0.7851020097732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..329e66c070eb566fe7a6e2e123c391ad1ccecc6d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16bcb873d4271bcf0068c0f472fe1c97bd2e9c8d15d40e5d3eefdf25561f080a +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d6c8b898f40d1b1a696ff15736654e1a9f67948f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2783a263707c4fba65782370af9bff3f99ce68dc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 75.81502151489258, "l1_loss": 3782.3845703125, "l0": 640.0, "frac_variance_explained": 0.8975415527820587, "cossim": 0.9643395662307739, "l2_ratio": 0.9644364833831787, "relative_reconstruction_bias": 1.0002473175525666, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5044758319854736, "loss_zero": 12.452933025360107, "frac_recovered": 0.9944850146770478, "frac_alive": 0.8695204257965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..90f1280d65d635d83a5ea71d7f44715319be2fdb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e793e130ca4157841379b31ad3d721e25bb4eb3d5b464b8ad4fffbaffa5d7d1b +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3bc8bcc51f325d78805b6a0b5dadf5795a98f312 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1bfae202920b7aa8c66910e0104ee967fb71def8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 70.46529960632324, "l1_loss": 4459.463427734375, "l0": 640.0, "frac_variance_explained": 0.9259386122226715, "cossim": 0.9691527128219605, "l2_ratio": 0.9700005769729614, "relative_reconstruction_bias": 1.000119686126709, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4980183124542235, "loss_zero": 12.452933025360107, "frac_recovered": 0.9951267421245575, "frac_alive": 0.8415256142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f661af9acc45954b1f8c7b1eec27a56da4c7424 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf4520a14af0ab80a79fce3752a970c99e7568dff1cad3d899ef8a8170b42d0 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a6bdd17a52442401b071273f5cf859757586d939 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7035638f51dc0e2a599bcf16835c62b7ea6a3932 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.266572570800783, "l1_loss": 177.4275115966797, "l0": 20.0, "frac_variance_explained": 0.8498155057430268, "cossim": 0.9323096394538879, "l2_ratio": 0.9319331288337708, "relative_reconstruction_bias": 0.9994702100753784, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6027263402938843, "loss_zero": 12.452933025360107, "frac_recovered": 0.984678465127945, "frac_alive": 0.1553819477558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_1/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d8fb7dfa95f4a7034956de0928298b7a861e46c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:932a87b8d5eb705748e791b837ca27c02726e7fcb72b1532a1c3870b901cc411 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_1/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d469874e47ba5c351b7d556f3d0437746bd300f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_1/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ce83f5633c8cd58cfee4924089e5592916a421e3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.170896148681642, "l1_loss": 237.9600036621094, "l0": 40.0, "frac_variance_explained": 0.830749922990799, "cossim": 0.9341044247150421, "l2_ratio": 0.9342280626296997, "relative_reconstruction_bias": 0.9999212563037873, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.568475842475891, "loss_zero": 12.452933025360107, "frac_recovered": 0.9880999863147736, "frac_alive": 0.2572157084941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_2/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b575e66749376d5d460f670f006224e1a48342c7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce8eee946a147735e4fe555a1c3b42e8aa4c2547a831a399333be91be32355e +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_2/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1ca2e0e1ab44d79228daa7260d60839ded5723a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_2/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d0ffecdcd806689e4c107c3bce57d7c80c3d46e2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 25.94360065460205, "l1_loss": 327.79884033203126, "l0": 80.0, "frac_variance_explained": 0.8436449348926545, "cossim": 0.9419341802597045, "l2_ratio": 0.9421893358230591, "relative_reconstruction_bias": 1.000232994556427, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.544915461540222, "loss_zero": 12.452933025360107, "frac_recovered": 0.9904540121555329, "frac_alive": 0.4140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_3/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..11adda5e8142cf3f8733528334ba1d4bde9dba76 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ab44fc07e1ae8d2908054084a6ae792e4b9dcf38e4021c222db404bfec4241 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_3/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..56d0b26ed0f69dd8359724abd8fcec537ec742e8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_3/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1c9d7ee59937187c2bba495eb7b442a6b1f69cb3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.478511238098143, "l1_loss": 456.1298492431641, "l0": 160.0, "frac_variance_explained": 0.8591484308242798, "cossim": 0.9525353133678436, "l2_ratio": 0.9534881770610809, "relative_reconstruction_bias": 1.0014338195323944, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.521990919113159, "loss_zero": 12.452933025360107, "frac_recovered": 0.992733633518219, "frac_alive": 0.609971821308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_4/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e282192cdc3fc9ecbec129e2b9845631bd7028fa --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4f1869972b9d2a70943dd187cb0db6732fe404ade789bba5134069fc93b195 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_4/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b0da10c7c18453193b35c1f4d66618a09dc228b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_4/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cd0b65d0938929606930697894ad3ef923f425e0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.321846961975098, "l1_loss": 799.7821960449219, "l0": 320.0, "frac_variance_explained": 0.901763665676117, "cossim": 0.9601259887218475, "l2_ratio": 0.9598486363887787, "relative_reconstruction_bias": 0.9990402400493622, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.496252703666687, "loss_zero": 12.452933025360107, "frac_recovered": 0.9952966928482055, "frac_alive": 0.8234049677848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_5/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..82e3cf9037055e96c4ecd98452b8e05b67830f34 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5271be90c9f1b278651b6c5c4b55561a5108397351d47653fada93b04d6bb147 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_5/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a2f9fb3e5d9e8c2ccf02ebdb1ccede0d7f83d6f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_5/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3b57cf48bd4ccb8983d93f791f214f1ccd35ae70 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 19.113201332092284, "l1_loss": 1533.4474975585938, "l0": 640.0, "frac_variance_explained": 0.9071434438228607, "cossim": 0.9702927589416503, "l2_ratio": 0.9707047760486602, "relative_reconstruction_bias": 1.0007278263568877, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4749300956726072, "loss_zero": 12.452933025360107, "frac_recovered": 0.9974163830280304, "frac_alive": 0.895616352558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c19c066a8abee0cfcae61867e589a3ab88ce8c9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a8064f065b903f611ef28b027bf4c72c16cf1f2a136b3a0889ee684def01e7 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e7b4ac626e1ff9f7839c550c0fedf0fd2538c1fa --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dcd4560f5f84ba432abb75e13bd63bdd32c7a55c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 81.11176300048828, "l1_loss": 66.31471557617188, "l0": 20.0, "frac_variance_explained": 0.06356927156448364, "cossim": 0.2942200303077698, "l2_ratio": 0.18611131459474564, "relative_reconstruction_bias": 0.632641339302063, "loss_original": 2.4489264488220215, "loss_reconstructed": 12.665856552124023, "loss_zero": 12.452933025360107, "frac_recovered": -0.021439347753766925, "frac_alive": 0.1322699636220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b857d4f52dbd71dfe8d3e515706e4b2b8c87bfe8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a34ff83fb80136d99837580d3a976ed380ad59ab9d8275c85318d76b92bf7868 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..655d8fd5350c7612fc468dd85ce57119f201ed22 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..35d4021da46f45c95b1655f2f36cb4509f4d16ed --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 26.953345108032227, "l1_loss": 171.34053955078124, "l0": 20.0, "frac_variance_explained": 0.8236174821853638, "cossim": 0.934092503786087, "l2_ratio": 0.9346385538578034, "relative_reconstruction_bias": 1.0010874211788177, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.613414168357849, "loss_zero": 12.452933025360107, "frac_recovered": 0.9836098670959472, "frac_alive": 0.1463758647441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2895410020bb89ea6a847bd063e74b975437dae5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e157d0842633c04ed7f2e22eed8f2d6ab56cfcab165b7081ab563ff60ba361 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..da666a1c251d4e84ddf083deab18365a55d6afcd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f9085b96b35ed74b03be31a16a0f73aaccee4b57 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.598044776916502, "l1_loss": 169.38932189941406, "l0": 20.0, "frac_variance_explained": 0.8030181944370269, "cossim": 0.9267681956291198, "l2_ratio": 0.9272215604782105, "relative_reconstruction_bias": 1.0012859225273132, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6079941034317016, "loss_zero": 12.452933025360107, "frac_recovered": 0.984152227640152, "frac_alive": 0.1520182341337204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..28f4d086f501fb2c3394d2825f9e63bb0fab5bd8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5e65bdc679958956e31d17b8d2c8848df3aeef3480920a222151ec66c9796b +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e79b30d0815aba8308f9602f98f4d87aae6a1405 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ce64741cb8f85c343b733c77f6bed7f337ac71f9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.8795015335083, "l1_loss": 167.21665802001954, "l0": 20.0, "frac_variance_explained": 0.7408388495445252, "cossim": 0.9123399138450623, "l2_ratio": 0.9136622846126556, "relative_reconstruction_bias": 1.001996898651123, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6352880716323854, "loss_zero": 12.452933025360107, "frac_recovered": 0.9814215242862702, "frac_alive": 0.1571723073720932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f2fbdf98d8c0d54a38f07bb895b344383edc835 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6685d5a384a7047031a9e644c9428cbb5a2b30a09c971dbf221fd2590a54484 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2eaa3c0d1de4e54a30527af510d7454c49729ed0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..90f86f3c8dd7f17e3aed93d9428f12e25b4b1b49 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 30.28138885498047, "l1_loss": 179.02954864501953, "l0": 20.0, "frac_variance_explained": 0.794158810377121, "cossim": 0.9210076034069061, "l2_ratio": 0.9214390397071839, "relative_reconstruction_bias": 1.0008983969688416, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6215444803237915, "loss_zero": 12.452933025360107, "frac_recovered": 0.9828002631664277, "frac_alive": 0.1518012136220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..03f0483b05e857526c0999d55e89d93f0910e7f0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85844e6bcdab2cb4fff3df71b8502687eded89f829a7c38db2d85505dc468ad +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4fc0e6d6109389ee7879091d1360cab9c3393a87 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ba096668803c2fa8f5c65e05f77ed3eab6c2865f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 29.35189151763916, "l1_loss": 242.6250015258789, "l0": 40.0, "frac_variance_explained": 0.8169487476348877, "cossim": 0.9274865686893463, "l2_ratio": 0.9275311231613159, "relative_reconstruction_bias": 1.000645935535431, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.575002074241638, "loss_zero": 12.452933025360107, "frac_recovered": 0.9874506533145905, "frac_alive": 0.2630208432674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2652992c99adb1e3ba454be7c2244ec3fbdf83e8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291d118332f640176a7ccfecf5cc29fe200460959f20eca8fc4bec240212db7a +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..477bb8c2bf855610aaca42ce777ff639b6b3b05d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..00e438b029f2a0cd0bfab7f4edb9e42a2e190bc0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 26.325938987731934, "l1_loss": 228.18090057373047, "l0": 40.0, "frac_variance_explained": 0.8482047379016876, "cossim": 0.9400634229183197, "l2_ratio": 0.9401220500469207, "relative_reconstruction_bias": 1.0000902473926545, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.569880652427673, "loss_zero": 12.452933025360107, "frac_recovered": 0.9879599630832672, "frac_alive": 0.25244140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ac916d0dc54be3b56182a39a9f2ab15ccdc9253 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd95fab3eecf83966f23c2f24b7ee3d8b1167d1e27c4512808040e2a82948139 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e35951ec998f7775dc41decdb891333d1d7d5ea --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..37c19d41d5ede62175b266e0289b89fb597c1ae4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 30.247645950317384, "l1_loss": 254.92621002197265, "l0": 40.0, "frac_variance_explained": 0.8046451151371002, "cossim": 0.9242092549800873, "l2_ratio": 0.9232668936252594, "relative_reconstruction_bias": 1.0001021921634674, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5943933486938477, "loss_zero": 12.452933025360107, "frac_recovered": 0.9855135440826416, "frac_alive": 0.2854275107383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe8c52823ccbd26ffa9b3be8857242817988f27f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83e702ca1681a1565313d109acbf0f80c1830e6fb91648e01fb11cf51f730cf9 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a6acf3573caf6c0e7def53bc1823f03137bfeb7f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..626e25e329a2a84aaf8b6bebcfd823e9ce1e8739 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 30.165078926086426, "l1_loss": 254.00455780029296, "l0": 40.0, "frac_variance_explained": 0.8110504627227784, "cossim": 0.9237140834331512, "l2_ratio": 0.9251842975616456, "relative_reconstruction_bias": 1.001541006565094, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5785513877868653, "loss_zero": 12.452933025360107, "frac_recovered": 0.9870951175689697, "frac_alive": 0.2688802182674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..28bdab96a2bd5faf114d1e1ed45104a87975f869 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ca9d0f5459a5bdf11fd22889009ce9498583154d1c7ba87a41e47e85770851 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a24b053c09e4326850807f144eba6635f67d30ac --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dd94151f66fbeb06d35cfc964f7b7442e4f249e1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.827689170837402, "l1_loss": 345.77353820800784, "l0": 80.0, "frac_variance_explained": 0.834139883518219, "cossim": 0.9360387206077576, "l2_ratio": 0.937719202041626, "relative_reconstruction_bias": 1.001767522096634, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5493752717971803, "loss_zero": 12.452933025360107, "frac_recovered": 0.9900059461593628, "frac_alive": 0.4269748330116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a24594dcb40eb2439864b5d3a6d3ebb0fcaa5f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9691c701b1dde16e528cd31c5f152da8e5ccb08a252c0c646ecf8e5e534e685 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..163f03d2192d573af2aeb21d0e50a091ad72171c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..75e7f6ea47a185c35ee91ef3d614c3cab73b560e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 26.360229301452637, "l1_loss": 316.6804168701172, "l0": 80.0, "frac_variance_explained": 0.8179186403751373, "cossim": 0.9384184181690216, "l2_ratio": 0.9385398983955383, "relative_reconstruction_bias": 1.0003150522708892, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.545375347137451, "loss_zero": 12.452933025360107, "frac_recovered": 0.9904079556465148, "frac_alive": 0.408257395029068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..93a425572c850f60ad78c763295f91d004b09129 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddade1d6f844451865a697eac750e9611fe088e5947f27a9e3c07f01491bc31e +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c90c4c73ee69c54d812aa1cb06fbc79f62c81 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ce58d3853f8ffceb02189544c505eda239c5d063 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.21126365661621, "l1_loss": 341.08843688964845, "l0": 80.0, "frac_variance_explained": 0.8251140594482422, "cossim": 0.9305853009223938, "l2_ratio": 0.9317696034908295, "relative_reconstruction_bias": 1.0008663594722749, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.570930075645447, "loss_zero": 12.452933025360107, "frac_recovered": 0.9878564000129699, "frac_alive": 0.4659830629825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ec77296316416649979081c69292fb974e16cf9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4004f9ff6e35424ee13fa76dad113fc4badc891d41ae2a0a517adfbc37ca8278 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d026fcfc6ef0b5fcc8cd9032c6489ef016fa6c82 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8bf2a1842d27b559d0dc7628817914ae22b30acc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 26.508971977233887, "l1_loss": 328.47056884765624, "l0": 80.0, "frac_variance_explained": 0.8329806983470917, "cossim": 0.9393463492393493, "l2_ratio": 0.9382982671260833, "relative_reconstruction_bias": 0.9995725989341736, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5586145877838136, "loss_zero": 12.452933025360107, "frac_recovered": 0.9890822887420654, "frac_alive": 0.4383138120174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfa76ca46d8f5d54496e3c195644adb7ffa05666 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704c957535aa6adace22f2d9d86fe3e7ed37ddee577b00854372bc684c1febe4 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..049c3bff99549142013ae0dbf0ab017db1fb2e49 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..56bf74947b4ddc11c4ff9fd17e81909aaca61504 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 24.973313903808595, "l1_loss": 465.1881103515625, "l0": 160.0, "frac_variance_explained": 0.849942284822464, "cossim": 0.9479575037956238, "l2_ratio": 0.9482971668243408, "relative_reconstruction_bias": 1.0010927855968474, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.525223898887634, "loss_zero": 12.452933025360107, "frac_recovered": 0.9924123585224152, "frac_alive": 0.6433376669883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2622afd0154e8c5f3185ceaab5b0076e3bc17ab0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a626e091f9441ccc07afb008f152fb2ba3a9bf3e59164424417cec735e0afbe +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..606c340815c66b8622845180b77d76af1b2b0342 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3cca04a00da9080a44e8870178addde6dd19d001 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.815764045715333, "l1_loss": 457.1536590576172, "l0": 160.0, "frac_variance_explained": 0.866757619380951, "cossim": 0.950747811794281, "l2_ratio": 0.9520208537578583, "relative_reconstruction_bias": 1.0018658936023712, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.520562839508057, "loss_zero": 12.452933025360107, "frac_recovered": 0.9928742706775665, "frac_alive": 0.5978190302848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5c369706e2da6df523baa6a36681869382a1346 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4b8f6e704d5db0d9d152b6f97a10179c83de7b463eb7c36d759a841a908d60 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..20287d723cf61918283e886b290136d03cbcc8b3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1c845c2e9d06a6098299ef4e0462eb34b81221e6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.5632266998291, "l1_loss": 470.2460906982422, "l0": 160.0, "frac_variance_explained": 0.8464842736721039, "cossim": 0.9401612520217896, "l2_ratio": 0.940293037891388, "relative_reconstruction_bias": 1.0004801511764527, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5428218841552734, "loss_zero": 12.452933025360107, "frac_recovered": 0.9906556606292725, "frac_alive": 0.7394748330116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a98e96e798c27e435578a411ecf7b7b0c8e0677 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:779ad03c4d85c3deaef37f32965d3cc57929ace9ba9c19b109a28cbb4ca27d84 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dfbb3eb61e46e1bde614cf580f7cbdbfd5100946 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..485ad51e292191cb65256cf1633d51e5bb03e409 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 26.196580696105958, "l1_loss": 475.3064727783203, "l0": 160.0, "frac_variance_explained": 0.8526719510555267, "cossim": 0.9425513565540313, "l2_ratio": 0.9438767254352569, "relative_reconstruction_bias": 1.001193392276764, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5301116943359374, "loss_zero": 12.452933025360107, "frac_recovered": 0.9919238090515137, "frac_alive": 0.683214008808136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..daa681ef50c7d41bcea27bee30f8c5808e3bfef4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f5dd440b812d28af43bea1e7ebabc40b2b27b1fa094124937f874173c5e7e8 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..05a489c9c5dcc2dbc2894ee6a139dfae0a43402a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..70acc9e47df2023fca7bb057d5beb6e94556b495 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.630137252807618, "l1_loss": 761.5822509765625, "l0": 320.0, "frac_variance_explained": 0.9073990225791931, "cossim": 0.9594317018985749, "l2_ratio": 0.9603235721588135, "relative_reconstruction_bias": 1.001180464029312, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5024558305740356, "loss_zero": 12.452933025360107, "frac_recovered": 0.9946776688098907, "frac_alive": 0.8553059697151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce053340ae9a7a7045f1cd55e6183f7ecb3274b0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eddb5e937d51b6316829a6646290c2c07086d35a50d222ae3ef65b679b6baad +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9871fb1e086b3f5e6995720d083b1fc576502ebb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f98b1f4e86a41019151cfcfb158f7efc081c84cc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.81598815917969, "l1_loss": 760.7758239746094, "l0": 320.0, "frac_variance_explained": 0.8861272156238555, "cossim": 0.9564962923526764, "l2_ratio": 0.958636736869812, "relative_reconstruction_bias": 1.0027027010917664, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.499033737182617, "loss_zero": 12.452933025360107, "frac_recovered": 0.9950167834758759, "frac_alive": 0.8181965947151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..58b200ac1cd39db5b1f6104711b4e60d25e0c9c7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b1753424e223d74149e613a46dc46ff0156d32b70e466950d24e5a7ce9b267 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d0650bd71b2b875ad177e7421aabc85beb8eb95 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cedbe8407e99f38da7d001e8c0b041e96190cf99 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.296066665649413, "l1_loss": 635.4285339355469, "l0": 320.0, "frac_variance_explained": 0.837058961391449, "cossim": 0.9406275987625122, "l2_ratio": 0.9429730951786042, "relative_reconstruction_bias": 1.0029973804950714, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5147974491119385, "loss_zero": 12.452933025360107, "frac_recovered": 0.9934382557868957, "frac_alive": 0.9421115517616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..464dcd6867ac7e3ae3a73bb4c0d8665e66aa7d54 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf213a61fc8d82d969d0a2047de5e5f5225c7a831c7794949fdb35ad62c314da +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5038c3a90e8c3a78bcdecd577b1b82a6fb18228d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9b038c8208d6a0eb0201662e2992ca9510dcd1da --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.620394706726074, "l1_loss": 685.0079895019531, "l0": 320.0, "frac_variance_explained": 0.883182042837143, "cossim": 0.9556304156780243, "l2_ratio": 0.955713278055191, "relative_reconstruction_bias": 1.0008978366851806, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.506931781768799, "loss_zero": 12.452933025360107, "frac_recovered": 0.9942240417003632, "frac_alive": 0.8894856572151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7051abca186902a952c21739beb89784b05d9eae --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8a8420ce23d07b753e145753ef1f32a79c632ec3c5e4e020faa4c29e9ba3dc +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..413fe479d4379a7143cfa8584c4d83aac92d7c82 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..43f51daf8c2c4eb4b2b45f1e764700b913aff4bf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 20.8123197555542, "l1_loss": 1378.54248046875, "l0": 640.0, "frac_variance_explained": 0.8955779373645782, "cossim": 0.9655032277107238, "l2_ratio": 0.9666714549064637, "relative_reconstruction_bias": 1.0018744826316834, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4798152446746826, "loss_zero": 12.452933025360107, "frac_recovered": 0.9969263732433319, "frac_alive": 0.9377170205116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dda7000c85c08017ea120451322d9063932c18a3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b198d9580bb45474ad04cb16fbcea9d1da65c5898acdbd3aceaefb284ace27 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b84b139677f5cd2d14857f646bf8e9a860dbeee4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2c278408ee4d888abeb4614a9cee5457068f80db --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 19.91728630065918, "l1_loss": 1506.0241333007812, "l0": 639.4, "frac_variance_explained": 0.924304074048996, "cossim": 0.9687296390533447, "l2_ratio": 0.9696931481361389, "relative_reconstruction_bias": 1.0016383588314057, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4805987596511843, "loss_zero": 12.452933025360107, "frac_recovered": 0.9968513727188111, "frac_alive": 0.914171040058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f4637c930a266ff2d870bd7c4be8cd6eaf17e3c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d88dd49c3649badc7e8e3c31842c8a2f9c8213df6b3251434a5acf2555f0b4 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eb53c6b2b1e3e6acdf647a2f1e395938a1079a59 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..faf34b41e70c43e757469ba4882d69e6a30fdf48 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.1976598739624, "l1_loss": 1065.5535888671875, "l0": 640.0, "frac_variance_explained": 0.8896034240722657, "cossim": 0.960835748910904, "l2_ratio": 0.9629217565059662, "relative_reconstruction_bias": 1.002778035402298, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.483345627784729, "loss_zero": 12.452933025360107, "frac_recovered": 0.9965746462345123, "frac_alive": 0.9876301884651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..14402b6cb64c9648da70f788b1d1f898c86ed4eb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db13008a0bee3cd624712b58d35df653bf5a266b20451bfd353b7814b505c3ca +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..578b29824432a515fe55affc2a9d2083b811d390 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e44ec2c742a69b3d561a83edd0b238e98400a7fe --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 20.505210876464844, "l1_loss": 1237.8057373046875, "l0": 640.0, "frac_variance_explained": 0.9102000713348388, "cossim": 0.9658762633800506, "l2_ratio": 0.9668752193450928, "relative_reconstruction_bias": 1.0017230033874511, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4834362506866454, "loss_zero": 12.452933025360107, "frac_recovered": 0.9965657532215119, "frac_alive": 0.9698893427848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_0/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea2e9b7c6e63301c28bb6f8aa40cc08bd8d9a04f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1787db73a7fc52f63f2ed70210fe4882fbb774417722f0c8c4b37acf261b6919 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..581d92370a3215c2ea34640431e1164fb25a34c1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..24959af30af13c75cba4c0a0d74fffa6bab1fc6d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.385193252563475, "l1_loss": 269.69310150146487, "l0": 20.0, "frac_variance_explained": 0.8598198533058167, "cossim": 0.9176318526268006, "l2_ratio": 0.9170765697956085, "relative_reconstruction_bias": 0.999529504776001, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.638276529312134, "loss_zero": 12.452933025360107, "frac_recovered": 0.9811272025108337, "frac_alive": 0.1595594584941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_1/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d5da98a3818783e742563899c9747c143cb7d8d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1eb1b14ac094c1dcd0a6193f9e4cb23633644486e5fb9f859dfcf49f1954382 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_1/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2c26034300e760b863c603375dbd1194edfbf8c6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_1/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6b13bf10028e03d0dab2f8333a3b247db91efbf8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.6403621673584, "l1_loss": 266.82349090576173, "l0": 40.0, "frac_variance_explained": 0.8049931645393371, "cossim": 0.93520388007164, "l2_ratio": 0.9352702856063843, "relative_reconstruction_bias": 1.0001867175102235, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.551988196372986, "loss_zero": 12.452933025360107, "frac_recovered": 0.9897454440593719, "frac_alive": 0.284722238779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_2/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f7cac6e8316201d6d58df21ca0fe001c9fbd18e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0976c5b6ed4121f599e687f2512034d13479b2322d31ba48064754314d40cb11 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_2/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..37c4235c7b8741e33bc5f5d977852f99d5d6ff0f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_2/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7506913cd7ea8bd49585ca43cdf4e51bd7b7dbf9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 32.920125007629395, "l1_loss": 377.2314727783203, "l0": 80.0, "frac_variance_explained": 0.8427301347255707, "cossim": 0.9465734004974365, "l2_ratio": 0.9462980151176452, "relative_reconstruction_bias": 0.999693489074707, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5190592050552367, "loss_zero": 12.452933025360107, "frac_recovered": 0.9930278241634369, "frac_alive": 0.4312608540058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_3/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..944fcea43d0901bd71bf2fc3db5aabbb70c55eb8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51908dbfa7760fce0eee9c1abd4ca2c668c1edd7e020fbbfc7a79e07816c0b6b +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_3/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ecc5290c7e5e9956ee4da47ff05ddf5822f7427f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_3/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bb7088505715be9e5f3839a48f390924ef53c661 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.11774444580078, "l1_loss": 587.7058166503906, "l0": 160.0, "frac_variance_explained": 0.8864160597324371, "cossim": 0.9515470147132874, "l2_ratio": 0.9520631015300751, "relative_reconstruction_bias": 1.0004171848297119, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4948626518249513, "loss_zero": 12.452933025360107, "frac_recovered": 0.9954373061656951, "frac_alive": 0.6174045205116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_4/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3dc65a13e6f81b6437bf45b5943071546343173 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d10574cb4e1f4ed7f56d7ed4e8a936159ceca155844d79ebd207f43ee2d26bb +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_4/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3737fe0fb2bd8228009b661a742b5a3d96075d73 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_4/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..355b1bfd1c564a8bd28e411cc000b66e6cfab668 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 25.603483200073242, "l1_loss": 887.8896789550781, "l0": 320.0, "frac_variance_explained": 0.9075254380702973, "cossim": 0.9665767192840576, "l2_ratio": 0.966665506362915, "relative_reconstruction_bias": 1.000228750705719, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.477691173553467, "loss_zero": 12.452933025360107, "frac_recovered": 0.9971450984477996, "frac_alive": 0.6903212070465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_5/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..faf858d885c4077ed4bd35ef36546507998ebb02 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5ebef1027b4924dbe331b5ac8c4cecbf4abf1472688a71d805dbfaa7ab86a3 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_5/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1ab67a8b4fc797e8eda2c1b258c91f387f78db80 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": 48828, + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_5/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3e98b3017af1fc79e7992218d65611e808bdf73d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.750783348083495, "l1_loss": 2076.0646728515626, "l0": 640.0, "frac_variance_explained": 0.9398650825023651, "cossim": 0.9774176478385925, "l2_ratio": 0.9777889370918273, "relative_reconstruction_bias": 1.0005208492279052, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.461538004875183, "loss_zero": 12.452933025360107, "frac_recovered": 0.9987489283084869, "frac_alive": 0.7127278447151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..28011c3fb7d879b92db701af5cfcc1c8d8205f03 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25bb48bcfeb5b066a671d65344582a2bc68f38f7f4a4309fb42dc297fe3e50c3 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a46539d75d8fa358b79a532c107b705122d2d27 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..99950f45460b047126a5a6a71ff0cd0b015c244b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.97785682678223, "l1_loss": 224.113671875, "l0": 20.0, "frac_variance_explained": 0.796733170747757, "cossim": 0.9149265289306641, "l2_ratio": 0.9156940042972564, "relative_reconstruction_bias": 0.9999639749526977, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6481229782104494, "loss_zero": 12.452933025360107, "frac_recovered": 0.9801438331604004, "frac_alive": 0.1609700471162796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a741553b297ece1ac5db856fb23b36bc5dd62e43 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b1dbf910cef095fd8a1b350939e136663e147bdfa67aef8e9e6e6cc9789f65e +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c15be8dd8bdd3cb276fb526b5dfae21aa8924669 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e8e25f7ae2128bc18ec646b51f2227801f0ddfb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.959268951416014, "l1_loss": 214.32296295166014, "l0": 20.0, "frac_variance_explained": 0.7623796999454499, "cossim": 0.908824360370636, "l2_ratio": 0.9102616131305694, "relative_reconstruction_bias": 1.001514995098114, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6436038970947267, "loss_zero": 12.452933025360107, "frac_recovered": 0.980591356754303, "frac_alive": 0.1611870676279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce342e950028a07f94bd1e5679054330c5a9269c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8295093416b9408805aa04b42836ca975874d49a81436e8a9be245f6438e334 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..12dfa36d75e25bf017adc94825111a3867b04009 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..893767de981a4d86d8fd3fd51002dca8437a4c86 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 43.85188674926758, "l1_loss": 220.95366516113282, "l0": 20.0, "frac_variance_explained": 0.7532911002635956, "cossim": 0.9032427906990051, "l2_ratio": 0.9006875157356262, "relative_reconstruction_bias": 0.9967345774173737, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6938873529434204, "loss_zero": 12.452933025360107, "frac_recovered": 0.9755684912204743, "frac_alive": 0.1538628488779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7340a7dd784efe6be3c5f0192842c54e3e5c311b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a575e74d5482c5e76103ecefe204bf6bb0af85917cea78792a0c7e9f0514294 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cf8c36acbf8d2dff1e9acec518fa300dcb78ef6a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..509229230282626da8dcbaf2a003d06e91c47fb5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.27531471252441, "l1_loss": 227.16014251708984, "l0": 20.0, "frac_variance_explained": 0.7727083802223206, "cossim": 0.9027702331542968, "l2_ratio": 0.9043409764766693, "relative_reconstruction_bias": 1.0013017654418945, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6606419324874877, "loss_zero": 12.452933025360107, "frac_recovered": 0.9788933575153351, "frac_alive": 0.1574435830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcc49e9c93efdfcc14c4df1f9d5b96d02c482fb1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d331c677cfbcaa03c1ecbae8b247db19c00f56cf0923d347e1685239da31d0 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b74072331f2d9f312d862c90cc5e95a41b0f5c71 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b22764017c520a38716064263c188e90f535ebac --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.736992263793944, "l1_loss": 305.2062561035156, "l0": 40.0, "frac_variance_explained": 0.8604458928108215, "cossim": 0.9312281787395478, "l2_ratio": 0.9330651462078094, "relative_reconstruction_bias": 1.001002162694931, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.556801986694336, "loss_zero": 12.452933025360107, "frac_recovered": 0.9892597913742065, "frac_alive": 0.28857421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f72b7e767d66fa3a4cc08b324f40c7b8220c4d28 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c3a0aa67416ca20bda1a534667540d335b8a7ae243e37a7e214e1ca137fd15 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d6d3e71f718b028621bc364786e84e1f3f1644e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b300a4f82101283d134a294d9fb84a60940f496a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.72008857727051, "l1_loss": 292.7100036621094, "l0": 40.0, "frac_variance_explained": 0.8577439606189727, "cossim": 0.9344999670982361, "l2_ratio": 0.9352131724357605, "relative_reconstruction_bias": 1.0005686342716218, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.555193829536438, "loss_zero": 12.452933025360107, "frac_recovered": 0.9894222438335418, "frac_alive": 0.2881944477558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..11e689d558edcb258c633235055e8349c0f3e112 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d13d251d438486395fafe23e20da5ab6689c4de326d7c74012b1f5048fe5c5 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6b8252e906c73149d35de504f1dd0915b73f8e1a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..79ffaf71187f61feb1df0bf74ae01e8c549b3da2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.11642150878906, "l1_loss": 308.71179046630857, "l0": 40.0, "frac_variance_explained": 0.820146119594574, "cossim": 0.9230989933013916, "l2_ratio": 0.9211092412471771, "relative_reconstruction_bias": 0.9978638172149659, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.58606071472168, "loss_zero": 12.452933025360107, "frac_recovered": 0.9863392353057862, "frac_alive": 0.2887912392616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0da9ea879ac37d86aa3bd42c7a7411d6a2606d1f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff5ed45ee0ccd4838a997cf7b801f001be6b3de068d823309f986da77005c311 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6167b8b1220acaedbabefc1167c3b2cbe871233c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f4664a72c2740431f814020dc401a1db81797f63 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.83385543823242, "l1_loss": 287.71506958007814, "l0": 40.0, "frac_variance_explained": 0.810006731748581, "cossim": 0.9278073966503143, "l2_ratio": 0.9288983643054962, "relative_reconstruction_bias": 1.0017506897449493, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5620718479156492, "loss_zero": 12.452933025360107, "frac_recovered": 0.9887315511703492, "frac_alive": 0.285210520029068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e83670417394d2f1b893d95353fec859dc4cc0ed --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021eacd7fcfac44b92ff7ff2872662a3146bf1a63cf0bb318f066d1f21ca1c73 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..17dc4ac5cc6132f53ff3c7a3c6b81a8c8067b388 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6b1d35b94db6ca07b965083653ab8b9fa92d16d1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.54673595428467, "l1_loss": 380.01134033203124, "l0": 80.0, "frac_variance_explained": 0.8351876437664032, "cossim": 0.9389269590377808, "l2_ratio": 0.9404280722141266, "relative_reconstruction_bias": 1.0014681100845337, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.526151704788208, "loss_zero": 12.452933025360107, "frac_recovered": 0.9923255324363709, "frac_alive": 0.434136301279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..378a0561dbee6064c6d6610650198b3aae2cc359 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a23b6fa53bb0fb679aa96820397499e7a59b4cc9b6dbf67ee48ba25579b565 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..99aa125ad9e207c99ebabb7970233a236b8d5eb3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bdc2bc9956a80212f6521797ef60c4b6b267f3df --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 33.95244846343994, "l1_loss": 395.76544494628905, "l0": 80.0, "frac_variance_explained": 0.8481647431850433, "cossim": 0.9428203880786896, "l2_ratio": 0.9439214825630188, "relative_reconstruction_bias": 1.000917023420334, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.522923541069031, "loss_zero": 12.452933025360107, "frac_recovered": 0.9926421821117402, "frac_alive": 0.44482421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..be963eb536772f3d184315de2c9220cf10e812eb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e4d5dd21fc57d7c1b1e305d408a262b80c1c7058e197287f5a3ecd12fc51a8 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e5d16fcfa7132f9db658b9f3e858e19fadd8897d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6e26a88730b98c70e9e32163f0d7ef3f055b8985 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.37924575805664, "l1_loss": 385.1722045898438, "l0": 80.0, "frac_variance_explained": 0.8252128601074219, "cossim": 0.9325456142425537, "l2_ratio": 0.934538209438324, "relative_reconstruction_bias": 1.001760905981064, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5465892553329468, "loss_zero": 12.452933025360107, "frac_recovered": 0.9902855455875397, "frac_alive": 0.4774305522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..526fe06cda1d227c7f514ce567dec3259d46a579 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75386acd5eaeb9eaa8e365f8172f2a15e90c514ad98900057fe0621c785fed0f +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e9afcd584aa060126b44de0b39e9b5b1367e1e1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f450c911cd08194fd817fbd7512d1d2d5bf31117 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.86564693450928, "l1_loss": 385.8026153564453, "l0": 80.0, "frac_variance_explained": 0.836561405658722, "cossim": 0.9394849181175232, "l2_ratio": 0.939175671339035, "relative_reconstruction_bias": 0.9997531950473786, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5293116569519043, "loss_zero": 12.452933025360107, "frac_recovered": 0.9920073211193084, "frac_alive": 0.4495442807674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..04daf72a3902dd7b1b224c66088e72ff3adb3650 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b50dfaeb9ce66bd49735ce01f5ce87e471955cd5cfb89a05c2e6013f47daa45b +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6bb17e0e46e1d3d6d4d43e4d6adc628f5fc043c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..33fe552ca93cae294474cfac9fdb36e6e1190cd4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.254188537597656, "l1_loss": 522.552392578125, "l0": 160.0, "frac_variance_explained": 0.8500134646892548, "cossim": 0.9505284130573273, "l2_ratio": 0.9510213494300842, "relative_reconstruction_bias": 1.0003615200519562, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.498537039756775, "loss_zero": 12.452933025360107, "frac_recovered": 0.995071941614151, "frac_alive": 0.5815972089767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..db272000c5fd807368bb0fe6b3acb11371779007 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae5e4b62b65098babab72ea2f2ed9091b08e63e69d305bcdd3ee93e8bc5dff8 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbd2a0393321b7a4d930e8f65da54f8552873ee5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d496252e61b15e8ef49b7def041767f706d8d85e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.243211936950683, "l1_loss": 553.372265625, "l0": 160.0, "frac_variance_explained": 0.8649967312812805, "cossim": 0.950848001241684, "l2_ratio": 0.9528935134410859, "relative_reconstruction_bias": 1.0015978693962098, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.497640013694763, "loss_zero": 12.452933025360107, "frac_recovered": 0.9951586544513702, "frac_alive": 0.6100803017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad1a9d5da11814682535bf8bcf8c2445ad309d49 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4490899685453029db14881560d0670b587772ae234b74bb43142cc36c7aac23 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..08c98a5f9541c975fb079ca53d07e5cd5f40b066 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..41634ff94dceba39a96a313e146f8bb8f6e5d13b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.319954299926756, "l1_loss": 523.6402954101562, "l0": 160.0, "frac_variance_explained": 0.8372977256774903, "cossim": 0.9389589726924896, "l2_ratio": 0.939053213596344, "relative_reconstruction_bias": 0.9996998012065887, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.519640302658081, "loss_zero": 12.452933025360107, "frac_recovered": 0.9929706990718842, "frac_alive": 0.704698383808136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ab5bb95be036dd4662bc18704abf3043a4919a0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5a1e45da2eba52f0e080d099610cd73cec8d3fdd56a2dde5253c92130b2b32 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..89730e5ac10a496de4840c865f9bff0aa4767514 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2d6b6128cc9cda0c96d054a71174a5d4de162454 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.36524696350098, "l1_loss": 555.1797607421875, "l0": 160.0, "frac_variance_explained": 0.8513323128223419, "cossim": 0.945607328414917, "l2_ratio": 0.9461180448532105, "relative_reconstruction_bias": 1.0003768801689148, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5059152603149415, "loss_zero": 12.452933025360107, "frac_recovered": 0.9943358719348907, "frac_alive": 0.6633571982383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b5b725f0e65d6c218a556d46617e90b870bcca4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:130e19e1cb77d94269fed1c07f934043b5020e93357a3c47cd53a94b6311eb8e +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..49ee9bcd605ce777e6e423957d98f22b80200d39 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eb4ed9ff62cd2da0e66bb055e297bb4fe1375a09 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.15417594909668, "l1_loss": 891.0104736328125, "l0": 320.0, "frac_variance_explained": 0.9106251835823059, "cossim": 0.9636231184005737, "l2_ratio": 0.9649718701839447, "relative_reconstruction_bias": 1.0003974497318269, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4835067272186278, "loss_zero": 12.452933025360107, "frac_recovered": 0.9965704083442688, "frac_alive": 0.7428928017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f50bd4587169a073066a23cfa1b55ae61d0b33b0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297022bacf9f15574bdf51fd0d36cc73ba9a0a101a85c4642a10045ad520f585 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aae2c845863467858e26f06ad16169c0fa33cbf3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..be174fc2f739cd7b2fa63b90d980ee21b90aea10 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 26.882869529724122, "l1_loss": 908.14228515625, "l0": 320.0, "frac_variance_explained": 0.9116247355937958, "cossim": 0.9638674855232239, "l2_ratio": 0.9651795566082001, "relative_reconstruction_bias": 1.0015326201915742, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4805299997329713, "loss_zero": 12.452933025360107, "frac_recovered": 0.9968637824058533, "frac_alive": 0.7108832597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..23050bff0fbe25f6e34f003a898882c9422e54e2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f3ba8b07a81bdf654498e30232216fce157ca2e87f7e2e015e68746203d21e +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..749867021ed493c8597171f69e3253b1930d7797 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dc5613685175946837d5b030796d3a1a3dad1165 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.69829444885254, "l1_loss": 762.1271850585938, "l0": 320.0, "frac_variance_explained": 0.8956417977809906, "cossim": 0.9532331645488739, "l2_ratio": 0.9554112255573273, "relative_reconstruction_bias": 1.003282082080841, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.494775891304016, "loss_zero": 12.452933025360107, "frac_recovered": 0.9954480111598969, "frac_alive": 0.9065212607383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b28cf18f3b70a48341d3f865d39010252fe33af --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:961d97a3ae160a2ce0e988d07819487db1abfba2a2a35d554eee228c0570534e +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..258dad0207b66848f6f441de9c92d715fd90d91d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cf16565bc8defc41519091f119aa4b72ff602e5c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 29.911967849731447, "l1_loss": 843.8339965820312, "l0": 320.0, "frac_variance_explained": 0.8969981431961059, "cossim": 0.9574037551879883, "l2_ratio": 0.9580581486225128, "relative_reconstruction_bias": 0.9999774336814881, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4840301752090452, "loss_zero": 12.452933025360107, "frac_recovered": 0.996515566110611, "frac_alive": 0.8080512285232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e380b7e9d6d4e5a0d46dfbde5f1029da94f818c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d77944b88b348b3550edf5e7035549bbb7fcb523deaeeab9b84f0520691aab +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5b0c9addf97ca39ccb34ccfabd235a1c3a936e9b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "19528", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5f9c786f3bc04e1ae125fdcd5a61e16c098e895a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_19528/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.559234428405762, "l1_loss": 1923.7697143554688, "l0": 640.0, "frac_variance_explained": 0.9371443152427673, "cossim": 0.9742399871349334, "l2_ratio": 0.9747077763080597, "relative_reconstruction_bias": 0.9993009388446807, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.465738582611084, "loss_zero": 12.452933025360107, "frac_recovered": 0.9983337461948395, "frac_alive": 0.7103407382965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dabdf6567c54faf12fed7be805ac2a9cad0d8fa --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a3dd15ceba46757c236b0b2c88da65300d0a7d876ba12d8b7a974601e148f7b +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ee0c8409f06918cc03bee77ce90e1494ac64a249 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "29292", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4bec5f2efff6e4ba84ca3c5b33d62c605c1bf012 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_29292/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.497556304931642, "l1_loss": 1915.4122436523437, "l0": 640.0, "frac_variance_explained": 0.9303101122379303, "cossim": 0.9766062378883362, "l2_ratio": 0.9777574419975281, "relative_reconstruction_bias": 1.001092267036438, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4637466192245485, "loss_zero": 12.452933025360107, "frac_recovered": 0.9985301256179809, "frac_alive": 0.6819118857383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f827afe9961a1e13050ff65ef4bdbb8bbece6bc9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4d75cab21ffaacefcc39edd4dc894994fc7224249080569cfbd5bf1f29bde6 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7dbbc38438f17caf2b2b64a4ee181e159c9d35bd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fcc250067cbeb27d34147076ad6eea8e81ee4f5f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 26.21021385192871, "l1_loss": 1639.9731567382812, "l0": 640.0, "frac_variance_explained": 0.9196356236934662, "cossim": 0.966629022359848, "l2_ratio": 0.9678321003913879, "relative_reconstruction_bias": 1.0004817128181458, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4677053689956665, "loss_zero": 12.452933025360107, "frac_recovered": 0.9981360375881195, "frac_alive": 0.8402235507965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/ae.pt b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec06b719df0bc27fa6acf731842329a8e0fdcde5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc90062eb6e4231b952b328c449ee0d8a8b3e8f145b6f433ee8ac901c8d970f3 +size 339823336 diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1b58af69c4861d2c86857bec9f9de7ee9cfaedb0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "9764", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b539be0b8f2c6545be4342c7e6d29fab75ec8934 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_9764/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.994207000732423, "l1_loss": 1734.673193359375, "l0": 640.0, "frac_variance_explained": 0.9146796822547912, "cossim": 0.972093015909195, "l2_ratio": 0.973492443561554, "relative_reconstruction_bias": 1.0015430510044099, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.466175389289856, "loss_zero": 12.452933025360107, "frac_recovered": 0.9982897996902466, "frac_alive": 0.7409396767616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file