diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc54f7cbfe621144607e42f5640dd7d87a9eabc8 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208e1a9dfa6a72da57408cdf09d5e956bdc73d22cb40fcef6ac276e0f7c0f40f +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd3947ec76566c1e00d7740bb0cfca44f437b2a4 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_1/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7ae60724bf04925c0e86389c3003f79c1165a42 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b62256e9d5194b29791199a91bda4b7a57f1fe2053d9f33a00e2bac6a4103fca +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_1/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4db3de3d66157550be659cdae4b56c13fa454b30 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_2/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..82d191abd822b609488b2ee96378824a27eb1e12 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1806bd2410e9b3624cd75b80660b0c9b5e1b4de5e573fb7e01f0fadb47827eb +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_2/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9e28376c91071ebb4864320b9d9e9cdde474da1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_3/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bed16c2d3d974de0a6594a1af81c4faa1de6dba4 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70d494ac3a55427b1c8a71862177a6207389dedeb5216272cb225773fa18108a +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_3/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d520179baf759f27207e258db93ce0d5447c3ab4 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_4/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..713300c4f3a975b42c766a11cec5878e5daca9d6 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:097f6be632e37ed5ad2db3bb71275074af73381414aed4c958d8c981ba57ff0b +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_4/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ede075e1ef7a2401a10e7c5d3ed9d39050e19ff5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_5/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d6490b295c4b6fed676caa2de57a9a8d597a2ae --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08b5edb8d4f08d5624f3f2ed57099068d5793e56a6a0b995b89ad008a0158d2 +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_5/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ca356bfdee85c76ac26fc79da7e66d7d0cf467e1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4728728d1950b3c1bc95d51181d9828e021cfe5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502a714e42542d494218c7cf9ba4eb50e44f5d991e21f7380afa481874e98186 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..be1f4f95ee612e7981fc125ebc2927ebd3d3f481 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..99442f5ab6afeef44ecf9005e5b6a16806df8e31 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a26798a8bcb7a1651ce9ecea96e8a3c028d085c6e076deb10716d8ac54a7e34 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..819a05ba0ffffbf7fdd951651ead5a39f926140e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6657a9b60a9b648c5090d191d022a4141fbd546 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4c1e18c0ebec34395a355c549668618fae357fbf8a711941e2c6a19218a167b +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..049c02f59cc6094479b8a516f922a6b76246fb54 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e5d90c1a199ca69b17c689624a237317943da44 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ad2b8ba91efe15d2b0a3b50df6e7d72f20693ff85187e6affecf24c99b3cdaf +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ab941c88977e80885d3602048aa4169cc2e8a45a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e643351d9ee79d8c5422cbe240672f73d88c1a36 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fdc5bfe07945e0e7c747785f6c0f7697e31de06247271407f63f4b43989ce7f +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bff5b9ac4bda318d5053f5356a839c84221556d --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3871bd22cb806ba6e96041983490c1fe3fdeafb --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6b27d354e0c1291a14f2237c2179480763c807b10d357479287996d6bd4a32d +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0133094751bea889ba5007f8cc0f4c0d9337460 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce571e3a772e0dfbe6f08583172a964ad702906e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42673cf80ea69ba2a20937fade9cb5229e8e9c2473622a93179b73bd04f66192 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1b98f5437ea91cf6d4cae2783728692691de50a8 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4728728d1950b3c1bc95d51181d9828e021cfe5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502a714e42542d494218c7cf9ba4eb50e44f5d991e21f7380afa481874e98186 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..757a08210fbbf30b0f87582005ad3b690d69c00b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3805b40d240c8a6d7d41dce97ecf38607af6750 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f4970ab95412f34dc6d2a51e1a44a2ba646f4fc3db7b66e78d0f8457e5f3de3 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..831a71358fa4499970990a3c802a7cc66cb5e7e6 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..23db3298a7d5465c5bebace44f630081f16c34ad --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68bfc21858c287b3469700526b53b45af93be77ac98f10ae6823ccd93c6b2656 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbea75e371a72a995e9002e1c7e609f6e997f5f9 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb81e963448d29e3713072599a8038b81ee86137 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa60e665467bcc939083c498b0185bb764f393b12f3e809b83eed112aebaca1 +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d67aa50bb0d3cc62b20fdf9b5684aa71721d5c03 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7405a17f5b05347437da356872543e427d29473 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e977b4a56cc05dff066aeb2814bf968ef19b6ee2597a916859336d280e94fa4c +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e29707cfe2e188dbc192fce2ffe2db4281c7bde1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a44bfbaccddd09d850f1a94b4328a90bf9f3e9d6 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f23b1538dc9bca7beffba37ea52766db4fa837f3efaf6020db6a12cc7c8d1a2 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c875c94124e95c8c5afd054b1382e90ab8d64fce --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b2061de327b725f4e39f6c4c9837062f896788e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9488ef8aa60b2f1e26f0ae852cc39c41ea54755c1b7213af3e3d8b917f894615 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3abbf4dfb3dd914e390dd10a57757112ffe47c95 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4728728d1950b3c1bc95d51181d9828e021cfe5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502a714e42542d494218c7cf9ba4eb50e44f5d991e21f7380afa481874e98186 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b7a36064ecbbba369aac15f88a81709d21ad4c3 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f1b0a4309c379fd73efa8c9823c08dd5d242e58 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ef1da96268137de3a086f3cc7ad0eb2648ef398566ce418b9d905e07fece03 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f78e2aee1ac1a648886b0859b2b48fa48f4d632a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..271386ebbbe92fca8911cf92b7774b2f2965101a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33aecee459f09e9fb9e136a885097e0c54ab6c0fb0f09dbc5b4c3f04d9fc3e5b +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9009b84da1f73309670ef51667190e1b87cc651a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..366bb4e25f3f3a8ce7a9ef9e7a7958767d8c385d --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb384a6445ac3ab10ab4c7118e2849d4db2053a51661d5c13333515ba02740c3 +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b227a42b989529f132ca91578290b54a1e99e9e1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c34c09360346a8b03d6646791840900b180018c7 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9116ed6e64a6bba8b45edda4ca2f67c0e783aaf950a6c00f2bc149e513b1132 +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f70126ed7ba43feeafbc614ee6ceeb156b270e7b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b8b7384d8a40e99136777957ed14dd93481b349 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e725fbf1c0d077a7dce7c71455bf3a9ec7c05ab03962357deb58883c7929d9cc +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b5e834fd376d547e78b5fa3155253c47c37c7915 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7ab1b776ec3870d4769e859ca822316ff7fef38 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d1d5e79063676d467ea317cd6d4ef9eb9b08b56a4a87488ef13f715d6f91e4 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..784075da0978b35ab347c34b5bb139896386f1bd --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4728728d1950b3c1bc95d51181d9828e021cfe5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502a714e42542d494218c7cf9ba4eb50e44f5d991e21f7380afa481874e98186 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1828625412e2f2395cdf480c76144a7f0aaf3e9 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4b8e1efbd2a79b84d2981e3c38893a9972bc676 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1eed7130ae11e47ad71273ab249091a9fb3889b614e7532bd2a79802d982dfc +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a47cae96b414e64e455dbcdc9a0333628db12a2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d230a593d11f2dd7bab51691a0b0891c25b035a4 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b316067c487ef07f8ce3b71e0c722958f6b6f4d7db6a56336f6911ec72d1e824 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a36dd31e1a1d2ce2ab23859046e1fbd07a6ca165 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d666631615b2147a91be2730d8871331c8e71d0 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0293d8f082bea62b7f984968a075547f9d77dc6c202bb469a344658f36872c19 +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..256b69929b5e84e58cc3d25bdfefeb7eb81995bb --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e6440411bd3a8b5429a122c90f4c52577738b74 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f550f874c3895b5d101898cc395e893cffc21ef701a0c4b7de7e5ddaec32380e +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbe3eeebfac54acadb9a42e6357bb797bbafdf70 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..09c8acdd7e2c9a4762343a2287d46eb78f91f542 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75cf14d2835051118a7eec3d7f0352a4a6aa2045a4858f0bca6c5fdbfc103d83 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec4ef518187aef23c7672cebd3e8bcf2f79c665f --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..72e553854d78f10fda3ccf2f2d5c2e8e5da456af --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01dfe7d7cec35aefa78054d6f04f87b19ae1957205efbbded738203da151ef22 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a27e53148d66e0785e6b59ee57e0203765f69808 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4728728d1950b3c1bc95d51181d9828e021cfe5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502a714e42542d494218c7cf9ba4eb50e44f5d991e21f7380afa481874e98186 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ffed1135165020f7cf4bc42e0f1395296eab8600 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d8a30120a46566844f8499d58aa42ace6d069f5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed4f91855eb7a84d639a757f8f21534133f2dd6d6e71c5f699d0f36f2ae9e7a6 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2601c28488ec26976d726bd8d587c2d672843c0a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..83ee239c8403d801c03317f9f198a94895b23f53 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245add2c72e1f14ddbd55525e569a663080afeef6f9e2081dd97d82f089f2765 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2220e8fee12be7059cfb001c3c1c77dcf2726205 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..64d2e49e8ddac6188a0cae761f72fd16c4efda49 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:421e3da10059d16e4570a7be7aa88c92e23ee9e86fead46f511d5d1a673082a9 +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..245e2a4069e0df3cccf7d223cf765c871eec3dc6 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b435e4593db109b02e56ee626aac61d253181126 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddf22f8647dac347b3e886dcd4ea141ec0d9a829c3b2452fea7235d7201d70bd +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9bb4e85901de87b0d4f71ed8d3a9ea12cc8caf3e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f368cc33b5ed62d1d72943607d06484fabf354a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5358ce567048a8928a23b45852c57dd1bdce0148b16772d836608450e5606e1d +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6af16d2d3ac1b52e5d4c6c700580758ec0bb4452 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..46e830d9bfc0c404154b0020c00ac4b44026a214 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455d665dbfd5bdbe2891ccb3186ffc81454b46a4fa3ef0eae9ca74e4d6b8db9d +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..88c8ce4c6e8a7d4b4503fa647be7c7ca2c659ca4 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4728728d1950b3c1bc95d51181d9828e021cfe5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502a714e42542d494218c7cf9ba4eb50e44f5d991e21f7380afa481874e98186 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e4d9769e871ca51038ff28ea7153212a91fc0e5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb3ba71bfea1b120df4e3861d3da34557065752d --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f96226ba7d4fe69e88371d63129ac23343c5dac3e291ed36e67da785043892 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a8fbb31ae901aa80cbecc34b3c01effaaf70ad2e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b3735f305741faf342870a5fbc8118c4b9838e4 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95dae363d5e926a9f04f1a30c1103f7d9c2e3e4c8d1c24803cc9039fa2fc8d66 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5464c8076969dc85f0d31f24a983f5c1bd66eb01 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ba31c45d07d0c901d833fe90cbddec89bc5b9ee --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f81ccd053670de587af0d39026aff9435de3f053f99b4bfd4fbd35b1e4a158 +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..524815873714ccc2619145f2502b76280d863c4b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..30956702dc00008d8ec6f6400fe22f2cf5c84add --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4bb49c6eeccda66be94d51e4f56e396edd1b120abdcb72ceff6a610ea38764 +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a50040c8fe5216009e2b25e9203a539b277adf6a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f1798a3704cba29720e9f2c61ea070e3307f83d --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f8dea90575fde9dafb27ef0c96b233c1ea5b22bf77974d859e5ee55248f101c +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f05fefef7eb998d7c0e8f22ce6fb09afabeff130 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac767659dcc53556e50807f3ebe4083b1a8a636 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d06b8c87727f6ba6a4dd94b011f61bd591cab3c5a73d9ad20e3596dcb29d97f +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f4478d02af2dcbb444d93dad5850de1a9a7b975 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b38d3e9e3c9045a9565a268477f58330506e7a4 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7451216c73959103c320d5699c5a8b0a80f8d0ed34a8ce51d53c483f07cffb2 +size 1189354280 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..32d464d50de1207203042e1877a702378d3e18e4 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": 195312, + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_1/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ca1d589c1a8f726239753ccbeed884fdcac9e50 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f169a74b5fb4d130a878ff34efa4595a52d62a0eeb30ce8b78ed62d2f6d9382d +size 1189354280 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_1/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d7c776cadcc84e872ea05a26e837b9911870243 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": 195312, + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_2/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4ae8adf2f161bac384b73c6c10a6c38f9dae648 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1edcdcf734f3468fa2f80a650b8772711d6d662d238e8ac08c6e92daf213beeb +size 1189354280 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_2/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1f142c98aba6a538720838e2d73a90da1fbc107 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": 195312, + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_3/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe13541e7fd637a54db75c824ca00f4deb2410c9 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a71dab5274ba3ddcaad5ba98f16260de46fc053267e433163bba173748effb1 +size 1189354280 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_3/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b36a56b7f32f60d40656b956a1c1bc01244957b4 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": 195312, + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_4/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..867b1f999e64d0a279c89af579a475b656c7a500 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2ef6eaf037b8a6ebb3b863f81d22c85c6909cd2521e925d82e9eaf46e22c17 +size 1189354280 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_4/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9b4b449f8d158f40e1a3324b86b35298faa0a77 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": 195312, + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_5/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f5c1281d9721cf1a30411b8205e4da65919f5d5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e813c2972932547eb715039f4fea3f80bc0cc3232d8284444a9e8c5d86864443 +size 1189354280 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_5/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..65cb4f169dc726d6dc216ee7f4734964a5a166b8 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": 195312, + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1b20f921383fb62d38c3ae1c0c692f4f27f7fb1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9882317ab593dbcdc4c1289214117307d075cff2fe9c60af6b59cb1009c8b014 +size 1189354296 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bcdcf300a8c7c46181df0f36777b9b1110357265 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2785c62173d33439c0871584da8f9596a5396357 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae513ee96d00f991d178c3175705385db954be2ace3c14c55d8fe5afe53e1240 +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a903ef63d6f557153f5b1dac04284f2291d6ff3 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "195", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_1953/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a6f73fa1bbc5a9646babb534e73083fd8766ad73 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_1953/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "1953", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_19531/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..23119c8ac6e4a0114fcf5fa90318289b0cde4169 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_19531/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "19531", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_617/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5e04f05fe587b134b79c0f01a1936636d2a1d983 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_617/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "617", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_6176/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c70307fe72fec2849900bb4fcde41f9fc26ea455 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_6176/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "6176", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_61763/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aa91edeaf8814f58d6fc1a116258a797b0619768 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_61763/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "61763", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1b20f921383fb62d38c3ae1c0c692f4f27f7fb1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9882317ab593dbcdc4c1289214117307d075cff2fe9c60af6b59cb1009c8b014 +size 1189354296 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5831a7512a977d3551501796c1c8662569aafcbb --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b210aae9aa35aee110377b8b9e434a787d32665 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0f5988fd15c6170506894e6309023c272e4c9aabe7cb807b434088799c6c67 +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..897529480126ac429cddd054811bdf02019b1cef --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "195", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..79d744d9442513bfb508b0f79e4967e65bc9a19a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11ecb94de0079b249ace915f6fb558ef187a11b5fd463fb3a65ebe0dff5850ca +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a9013d02cf990d82bed6bb9f8fecd5ccf2c21516 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "1953", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..efe81c55a62bc7a9bda85e0e7a034ed498a2786e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12aa6d1d0167c0cfa925610f1ea27c8fe56a5a382635f31c9d5e68211dec97a +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25dc02b3faa0c472774be9aee5978a2d196b3be2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "19531", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..816e6dd55c4e9e56e8f8f7f9b031298296aa07f2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea8ec2b4131dec7f52d2772af0502e3658008b5f0cf35af2529d80256a12a350 +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ccc3e855d459b1a9489477ebe9d7c0eed2c58b3 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "617", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7f856c362da38c3c771d2e4ffecd4c68ebcf627 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5169cb2decddc91af20ec1d2aa1681bb7c9d75f74ce4052c72d7d6c6b2305fad +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d53491b8edfa669edfa759bd2109571f2d2e67c9 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "6176", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa0b868486fdf16413d4ba0cc7b4aada31d43df2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d7aeef9a3f42b21329ff32e0f6b3b3092d71e36d7c455c3c402186b1ddf1d9 +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4958588cb58c481c6d0dddc5bb5bf2b7eddeaa46 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "61763", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1b20f921383fb62d38c3ae1c0c692f4f27f7fb1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9882317ab593dbcdc4c1289214117307d075cff2fe9c60af6b59cb1009c8b014 +size 1189354296 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..51c298759ac313dd133a8dc8a5977886826a6109 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c67ef34099dc88020c9c2edcfec06261eb2203d2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b5f543a8b790108264e9f11a9bc5f39e4246abca94d7ff43339d7245acc2f0e +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..faefab690106b5a4ff1f4e1373ff71e360690cdb --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "195", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..62ac020599e785f88a33ac053ae05a1b2825cdfe --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c98dc746b35b2de998cd9f3c2c1bfd483ef2bdc789f60bcb1b041acdde97917 +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5f9ad7a501398453415e0c0335f1c3b9b9bdde0c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "1953", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..937d83b6a43a55a59a7aaf1c6088058d787d7209 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a4ed0c6f1c643f7857f1ed5863f02881d061cb2664272488cbbe6b1a995bd78 +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd9295c5a964e68fa883043cabe98c7556e8f018 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "19531", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..080529795a52edaf8f17d5453fb0d651ad2b7f75 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f5a58bec7b367bed4f09c06fc8f6bcc96f9f78d8fea6812979aaab284ade153 +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e030c82081e27bb2b67663303cd2a7f3685972d3 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "617", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..572c5e994894440783e50ac7879fdb7a8b3e61ab --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04d8b0de11a5d09081caf84c4b823e891a6c2a6977acf5e1c9a25f845dc05b7a +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a1e13809963122f8ce29e725c1a12f3c45a8489 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "6176", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..be1d2747f1fbed3c6da19731e1244ea96c3777b0 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65b8a48410eec893b0937ebb297ab23dabea2eea039ab0b109adacd02c42a06 +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d8e022cfa7e263d4c9638c20ada654091e46fd10 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "61763", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1b20f921383fb62d38c3ae1c0c692f4f27f7fb1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9882317ab593dbcdc4c1289214117307d075cff2fe9c60af6b59cb1009c8b014 +size 1189354296 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1dda5d9193b80e28f8790f634dbc725b37d4995c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0282c9e730c35cac4c20ade5a072d4b32d4e3dc --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71fb799dd4d8d998590171679863d0d7177a214a78563cb687ef62821bf4a7a8 +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1fb8962a752150badd59c46dc544299fb9f80bf2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "195", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..55137de38390e2e47f7bbfaa17808648d7099dd3 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33b132d92e179c7887e837bb409d19ff97240b686b001e2091dac81e5601675b +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..04deba4556b98a992f559ffc335b7aa0ca4f1db5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "1953", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..929195310143619de214f7e2bece4c5a423e42ac --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cfbbcd14b061661c6351f3e166f8b13698160bde5b250b67b0b0b07795b0a09 +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d2d71cc819c6406ee7726fe152eca8a4f7121c95 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "19531", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b2f1006cd761a2477f6b7fda02e3281a015e279 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6224021693529c16c3807f387250ee50a1cee07255b7bfb129072e45ddaf21db +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf55e339b2cb3c6427911d3c61a8c3ea6297ff9 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "617", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a3cf254d3540370b8f2de10ba95c244d43faa8c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e026a8e2e8a265e55f787540211dc8ff9ca74b534e1e9349ae9f7cba55f05a4 +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9f971bd71b2721f5e1fc67c643bdfa215b2478ad --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "6176", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e9584f08dcceb54b61fdfdf22537b6a43373263 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d63b38228f299c9cf43ec402178f21dd6ec564cd91ff3bbd913f96e5b5486b +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c3ce1207b26713a03d7036d02669e01f4057238a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "61763", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1b20f921383fb62d38c3ae1c0c692f4f27f7fb1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9882317ab593dbcdc4c1289214117307d075cff2fe9c60af6b59cb1009c8b014 +size 1189354296 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8368acf4a36031819f3b3fe6731b2fc4c2639835 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee95e6fbc1854201ddb70ba4624b320d7b54ae7f --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6265f4118f240523af0d2fa9a9d7e595109d58a45c62671705466b4e16eef4a6 +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..58d5980b15870bcc58a267e23651b24dec15fd1b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "195", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc2408d340654255d69c275e9e4db9b65e3cf314 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:547f0cf26b2cbc0e0045eacde8e9bea6f10fdbb53b6602e17adf7dee6e143c6d +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a93e839623cf2b453b06a9d2de176ea701c19a2b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "1953", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d5a928e21c760ff8740eb91845e7d9a316bffa5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29e854c7888f4ff4d99c26160d280a06d9ef75d7f65e82410772159fb4312ced +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..806ab56acc235eb60c3cd1406a02fa6367f3b543 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "19531", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b864bc32514b71fab3518472b3812e122fc273b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0db784a9a2f3d807be507b5fe2fd4f476a36ef24e2b47729e07c1b7de89dbb7 +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf8a86aac9d852b6f96a18382df0689999da0cd2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "617", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef450d2abd333f72e65c9e5133c6e8557b0b870e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e248d69606a34cdd29551a14bf37b09d2aacdb4e328760ca18f575809a4bed3f +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d8868d686ad99aa0ce96323fa984ef3eff17cf1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "6176", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4a8383f9d7814511ae7f01c490318fbaf10a421 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e079da5092a95a27d21ea65a5effa1cd29875878ff8638d1d05657cc9a8c11 +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff384a0d5e076479185ab4b75dd1dba46c51b47 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "61763", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1b20f921383fb62d38c3ae1c0c692f4f27f7fb1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9882317ab593dbcdc4c1289214117307d075cff2fe9c60af6b59cb1009c8b014 +size 1189354296 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2677e8a0369966694e13ce7d7275757c99657230 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd55dacfe3ebe7356de054ed69b337e1a50e55d9 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a03f5b69235ebc0d53c815000a5057eaceffc7cbfed4b2a934b566225a214c35 +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7c39d7afee7b831dc4591e6604d2baf5bd6cbeac --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "195", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e68f4849344b21d3b84c803bd3128cc0cb3b72f5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e53188eb1f14494a6053f0d85dc1e77ec9640ebfe036723df214d65a752189 +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9af9513425336b178f639959ec79546df646b5ab --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "1953", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c00dba8085d5a560dd4da7a0f833b1b73282850 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e68e756e2890f6a56ca5c2d7f01b7a181e9b0eccbe0bac6ba7883ab73e5bad +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a6ef092f5da7117e51af89bcb851c56e81007e97 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "19531", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8457ca7d867241392c6147c3d58a3accddb9dd0a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bfb718b5f994943cbae3bbeddde1867d24514e32b6bcaeb6d4d9dfe9cb06fbd +size 1189354312 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2c4109a47de6efd7f788a9c4dce4a11ad452b62f --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "617", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fde6cd97bdb4786b7fc410a15576d7d201424dd6 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7a24e6855d3e2d1e5c130e6dab38ba0f0cac6a57dbe5090edd496d03c21d7ad +size 1189354384 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5094c6bace8d5ab73da06dd40903acd6469460f5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "6176", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9f31dd0c763c3391dbacd457e0d6fac54d2dbdd --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09c8c7ed714713fd1a5c8446aab9cfc2f912c26792dd9039f97b4525f908ce8 +size 1189354584 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c6389b8b9fc95ce6b611aaeea7be739f8916ea0d --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.00010079052613579392, + "steps": "61763", + "seed": 0, + "activation_dim": 2304, + "dict_size": 64512, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cab96d62604d49455ce8ed78d3c8ca3c2dfadf2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1f8dd975c589688ca2da5694cd7e2c71d5ba9ab45712387bb5262e20c5dcbb +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a76da274218c76042cfe522ee0338ec7b7c1daa5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_1/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5000ff838e9ecefd968e313042de1716c6d89e99 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d04205f6040833d5a3fa2564de85c17b1b82e34273933414daa2502606b79f1 +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_1/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..18121a284dd6c6fda4b58592bc5753a9cd5438db --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_2/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..558b9af8a6e8689b0e278e2a67d976e44cdb876c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:014b2b89e383c0ef94e4a603e5186b2a0e403d0eb0b38e7ab499163af2405887 +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_2/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a611ea8d427dcdeca9f2bfac5b0034412cdb4417 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_3/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..15518ac17fbf9e9016c2b9860ed8c4987b4ce890 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c871b230a51ea73e5f6c08ca1db0514ab9dbfafab74e27272bfb17a7ee2d8b4 +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_3/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2ebbf234295383bb3a94ccc0adf277e30dab53a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_4/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c410b2e7b88554ebf5bb4dc6004549226480a26 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9393230825a684253ef2795dcd10d67ee62749e2a6734e6ba0e87919e2603140 +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_4/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..402eebc50234ab7bf1618517832045484006f3fc --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_5/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e576f47ad2112d9c7487c544e88eec3f5f98a53 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36fffd303edbe8a52d6c0e8f27a425acaaa5a4c24b352160d0a63addd9a019e9 +size 1208232744 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_5/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3d27ac2d8a65eaf7fe2818d91fcc2314969986b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": 97656, + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f91c26925465dda4babcff5a3a0161738b8920c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f5f6c17d57c5b99249c4add22b64d208990b20e1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..973a74a62b1046890b43720424525b3df1164a8a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2936474d9c934443b3d5369b94bdc45053a7f4b2b5ba89cb53fc2651dccce915 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..760ed4b0fa1f98a7fe539f6e25948362891965a5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae90eb4c3a9d051a6ba74f85a7851afa01f0ac66 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e1e3d667af479cce5692ac6aab166258cacf1ed4d480fbd80eeadb32127315d +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d3ed18524e1161a09ec9b8451036ab8a1c135fa0 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b88aee2e97c6e24d8a660b3785762ca0f332e1ba --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae2e3b8be3f90444c4496646fb6aa604f0c2c7809b62a0d8608c47d1e697a2c +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a5bbf33b6c21b80f07861025ab91d44ca45ecfd7 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee24e29c6efc45f42561510365053ebf247f7e4c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98396469af5a09409acad191e2fb8637e647029ad7fe6764ea11b847b1471e6d +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..028c98cf8fee133c17aba70e53b113e923c6c1cc --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ebae94e9be697b6b1a80a0d86e16d39c98c197f --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72247aad0086f6772c263e029965661f81a79ea11caae94727f16d95c07d313c +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..09063949120a1203154d62c43886b8ce8ebfdd6d --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..40dd21ac7d496e3636df93a54b447b75bca1c077 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de72270cab9c46aa2cdb3c51a7e986bab6f43b2e5512a1d77f4c43613e20e4e +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..80b0d76eaf22b0a203d03b6092bb8719ac4a2e25 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 20, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f91c26925465dda4babcff5a3a0161738b8920c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dc776d2046806eb569f5e55f887eae44a103d188 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2c5da148cdcd94dff9f61e014f20bdacc9ff08e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610e1d15ff4fcb05fe0db164879b316f1205732e25850e145091c9cb48bfa09c +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..34bcb2f70999246e0fc515625435ee5c2d9187e0 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecef60e1823561fbce88eee519f44413987ec7d2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d9d3b8aef0f52860267de45ac12977fe86826f35cdb6b87a3829673a37c36cb +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e7869912100c2521663c3266f9d850afb695f818 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6736b1de0b6a87e46adb4e7b9d35cd8c63dfe39 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8190d85699dad6aaeef16cad0731e96b63676f479e8f4fe151062e2eece8e4aa +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8287051f7f2f5a9631c318026c1529e987b6a662 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a09daf61b17f175b471f27031d2c5a9d5344fc3 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5458365646730ae06d8e514b5c728bd501dab0ba227af64729218b2c76dd9d +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..87ba0b65e19147eecdda3155f5179689ba82b90b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9f645e06d583cbdf81d1eb7eae33793966e6031 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a2260a96e754b304bdbfda0175f9dde53527e2c3cc76023e1f21eadc6889fd +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..151644f8d99eb21bd9f63019c0d4f16b8228aeb6 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e29a7327d35c4923a9c2aeb038a4026c8174ce5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4fafb152c01023057a05eb8d0e2f2b50263ce78eb201cb1269ce0dbbc5c7585 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ab700373567238ce23cc2bdd3a6c72bb7cdbc176 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 40, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f91c26925465dda4babcff5a3a0161738b8920c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e6415b24fe46d6ea879acccabb8311e690bd47c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..421bd8078bf805e8236ee914b61cbdb1cad2df53 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:819dec7bf756472ec9ad3ce387cd15ff50e18491d98df4775c70db889f84745e +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b8a8a2aae9387b1dea2d90f5bda4891a9ede947b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..70eb52aae3a63ea451f01fb16cf4c18c5c8359db --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4956f6f43595e7dc95488bf8f4eac64e76b2b22f50d0a488bcc5088a229747c8 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cca52be82b03456fc1d4161b43a1d63c2440b348 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2dd4cabccb157563ca0fb6b2837313fd59215719 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b8ca264d34fc9c813271d4740f8354d920adc3a8bb2575e010e191f6d652af8 +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8593f979a7492addb7aead09b0f72d9b6088ffd7 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..412d83236bf06a8e8cffd1f5f29fe5508f7f31e6 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435d42cfbfe4fb8260ba563ecedb62184ff808afd8f7070a8c23b2b1f503838e +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2b8a18cd614d0a945068de20924fc185d109b31e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd827d235eeec470656a8c216f730b6dad6582f6 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8af3df52dba4e0b8ae0f464cf3c7c7c2fc55b12e7e1ebd5e325e49f5d487b69 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cdf6417992e880fb7b1bd24b58df16b2d2d40eae --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..42019e57b978ca42f2993bf4c3eac13fd7606455 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:025b7d7aa2e0085f3c34e1cff16b900a16cb18bf6e8d8e7984dfead442d22ca2 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2b94e6d5828bb5e8684c0d685d058b70b7bd6fb9 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 80, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f91c26925465dda4babcff5a3a0161738b8920c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9045ad04508b7a5b8d9e53399e41894fb19035a7 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..17f10a0cee69a3f0b6cb74495c95cd887b79e970 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1172e3e921375af16476b723059354adca19d37dadbd0e5c022ba9cc633bec2 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0fd46d917df3414969bde32dbcd712114a7d1ac5 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8247d769d39658c211484b3ca54d83ab4adb8ff3 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b637cb1114124801da785a1af70fd65bce57142a6d7bc824308e49caba8a64 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a51bb293523dc5254ae5c7942e06528d4e6ba28b --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..42d5aa81d29b0feef2574461a7739ace879cbb71 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:821b99c90698176c46a4ad714484fbe8f816ed7deb4cd1b0fde7b2791d5f0a6e +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..33a381be79328f832a5e282870ea661046fb9c18 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b35a637f57e27d505396ed100f717949a6e47bb --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbeb3944bfa5aed345d091ecb19eae1873bdc00720b533dc196b0e5cee2def9 +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ea6bfce4be4b380a1003faa9cba83b32be172d7f --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..692321cf7418bb7e70110c7937bf33dc8f7d54c1 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27172f07df06c92c65a02235ee3e4a47b5045307c77f5ca1213c2ca93cf695a4 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3045e36c7652921768d32125931a6066a44c0692 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..de2631d782146c4354115ce0e723c02d2fabedd8 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c238784c8fa39b48277e1b47048027e553add8e2e0d5f1f46c21e18150c8cdfa +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..05b8ad7fa8aa6ef048ece7e4be542276a046d97f --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 160, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f91c26925465dda4babcff5a3a0161738b8920c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2d84c7f8eebb88b6a6dbb063bb90f51fd4bad05e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd417def2ad8d4f9a9192fbccea5513454e35e2d --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6081a5b6e5a30ecd8aca0d29bf0abce3a76c011ca0ed199b081c2d5ca38c61d +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bd7c5f9cdd5474e69062bd40120b83336c1ea013 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1109ef1883b684035d5843eef6e0f6f1bffba88c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc3845bda73209ad791526f730e4caa8fc27fdbedc7e0e9fb881e49ef11445a +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e62f5a40085e57ade88bf796bb9d5a238b92f056 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c85afef37672616b4a737e3fd053e6f19a2b652f --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0923a9e807e7d0a15948d7da2e9afbe095a1764d850e889807b99744816358a7 +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0aa4414fbe70ee2762cdb6f6620aee0f63dc5b0a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d941612ab079cb3219480d56b9b9db158c5d6896 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0edfd6bc01706233b5a034332879fe5ea2c49ca0d42f8c4c1ee249562cdefcf4 +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbb542011b51df679b800e440b3ac5c4e86257a2 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..964de01179fa129dddb51fb7edaf4f70b2a53c68 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2236f43033c1069cda9177a1233a487b1f23c71a92ab8c6277fa7e0753098f24 +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..35d168e1569ba0e0eced1b7e01be04e4a2e38cd7 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d63e7208da96d84c3907b8a7d74acdae88d47c56 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85e6bbd8e3bf2d890f516d488d66eaa7d887f3ca1ef90ab28a411f4674747957 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b895f0aecf321f31e6c91c2b9659dd416a67bf6 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 320, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f91c26925465dda4babcff5a3a0161738b8920c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fa6020d346a61f1248b174c40ae3a225c988426c118ed376952b74cc14a391 +size 1208232760 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ea53b33ad2144d200f8126be541ec3c69c769003 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_308/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_308/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..13aba694a9f5820a52bcd952907b90e946145827 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_308/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a6abf7f8b3922cec39af58fdd26a6430c1a3c8e7a4045ef17aa7e8a565609b +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_308/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_308/config.json new file mode 100644 index 0000000000000000000000000000000000000000..744869cde9b08744c5a3eb480c681528173a9028 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_308/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "308", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_3088/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_3088/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cdae55abf7b76eecdae06ea7b3dd4f4416a60da --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_3088/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:344408d5a9da83ab4e40e9a0088c345b0a4027bc2664759fc28122eeabc9c766 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_3088/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_3088/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c26e9c3c3ca1cda3d213d3ea04a1c98a40aa971 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_3088/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "3088", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_30881/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_30881/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..665dbc989ad506f13c5022b20e57278c6c7ee41e --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_30881/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:511c2d9cebc5831b10c25f07676123517d73039146d12201f7f9b171fe045541 +size 1208233048 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_30881/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_30881/config.json new file mode 100644 index 0000000000000000000000000000000000000000..baff67436379a82b3c647e3d5adc87afd997e89d --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_30881/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "30881", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_97/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_97/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..941c9925e8c3375f68a372b2699d86274cb3bd5a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_97/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd39ad6897102b5a94bf2ae7b4f54568f348af31ff9f27459f8a15aab9e7a13d +size 1208232768 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_97/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_97/config.json new file mode 100644 index 0000000000000000000000000000000000000000..263819313122c4f2081537dd8b8edddff605d39c --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_97/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "97", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_976/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_976/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5afee02f645e46a5b84dbe6b96f725106ae4a10a --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_976/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cefcd92ddc24cbafe039584f0248a9890bcd164736643da22eb0fc7055da0bc +size 1208232776 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_976/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_976/config.json new file mode 100644 index 0000000000000000000000000000000000000000..763c8f27a6e197b0e8a11d7a620253fe239bc366 --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_976/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "976", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_9765/ae.pt b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_9765/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d891c550fb07a82cef7cb7c1ea8c0c59693fb4cc --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_9765/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e76fb79765eda9f2834bb29299a1e279a97d9a2c35e61fa41ce618d906b70ec3 +size 1208232848 diff --git a/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_9765/config.json b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_9765/config.json new file mode 100644 index 0000000000000000000000000000000000000000..01ff5b9407287f774963024ebbab6fc6cb37a06d --- /dev/null +++ b/gemma-2-2b_topk_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_9765/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001, + "steps": "9765", + "seed": 0, + "activation_dim": 2304, + "dict_size": 65536, + "k": 640, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ae51192328e5dea0b3f583c18b2adfde59e4301 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc6a99edf9c457fcc4d8d6b44b49ddbad792ba4d638588590045d4f8f8fb4e4 +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c861f618569cdc92313963cafac175a64a6c5df5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_1/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..281f83d87802f7f95b7f9327e856b40ecfcea0fb --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c26dd592531e23629074b6936d56a37cf79ce03686e7a5b3c161f6d0646bcca3 +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_1/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ce1c2625537f04aa116b978fc5eabf927eb063f5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_2/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5315190411ff88159de181e1da7e55d1d6da8ace --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39276307618ee7fece1f9d2006cf423526f18513fc959741a0c2a99ef0f45cbd +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_2/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..94548fd73f1298fb9e2c13ff588e1abbf086e4a0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_3/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..343ac4adf4418531df85029a2fe41f45712b7edc --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c269a023a6028e0835edcdf186e00d8ff953422265596eaccd40f9f35224943 +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_3/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9bb23eb596f70e5b4662766f737acab3c481725 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_4/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c9fba9b0dda6cd7bf47d0345c01092763bd169a --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d9a10c6a8bb3d652013f69145f4968b313cce37d02c8aa14bef7c9b6b9c208 +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_4/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ecc27b54e7ac1537c4252a62a0789945a228583c --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_5/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..665783ee334e31dbf7effa9aace6f8e19f798aaf --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e55d83a3426d3baa501d2344faf865247677655d281921033fa3831cb4a90b17 +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_5/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9857fac6259e0a0e3f3ad7f9067fe44c23de4cc0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb86176f53bd0f91d290f741389c63e9b8e51c68 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fae53d8d02be56365c602f46e4b90c0a14c306d10d9495e192af3df0e1df16f +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..086a40aab283554819bc856f624ff83b31bb1678 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8058df7199c9adcc4729e1bdcf23f62b4038796 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58cb5c3db70b9de87f922a76e7a810ffd26bd577b00f84cf4698cc2233f0c204 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..317b4041d828384845e0515d2b83634337666fec --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..51aa21460f6e1e91904c8040b47e7bbb767b4e33 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d829235736fdaca94dbf214a0d3d3aac3f6b3da9f6728eb62d06869aa081f78 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d46cc4610b32f10415a28430f8ab19b938b197eb --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cb8d2d8671807bcbebacd73526846bc99290cfc --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ca223625afd4cc09520760e0fe32e64f423383dd1119d16b808c3dbdad2d7ea +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bbd6f7195e2d832f9d1577d221d5f4e53054493e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8c8be18ac317765ee71e0599f0c3c225fa5c716 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0b7a2ec4ea9f123c465abb75f33b3b9cce5b6c525a3376244be496dbc040f5 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..70761fa2ccaf204410bfdb25e8f1559cfce784fc --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ec529de42bf296b3466ce937990c884bf089acc --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b02cf10b07ca5cfa2ad7ad42840e84614f6b683116bb9e8e29595e64ca1185d +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fa6c8807c8a17b3a6ae733d2a460b5f361faf0cf --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a179db24fbe1bc1400a5ec5ed191b7f1941433c --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ad1f574cabfead2ca2bb09db4793c3f82835d4c017bdad0e820d230de92d87 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fc726bd30fcb24979269e0d0048e4c7a94db68f7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_0_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb86176f53bd0f91d290f741389c63e9b8e51c68 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fae53d8d02be56365c602f46e4b90c0a14c306d10d9495e192af3df0e1df16f +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..95b95258d1535012eea67797922adc44d346d2dd --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fb2cac68726d7548a89a6b658d244c627549526 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9e9144f777df2fda3013c6e5f7d74ef0f169960e16b2f17febdeac6379527b +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..73077c6ec67ef0c2415b82395d00452decb77665 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..10a30af18ffee489a3d69962ed2a1994727aa5fc --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dfd4496e6c2a65af4cc315649fc65e1ec5e2cf813ab8a44a3fa8293481a5c11 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..97f3c66dbca18e3c09a27d3af0fcbed57f696e9b --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a7a93e5290f6834f764809492f465d9373e177e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5979e305d997b7072cbf2ec5298120e58435f575b7027f9d87aa49f7d9350d0 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3911f470fc1b06dc8850762cbdeeef1ccec16fa7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ed0e0491b874ea6098c8962a9ed0b1d425ef12e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:762383fb6b7c134c1f3456cd55a21d45e50b1f2f114627764f364b687cc63108 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11eeb286e8128476209ce82e44962971d93677bf --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b33733e8195553481cf2ed6b5fbaddca66a96254 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06df2a6d96bbd157339985fca0806c0f7f9d32c1538415a93cf9ea74602af8c1 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eb1603c804fae82939bbcd57d9f412f2969472cf --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..eead4c40c2ffad7096f886b4bc9da9035e39d592 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6772495bb798059a00196ef70e2e9c0c32f78c56255784a05927bf7348545270 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5291a6715a905c0e0807bfa51d20aedb155b44c2 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_1_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb86176f53bd0f91d290f741389c63e9b8e51c68 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fae53d8d02be56365c602f46e4b90c0a14c306d10d9495e192af3df0e1df16f +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d8e552328f43fd5694413897c1d313d3a7d49561 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cebb47db9bf108f11b81e4e835f284aec9d913a --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e1b0bd1868030e4cd76cfc6aaf00c099bbf8aeeba7c63c2b4f6c9be043bb8a1 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4f0c2dfd2db77d53c241e99c1bf4ee8ef11bfd0b --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6e6ad5197d5a67fe2ffa6796347a8f19bc67580 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9cb04be6f0c52c4a911942b423c05da295d3a7857648fb4f7882a38fdb12b5 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9130d7b04122273d5c89b2f1b831b77acf681e7e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..abd0e634f40589b632fd0a6481113502a0d78506 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8c906675406b3086ac4fc8368e7b719bb9c33a1c008fd532c661bf5d16e160c +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..be4fef21359facc2820063cd1fdae4237c99f93e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d602d2bf1c8d4a3990eb09abca594132ffab516 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f6b1a0b5833ebea4209545b95aa2c64674e64e638843426d28758056cad274f +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3dddd19dee7d5382b1837dbf94bf66c3b1d93e16 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a9475efe9af590656447fe5030df2e23d77b9ba --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b8ed32d425c274f087a4ea98c10123e8fb08c3bb6e18418e07a84003328f314 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3fcb8bda7f2f03c2275a7cc0280c8e35860c077 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..695ac806408acc87a37a2c6a31787d07e66e25d4 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d92c541b0b53f2c11a7194b3a420ea4691e73fa5991008d65dfcad81794d48 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..28efaed8a965eb321c5520624d8d0dc272f0e512 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_2_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb86176f53bd0f91d290f741389c63e9b8e51c68 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fae53d8d02be56365c602f46e4b90c0a14c306d10d9495e192af3df0e1df16f +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..faf2f1a4660e8fb4ca99faa4ee14bed82f7cb19b --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..65af22a37307b5170876220d5683d9c03c5e9d60 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700be51c0fd217378d6292156386e96bf62dc353d96bee42c0d4f0e492eba694 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff5a32bd52de4c8656abc61c26e40e72a19c774 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..212cca480634249d56037d315e2910bf093068b8 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79facdc75da9909bb639914e531e9ad751c6f6b6cc3452488798d25f6a413a91 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ac7caacc7d53864442458558e3f5756c49f2505c --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5477082f44fd70012af7d102c1e7d9698cc88400 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89834ee40efdfe09d3514c39f8dde6d36b1325f1611b69b6a01b59780fdea6e0 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7182f9c3eab6e821197ab653629efb7fac41457f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0933e2c5547ddb94b16cdb3467416f0d9f44f523 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49732420b5eb976ef83d91cc55c58277b260700fb18573a49564f8d7d58a79b +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e2e3eba141f934eb6df37d4cbd0da1aad123074 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcdb40772810659ce890e79ac4e365baf3c672b7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ba9e980cd919848d1f7de63c28de3f3a30fe3826df269d12589557cdc6faad +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..79941c1fab9a78e534023ce1fc304c2fbad54756 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b8aff09f7581baeb6b7ce4a67d8128969ec9b79 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0503675987a0383e1ec8db7bd2635c197bd8aa2f91afd7a463f67a218f6e8dec +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..73036f03286d817a03c1cda9f2d39c6b747a9a18 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_3_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb86176f53bd0f91d290f741389c63e9b8e51c68 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fae53d8d02be56365c602f46e4b90c0a14c306d10d9495e192af3df0e1df16f +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a81a5b56768706c99eace9c99e999c81b05b0898 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbce352a190c88de40b03b1dbdce4138ebaa7f24 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c0fdb37d2f26a21c97a944be80ad3cce40abe6aa1592591425b968b0279ef2 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c569226c1f7be0c6dadcec20e8c7863d35aa332 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fe7957067c7693eeafcc3563496175bf23f3003 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666b490efe0e565c8859a5ba50103818ced574ee09d2568c5025e8e5dac6a352 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..91fb685c5501fc28f9b11c82ec88e69f02496e43 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6510d59da5b238cfd3481ea86b17fd7aa824ca5f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:288c58ddb8e1aab9e05ee9a8d7313c370efc60ab103ad3fd5bfdb5091f9e6690 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e37ea1a9ff96b9ebed313117a9c6940760762482 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f114f7358291440e16b7e109cca4e7adaa240b0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a912c9fbb8377f09ae13ae4dc197cb9deaf26237d44d0d66bbef415a1532fb +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ad4b964e9f670ed7ddbdee0faf1af0f392ef2bd0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9afc821f1a45fee3edea727d00e23cc06d89bcf --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f6b1047805de8f2f6bbc59271d6ed26532ff490d2c6abc939ca8c697fc95d99 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b7fc8cb4f59044d8efe257fd4644a70a61aeef60 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d6633e5b9ea68488338df2ac411c1a36af765d9 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:027f13899f380a3a859510242181512fc403ab299c55a626740dc6db13bfbbb1 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..66fffa7d20db277edb50230c00aa78826239bd5f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_4_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb86176f53bd0f91d290f741389c63e9b8e51c68 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fae53d8d02be56365c602f46e4b90c0a14c306d10d9495e192af3df0e1df16f +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11bfc13a07c3d7fc3a85313aec3c47ec76f746c4 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6b9d4d5fb843874d141bcd013845bae51531177 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9a905fc71b475b46ddcde8c42cd1599faa7ad1258114b1013f75ca06d337e0 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2929167c8d6a34d0869b3836a980d54364c51ce6 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..df19539133d77dbc203723c096c94db71a1f6833 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8efd43ef9d06aeb96fdb7e351a67e4a0b0df136302435992fdde78d21d3c5d25 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2e6143ddc77a15e5b8c5bc49a1d6b974b676dfb9 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1f7502d7b4d5236e36504ee082150685361bd7e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18fab63d0f5535cca43c19a7d330e2fa684623e4c92fc635ce72d2a5c549a366 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..65ef4e28f4c352c0946af2213c513ddf8d0402ff --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4788dd0c17f62b9a6d3d2f93bf7d9a9e3f3dad1e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd36ef39143958c360a8e1f479eca4901db48579c07c4cb0e23d4ec9ad89d25 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9fc96d0842b86a6a3ffe9cbbd6f794d69a002dd --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..21ec3b3b61351d5ad166c03f721dda5eb402f93a --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ca3d913d5fe4388b0fd78a9045c7e7bbd292990100a55edb60386c69592ec04 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed4809a386373c50349bd337fffee93cec15cb2a --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b2f6c22a56e8de3f709b7a41cb1bc9b7297f08b --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d15c51950f7a1b0cb2c878a6812fd81bcab0fe22b0eb571bb93a4d30d49c4e5b +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e7a0ba0d59dea094bb693b7c74cbca066d8c70d --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_12_checkpoints/trainer_5_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 12, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_12", + "submodule_name": "resid_post_layer_12", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..935947b9c001760879a24011b9b121b5d4e311e1 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ecc56009d3a023539d92618d40c382297b998f945dedd0c18006f4b88a5e1ae +size 1189354280 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6bbdbbd12272ff3f17b29da63e4af6c6f1070ca2 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_1/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3544e114ea1166c3f4877eb8df42368f0e8c6459 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a8aafb2a29031b29217a9a88cadf2548480812cdeec2a0003d6713178ea24d +size 1189354280 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_1/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a362c98d1041c88d16489526b203278b545ff3be --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_2/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5f8e77794e3c2910fe3210e9a08a5efb155ade7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021abdc1b3415499d9e2b052216972765456db2bc5d1e36aa5d667fe9aac592d +size 1189354280 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_2/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..02711dec41c30e2b8630fb645a4ec7ae12f8c4bf --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_3/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..55762a6c8ddf67dda691dfb4f32f2ef8da65b091 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:017e9b7ec7dcfb474a5233128f70125644913ee683c3c50e8b187410550181b9 +size 1189354280 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_3/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9fc7e2107b5b93e6deee9560a235e3b19dd6bde2 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_4/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d275a8b8f040da4c772f0400b415437b185296f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e257f0618aaeca69ae6d7a05c96b2264b2e6d8a67c79cfd40fda00b6b89032 +size 1189354280 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_4/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fa9ef940f3ffa2dae1080f11d9f3b79947e00632 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_5/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1dee5bae21f86ff5f3ab895dc3c98e9d42f671d7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99cfa50909424a98f9fe37b27fe30d5915268fbb2204beb17daab53c0f5d1d30 +size 1189354280 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_5/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ede283d02eb028b12a4aac5e12980b313019acc0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b991da856712f9820327f3e472f300fb853b6c7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5582f7711fcd99602601305590bbebede8335bd51206df8d3e8bfed78dfb2b7 +size 1189354296 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..64fb028e53414babbcbd09bce4a84415082fc6c9 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..df42d34dfd74d2a30c0d7c572e4b3614e523ae29 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a013ac7394c8f4dd54d826f7c9eff7929f2d04903dc847ca4ce3d141a0e0ab23 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a44e7a4a822040e266749ed540152561ddfea394 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0095f31ebde9f20ff757b9c68d7bdd88eecc2350 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:782de5583818cc104fed0d0a16082538350507114fce04c05c938e730ccdb00a +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a3467cc511e3896eff16970bc6c4e35fff62aff --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d2882b17a9b6e9087a5cb2e7fd5bc20246b86b6 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8a29a613a2d59ef3f116f251247f6490015e4546c1220fef115bfdf5a4e5ce +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9411074433e1a131507c291e9955e73312d352d8 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b67873573634a2bf7b265acca2fbfb0259c50bc1 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee67942436a091256c912805522f3cf56fa8a117592fccd0e3b95d1b821aa1fe +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..586b82a0ee011ed8cb1e58445a783ab9e201f78a --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..944dd723be1e18353843ade728b597dc4d5bdfae --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf6af27525c09cfac82749f04d187e53d6155b9ed24901da01678f4fef2ec287 +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..da49ded9629b0a12ec6483c40c64d71e7c72b25a --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f3886012514232c2890ebf087ef4405ebd34474 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39bfe1c4887914b7d3240a25c8c24a226b5adaea99fc324bba3727e25ab9b57e +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4a9bf24cd3c617385244215a5efce54dee8e3ccb --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_0_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b991da856712f9820327f3e472f300fb853b6c7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5582f7711fcd99602601305590bbebede8335bd51206df8d3e8bfed78dfb2b7 +size 1189354296 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..384eae3a375de5c4926b32313aaed5a10cf8f95f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ed1a822914f2c57200a0d571a457e34dd618712 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42f364278226593cfc2de149d0c1e99637c7b20117ac54f3dda9d4f321a433f2 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b63a30babe66ba9d6a87b10dd618374c048bf4fb --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5315802a0ab08b3a1f80ad316b81a53aab3074e9 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75dce8c88a6a20616fd9d4dd1d55e7be9d42ba9e33f8729cbb233ad8456a22ea +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ae4bbcf39cac58a18b9ca1292f9787c1fc2cdbc --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..85c86b761a951a8854427fd03fa196dec59f0824 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bcd950730f52697819c08496e231e54df63f35a817550a4e53e6e6941e1c6ee +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db3e99ca5bc41a1aa75c7c9eb7e9bcfe442a5499 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6105d88bd29812c68ca75ece783c5d7d6c75dae7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34510fd1a73e62045f4c3f9a3435ce282b0dbf54ec20affa559db9415524dfa0 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..287eda1c84bddb74f080d4bea6e1cfb7bfefc2c4 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa23b82bf9a460b07c229f9bc7515f37cc2163fb --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a125f24c10cb67d34921d900114528b5704024663b850bf179cef9a6b46547c7 +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..05e384d2075f2d901d4ba9a705146eb0b72d6579 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7edd05659a97fa1b95c7676bda309de839d444ab --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3398d3ad7feba943cf86cecfbc0c966a60b92d7b94e0b299f393000b2a226505 +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7751365ec627fdaf4df2fcf9c1eb95915c957ab5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_1_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b991da856712f9820327f3e472f300fb853b6c7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5582f7711fcd99602601305590bbebede8335bd51206df8d3e8bfed78dfb2b7 +size 1189354296 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbfefe1812f7bb5ed8c0aa7d672e465ec9beffa6 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..58757743d857d3ad557a4f28f320a3a3083114f6 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea8da3069451a6063448d6b53938fa6eb00a970db02598e10621f4400eb19357 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..73bdc16df8cbe1924ffeb9d1e433b9b5b6eaffb0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..29ebbda2a703a71786c267392ccc6e5cf5bfd377 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10da5acd642ff3f2e6c547669d35464edf3053e5102b33c06f80edfe6a6bf576 +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..052a01e805389c4b9f188817771b880dba18c1ad --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e67d1901af12dd0cacf51e2d3637d00feeb469ef --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe08638fbdf56a5f4488ed2a6b8126f7bd54561e0cab0333538e12534d5f512 +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f53dc44ac33f668614a6edefa114b550acca38b1 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..920b37999712881aadc2fbca6e1c00170ef97f93 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fc5720414c79a61c6879c67a4b38c968ba804a7015fa52a15eb56535bc6bae7 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..15de9706a73ec44561486bfa6516534a4d8dd271 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d9ea52737ba9b406a3f8392b6d62a0a64ba60f1 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a86b37f5139d54a7571b4783fb603a01ac6b7ed7c9c7e039edb7ba6f69dba528 +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6f423d18a8845ec21c520edbd8457d69b8ebf176 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e44098998e8e67347a133846076b1185d2af0024 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d45e8b02c70de4404f9cac435570c4c2c1b2705cafc2932f962aa66c2d74b9 +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ec4e2db6e0177b89ee802c7055f1b14f0dfd48b --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_2_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b991da856712f9820327f3e472f300fb853b6c7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5582f7711fcd99602601305590bbebede8335bd51206df8d3e8bfed78dfb2b7 +size 1189354296 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0003b238dc741518a707b7e36ad799fec101d813 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dc8a9f2b3d0d3a57bb64648a3e30743d9e24a98 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82798d4fbccbe9c01d874ea872fe866f6a0111ccaf9c7afb3e9e67376af9eea8 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..326517fbcf2e26d4968952411a73a5bc2a092216 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..460a5fcc0a82e65e15207dc355a269839edf1fda --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f00d55c0ebff7600d0e1470f6bd5333beda161969885b3ee5b5575b42e8ffd +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8930d0da4096ed8dfee41da5b54d3d2898291dc0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d317ae9cca6cd84eaa4836431a885b88217a928 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57dca333316df747b66e5594b6ae10dc96cd21d73a129895b25a53214fdd3e60 +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..397a4e56ab6e78b175c713ce656bd0dc9a77369f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dec0c290789f452c1bcbfddcaec42476a3b033a3 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9111adc6e171410a804562b22a35fdd844edcf811a011035036d7a07d299715 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f87c69537931f3a73b5b2c1885cd60ac0b0eaa69 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..12ff81e917b4f22088908e2b320230c0a1ac6853 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eef472f02f3418e440ab49f1194dcab7144d74a20b97f99fbdcabf9a16e8b41 +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a8abc3e5c6b02efdd42dae0a307b0eb6551ea87 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed5a96b5056d88ef4b97a0e897e9113c8e280531 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b392c56bbe30a9ea1d295fa104a7ac0b85a8f64eb436a64235b4ee77d0da7e +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6f04d51f2c5be77fdc97711eb79de2b16be0f06 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_3_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b991da856712f9820327f3e472f300fb853b6c7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5582f7711fcd99602601305590bbebede8335bd51206df8d3e8bfed78dfb2b7 +size 1189354296 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e02e5cd8a678e5916d3cab87a77d09cc03028d59 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b2270d69883f5dd5d7258c611f26ee5f891eea5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56d1cdac255a3d8e78a4af72b5a01531d7fafb279631530c4fc04eb799f89ccc +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7a067bbaf4b746a8470887cb4b3bd30a382dc7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..137107542333c145621f17e489027c19fc0f6fd3 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462f0f46197f8cf89dc26ca7ba0416424d1a5ef2df77a82b42b680a508c444eb +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fbbc964805fbe83a03a5845834ca903461d0248 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ec3cc2aed4f9fa5c8fa94fc1d240aed63e779ab --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca6ac035999a9ffd4f8a4bd94b456de44408b7d02c1e5056d2b63af20e0a5a8b +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e6174700b0f1d8d8ade320c6d9775c47551da74f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1f438cd532daa368bf31f37439d1b851a2dae12 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2790690a0630addc3ec9344985041471f1d739c894789988dd0176da3b300489 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..97eb81b99645b81649f546979c076bd431c4e01e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..384ceb592c90739dc5059d371c71a9aeb2d26478 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0d3921b1ebb18aa333da62b3b906541eb99548079687888c3070540938db4ca +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..806b77d13baa4e986ff23932011bc691183e6d97 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1014c41fbdba429461acc56ba7de9173dfb09322 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc5628b66616095c1285df97ebc6a2dab12f9c93c83178bb64110885470fcc4e +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e760b976b1201491a59346f96d7d8fda1182025 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_4_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b991da856712f9820327f3e472f300fb853b6c7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5582f7711fcd99602601305590bbebede8335bd51206df8d3e8bfed78dfb2b7 +size 1189354296 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..89ca105f4524cad55e1daf5f2b74d6fdc0b7b3c5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bec50fcc84673f169b13eff787cc483a400619d0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f87c1ef7bf807f482456dac6039d72742776c257ba699021dcafe94650cec202 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef1d2275aa0193c34a8cd7e0a9ac351b59a0ee53 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cf99e8695932d720a0dda0aa06f78ff4a6536a3 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a619a00c5fea3ec9af9386036f96787522562b99889663090b65cd0b41ad48b2 +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0a68f7343abac498d8528b1d32b3544df71de1 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3487c5b2ee5ac8f96094362f4433b7c3490b661b --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639b5a47d9ad4d843d81a368d149b63e8b5a1f634f9ac222c94668c97e4ab554 +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b07be6b7929b0fc2c83cc14530fefce83a0976e7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1edbbd84d2989f324fbc088b838230ce94ffba41 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cc3d2d97d6d02bfb787cee90f5dc6db50ea00b7ac3632649849313b42702109 +size 1189354312 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b8c2a845464b5dc4c104dab05cbba00f7554ac4f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cefdfec92385349618548996e580d8af17a7e6c --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570981a2fc91a335000bc503edd6dcdd695e707eee99ed1aefcbb6041ff2aaac +size 1189354384 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..43e6f9b8ea77ca8e380a7a1edec447991b1198ca --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..41d3ec0f7741aff1697945197de57cfad28fe890 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761a1c4182b2c4d57bc6ef4e5bd554a7780f32e1d50106000b2131dc92a4e7a6 +size 1189354584 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fbb7b8117bd0d3fcb3b5cdeb51f0cb4918df4ab0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_19_checkpoints/trainer_5_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 64512, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:1", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 2048, + "device": "cuda:1" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c3ff78117b7c079948dcb1d5829dfe71b7edacf --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96e31bef3f72671c730cccd1a7723a27455b86c31d03f2fd9ea95037997d1494 +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f11294bae31c2a66724b63fb52b9d664fcde0fa6 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_1/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5608d2ac31df53b8e74590dbcab6666b62cb1f5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0438848e955a4c07aabb00cafcf5a8e33f77ab14a36a50dbeaa378429587115f +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_1/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..efc9e7519be0e19e187a88cc7014edc1cc7e503e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_2/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..18d234bf336bd55f997c7285aeb5ffb1cf785082 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a1e99325f6779db46ff3626f7b3c86778b34feb25222c268a2452e2e91ada4 +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_2/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b88b1a6b8c834473e567ee1068018736b8de935d --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_3/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..327b8e325c8bcece2ccf0dbe4e4fefbc8b2fd483 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebbcb4e7999f6f84035f05a0033465aaec72bc91ecd3d9e1e76b3cbd53ba1bcd +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_3/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..98e7808cc0e70b6bc4edbb85ace396a7f8859c5c --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_4/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d7cb86c073dc3fb455c3dff239ef714f5a2d943 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:267b284e9f8256974be52ddd2264ba5bc2fe56bc19e35d4c22c102f4357cd2c2 +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_4/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..37b6390341b0e177dacc9e839a7bd41ea1ab2610 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_5/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b6cc3b02e4eec55d49e2b29328595b30001ebc6 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:536e47dd3cfdb0fabaa9c8e625192ca87cbec2f9a7c787d5ee483ecf346650e0 +size 1208232744 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_5/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..73ff5b05851d43cc77702535339dfe21259557a4 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..35e5bce4b4f88b19e5c7932711b4c1c8bf0328ce --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22 +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4a6a6be2ae0c0faaaa900691d2543751431c2479 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..896b25cc095736479e8e375aa43bb8f1d9485add --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2719944aa66b4b8a560631fcad5c75b33841c9d7f912952964f5d9cb99b5fec +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2c45cc2dc008951a9bc97a9ef03408732449b171 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..66e9b1ad3bd7fa22cf97142f8932f545a8636b14 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc8256b3a3256b6e5418b9d91b5734339cefeaae85e42c82d4b52008a38b27f +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c7f2cb053cd762a6531082f29bef43ccacf0b009 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b284ba561188960e13d2f2e35dc847a1582f2982 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03dee9a5170fbb2615fb26cd612921da7f0418ceb4ba8bdbac70a2ae5d0e9860 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d2e3cfe0b533b8602cd19746ae08b83911445c5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..992467c2be358ef51dfb8d14e62ba30b70199ee9 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43df85ceb189027f81fe27c41aa57fb75d22bc81dbafd20e64147daf97035cdf +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c8ee9e65d660237a68ee9cb615dcb34403e76505 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..122baf1b48bc71144ab29614c29e0bd080ac1fca --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c732f580ab880879cdde780baee7d26987f9867b7d7f23bca02e8907769fa58 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc41580e2b0cb4fedab3da66cd797f2b4d66e43 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..04c9df0e54ec1f4242477be49e8c014029417571 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ae0849c49f1767bb915700aae9995dc9bcff76ff3c63dd820df7ef966c28a6 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..de70d0507c3b38b4e87f408edd652de1f56268a5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_0_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..35e5bce4b4f88b19e5c7932711b4c1c8bf0328ce --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22 +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b5d569e29c34d1c28371f091e217b2290ffafde --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ec9d9fde94cc6cfe7f36e797305c3c4a7e726f1 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b2d3c50422f04a443b32a439c897098fbbc36aa205194d42848cd016a3b8f7 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d547bb13872f3fff95bbb5c4d9ddad4ace7a8783 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8bfb7a3456e537e8526bcfcf2b2f6bc31602304 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96c629f309743528eae9100f7ad4de436f74372ec3ad4b9df2b8b42766e73928 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..94cc62b4447fdb46495178bd661278aea314ca74 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d03f3c750e3cf6063bec91f54d78c06171e8fa9a --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d80023e2d3c4825e43bdf2863afe3be88422263ffbca3e22cf6f1c861197be1 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9be111f8a88319f13d87ea13c98b63f1dce663bd --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..850a80a58d33d1c5171a581e552a2c21944c9a30 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2154ad3037938d4584f8f6b38773f58fe3a28a1d95ca64122baa8b7ca6fe681 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9aa9019a7c7e69a6321ead93f4f4bc71a06fcf10 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c56d42777b02f9fbb92d1e4c636c1c9488ecdb62 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc3a384661f113926ac9709e5a0279eb5818f7c95d360459eea20e7783c1929 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e55f8639191b7d10b60faf0f1e4841607af8407e --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d03af8f4d541219f7a4797f02583bd8e34285ee1 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f5f81d086fdbb0d0c970b1a8592d474224cc8a10771351045fae65e5a11406 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3ee414a34d7223bad090753495bb7b66df22505a --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_1_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..35e5bce4b4f88b19e5c7932711b4c1c8bf0328ce --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22 +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1ad7b8cdc7336f95179634a513e599b12903fc25 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0816331c0b1b29f8825a43c0bc8157150f3072ce --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c82ad7f17773d76485798e5bbfd41d1854d5d28cdce8015e1b537cf671197d1 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1116d63353f9972a88f2bfdd114456e7cb2195d --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..de3f6ea794a70667345a463ba043b8af69a687a7 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cfe58ba4ea5e4aa094a4d7f41bb1d067965ba57ed9c342c81335f62b8863ba8 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e22d5f9ed79283b6b0c8ab2e7706d7c787ad5cd --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..745aaab27469246655c3d07cc157313de4e11027 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f7c9c06f1e41050ad18309eaf59b11e5d60b9cad6cf323353f67cbf94c6893 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3390109c86375c1da57713692b6e62951db7c9da --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d52ce731f08bf2d0a225f3c7230d4b3898c9c317 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375a73377c3ab24b5a2a3ea113530d00b47d856c236430df5841022e8680e79d +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7489c9a6a9ebb1bdb87a81cdb9de9ac44cabe6ee --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e531823fa945153211e442da7f1bde150c954e5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466756eef1a6aff1f13301308339b0e57502a85914d51a874f95531ef442020c +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5fb6eb8b31b605fa5035ad68ee183cd95490293b --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5979ec5292d2cc632878f3b694b39a9f6257798f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a75d62beba010099b105df801d0277ea97ee73b78aa42606ad714a21f2c8fa08 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f236d6c71019ed33408c2f6bf430e6a6e171871a --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_2_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..35e5bce4b4f88b19e5c7932711b4c1c8bf0328ce --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22 +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1ef2abdfc4760e9ce760978633b9da3e2fde70b4 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0eea3be695b9243449ffac6283825fbcada0398 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fd81969d804d3b87a344c72d08d26dcb8a65c25dc87d818f875d4e2ac09f347 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7040f519164590a9e16c19f6fcd72a390d370c14 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e100945938f1a98af10e2960cfca6c7d15bdff52 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd1e2ddd31bf59234b04214e8231f913688d48efb36667e84dc5bcee4e39285 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e4e081d590b89a8ff7493b63928ee6a3a35a61b2 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b64c4a05f3e47cfde22bb9c4cf9c3b686a281a00 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350beb4c71c62ff10017f1024b8888cffb47e03b4a9773357d800de783ae719e +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bd808db494c2f17f4d14ea6a9cc81fa393e9cdf4 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b1350298f00ce4f5ba9ffd79d8a9fdf1bba86d0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9670c6f6089787461394123fcb0e28bcee2adda9d015d39578dabd3013931702 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..34d4c8bfd404157744a1c0c24f040e99378dd8f3 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f477a50a7c39870b3a63478d68455bfc90bb34c --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe0c190a4648b9bf55ce354934aa1b21dd9a0e8900b4c256c7c164f11719656a +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ea536936f2ee2378cf0dc5719e16d751488762cc --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..235fefde7dc202d57dbdcfd5eac3dbe148e4a947 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1805452d79ba00cf56f6708d33abbd4113a1a462238fd92bdd3b068bc626fc +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1506cf245231242190f00e43eaa7f6a1a0948f2f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_3_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..35e5bce4b4f88b19e5c7932711b4c1c8bf0328ce --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22 +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b1639ea66042048389e8ac376e6d9f3262db046f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fa972e865713183fb4723b886dc4019e7ba3a54 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf92a9f845ab6e96cf23b872db200ccb62463b32b462eb08e34623d74b1bae8 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a8446ba7a6e06229857977103b86cc82cd4691bc --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c90bbf07300bea87bb6dcb378a7f0b8376ab070 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:723da694f229e2cc820f256fcc59625311adee59ec8ba407ba9ee5cb07f4cee4 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..956ca468bf99f49040ee703161a3caf7a31adfce --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b2a3e06e38ac5e0c9f83e4ab6e65fd6be1f8daf --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3409bfeec4f6af5b84b5e8069f2480961ecfe47c9db1b4d46463cf323a896549 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a62ecb9505b03070c0fa18360225d1dcd54ca2ba --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b86a55cd7f196144c4bf57536f49cd50731bab72 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2d9d57d103ab9239adc7a5b2ed7eeeee575bbbe180e9410f17e1e35c4079e0e +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d52b827b357feb7ac4797fa855509f812254e538 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..43583529093f7e59395d12e531b3d231f7993299 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acba9c1e3c7f02bbe3cc7e07975bafeda0770e29b439b9848991f67bca000620 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ee5441ce81b46613f97556728a06bb9f6b02923f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d06c931fb787eae696a162adcc2b03d729ff1cf5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d168baccfc05737d21ef2caa6c5b9d86e4ad34a94c12fb1d66d8e4583479990a +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..230c703b7168f116a22f19b16398e8b113e2c604 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_4_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..35e5bce4b4f88b19e5c7932711b4c1c8bf0328ce --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:128cb0fe4243dbb2ec45b6e104d765dae7fba4e6ed08cdb5e9136bac1d1d8c22 +size 1208232760 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..70712938dce90bbe62ded1b59bd3c4c8690cd2b0 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_195/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_195/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..213e466b03f8388c53c2d614cb26fccefefbacf5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_195/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f0b478c66fffadf534e4f6a7dfbb917e32bddb58f8d9ce4ef74db321115a87 +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_195/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_195/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d02330b0d653cc6219d8a720c9655a1d34b1a65 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_195/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "195" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_1953/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_1953/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9f4d3cfba433267dcf9437bd57a5e9bd0f9adaa --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_1953/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2fb50e65f2d332e85f0961d41654887db6fd7402406d803c23756f6ec584c9e +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_1953/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_1953/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3dd2ceebc2f5276219dfbbcae1ef9a0808a7ab68 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_1953/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "1953" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_19531/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_19531/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa8f6870072dd28af60a1ceeb19f4eb43642c5e8 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_19531/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8cb71e72be70cd78bc523888cd75e9883650cceb6b3cd19ea44fe53ea419887 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_19531/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_19531/config.json new file mode 100644 index 0000000000000000000000000000000000000000..edba5e389059c641a76cae4c68fa25c2f21a6d59 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_19531/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "19531" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_617/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_617/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbcb92f7574d0b6fb6d8a4c152ea9c5e518b555f --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_617/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2a3c3c4bfa963b1fb66c98d0086384ac5925ec62fae1672fd7d3a2cdbcdc2d +size 1208232776 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_617/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_617/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0649fec1ff6703e40bc45da433e0b3d69f6464e5 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_617/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "617" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_6176/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_6176/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff2ea936f55bce303d04fe9d837077e4de241426 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_6176/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86eb270a7595b6b10859a91cde15f979671cf03d9fdd0ce692cc8cc9ea169a61 +size 1208232848 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_6176/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_6176/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3692e64b731ff3e6cb614b7c20bbd1689f60ef84 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_6176/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "6176" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_61763/ae.pt b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_61763/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c965e853a10c6aec489fe8b323a81c06abcd8bf --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_61763/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:289536d9c4d1fd92c8160c6e56f7c8c810d35eafc6f4b88379413fab1ec4cc04 +size 1208233048 diff --git a/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_61763/config.json b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_61763/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7c7e833dd7a0c38a2dddd272314d0a94680b7c90 --- /dev/null +++ b/gemma-2-2b_vanilla_width-2pow16_date-1109/resid_post_layer_5_checkpoints/trainer_5_step_61763/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 65536, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 5, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_5", + "submodule_name": "resid_post_layer_5", + "steps": "61763" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2048, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 2048, + "device": "cuda:0" + } +} \ No newline at end of file