diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fb200bf60d7886fc9e46c371ee93c83a073560b --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08041a12047d806f49b1ca3c1febc9aca731beb68569a444d23fc2daaddf1fc4 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..88820a664803894b71d960b99c506d6ffc59d995 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..515a7fcf4fb376b84a94186e560eda3612ef4e3a --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.4167590290307999, "l1_loss": 62.0523624420166, "l0": 1568.3194427490234, "frac_variance_explained": 0.943539559841156, "cossim": 0.9821989126503468, "l2_ratio": 0.9195215627551079, "relative_reconstruction_bias": 0.9397303834557533, "loss_original": 2.988885059952736, "loss_reconstructed": 3.106488525867462, "loss_zero": 8.828418850898743, "frac_recovered": 0.9798414222896099, "frac_alive": 0.48284912109375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a975230a08f2460240ff9655d32ed95a6bf84f4 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68a2c94c70e951bbc10069ccbc6c9ec24e20cb02f19c2d15ee3a673adf1a300 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..76690122a0716efeedd9e5b18d5863f54869c02a --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9819d306727302c20ace3b2d0ce9f80f3db94a19 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.5789328217506409, "l1_loss": 55.72900319099426, "l0": 1330.4508972167969, "frac_variance_explained": 0.9300252571702003, "cossim": 0.9778566658496857, "l2_ratio": 0.9065023027360439, "relative_reconstruction_bias": 0.9307215549051762, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1464501470327377, "loss_zero": 8.828418850898743, "frac_recovered": 0.9729870557785034, "frac_alive": 0.47406005859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b3dc1f8a3d518cd85dfddfe4f34e944f2a4890c --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bed5c2fd61816e5fc0719d9adca7311a85129b8d78272cbf46cb62748ab5548 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ce57c3d236b067aa754c36fd52c373fd87685e08 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..92136ca41af6fde23fb3377b53d580e40dd7c842 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.8266066312789917, "l1_loss": 47.685190200805664, "l0": 1031.1428604125977, "frac_variance_explained": 0.9062778800725937, "cossim": 0.9702106080949306, "l2_ratio": 0.8861087150871754, "relative_reconstruction_bias": 0.9171949252486229, "loss_original": 2.988885059952736, "loss_reconstructed": 3.251991391181946, "loss_zero": 8.828418850898743, "frac_recovered": 0.9548681862652302, "frac_alive": 0.4541015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..919ec1ef9685f311e184ebe015f37c4805b4428a --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72edbdb237b1133d7d63248e9dbdc9dd0826fb9fa0fcb64603ee890a4f3e6661 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0a6181e7c8ba614e170645a5f912867fd5102c3 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..20c7b31134f417cb8017167af0ff3022469a19ae --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.280204176902771, "l1_loss": 36.662381649017334, "l0": 639.2051010131836, "frac_variance_explained": 0.8530002571642399, "cossim": 0.952362023293972, "l2_ratio": 0.8507192321121693, "relative_reconstruction_bias": 0.8974457383155823, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4817975610494614, "loss_zero": 8.828418850898743, "frac_recovered": 0.9160263948142529, "frac_alive": 0.43353271484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8061f76354187120ea050fef079db16cadd201e8 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c51a70b6f60418f10aa224b5abc555b4be0f46b8862c98962ca0741e4677d7e5 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..24af8f4e63c3defdf473b3d8926a1879fe344700 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c8b33f15cc3cd2fb9e4078bd614de194a5d1c401 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.630507007241249, "l1_loss": 28.577664136886597, "l0": 394.4646053314209, "frac_variance_explained": 0.8021240644156933, "cossim": 0.9342639222741127, "l2_ratio": 0.8253069184720516, "relative_reconstruction_bias": 0.8880686983466148, "loss_original": 2.988885059952736, "loss_reconstructed": 3.6719041615724564, "loss_zero": 8.828418850898743, "frac_recovered": 0.883676066994667, "frac_alive": 0.41949462890625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e8bbff024ce7031b10f2e60166dcc0646a4abc5 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed64d5e83254a4a1680542ca54f585d6989792806da6f1d1d6e084493d1b67f +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9702e116d28d17e2bcfb76d4a3b419a5a06ae94b --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 0, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_0", + "submodule_name": "resid_post_layer_0" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cc10346236205950211bade1ab442f89059b770c --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_0/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.089244395494461, "l1_loss": 18.967594146728516, "l0": 164.81410884857178, "frac_variance_explained": 0.7212224006652832, "cossim": 0.9034432731568813, "l2_ratio": 0.7909861765801907, "relative_reconstruction_bias": 0.8814779929816723, "loss_original": 2.988885059952736, "loss_reconstructed": 4.005634561181068, "loss_zero": 8.828418850898743, "frac_recovered": 0.8266438916325569, "frac_alive": 0.39056396484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6505fbf72803865047223b7fd5c83c8071995f0c --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:820854280e0d77fddf8f509bb79ad6e9a428b4b75d9060bc1ddbcb19b7ab8d89 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6ffedcd21f49acfa2d99cba6cdd7e939be1afecc --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b4b4d7241c2bbe89a750b5580e77dc4de3ae2a5d --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.921069249510765, "l1_loss": 86.7099380493164, "l0": 1889.5925827026367, "frac_variance_explained": 0.9374731332063675, "cossim": 0.9791730977594852, "l2_ratio": 0.9044194445014, "relative_reconstruction_bias": 0.9294404052197933, "loss_original": 2.988885059952736, "loss_reconstructed": 3.0913854241371155, "loss_zero": 8.97075641155243, "frac_recovered": 0.9828116111457348, "frac_alive": 0.47479248046875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f432ee08fb7eaa4027f57677833506b3200add3 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b98cc0e9bcfc76d8507051e5bd1366145f34774c39ab5c0d83fdef56df14af +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c30bd05e19633c9f4f9c77c540e88bdbbfadf320 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..729e6dd2ee630881941ac150bf7feabd959de35a --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.1518948525190353, "l1_loss": 78.03551197052002, "l0": 1625.4286727905273, "frac_variance_explained": 0.9218312874436378, "cossim": 0.9738432057201862, "l2_ratio": 0.8885384723544121, "relative_reconstruction_bias": 0.9186785258352757, "loss_original": 2.988885059952736, "loss_reconstructed": 3.1272957623004913, "loss_zero": 8.97075641155243, "frac_recovered": 0.9767856597900391, "frac_alive": 0.47882080078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7207f06d15fc77398081072216453f5c17007f74 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bdbc4935c9d5298096ee6ea78bfe243612fc4731ea0caf2a1d368f266b17448 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..95303e104c050a0b7e29ac3c8e407bf43c1b60f4 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7dd0f7bd9043adf892431c1fd72d95ec6f427b9c --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.5168990939855576, "l1_loss": 66.47064352035522, "l0": 1277.5725173950195, "frac_variance_explained": 0.8933972716331482, "cossim": 0.9639947153627872, "l2_ratio": 0.8639187514781952, "relative_reconstruction_bias": 0.9028670713305473, "loss_original": 2.988885059952736, "loss_reconstructed": 3.2126090973615646, "loss_zero": 8.97075641155243, "frac_recovered": 0.9625394903123379, "frac_alive": 0.47686767578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4545eb9e917285d2632cf59a10c8edc584200d0 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10b606532e1f82f19fdd1e40b0c23c64f9ebe39ae79d7d587a61e65325d2ab30 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5d2f48506bc1e755428730d9ca2ec1e3f8226923 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1f23cbd8757ccf026622a5a3d56c9d3fa7ab7908 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.169589176774025, "l1_loss": 49.724985122680664, "l0": 787.4504432678223, "frac_variance_explained": 0.8310047425329685, "cossim": 0.9411170482635498, "l2_ratio": 0.8247413076460361, "relative_reconstruction_bias": 0.8834847621619701, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4419959783554077, "loss_zero": 8.97075641155243, "frac_recovered": 0.9246886782348156, "frac_alive": 0.478271484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d96ee985c6dad3da6de7ea2e86c67156d29c0208 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66a161fb8dfc91099dfd8e610fb705a9dab9c3bd9356622160f322cdaaf6bac4 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f5333bcf35656c1648dd67254ba170438c343fc0 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..abb3ead0cbc746cef487eaf9ae07e0fd4136daab --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.70350344479084, "l1_loss": 37.7823760509491, "l0": 474.227840423584, "frac_variance_explained": 0.7687290497124195, "cossim": 0.9165537543594837, "l2_ratio": 0.7957354746758938, "relative_reconstruction_bias": 0.876005794852972, "loss_original": 2.988885059952736, "loss_reconstructed": 3.693199545145035, "loss_zero": 8.97075641155243, "frac_recovered": 0.882872398942709, "frac_alive": 0.48974609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8fc9ed03619b00fbe743d1fad3f740f7acb7304 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b8b04d6c4829e44878a7cfc9bb99f3fa16deff3e2edcb5e19d4e4720640ced +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f450d72a8ff54298a203ce4776d8c6b5e3265c51 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 1, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_1", + "submodule_name": "resid_post_layer_1" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..04070e4447e7b73a824a40548dae257eb597496b --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_1/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.367744535207748, "l1_loss": 24.294288277626038, "l0": 183.46984004974365, "frac_variance_explained": 0.6762260124087334, "cossim": 0.8779518343508244, "l2_ratio": 0.7569525502622128, "relative_reconstruction_bias": 0.871852483600378, "loss_original": 2.988885059952736, "loss_reconstructed": 4.113821625709534, "loss_zero": 8.97075641155243, "frac_recovered": 0.8127008564770222, "frac_alive": 0.48626708984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..521b3f234c137e4c77ce05a8cdca2753ac294c47 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cedc3d1f87faf8d918f58ab0874219bf8b4cb884f1745c03bc57247330e6d45 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d657ad88b0933ae99e413be8b904076397faf5ef --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0aecc9880e272b09d84f733d6103ac059ee25f72 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.1716324537992477, "l1_loss": 109.77040338516235, "l0": 1611.395637512207, "frac_variance_explained": 0.9486353807151318, "cossim": 0.9696135818958282, "l2_ratio": 0.8780255950987339, "relative_reconstruction_bias": 0.9444989748299122, "loss_original": 2.988885059952736, "loss_reconstructed": 3.313967987895012, "loss_zero": 9.41981041431427, "frac_recovered": 0.9495267793536186, "frac_alive": 0.532470703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e0e53f2c3c9ce269f347e1d69f1732335f86ba2 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:309d11c65c3e16ffda7685ea51f4b04ddd935ed7c6a49823a9dea9e8b41f420d +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d65241454de823ab80551c4bd1bca0560bea93b6 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3f1f85956a776481b2eaaa52e3dd71679c60e4cf --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.5997580736875534, "l1_loss": 96.4208664894104, "l0": 1306.8117294311523, "frac_variance_explained": 0.9347885400056839, "cossim": 0.9604156315326691, "l2_ratio": 0.8589782565832138, "relative_reconstruction_bias": 0.9393252618610859, "loss_original": 2.988885059952736, "loss_reconstructed": 3.4662882536649704, "loss_zero": 9.41981041431427, "frac_recovered": 0.9260412231087685, "frac_alive": 0.5325927734375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..889015a3e474e1cc077c81f56f2de43739905725 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c85ab3b7913d8c2116da2c18df2340f628b33b7ad444fe6fac5ed284aa26d12d +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25e10dd7e0c3b078832e6daa7e5bdbe4eb14d2dc --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4a54f14a00365cfd29199ec8fdf346f42e438400 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.234767764806747, "l1_loss": 79.00359535217285, "l0": 927.1257019042969, "frac_variance_explained": 0.9083689413964748, "cossim": 0.9438304528594017, "l2_ratio": 0.8312639258801937, "relative_reconstruction_bias": 0.931897010654211, "loss_original": 2.988885059952736, "loss_reconstructed": 3.781919851899147, "loss_zero": 9.41981041431427, "frac_recovered": 0.8772863261401653, "frac_alive": 0.54339599609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9df1f76880229d2024a4db8f43bbecbb0caf84ed --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4f1b6ae72ea4efc055b7106d07c4edb55f74f99ebb2f9f4b2553a958d9d4ea +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d87af2f008bcd3c23586551b157e75341297885 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..924f7090b7da6f719b63ac59177ad695eff598bb --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.191680729389191, "l1_loss": 56.57015347480774, "l0": 475.47170066833496, "frac_variance_explained": 0.8640795983374119, "cossim": 0.9118851572275162, "l2_ratio": 0.7894385904073715, "relative_reconstruction_bias": 0.9269732721149921, "loss_original": 2.988885059952736, "loss_reconstructed": 4.469057783484459, "loss_zero": 9.41981041431427, "frac_recovered": 0.7708443030714989, "frac_alive": 0.5369873046875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d9a4bcebea62c9edaa563f1b0007b45cef1645c --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:520643a4d5d7e9668d1db789c4e292fa8cbf0a008d9343fc10af3597b518f147 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2365e9cab1972fcff87a8631be3a7f2f2f762ded --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..03218e7d44f507e8b65b7fc036de098cf160527b --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.802442759275436, "l1_loss": 43.923553705215454, "l0": 276.28380012512207, "frac_variance_explained": 0.8286938667297363, "cossim": 0.8872785307466984, "l2_ratio": 0.7590875364840031, "relative_reconstruction_bias": 0.9230137951672077, "loss_original": 2.988885059952736, "loss_reconstructed": 4.995560705661774, "loss_zero": 9.41981041431427, "frac_recovered": 0.6891671046614647, "frac_alive": 0.53216552734375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e5a611f990292add678285f8281278924c88be8 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a633a5f4947921984b1d5763880a307dd4c130affe7ed57b259502d7a40dc355 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0336e130cfa4d5188c6554b2d4b336eadd015d22 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 2, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_2", + "submodule_name": "resid_post_layer_2" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0cc7da7cb16e9f06920ee83457f06888263fc47f --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_2/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.554825067520142, "l1_loss": 31.05599594116211, "l0": 136.27135467529297, "frac_variance_explained": 0.7822760716080666, "cossim": 0.8526212312281132, "l2_ratio": 0.7127167582511902, "relative_reconstruction_bias": 0.9167366027832031, "loss_original": 2.988885059952736, "loss_reconstructed": 5.627534776926041, "loss_zero": 9.41981041431427, "frac_recovered": 0.5911562275141478, "frac_alive": 0.5235595703125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..33b76c5773a11ee2e1be80568f7ffe06d92e73e9 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:867a13876cea15fbd26ced986ad10469f8e3fc56eb577b1ac82d64cde548c724 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0189e21a6bcad7d61c4d72e10387794dd1b93866 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9df0eabc4e5caee9b8a8a36b945c424f647d593a --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.5034949481487274, "l1_loss": 116.21631669998169, "l0": 1614.8979721069336, "frac_variance_explained": 0.9463904872536659, "cossim": 0.9707099013030529, "l2_ratio": 0.8881971053779125, "relative_reconstruction_bias": 0.9474758766591549, "loss_original": 2.988885059952736, "loss_reconstructed": 3.2336234599351883, "loss_zero": 12.706798493862152, "frac_recovered": 0.9748080372810364, "frac_alive": 0.5294189453125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cabc62c9d90d3c35e9872d2feff76cc4cca82c3b --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0058b571ce969260d993771c9bb1981acb34cbab5520cb85c6b18ac53c2d6f +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..989217b6ec1858d8bccc3ef2908a992e7f406b82 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2eca67f1addc173d3a52d5e253e5fbeeda5f9095 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.9629554450511932, "l1_loss": 101.46468496322632, "l0": 1308.2064056396484, "frac_variance_explained": 0.9322930946946144, "cossim": 0.9621189013123512, "l2_ratio": 0.87125438824296, "relative_reconstruction_bias": 0.9428663663566113, "loss_original": 2.988885059952736, "loss_reconstructed": 3.334225758910179, "loss_zero": 12.706798493862152, "frac_recovered": 0.9644450768828392, "frac_alive": 0.533447265625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8ce2168123df70d40b68228307099b7fdf3d666 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4927d051b6cca01387611c1e6ca16f2a23f380aef2ffb88190ff1eb3b0fa1b +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ce92e7eea36fb6c8fa8e3f5091eec7e5b14fe76a --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..331900fbbfd56bba31c249eb368037e350fdaeae --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.646789610385895, "l1_loss": 82.44846296310425, "l0": 914.1399726867676, "frac_variance_explained": 0.9069696255028248, "cossim": 0.9467041566967964, "l2_ratio": 0.8475027605891228, "relative_reconstruction_bias": 0.9377563335001469, "loss_original": 2.988885059952736, "loss_reconstructed": 3.5817741751670837, "loss_zero": 12.706798493862152, "frac_recovered": 0.939008891582489, "frac_alive": 0.535400390625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..869aa729151760ed0fa59feb5deac6ef6048037c --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb28f4a5da9c81091123a94ffc7ee3d25e0757d8260df1d78b5878ee2bf383cb +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0e26d5d5e11a365ea5d028f8abb050a422d5ae68 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..90f1362466d4ac1f75d88f64b9a8ff2b1f92f665 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.675857901573181, "l1_loss": 57.70899963378906, "l0": 446.59418869018555, "frac_variance_explained": 0.8627602681517601, "cossim": 0.9167746305465698, "l2_ratio": 0.8126122020184994, "relative_reconstruction_bias": 0.9355852827429771, "loss_original": 2.988885059952736, "loss_reconstructed": 4.142080336809158, "loss_zero": 12.706798493862152, "frac_recovered": 0.8815245255827904, "frac_alive": 0.5225830078125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c73163f0b17d4d7275fbd5a7ac0e50a646a95090 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d88c23c4cbfde16e1a3fae62caf2634bd25ded88817619a538a7e3be3c965fe +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0536d16e421444a3cd55f403de207e03021f4f2d --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5186e140410c9d3e2787d13083b1911b9019c2dd --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.340411335229874, "l1_loss": 43.92790341377258, "l0": 243.79311084747314, "frac_variance_explained": 0.8235163129866123, "cossim": 0.8934200331568718, "l2_ratio": 0.7865662276744843, "relative_reconstruction_bias": 0.9319948889315128, "loss_original": 2.988885059952736, "loss_reconstructed": 4.628625869750977, "loss_zero": 12.706798493862152, "frac_recovered": 0.8316713199019432, "frac_alive": 0.5096435546875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d029c505e6107a32f9ea610fab30da04f14fcb9 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0edd35f69783904513b913d3fd2e5ca1ceeec2976ca4625577902555fbe9d96 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a2d0f2a5a5b1085b8d9e25b5d24a69ba797f33f1 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8d5a7f7b5456c1a7a6dcc25c54c4c50f5e65ffd3 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_3/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.106445550918579, "l1_loss": 31.2361923456192, "l0": 110.58522844314575, "frac_variance_explained": 0.7830671519041061, "cossim": 0.8631722629070282, "l2_ratio": 0.7480260021984577, "relative_reconstruction_bias": 0.9280879609286785, "loss_original": 2.988885059952736, "loss_reconstructed": 5.210390657186508, "loss_zero": 12.706798493862152, "frac_recovered": 0.7719510793685913, "frac_alive": 0.51483154296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b890173fb734740b7129a846bc50c6437c356187 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f8198c5bbd49d288342878a7a681c00dd11efe25351684b53a6447d7bedd604 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07ff6eae4e5906d8c6939fa62de3549c082dc96e --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..082c87148875e06297016ec4e9811a5899771561 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.4372202306985855, "l1_loss": 142.0617914199829, "l0": 2092.681838989258, "frac_variance_explained": 0.9409727565944195, "cossim": 0.9778236597776413, "l2_ratio": 0.8984446451067924, "relative_reconstruction_bias": 0.9330671615898609, "loss_original": 2.988885059952736, "loss_reconstructed": 3.116362765431404, "loss_zero": 8.795905828475952, "frac_recovered": 0.9780693799257278, "frac_alive": 0.4329833984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..71cef1d31c1fa7755f669a509e966926b69f4693 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d206319cc9a96452af2ee6327d20738f1f82d077d8372527a27417ef13044151 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..40d6f3a8e318f222cc9f79ae0ca2ad3ad7b823ed --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7bf6b0bdf994db5d837635383b645fe22ec334ab --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.8961294144392014, "l1_loss": 127.3771653175354, "l0": 1809.7124938964844, "frac_variance_explained": 0.9244818277657032, "cossim": 0.9714625887572765, "l2_ratio": 0.8806721530854702, "relative_reconstruction_bias": 0.9235298186540604, "loss_original": 2.988885059952736, "loss_reconstructed": 3.160843104124069, "loss_zero": 8.795905828475952, "frac_recovered": 0.9704258032143116, "frac_alive": 0.42205810546875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e064a78358efcc5b8123a7ad5da71152ce721abe --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:366684a3370eef31ea65b171d214c5594b978092dda52d1e9406c99c67827be5 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d95effa4a61525871f1158e980e77a7bccb7cb9 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4182d573543491150f9bfacb11b11f164e819cf0 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.613335400819778, "l1_loss": 107.76228666305542, "l0": 1432.6851348876953, "frac_variance_explained": 0.8948646783828735, "cossim": 0.9594850204885006, "l2_ratio": 0.8543104119598866, "relative_reconstruction_bias": 0.9115758650004864, "loss_original": 2.988885059952736, "loss_reconstructed": 3.2515034675598145, "loss_zero": 8.795905828475952, "frac_recovered": 0.9548315294086933, "frac_alive": 0.40576171875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..54ab3237b81974b91b05a8e04854141f32e81fd8 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331af86710fa7aa47e89400df9682eac142408884f197519c26a047bc907cf2a +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7ed1816f97483488f4267660572003187c3472fb --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4d25c079b89ee9ddbd948f2346417c29178f4037 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.87492099404335, "l1_loss": 77.92073631286621, "l0": 870.7094879150391, "frac_variance_explained": 0.8295186869800091, "cossim": 0.9312763400375843, "l2_ratio": 0.8113289326429367, "relative_reconstruction_bias": 0.898192785680294, "loss_original": 2.988885059952736, "loss_reconstructed": 3.503233313560486, "loss_zero": 8.795905828475952, "frac_recovered": 0.9114748723804951, "frac_alive": 0.3726806640625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..549b625a6bff1c1338d41ecb4d1dabad05170a8c --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf855ee683490a6d41e6fae5154466126bda1acb1b2b36bd8fa8032e27ce8de +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..760fb5fd6b0c6a96809b7548e8f2b158ca35b835 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..47f27112552160b0c6888b4baaa9cd539c178dd9 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.856215268373489, "l1_loss": 57.29561996459961, "l0": 498.5478820800781, "frac_variance_explained": 0.767219714820385, "cossim": 0.9014714919030666, "l2_ratio": 0.7819858938455582, "relative_reconstruction_bias": 0.8972665779292583, "loss_original": 2.988885059952736, "loss_reconstructed": 3.851107880473137, "loss_zero": 8.795905828475952, "frac_recovered": 0.8516864478588104, "frac_alive": 0.3365478515625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea09c961e650f8d504a93871b0013a23028e695e --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5941b2c145ef2a3dda61bf59923f58eb1da698f9afab6da584dae74ef9440cda +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..40dc2bc68965cbf4d8fee35dd5dc9076242ce73f --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5ebadc3f92230cd2548cd0638abc6db85aaa203b --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_4/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.9897752702236176, "l1_loss": 35.976064682006836, "l0": 168.85280323028564, "frac_variance_explained": 0.6834423765540123, "cossim": 0.8579610474407673, "l2_ratio": 0.7446792721748352, "relative_reconstruction_bias": 0.8999854475259781, "loss_original": 2.988885059952736, "loss_reconstructed": 4.48969042301178, "loss_zero": 8.795905828475952, "frac_recovered": 0.7421544790267944, "frac_alive": 0.2723388671875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..00bc8157bc027c5df9c2bb99a17183fbde2bf91a --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f0f6542fd12ec9f4d1e54f17f360813928bb2b0ba0bb0194f6720929b179d41 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..63ae8b3572adccf4f17ab25790acca6e5ebc6f8f --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.012, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4137fde901f3b83bc5d3925637bdd718004e4ad0 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 14.34303230047226, "l1_loss": 134.7817931175232, "l0": 66.5282769203186, "frac_variance_explained": 0.6393711157143116, "cossim": 0.9801309332251549, "l2_ratio": 0.9737087674438953, "relative_reconstruction_bias": 0.9934540018439293, "loss_original": 2.988885059952736, "loss_reconstructed": 7.50357323884964, "loss_zero": 8.736501514911652, "frac_recovered": 0.21367329265922308, "frac_alive": 0.5682373046875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a50b08fb8fa8d98f3de7e4d9bca2e31d34598e9a --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d6f9e78febc596185e41fd5ca338e1c92175ea651d6725ce1713e25470e8760 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7fd38deef029e7ae3c40caacb1949c3570b3b07 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.015, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6b97d444bc251371ee23340df0bf0486c81846dc --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 14.386766374111176, "l1_loss": 130.9131121635437, "l0": 50.937511682510376, "frac_variance_explained": 0.6373972222208977, "cossim": 0.9796882271766663, "l2_ratio": 0.9714379422366619, "relative_reconstruction_bias": 0.9918921552598476, "loss_original": 2.988885059952736, "loss_reconstructed": 7.5580668449401855, "loss_zero": 8.736501514911652, "frac_recovered": 0.20432853791862726, "frac_alive": 0.5670166015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd8eefe98a4dbe0ce729aa6c1bf544bf2651ee65 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57500ecb91c5d65bcf573a53fed61c260a05b9d0f9287aa189b0bbda9116e8c +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..61e8aaf69271e67aec7baa3f70d5c7db41f3b745 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.02, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ba6f21759b005572d9fef3c7ba9ea7fa668717a5 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 14.467644155025482, "l1_loss": 124.26546907424927, "l0": 39.628663301467896, "frac_variance_explained": 0.6350334212183952, "cossim": 0.979067537933588, "l2_ratio": 0.9690977036952972, "relative_reconstruction_bias": 0.9895152449607849, "loss_original": 2.988885059952736, "loss_reconstructed": 7.644779443740845, "loss_zero": 8.736501514911652, "frac_recovered": 0.1894684201106429, "frac_alive": 0.56439208984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4efda28d3a755383b9280902d3f0ccbf3ed92507 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315c853903f8bca3d49e6410ffccb1c2e578ce27391f6a5d730cd588841e6c3b +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a157964a8361d261fadb3f65fde48d7dee6f8215 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.03, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d5fb0e0c0711901d73126a06db74d0c1743fa39 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 14.838936507701874, "l1_loss": 120.4752779006958, "l0": 26.316574931144714, "frac_variance_explained": 0.6176462210714817, "cossim": 0.9775993674993515, "l2_ratio": 0.9620774053037167, "relative_reconstruction_bias": 0.9850547686219215, "loss_original": 2.988885059952736, "loss_reconstructed": 8.233057826757431, "loss_zero": 8.736501514911652, "frac_recovered": 0.08776906947605312, "frac_alive": 0.56182861328125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a4611c34626cfa4abdea402f2690a64ce3d7000 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:747cdeccddd6e23057f379539c127138a826c2563585df67adea7eff71e9e846 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c41d9d26e7c46620723de909c1d8960e2dd21095 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bd13aae665344342deac4c402e0990befd4a4006 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 14.916870176792145, "l1_loss": 116.73350954055786, "l0": 19.94594895839691, "frac_variance_explained": 0.6147905439138412, "cossim": 0.9771095104515553, "l2_ratio": 0.957810640335083, "relative_reconstruction_bias": 0.9802930504083633, "loss_original": 2.988885059952736, "loss_reconstructed": 8.315340042114258, "loss_zero": 8.736501514911652, "frac_recovered": 0.07384544261731207, "frac_alive": 0.55828857421875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/ae.pt b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d77946ec90185ffb016eb8fb60db3eed50aec7d --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd6946e090562dd89bfd816f845aa9ba056e227bafc217826e2818c6af2ed80 +size 67178280 diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/config.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f43fcc29fea964218a50a05ad1464e03cdda187e --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/config.json @@ -0,0 +1,30 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 10, + "resample_steps": null, + "sparsity_warmup_steps": 10, + "steps": 488, + "decay_start": 390, + "seed": 0, + "device": "cuda:5", + "layer": 5, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_5", + "submodule_name": "resid_post_layer_5" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 244, + "ctx_len": 1024, + "refresh_batch_size": 64, + "out_batch_size": 2048, + "device": "cuda:5" + } +} \ No newline at end of file diff --git a/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/eval_results.json b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c447237e2c0af927bb84d79bb0dc56bdddce1526 --- /dev/null +++ b/._run2_EleutherAI_pythia-70m-deduped_standard/resid_post_layer_5/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 15.072476983070374, "l1_loss": 116.40810489654541, "l0": 16.435348987579346, "frac_variance_explained": 0.6133805140852928, "cossim": 0.9764441624283791, "l2_ratio": 0.9474283792078495, "relative_reconstruction_bias": 0.9706622287631035, "loss_original": 2.988885059952736, "loss_reconstructed": 8.466006457805634, "loss_zero": 8.736501514911652, "frac_recovered": 0.04846660397015512, "frac_alive": 0.55609130859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}} \ No newline at end of file