diff --git a/102/edges.pkl b/102/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/102/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/102/ll_model.pth b/102/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9fd62460eaea5b2fe4687165a2c53b48af6cbb36
--- /dev/null
+++ b/102/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:feb81e3d0658fbd63f8e19b5ec8b036dd18f5e58ef62d8e0a57048e6ca450c7d
+size 15082
diff --git a/102/ll_model_cfg.pkl b/102/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d672d99d82ae67a30e8cd5ff8c09075f32934ad2
--- /dev/null
+++ b/102/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecb69a2f8949b8c7b19adc4c33aed4e99913687a72b169b64a317d2e7878dc97
+size 1093
diff --git a/102/meta.json b/102/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..2a64b9ad15db892f794f6353c546e7821b5869cb
--- /dev/null
+++ b/102/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-82-siit-weigth-0.4", "wandb_name": "case-102-seed-82-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 82, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/104/edges.pkl b/104/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/104/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/104/ll_model.pth b/104/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/104/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/104/ll_model_cfg.pkl b/104/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/104/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/104/meta.json b/104/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..e9fe3a76f7042095a23c20779b8e9ab3328b0e0b
--- /dev/null
+++ b/104/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-104-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/105/edges.pkl b/105/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/105/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/105/ll_model.pth b/105/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..503f970e58297932a0f93df298f7cefe1c459d91
--- /dev/null
+++ b/105/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c32e3e97b224cc8d115b70d7661b8b8433bfcd41a9f58d1d7409ce9367ea27a
+size 15018
diff --git a/105/ll_model_cfg.pkl b/105/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..2405e41cd39cba0b81cceb959306e573eff0e267
--- /dev/null
+++ b/105/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57dece33450716de87f38b12c0955174849706456cc698dd4fcce152feba3cbf
+size 1093
diff --git a/105/meta.json b/105/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..fed30b4cbe3e3c32f9314f9abc1729e4984ff4ec
--- /dev/null
+++ b/105/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-105-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/123/edges.pkl b/123/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/123/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/123/ll_model.pth b/123/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..43c68763ffa23c373508111a5f6af8ef124abded
--- /dev/null
+++ b/123/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b640bb628be87f8ebf41e6bb8b5351a6d69bfa11699bf2e1cea140f6f6f9f95
+size 15082
diff --git a/123/ll_model_cfg.pkl b/123/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..0c277598f6fa5a15b80026b95d17e17c98d63226
--- /dev/null
+++ b/123/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f027ad4de6828c6f5bb7a3f3c8aabd9658526e33b2284eba366977823b89c0a
+size 1093
diff --git a/123/meta.json b/123/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..f7c062b0dfc6206a3c64dd48c5d41d2affbb4e05
--- /dev/null
+++ b/123/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-123-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/46/edges.pkl b/46/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/46/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/46/ll_model.pth b/46/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/46/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/46/ll_model_cfg.pkl b/46/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/46/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/46/meta.json b/46/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..c88afa06b76d37bdc13f7ca0d9fe3b4f54e0ab83
--- /dev/null
+++ b/46/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-46-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/50/edges.pkl b/50/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/50/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/50/ll_model.pth b/50/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/50/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/50/ll_model_cfg.pkl b/50/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/50/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/50/meta.json b/50/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba9313288c7527f1052a4204310f6d02f3723687
--- /dev/null
+++ b/50/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-50-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/52/edges.pkl b/52/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/52/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/52/ll_model.pth b/52/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/52/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/52/ll_model_cfg.pkl b/52/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/52/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/52/meta.json b/52/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..32f37ef14bdef255ac0594ead679dc523c48c177
--- /dev/null
+++ b/52/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-52-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/53/edges.pkl b/53/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..145ad9019256687adb7de5136c70accdaedd74a3
--- /dev/null
+++ b/53/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:994f3bdfbe5b148e1da38018a24a1567c1d86e5de1c18e9b4d62af358812c709
+size 189
diff --git a/53/ll_model.pth b/53/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..114b2f59baa956e78999af060d060f1bea5f8863
--- /dev/null
+++ b/53/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c16ef496b74897cf166795daa3a019dd6adde99efe3edd12d540676047ff695b
+size 14762
diff --git a/53/ll_model_cfg.pkl b/53/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..a390e3aaa7e1cbd1c647f03e6f121426c8d0b7a6
--- /dev/null
+++ b/53/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:067caa3354f7d42fd2eb39aeacec1719e1e8aa60f9c707b5be1dff2a7a5eac4c
+size 1093
diff --git a/53/meta.json b/53/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d32547a0535e4d14e87af2db85a300ad022f38f
--- /dev/null
+++ b/53/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-53-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/54/edges.pkl b/54/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/54/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/54/ll_model.pth b/54/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/54/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/54/ll_model_cfg.pkl b/54/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/54/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/54/meta.json b/54/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..2eb8ad88eb31ddcf7c649513130bfc2b08874b8c
--- /dev/null
+++ b/54/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-54-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/55/edges.pkl b/55/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/55/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/55/ll_model.pth b/55/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/55/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/55/ll_model_cfg.pkl b/55/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/55/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/55/meta.json b/55/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..512b3f2f72c28bda037302a0201c8dce880a7d95
--- /dev/null
+++ b/55/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-55-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/60/edges.pkl b/60/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/60/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/60/ll_model.pth b/60/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/60/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/60/ll_model_cfg.pkl b/60/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/60/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/60/meta.json b/60/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..64e52a2f613f889e507368c96f77838ef2996eca
--- /dev/null
+++ b/60/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-60-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/62/edges.pkl b/62/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/62/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/62/ll_model.pth b/62/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e88019c946626401aace44160ac7cf225c1f2885
--- /dev/null
+++ b/62/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44b75705cc0241f73d43d60bd6dba9014f5643c80a0da7d58b49ea7bdc1526fe
+size 14698
diff --git a/62/ll_model_cfg.pkl b/62/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..166515f115742cdc958af8ac628de1edcb760d20
--- /dev/null
+++ b/62/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2d11862991a5791d7bd91005f7c0f928d522a579988c7000192e53ff05de81
+size 1093
diff --git a/62/meta.json b/62/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..1b80b0dd0d38f6671f932f457a87d3671f0311f0
--- /dev/null
+++ b/62/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-62-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/64/edges.pkl b/64/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/64/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/64/ll_model.pth b/64/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/64/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/64/ll_model_cfg.pkl b/64/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/64/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/64/meta.json b/64/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..c2a964af1068e56c19a015e7de899d525ad2f412
--- /dev/null
+++ b/64/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-64-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/65/edges.pkl b/65/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/65/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/65/ll_model.pth b/65/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/65/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/65/ll_model_cfg.pkl b/65/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/65/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/65/meta.json b/65/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..8148a084b92e971a747e741b05ccb81e18088f12
--- /dev/null
+++ b/65/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-65-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/70/edges.pkl b/70/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/70/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/70/ll_model.pth b/70/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..78db9a8280c82cc0928da467a1144f40608d0737
--- /dev/null
+++ b/70/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a624a7f2389feabcc0d2cacd91a8b0703b57140d7517115583f951858ac7247f
+size 14698
diff --git a/70/ll_model_cfg.pkl b/70/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/70/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/70/meta.json b/70/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..390ffc8997225a2e205b5eff21094bc228c65c14
--- /dev/null
+++ b/70/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-70-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/72/edges.pkl b/72/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/72/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/72/ll_model.pth b/72/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..43c68763ffa23c373508111a5f6af8ef124abded
--- /dev/null
+++ b/72/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b640bb628be87f8ebf41e6bb8b5351a6d69bfa11699bf2e1cea140f6f6f9f95
+size 15082
diff --git a/72/ll_model_cfg.pkl b/72/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..0c277598f6fa5a15b80026b95d17e17c98d63226
--- /dev/null
+++ b/72/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f027ad4de6828c6f5bb7a3f3c8aabd9658526e33b2284eba366977823b89c0a
+size 1093
diff --git a/72/meta.json b/72/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..1f12974a4ea4f99748e56298bae5c6a781658f7c
--- /dev/null
+++ b/72/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-72-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/75/edges.pkl b/75/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/75/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/75/ll_model.pth b/75/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/75/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/75/ll_model_cfg.pkl b/75/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/75/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/75/meta.json b/75/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..c4866525617060faec2c720e861325869009d0a2
--- /dev/null
+++ b/75/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-75-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/77/edges.pkl b/77/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/77/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/77/ll_model.pth b/77/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4dcbb972ae5fdaf9f0d657a5d41cf9b763260675
--- /dev/null
+++ b/77/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea8f3e7e90367aac2a071c5d27bcbd12bdec85d0ab29ab6191a94e1d1d27fe48
+size 14698
diff --git a/77/ll_model_cfg.pkl b/77/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/77/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/77/meta.json b/77/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..743781c71cb55315b0971dc28413e69c2aafd681
--- /dev/null
+++ b/77/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-77-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/8/edges.pkl b/8/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/8/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/8/ll_model.pth b/8/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/8/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/8/ll_model_cfg.pkl b/8/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/8/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/8/meta.json b/8/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..84a781d6b4796097264f00fcbfc1b9de043df2e4
--- /dev/null
+++ b/8/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-8-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/80/edges.pkl b/80/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/80/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/80/ll_model.pth b/80/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/80/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/80/ll_model_cfg.pkl b/80/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/80/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/80/meta.json b/80/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..53fee361eefad099ad2bb7d81c71d3dc73239de1
--- /dev/null
+++ b/80/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-80-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/83/edges.pkl b/83/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/83/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/83/ll_model.pth b/83/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/83/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/83/ll_model_cfg.pkl b/83/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/83/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/83/meta.json b/83/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c48cc00cb84a535e4d01cf863476725b8405128
--- /dev/null
+++ b/83/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-83-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/84/edges.pkl b/84/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/84/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/84/ll_model.pth b/84/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/84/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/84/ll_model_cfg.pkl b/84/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/84/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/84/meta.json b/84/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..7800f5efd6d1977ef4c0a9bc6677889bb4e653bc
--- /dev/null
+++ b/84/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-84-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/85/edges.pkl b/85/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/85/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/85/ll_model.pth b/85/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e030c064b3fbe5c5718c40664b86d7e3d566afc1
--- /dev/null
+++ b/85/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
+size 14698
diff --git a/85/ll_model_cfg.pkl b/85/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..971a904427237bd2171a1b8a7b4deb269de7c5ca
--- /dev/null
+++ b/85/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
+size 1093
diff --git a/85/meta.json b/85/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..38479f25ea2453b4749d6730677cb6cf5edbfc49
--- /dev/null
+++ b/85/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-85-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/90/edges.pkl b/90/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/90/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/90/ll_model.pth b/90/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..891d715075c9e3e221836f9cc8da974f43d552fb
--- /dev/null
+++ b/90/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f83b14a664e9da5bd04aa0a7d77f24cfb7c628f9635efc938700bc99f6e1c46
+size 14698
diff --git a/90/ll_model_cfg.pkl b/90/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..7449c91db5a4af72fc3e64e13d5a17a1c5fe112b
--- /dev/null
+++ b/90/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c85e1f4714f9fca8b018f160458611bf212599e6f9edc83de523721c6027e60f
+size 1093
diff --git a/90/meta.json b/90/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..bd5ac8282f72df1beab184e97acbb314d741180d
--- /dev/null
+++ b/90/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-82-siit-weigth-0.4", "wandb_name": "case-90-seed-82-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 82, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/95/edges.pkl b/95/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e5ca91be0d15dfebb7961bc819259c28cd200595
--- /dev/null
+++ b/95/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
+size 113
diff --git a/95/ll_model.pth b/95/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1e6cd2f53fb849836ee0bf356500feff31084acb
--- /dev/null
+++ b/95/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5665811a8574843411df9c47ec06cb70d1374744dcfbe19a0728d7a54389c68a
+size 14890
diff --git a/95/ll_model_cfg.pkl b/95/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..ecc680f7c23f0e49423b45c5c55f59d1596737be
--- /dev/null
+++ b/95/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2be93b2822d25fdc67eb73f9767ddaa4c0f37adbb43f8442962b907c72b24335
+size 1093
diff --git a/95/meta.json b/95/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbaceebffdd3ef6b7dc172a9ea69904f8db085cd
--- /dev/null
+++ b/95/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-95-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/97/edges.pkl b/97/edges.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b927cb5a367468165b1ed0fc593752c9816e5746
--- /dev/null
+++ b/97/edges.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afdfd64f4c8be661a44ae212318dcb22d1cd61cd023a9f510b0ed3cf9ceceac6
+size 1690
diff --git a/97/ll_model.pth b/97/ll_model.pth
new file mode 100644
index 0000000000000000000000000000000000000000..de54a00622412d60dd4b3fe9696dd6a45dd4e3bb
--- /dev/null
+++ b/97/ll_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7edb6ff1572d3b7b43e97ff512fe1bd0c78f47b00724906c72ef175338173028
+size 5869062
diff --git a/97/ll_model_cfg.pkl b/97/ll_model_cfg.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..9060fab742a5cfe522c32511aa7e5cef7bd308dc
--- /dev/null
+++ b/97/ll_model_cfg.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cb2f82ed87ef19e802ed3400f3b508631496f474aae19e07304666c6d1a06d5
+size 1103
diff --git a/97/meta.json b/97/meta.json
new file mode 100644
index 0000000000000000000000000000000000000000..16bf425a99ba09e8c6af55158712b9aaf4f09e4d
--- /dev/null
+++ b/97/meta.json
@@ -0,0 +1 @@
+{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-82-siit-weigth-0.4", "wandb_name": "case-97-seed-82-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 82, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
\ No newline at end of file
diff --git a/benchmark_cases_metadata.csv b/benchmark_cases_metadata.csv
index c5c3d65ca4e26262fb3c49d8e4959f6d4730a043..8a5f499df6f53bbf8c6ab082d630b79c1c917981 100644
--- a/benchmark_cases_metadata.csv
+++ b/benchmark_cases_metadata.csv
@@ -1,48 +1,74 @@
 case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.tokenizer_name,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs,transformer_cfg.load_in_4bit,training_args.output_dir,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.early_stop_accuracy_threshold,training_args.act_fn,training_args.use_wandb,training_args.save_model_to_wandb,training_args.clip_grad_norm,training_args.lr_scheduler,training_args.model_pair,training_args.same_size,training_args.seed,training_args.batch_size,training_args.include_mlp,training_args.detach_while_caching,training_args.scheduler_val_metric,training_args.siit_sampling,training_args.val_iia_sampling,training_args.next_token,training_args.non_ioi_thresh,training_args.use_per_token_check,training_args.num_workers,training_args.early_stop,training_args.scheduler_mode,training_args.val_IIA_sampling,training_args.use_all_tokens_for_behavior,training_args.optimizer_kwargs.betas
-101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-103,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/103,Swap consecutive numbers in a list,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10787197799411874,True,False,standard,False,11,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1460593486680443,True,False,standard,False,5,False,3456,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,500.0,,gelu,True,True,1.0,,,True,,,True,True,,,,True,,True,,True,,,True,
+52,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/52,Takes the square root of each element.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+7,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7,Returns the number of times each token occurs in the input.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl,2,17,10,4,custom,4,68,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,6800,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.5,2000.0,,gelu,True,True,0.1,,strict,False,1234.0,256.0,False,True,,,,False,,True,,True,,,True,
+13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,2,20,10,5,custom,4,80,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3,False,9600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,500.0,,gelu,True,True,1.0,,,True,,,True,True,,,,True,,True,,True,,,True,
+63,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/63,Replaces each element with the number of elements less than it in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+60,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/60,Increment each element in the sequence by 1.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+29,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29,Creates abbreviations for each token in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl,2,13,10,3,custom,4,52,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3952,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
+79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+72,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72,Negate each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+104,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104,Apply exponential function to all elements of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,2,26,10,6,custom,4,104,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3,False,15808,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
 110,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/110,"Inserts zeros between each element, removing the latter half of the list.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/edges.pkl,2,20,10,5,custom,4,80,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.11925695879998878,True,False,standard,False,11,False,9600,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-111,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/111,Returns the last element of the sequence and pads the rest with zeros.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.09847319278346618,True,False,standard,False,11,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+69,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/69,"Assign -1, 0, or 1 to each element of the input sequence based on its sign.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+87,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/87,Binarize a sequence of integers using a threshold.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+80,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/80,Subtract a constant from each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+83,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/83,Triple each element in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+45,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/45,Doubles the first half of the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.11094003924504584,True,False,standard,False,16,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 114,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/114,Apply a logarithm base 10 to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/edges.pkl,2,4,10,1,custom,4,16,gelu,12,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.14368424162141993,True,False,standard,False,10,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-124,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/124,Check if all elements in a list are equal.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.11547005383792516,True,False,standard,False,2,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-129,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/129,Checks if all elements are a multiple of n (set the default at 2).,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/edges.pkl,3,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10504514628777804,True,False,standard,False,2,False,576,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,2,20,10,5,custom,4,80,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3,False,9600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,500.0,,gelu,True,True,1.0,,,True,,,True,True,,,,True,,True,,True,,,True,
+33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,2,4,10,1,custom,4,16,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17457431218879393,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,,True,,,True,True,,,,True,,True,,True,,,True,
+ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect Object Identification (IOI) task.,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,False,False,standard,False,50257,False,84934656,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,0.0,512.0,True,True,"val/accuracy,val/IIA",individual,,False,0.65,False,0.0,True,max,random,False,"0.9,0.9"
+55,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/55,Applies the hyperbolic sine to each element.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,4,50,10,12,custom,4,200,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,3,False,118400,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.0005,False,1.0,1.0,0.5,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
+4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,2,20,10,5,custom,4,80,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,1,False,9600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,,True,,,True,True,,,,True,,True,,True,,,True,
+30,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/30,Tags numeric tokens in a sequence based on whether they fall within a given range.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+90,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/90,Replaces a specific token with another one.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/edges.pkl,2,4,10,1,custom,4,16,gelu,12,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1460593486680443,True,False,standard,False,9,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,82.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+39,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/39,Returns the fraction of 'x' in the input up to the i-th position for all i.,60,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/edges.pkl,2,120,60,30,custom,4,480,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.08432740427115679,True,False,standard,False,1,False,345600,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+75,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/75,Double each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
+65,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65,Calculate the cube root of each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,2,26,10,6,custom,4,104,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3,False,15808,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
+95,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95,Counts the distinct prime factors of each number in the input list.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12060453783110546,True,False,standard,False,3,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+84,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84,Apply the arctangent function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+93,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/93,Swaps the nth with the n+1th element if n%2==1.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/edges.pkl,3,20,10,5,custom,4,80,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10886621079036347,True,False,standard,False,11,False,14400,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3456,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
+34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,2,16,10,4,custom,4,64,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,5,False,6144,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
+26,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26,Creates a cascading effect by repeating each token in sequence incrementally.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl,2,21,10,5,custom,4,84,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12344267996967354,True,False,standard,False,27,False,10416,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
 19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,2,32,15,8,custom,4,128,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.15689290811054724,True,False,standard,False,3,False,24576,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,,True,,,True,True,,,,True,,True,,True,,,True,
+53,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/53,Increment elements at odd indices by 1,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.13151918984428584,True,False,standard,False,12,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+40,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/40,Sum the last and previous to last digits of a number,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/edges.pkl,2,4,10,1,custom,4,16,gelu,31,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,12,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+51,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/51,Checks if each element is a Fibonacci number,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/edges.pkl,2,4,10,1,custom,4,16,gelu,102,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.07525766947068778,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+85,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85,Square each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+77,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77,Apply the tangent function to each element of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,2,13,10,3,custom,4,52,gelu,14,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16,True,False,standard,False,2,False,3952,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
-21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,4,50,10,12,custom,4,200,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,3,False,118400,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.0005,False,1.0,1.0,0.5,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
-24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 25,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/25,Normalizes token frequencies in a sequence to a range between 0 and 1.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/edges.pkl,2,62,10,15,custom,4,248,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.08295613557843402,True,False,standard,False,56,False,91264,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-26,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26,Creates a cascading effect by repeating each token in sequence incrementally.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl,2,21,10,5,custom,4,84,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12344267996967354,True,False,standard,False,27,False,10416,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
-29,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29,Creates abbreviations for each token in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl,2,13,10,3,custom,4,52,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3952,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
-3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,2,12,5,3,custom,4,48,gelu,6,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.22188007849009167,True,False,standard,False,1,False,3456,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,10.0,2000.0,,gelu,True,True,0.1,,strict,False,,,True,True,,,,True,,True,,True,,,True,
-30,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/30,Tags numeric tokens in a sequence based on whether they fall within a given range.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,2,4,10,1,custom,4,16,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17457431218879393,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,,True,,,True,True,,,,True,,True,,True,,,True,
-34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,2,16,10,4,custom,4,64,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,5,False,6144,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
+11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1460593486680443,True,False,standard,False,5,False,3456,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.4,500.0,,gelu,True,True,1.0,,,True,,,True,True,,,,True,,True,,True,,,True,
+54,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/54,Applies the hyperbolic tangent to each element.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+124,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/124,Check if all elements in a list are equal.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.11547005383792516,True,False,standard,False,2,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+102,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/102,"Reflects each element within a range (default is [2, 7]).",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/edges.pkl,2,4,10,1,custom,4,16,gelu,42,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1059625885652035,True,False,standard,False,6,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,82.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+129,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/129,Checks if all elements are a multiple of n (set the default at 2).,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/edges.pkl,3,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10504514628777804,True,False,standard,False,2,False,576,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+50,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/50,Applies the hyperbolic cosine to each element,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+123,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/123,Apply arccosine to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+86,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/86,"Check if each element is a power of 2. Return 1 if true, otherwise 0.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+105,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/105,Replaces each number with the next prime after that number.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,10,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+58,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/58,Mirrors the first half of the sequence to the second half.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/edges.pkl,3,32,10,8,custom,4,128,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10415112878465911,True,False,standard,False,11,False,36864,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+70,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/70,Apply the cosine function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+62,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/62,Replaces each element with its factorial.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1414213562373095,True,False,standard,False,10,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+111,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/111,Returns the last element of the sequence and pads the rest with zeros.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.09847319278346618,True,False,standard,False,11,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,2,9,10,2,custom,4,36,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,1872,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
+3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,2,12,5,3,custom,4,48,gelu,6,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.22188007849009167,True,False,standard,False,1,False,3456,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,10.0,2000.0,,gelu,True,True,0.1,,strict,False,,,True,True,,,,True,,True,,True,,,True,
 36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,2,6,10,1,custom,4,24,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.19402850002906638,True,False,standard,False,3,False,768,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
-37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3456,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,1.0,2000.0,,gelu,True,True,0.1,,strict,True,,,True,True,,,,True,,True,,True,,,True,
-39,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/39,Returns the fraction of 'x' in the input up to the i-th position for all i.,60,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/edges.pkl,2,120,60,30,custom,4,480,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.08432740427115679,True,False,standard,False,1,False,345600,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,2,20,10,5,custom,4,80,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,1,False,9600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,2000.0,,gelu,True,True,0.1,,,True,,,True,True,,,,True,,True,,True,,,True,
-40,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/40,Sum the last and previous to last digits of a number,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/edges.pkl,2,4,10,1,custom,4,16,gelu,31,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,12,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-45,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/45,Doubles the first half of the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.11094003924504584,True,False,standard,False,16,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-51,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/51,Checks if each element is a Fibonacci number,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/edges.pkl,2,4,10,1,custom,4,16,gelu,102,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.07525766947068778,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+64,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/64,Cubes each element in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Identity,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+103,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/103,Swap consecutive numbers in a list,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/edges.pkl,3,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10787197799411874,True,False,standard,False,11,False,20736,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 56,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/56,Sets every third element to zero.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.13333333333333333,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-58,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/58,Mirrors the first half of the sequence to the second half.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/edges.pkl,3,32,10,8,custom,4,128,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10415112878465911,True,False,standard,False,11,False,36864,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-63,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/63,Replaces each element with the number of elements less than it in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-69,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/69,"Assign -1, 0, or 1 to each element of the input sequence based on its sign.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-7,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7,Returns the number of times each token occurs in the input.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl,2,17,10,4,custom,4,68,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,6800,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.01,False,1.0,1.0,0.5,2000.0,,gelu,True,True,0.1,,strict,False,1234.0,256.0,False,True,,,,False,,True,,True,,,True,
-79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-86,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/86,"Check if each element is a power of 2. Return 1 if true, otherwise 0.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-87,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/87,Binarize a sequence of integers using a threshold.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-93,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/93,Swaps the nth with the n+1th element if n%2==1.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/edges.pkl,3,20,10,5,custom,4,80,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.10886621079036347,True,False,standard,False,11,False,14400,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
-ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect Object Identification (IOI) task.,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,False,False,standard,False,50257,False,84934656,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,0.0,512.0,True,True,"val/accuracy,val/IIA",individual,,False,0.65,False,0.0,True,max,random,False,"0.9,0.9"
-ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
+97,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/97,Scale a sequence by its maximum element.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/edges.pkl,3,200,10,50,custom,4,800,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.05405899027195888,True,False,standard,False,64,False,1440000,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,82.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
+46,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/46,Decrements each element in the sequence by 1,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
diff --git a/benchmark_cases_metadata.parquet b/benchmark_cases_metadata.parquet
index d06a0be30f4a35956d142442f5cc6d5bd49a829f..7f48c2aec6c8e1217dbf66604bb4f7f8b2ef09f5 100644
--- a/benchmark_cases_metadata.parquet
+++ b/benchmark_cases_metadata.parquet
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e070cc1d81773e400a5c30ce2e9b328d370ef2e2f67083ac90fc0638e4d2da8
-size 72322
+oid sha256:729585ec39204363367ad83efc6bf10185f4e191fe490714be2ab026d1c9864c
+size 75187
diff --git a/benchmark_metadata.json b/benchmark_metadata.json
index fd5b25af8111724f95f53daf802b0bbe3b4ac7c2..a21bb7d64a15ec20752b0924a4fd9ab02823894a 100644
--- a/benchmark_metadata.json
+++ b/benchmark_metadata.json
@@ -6,9 +6,9 @@
   "url": "https://huggingface.co/cybershiptrooper/InterpBench",
   "cases": [
     {
-      "case_id": "101",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101",
-      "task_description": "Check if each element is a square of an integer.",
+      "case_id": "52",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/52",
+      "task_description": "Takes the square root of each element.",
       "vocab": [
         0,
         1,
@@ -20,45 +20,26 @@
         7,
         8,
         9,
-        10,
-        11,
-        12,
-        13,
-        14,
-        15,
-        16,
-        17,
-        18,
-        19,
-        20,
-        21,
-        22,
-        23,
-        24,
-        25,
-        26,
-        27,
-        28,
-        29
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -70,7 +51,7 @@
         "n_heads": 4,
         "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 32,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -92,12 +73,12 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.12199885626608374,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 384,
@@ -115,7 +96,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -123,7 +104,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -145,57 +126,49 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/52/edges.pkl"
     },
     {
-      "case_id": "103",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/103",
-      "task_description": "Swap consecutive numbers in a list",
+      "case_id": "7",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7",
+      "task_description": "Returns the number of times each token occurs in the input.",
       "vocab": [
-        0,
-        1,
-        2,
-        3,
-        4,
-        5,
-        6,
-        7,
-        8,
-        9,
-        10
+        "a",
+        "b",
+        "c"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 3,
-        "d_model": 24,
+        "n_layers": 2,
+        "d_model": 17,
         "n_ctx": 10,
-        "d_head": 6,
+        "d_head": 4,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 96,
+        "d_mlp": 68,
         "act_fn": "gelu",
-        "d_vocab": 13,
+        "d_vocab": 5,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -217,15 +190,15 @@
         "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.10787197799411874,
+        "initializer_range": 0.15689290811054724,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 11,
+        "d_vocab_out": 10,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 20736,
+        "n_params": 6800,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -235,89 +208,71 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false,
-        "load_in_4bit": false,
-        "num_experts": null,
-        "experts_per_token": null
+        "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl",
       "training_args": {
-        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.001,
-        "use_single_loss": true,
+        "lr": 0.01,
+        "use_single_loss": false,
         "iit_weight": 1.0,
-        "behavior_weight": 0.4,
-        "strict_weight": 0.4,
-        "epochs": 1000,
-        "early_stop_accuracy_threshold": 99.9,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.5,
+        "epochs": 2000,
         "act_fn": "gelu",
-        "use_wandb": true,
-        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "linear",
+        "lr_scheduler": "",
         "model_pair": "strict",
         "same_size": false,
-        "seed": 67,
+        "seed": 1234,
         "batch_size": 256,
         "include_mlp": false,
-        "detach_while_caching": true,
-        "scheduler_val_metric": [
-          "val/accuracy",
-          "val/IIA",
-          "val/strict_accuracy"
-        ],
-        "siit_sampling": "sample_all",
-        "val_iia_sampling": "all"
+        "next_token": false,
+        "detach_while_caching": true
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
     },
     {
-      "case_id": "11",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11",
-      "task_description": "Counts the number of words in a sequence based on their length.",
+      "case_id": "13",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13",
+      "task_description": "Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        0,
+        1,
+        2
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 12,
+        "d_model": 20,
         "n_ctx": 10,
-        "d_head": 3,
+        "d_head": 5,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 48,
+        "d_mlp": 80,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 5,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -336,7 +291,7 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
         "initializer_range": 0.1460593486680443,
@@ -344,10 +299,10 @@
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 5,
+        "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3456,
+        "n_params": 9600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -359,7 +314,7 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
@@ -372,14 +327,14 @@
         "clip_grad_norm": 1.0,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
     },
     {
-      "case_id": "110",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/110",
-      "task_description": "Inserts zeros between each element, removing the latter half of the list.",
+      "case_id": "63",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/63",
+      "task_description": "Replaces each element with the number of elements less than it in the sequence.",
       "vocab": [
         0,
         1,
@@ -398,29 +353,29 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 20,
+        "d_model": 24,
         "n_ctx": 10,
-        "d_head": 5,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 80,
+        "d_mlp": 96,
         "act_fn": "gelu",
         "d_vocab": 13,
         "eps": 1e-05,
@@ -444,15 +399,15 @@
         "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.11925695879998878,
+        "initializer_range": 0.13719886811400708,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 11,
+        "d_vocab_out": 10,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 9600,
+        "n_params": 13824,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -467,7 +422,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -497,14 +452,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/edges.pkl"
     },
     {
-      "case_id": "111",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/111",
-      "task_description": "Returns the last element of the sequence and pads the rest with zeros.",
+      "case_id": "60",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/60",
+      "task_description": "Increment each element in the sequence by 1.",
       "vocab": [
         0,
         1,
@@ -523,29 +478,29 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 3,
-        "d_model": 24,
+        "n_layers": 2,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 6,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 96,
+        "d_mlp": 16,
         "act_fn": "gelu",
         "d_vocab": 13,
         "eps": 1e-05,
@@ -566,10 +521,10 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.09847319278346618,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
@@ -577,7 +532,7 @@
         "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 20736,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -592,7 +547,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -600,7 +555,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -622,76 +577,54 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/60/edges.pkl"
     },
     {
-      "case_id": "113",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113",
-      "task_description": "Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",
+      "case_id": "29",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29",
+      "task_description": "Creates abbreviations for each token in the sequence.",
       "vocab": [
-        0,
-        1,
-        2,
-        3,
-        4,
-        5,
-        6,
-        7,
-        8,
-        9,
-        10,
-        11,
-        12,
-        13,
-        14,
-        15,
-        16,
-        17,
-        18,
-        19,
-        20,
-        21,
-        22,
-        23,
-        24,
-        25,
-        26,
-        27,
-        28,
-        29
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 7,
-        "d_model": 88,
+        "n_layers": 2,
+        "d_model": 13,
         "n_ctx": 10,
-        "d_head": 22,
+        "d_head": 3,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 352,
+        "d_mlp": 52,
         "act_fn": "gelu",
-        "d_vocab": 32,
+        "d_vocab": 10,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -710,18 +643,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.0512147519731584,
+        "initializer_range": 0.1539600717839002,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 30,
+        "d_vocab_out": 8,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 650496,
+        "n_params": 3952,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -731,50 +664,32 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false,
-        "load_in_4bit": false,
-        "num_experts": null,
-        "experts_per_token": null
+        "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl",
       "training_args": {
-        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.001,
-        "use_single_loss": true,
+        "lr": 0.01,
+        "use_single_loss": false,
         "iit_weight": 1.0,
-        "behavior_weight": 0.4,
+        "behavior_weight": 1.0,
         "strict_weight": 0.4,
-        "epochs": 1000,
-        "early_stop_accuracy_threshold": 99.9,
+        "epochs": 2000,
         "act_fn": "gelu",
-        "use_wandb": true,
-        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "linear",
-        "model_pair": "strict",
-        "same_size": false,
-        "seed": 67,
-        "batch_size": 256,
-        "include_mlp": false,
-        "detach_while_caching": true,
-        "scheduler_val_metric": [
-          "val/accuracy",
-          "val/IIA",
-          "val/strict_accuracy"
-        ],
-        "siit_sampling": "sample_all",
-        "val_iia_sampling": "all"
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
     },
     {
-      "case_id": "114",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/114",
-      "task_description": "Apply a logarithm base 10 to each element of the input sequence.",
+      "case_id": "79",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79",
+      "task_description": "Check if each number in a sequence is prime",
       "vocab": [
+        0,
         1,
         2,
         3,
@@ -791,19 +706,19 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -815,7 +730,7 @@
         "n_heads": 4,
         "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 12,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -837,12 +752,12 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.14368424162141993,
+        "initializer_range": 0.16329931618554522,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 10,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 384,
@@ -860,7 +775,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -890,76 +805,49 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl"
     },
     {
-      "case_id": "122",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122",
-      "task_description": "Check if each number is divisible by 3.",
+      "case_id": "24",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24",
+      "task_description": "Identifies the first occurrence of each token in a sequence.",
       "vocab": [
-        0,
-        1,
-        2,
-        3,
-        4,
-        5,
-        6,
-        7,
-        8,
-        9,
-        10,
-        11,
-        12,
-        13,
-        14,
-        15,
-        16,
-        17,
-        18,
-        19,
-        20,
-        21,
-        22,
-        23,
-        24,
-        25,
-        26,
-        27,
-        28,
-        29
+        "a",
+        "b",
+        "c"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 4,
+        "d_model": 36,
         "n_ctx": 10,
-        "d_head": 1,
+        "d_head": 9,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
+        "d_mlp": 144,
         "act_fn": "gelu",
-        "d_vocab": 32,
+        "d_vocab": 5,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -981,15 +869,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.12199885626608374,
+        "initializer_range": 0.1885618083164127,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
+        "n_params": 31104,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1004,7 +892,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -1034,14 +922,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
     },
     {
-      "case_id": "124",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/124",
-      "task_description": "Check if all elements in a list are equal.",
+      "case_id": "82",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82",
+      "task_description": "Halve the elements in the second half of the sequence.",
       "vocab": [
         0,
         1,
@@ -1060,23 +948,23 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 3,
+        "n_layers": 4,
         "d_model": 24,
         "n_ctx": 10,
         "d_head": 6,
@@ -1106,15 +994,15 @@
         "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.11547005383792516,
+        "initializer_range": 0.1059625885652035,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 16,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 20736,
+        "n_params": 27648,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1129,7 +1017,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -1159,68 +1047,47 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
     },
     {
-      "case_id": "129",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/129",
-      "task_description": "Checks if all elements are a multiple of n (set the default at 2).",
+      "case_id": "31",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
+      "task_description": "Identify if tokens in the sequence are anagrams of the word 'listen'.",
       "vocab": [
-        0,
-        1,
-        2,
-        3,
-        4,
-        5,
-        6,
-        7,
-        8,
-        9,
-        10,
-        11,
-        12,
-        13,
-        14,
-        15,
-        16,
-        17,
-        18,
-        19,
-        20,
-        21,
-        22,
-        23,
-        24,
-        25,
-        26,
-        27,
-        28,
-        29
-      ],
-      "max_seq_len": 10,
-      "min_seq_len": 4,
-      "files": [
-        {
-          "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/edges.pkl"
-        },
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "listen",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl"
+        },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 3,
+        "n_layers": 2,
         "d_model": 4,
         "n_ctx": 10,
         "d_head": 1,
@@ -1228,7 +1095,7 @@
         "n_heads": 4,
         "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 32,
+        "d_vocab": 11,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1247,10 +1114,10 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.10504514628777804,
+        "initializer_range": 0.17056057308448835,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
@@ -1258,7 +1125,7 @@
         "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 576,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1273,7 +1140,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -1303,49 +1170,66 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl"
     },
     {
-      "case_id": "13",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13",
-      "task_description": "Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",
+      "case_id": "72",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72",
+      "task_description": "Negate each element in the input sequence.",
       "vocab": [
+        -10,
+        -9,
+        -8,
+        -7,
+        -6,
+        -5,
+        -4,
+        -3,
+        -2,
+        -1,
         0,
         1,
-        2
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 20,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 5,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 80,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 22,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1364,18 +1248,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1460593486680443,
+        "initializer_range": 0.11202240672224079,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 20,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 9600,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1385,64 +1269,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 0.4,
-        "epochs": 500,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
-        "clip_grad_norm": 1.0,
-        "lr_scheduler": ""
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl"
     },
     {
-      "case_id": "14",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
-      "task_description": "Returns the count of 'a' in the input sequence.",
+      "case_id": "104",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104",
+      "task_description": "Apply exponential function to all elements of the input sequence.",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 8,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 2,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 32,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1461,18 +1373,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.15689290811054724,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 10,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 1536,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1487,7 +1399,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -1495,7 +1407,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -1517,9 +1429,9 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl"
     },
     {
       "case_id": "18",
@@ -1622,44 +1534,52 @@
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
     },
     {
-      "case_id": "19",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19",
-      "task_description": "Removes consecutive duplicate tokens from a sequence.",
+      "case_id": "110",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/110",
+      "task_description": "Inserts zeros between each element, removing the latter half of the list.",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
-      "max_seq_len": 15,
+      "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 32,
-        "n_ctx": 15,
-        "d_head": 8,
+        "d_model": 20,
+        "n_ctx": 10,
+        "d_head": 5,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 128,
+        "d_mlp": 80,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1678,18 +1598,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.15689290811054724,
+        "initializer_range": 0.11925695879998878,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 24576,
+        "n_params": 9600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1699,87 +1619,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
         "lr": 0.001,
-        "use_single_loss": false,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 2000,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/edges.pkl"
     },
     {
-      "case_id": "2",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2",
-      "task_description": "Reverse the input sequence.",
+      "case_id": "69",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/69",
+      "task_description": "Assign -1, 0, or 1 to each element of the input sequence based on its sign.",
       "vocab": [
-        "a",
-        "b",
-        "c",
-        "d",
-        "e",
-        "f",
-        "g",
-        "h",
-        "i",
-        "j",
-        "k",
-        "l",
-        "m",
-        "n",
-        "o",
-        "p",
-        "q",
-        "r",
-        "s",
-        "t",
-        "u",
-        "v",
-        "w",
-        "x",
-        "y",
-        "z"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 4,
-        "d_model": 56,
+        "n_layers": 2,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 14,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 224,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 28,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1798,18 +1723,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.07593263966019993,
+        "initializer_range": 0.16329931618554522,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 26,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 150528,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1824,7 +1749,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -1854,58 +1779,57 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/edges.pkl"
     },
     {
-      "case_id": "20",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20",
-      "task_description": "Detect spam messages based on appearance of spam keywords.",
+      "case_id": "87",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/87",
+      "task_description": "Binarize a sequence of integers using a threshold.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "click",
-        "no",
-        "now",
-        "oCLrZaW",
-        "offer",
-        "poiVg",
-        "spam"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 13,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 3,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 52,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 14,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1927,7 +1851,7 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.16,
+        "initializer_range": 0.16329931618554522,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
@@ -1935,7 +1859,7 @@
         "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3952,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1945,65 +1869,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 2000,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/edges.pkl"
     },
     {
-      "case_id": "21",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21",
-      "task_description": "Extract unique tokens from a string",
+      "case_id": "80",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/80",
+      "task_description": "Subtract a constant from each element of the input sequence.",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 4,
-        "d_model": 50,
+        "n_layers": 2,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 12,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 200,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -2025,15 +1976,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.09847319278346618,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 118400,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -2043,65 +1994,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.0005,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 0.5,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/80/edges.pkl"
     },
     {
-      "case_id": "24",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24",
-      "task_description": "Identifies the first occurrence of each token in a sequence.",
+      "case_id": "83",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/83",
+      "task_description": "Triple each element in the sequence.",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 36,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 9,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 144,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -2123,15 +2101,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1885618083164127,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 31104,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -2146,7 +2124,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -2154,7 +2132,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -2176,49 +2154,57 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/83/edges.pkl"
     },
     {
-      "case_id": "25",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/25",
-      "task_description": "Normalizes token frequencies in a sequence to a range between 0 and 1.",
+      "case_id": "45",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/45",
+      "task_description": "Doubles the first half of the sequence",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 62,
+        "n_layers": 3,
+        "d_model": 24,
         "n_ctx": 10,
-        "d_head": 15,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 248,
+        "d_mlp": 96,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -2240,15 +2226,15 @@
         "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.08295613557843402,
+        "initializer_range": 0.11094003924504584,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 56,
+        "d_vocab_out": 16,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 91264,
+        "n_params": 20736,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -2263,7 +2249,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -2293,49 +2279,56 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/edges.pkl"
     },
     {
-      "case_id": "26",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26",
-      "task_description": "Creates a cascading effect by repeating each token in sequence incrementally.",
+      "case_id": "114",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/114",
+      "task_description": "Apply a logarithm base 10 to each element of the input sequence.",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 21,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 5,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 84,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 12,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -2357,15 +2350,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.12344267996967354,
+        "initializer_range": 0.14368424162141993,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 27,
+        "d_vocab_out": 10,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 10416,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -2375,30 +2368,49 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 2000,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/edges.pkl"
     },
     {
-      "case_id": "29",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29",
-      "task_description": "Creates abbreviations for each token in the sequence.",
+      "case_id": "33",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33",
+      "task_description": "Checks if each token's length is odd or even.",
       "vocab": [
         "J",
         "LB",
@@ -2414,29 +2426,29 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 13,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 3,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 52,
+        "d_mlp": 16,
         "act_fn": "gelu",
         "d_vocab": 10,
         "eps": 1e-05,
@@ -2460,15 +2472,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1539600717839002,
+        "initializer_range": 0.17457431218879393,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 8,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3952,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -2480,10 +2492,10 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.01,
+        "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
@@ -2491,167 +2503,160 @@
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
     },
     {
-      "case_id": "3",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3",
-      "task_description": "Returns the fraction of 'x' in the input up to the i-th position for all i.",
-      "vocab": [
-        "a",
-        "b",
-        "c",
-        "x"
-      ],
-      "max_seq_len": 5,
-      "min_seq_len": 4,
+      "case_id": "ioi",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi",
+      "task_description": "Indirect Object Identification (IOI) task.",
+      "max_seq_len": 16,
+      "min_seq_len": 16,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 12,
-        "n_ctx": 5,
-        "d_head": 3,
-        "model_name": "custom",
+        "n_layers": 6,
+        "d_model": 64,
+        "n_ctx": 1024,
+        "d_head": 16,
+        "model_name": "gpt2",
         "n_heads": 4,
-        "d_mlp": 48,
-        "act_fn": "gelu",
-        "d_vocab": 6,
+        "d_mlp": 3072,
+        "act_fn": "gelu_new",
+        "d_vocab": 50257,
         "eps": 1e-05,
-        "use_attn_result": true,
+        "use_attn_result": false,
         "use_attn_scale": true,
-        "use_split_qkv_input": true,
-        "use_hook_mlp_in": true,
+        "use_split_qkv_input": false,
+        "use_hook_mlp_in": false,
         "use_attn_in": false,
         "use_local_attn": false,
-        "original_architecture": null,
+        "original_architecture": "GPT2LMHeadModel",
         "from_checkpoint": false,
         "checkpoint_index": null,
         "checkpoint_label_type": null,
         "checkpoint_value": null,
-        "tokenizer_name": null,
+        "tokenizer_name": "gpt2",
         "window_size": null,
         "attn_types": null,
         "init_mode": "gpt2",
-        "normalization_type": null,
+        "normalization_type": "LNPre",
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
-        "seed": 0,
-        "initializer_range": 0.22188007849009167,
-        "init_weights": true,
+        "seed": null,
+        "initializer_range": 0.02886751345948129,
+        "init_weights": false,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 1,
+        "d_vocab_out": 50257,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3456,
+        "n_params": 84934656,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "dtype": "torch.float32",
-        "tokenizer_prepends_bos": null,
+        "tokenizer_prepends_bos": false,
         "n_key_value_heads": null,
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl",
       "training_args": {
-        "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
-        "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 10.0,
-        "epochs": 2000,
-        "act_fn": "gelu",
-        "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict",
-        "same_size": false
-      },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
+        "next_token": false,
+        "non_ioi_thresh": 0.65,
+        "use_per_token_check": false,
+        "batch_size": 512,
+        "num_workers": 0,
+        "early_stop": true,
+        "lr_scheduler": null,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA"
+        ],
+        "scheduler_mode": "max",
+        "scheduler_kwargs": {},
+        "clip_grad_norm": 1.0,
+        "seed": 0,
+        "lr": 0.001,
+        "detach_while_caching": true,
+        "optimizer_kwargs": {
+          "betas": [
+            0.9,
+            0.9
+          ]
+        },
+        "atol": 0.05,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "val_IIA_sampling": "random",
+        "use_all_tokens_for_behavior": false,
+        "strict_weight": 0.4,
+        "siit_sampling": "individual"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl"
     },
     {
-      "case_id": "30",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/30",
-      "task_description": "Tags numeric tokens in a sequence based on whether they fall within a given range.",
+      "case_id": "55",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/55",
+      "task_description": "Applies the hyperbolic sine to each element.",
       "vocab": [
-        "0",
-        "1",
-        "10",
-        "11",
-        "12",
-        "13",
-        "14",
-        "15",
-        "16",
-        "17",
-        "18",
-        "19",
-        "2",
-        "20",
-        "21",
-        "22",
-        "23",
-        "24",
-        "25",
-        "26",
-        "27",
-        "28",
-        "29",
-        "3",
-        "4",
-        "5",
-        "6",
-        "7",
-        "8",
-        "9"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -2663,7 +2668,7 @@
         "n_heads": 4,
         "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 32,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -2685,12 +2690,12 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.12199885626608374,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 384,
@@ -2708,7 +2713,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -2716,7 +2721,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -2738,55 +2743,49 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/55/edges.pkl"
     },
     {
-      "case_id": "31",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
-      "task_description": "Identify if tokens in the sequence are anagrams of the word 'listen'.",
+      "case_id": "21",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21",
+      "task_description": "Extract unique tokens from a string",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
+        "a",
         "b",
-        "listen",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        "c"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 4,
+        "n_layers": 4,
+        "d_model": 50,
         "n_ctx": 10,
-        "d_head": 1,
+        "d_head": 12,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
+        "d_mlp": 200,
         "act_fn": "gelu",
-        "d_vocab": 11,
+        "d_vocab": 5,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -2808,15 +2807,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.17056057308448835,
+        "initializer_range": 0.09847319278346618,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
+        "n_params": 118400,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -2826,89 +2825,67 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false,
-        "load_in_4bit": false,
-        "num_experts": null,
-        "experts_per_token": null
+        "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl",
       "training_args": {
-        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.001,
-        "use_single_loss": true,
+        "lr": 0.0005,
+        "use_single_loss": false,
         "iit_weight": 1.0,
-        "behavior_weight": 0.4,
-        "strict_weight": 0.4,
-        "epochs": 1000,
-        "early_stop_accuracy_threshold": 99.9,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.5,
+        "epochs": 2000,
         "act_fn": "gelu",
-        "use_wandb": true,
-        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "linear",
-        "model_pair": "strict",
-        "same_size": false,
-        "seed": 67,
-        "batch_size": 256,
-        "include_mlp": false,
-        "detach_while_caching": true,
-        "scheduler_val_metric": [
-          "val/accuracy",
-          "val/IIA",
-          "val/strict_accuracy"
-        ],
-        "siit_sampling": "sample_all",
-        "val_iia_sampling": "all"
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
     },
     {
-      "case_id": "33",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33",
-      "task_description": "Checks if each token's length is odd or even.",
+      "case_id": "4",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4",
+      "task_description": "Return fraction of previous open tokens minus the fraction of close tokens.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
+        "(",
+        ")",
+        "a",
         "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        "c"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 4,
+        "d_model": 20,
         "n_ctx": 10,
-        "d_head": 1,
+        "d_head": 5,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
+        "d_mlp": 80,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 7,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -2930,15 +2907,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.17457431218879393,
+        "initializer_range": 0.17056057308448835,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 1,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
+        "n_params": 9600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -2950,7 +2927,7 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.001,
@@ -2963,52 +2940,3376 @@
         "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
     },
     {
-      "case_id": "34",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34",
-      "task_description": "Calculate the ratio of vowels to consonants in each word.",
+      "case_id": "30",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/30",
+      "task_description": "Tags numeric tokens in a sequence based on whether they fall within a given range.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
-      ],
-      "max_seq_len": 10,
-      "min_seq_len": 4,
-      "files": [
-        {
-          "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
-        },
-        {
-          "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth"
-        },
-        {
-          "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl"
-        },
-        {
+        "0",
+        "1",
+        "10",
+        "11",
+        "12",
+        "13",
+        "14",
+        "15",
+        "16",
+        "17",
+        "18",
+        "19",
+        "2",
+        "20",
+        "21",
+        "22",
+        "23",
+        "24",
+        "25",
+        "26",
+        "27",
+        "28",
+        "29",
+        "3",
+        "4",
+        "5",
+        "6",
+        "7",
+        "8",
+        "9"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12199885626608374,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/edges.pkl"
+    },
+    {
+      "case_id": "90",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/90",
+      "task_description": "Replaces a specific token with another one.",
+      "vocab": [
+        "-",
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "findme",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 12,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1460593486680443,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 9,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 82,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/90/edges.pkl"
+    },
+    {
+      "case_id": "39",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/39",
+      "task_description": "Returns the fraction of 'x' in the input up to the i-th position for all i.",
+      "vocab": [
+        "a",
+        "b",
+        "c",
+        "d",
+        "e",
+        "f",
+        "g",
+        "h",
+        "i",
+        "j",
+        "k",
+        "l",
+        "m",
+        "n",
+        "o",
+        "p",
+        "q",
+        "r",
+        "s",
+        "t",
+        "u",
+        "v",
+        "w",
+        "x",
+        "y",
+        "z"
+      ],
+      "max_seq_len": 60,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 120,
+        "n_ctx": 60,
+        "d_head": 30,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 480,
+        "act_fn": "gelu",
+        "d_vocab": 28,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.08432740427115679,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 1,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 345600,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/edges.pkl"
+    },
+    {
+      "case_id": "75",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/75",
+      "task_description": "Double each element of the input sequence.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1392621247645583,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/75/edges.pkl"
+    },
+    {
+      "case_id": "101",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101",
+      "task_description": "Check if each element is a square of an integer.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12199885626608374,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl"
+    },
+    {
+      "case_id": "ioi_next_token",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token",
+      "task_description": "Indirect Object Identification (IOI) task, trained using next token prediction.",
+      "max_seq_len": 16,
+      "min_seq_len": 16,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 6,
+        "d_model": 64,
+        "n_ctx": 1024,
+        "d_head": 16,
+        "model_name": "gpt2",
+        "n_heads": 4,
+        "d_mlp": 3072,
+        "act_fn": "gelu_new",
+        "d_vocab": 50257,
+        "eps": 1e-05,
+        "use_attn_result": false,
+        "use_attn_scale": true,
+        "use_split_qkv_input": false,
+        "use_hook_mlp_in": false,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": "GPT2LMHeadModel",
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": "gpt2",
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": "LNPre",
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": null,
+        "initializer_range": 0.02886751345948129,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 50257,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 2457600,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": false,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl",
+      "training_args": {
+        "next_token": true,
+        "non_ioi_thresh": 0.65,
+        "use_per_token_check": false,
+        "batch_size": 256,
+        "lr": 0.001,
+        "num_workers": 0,
+        "early_stop": true,
+        "lr_scheduler": null,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA"
+        ],
+        "scheduler_mode": "max",
+        "clip_grad_norm": 1.0,
+        "atol": 0.05,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl"
+    },
+    {
+      "case_id": "65",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65",
+      "task_description": "Calculate the cube root of each element in the input sequence.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1392621247645583,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
+    },
+    {
+      "case_id": "14",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
+      "task_description": "Returns the count of 'a' in the input sequence.",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 8,
+        "n_ctx": 10,
+        "d_head": 2,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 32,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.15689290811054724,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 10,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 1536,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl"
+    },
+    {
+      "case_id": "95",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95",
+      "task_description": "Counts the distinct prime factors of each number in the input list.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12060453783110546,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 3,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl"
+    },
+    {
+      "case_id": "84",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84",
+      "task_description": "Apply the arctangent function to each element of the input sequence.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1392621247645583,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl"
+    },
+    {
+      "case_id": "93",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/93",
+      "task_description": "Swaps the nth with the n+1th element if n%2==1.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 3,
+        "d_model": 20,
+        "n_ctx": 10,
+        "d_head": 5,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 80,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.10886621079036347,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 14400,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/edges.pkl"
+    },
+    {
+      "case_id": "37",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37",
+      "task_description": "Reverses each word in the sequence except for specified exclusions.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 12,
+        "n_ctx": 10,
+        "d_head": 3,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 48,
+        "act_fn": "gelu",
+        "d_vocab": 10,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1539600717839002,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 8,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 3456,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 1.0,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
+    },
+    {
+      "case_id": "34",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34",
+      "task_description": "Calculate the ratio of vowels to consonants in each word.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 16,
+        "n_ctx": 10,
+        "d_head": 4,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 64,
+        "act_fn": "gelu",
+        "d_vocab": 10,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.16329931618554522,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 5,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 6144,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 1.0,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
+    },
+    {
+      "case_id": "26",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26",
+      "task_description": "Creates a cascading effect by repeating each token in sequence incrementally.",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 21,
+        "n_ctx": 10,
+        "d_head": 5,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 84,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12344267996967354,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 27,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 10416,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
+    },
+    {
+      "case_id": "19",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19",
+      "task_description": "Removes consecutive duplicate tokens from a sequence.",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 15,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 32,
+        "n_ctx": 15,
+        "d_head": 8,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 128,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.15689290811054724,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 3,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 24576,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": ""
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
+    },
+    {
+      "case_id": "53",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/53",
+      "task_description": "Increment elements at odd indices by 1",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.13151918984428584,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 12,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/53/edges.pkl"
+    },
+    {
+      "case_id": "40",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/40",
+      "task_description": "Sum the last and previous to last digits of a number",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 31,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.09847319278346618,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 12,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/edges.pkl"
+    },
+    {
+      "case_id": "51",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/51",
+      "task_description": "Checks if each element is a Fibonacci number",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29,
+        30,
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        40,
+        41,
+        42,
+        43,
+        44,
+        45,
+        46,
+        47,
+        48,
+        49,
+        50,
+        51,
+        52,
+        53,
+        54,
+        55,
+        56,
+        57,
+        58,
+        59,
+        60,
+        61,
+        62,
+        63,
+        64,
+        65,
+        66,
+        67,
+        68,
+        69,
+        70,
+        71,
+        72,
+        73,
+        74,
+        75,
+        76,
+        77,
+        78,
+        79,
+        80,
+        81,
+        82,
+        83,
+        84,
+        85,
+        86,
+        87,
+        88,
+        89,
+        90,
+        91,
+        92,
+        93,
+        94,
+        95,
+        96,
+        97,
+        98,
+        99
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 102,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.07525766947068778,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/edges.pkl"
+    },
+    {
+      "case_id": "122",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122",
+      "task_description": "Check if each number is divisible by 3.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12199885626608374,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl"
+    },
+    {
+      "case_id": "85",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85",
+      "task_description": "Square each element of the input sequence.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1392621247645583,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl"
+    },
+    {
+      "case_id": "2",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2",
+      "task_description": "Reverse the input sequence.",
+      "vocab": [
+        "a",
+        "b",
+        "c",
+        "d",
+        "e",
+        "f",
+        "g",
+        "h",
+        "i",
+        "j",
+        "k",
+        "l",
+        "m",
+        "n",
+        "o",
+        "p",
+        "q",
+        "r",
+        "s",
+        "t",
+        "u",
+        "v",
+        "w",
+        "x",
+        "y",
+        "z"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 4,
+        "d_model": 56,
+        "n_ctx": 10,
+        "d_head": 14,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 224,
+        "act_fn": "gelu",
+        "d_vocab": 28,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.07593263966019993,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 26,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 150528,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
+    },
+    {
+      "case_id": "44",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",
+      "task_description": "Replaces each element with the number of elements greater than it in the sequence",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 24,
+        "n_ctx": 10,
+        "d_head": 6,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 96,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.13719886811400708,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 10,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 13824,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl"
+    },
+    {
+      "case_id": "113",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113",
+      "task_description": "Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 7,
+        "d_model": 88,
+        "n_ctx": 10,
+        "d_head": 22,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 352,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.0512147519731584,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 30,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 650496,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl"
+    },
+    {
+      "case_id": "77",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77",
+      "task_description": "Apply the tangent function to each element of the sequence.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1392621247645583,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl"
+    },
+    {
+      "case_id": "20",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20",
+      "task_description": "Detect spam messages based on appearance of spam keywords.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "click",
+        "no",
+        "now",
+        "oCLrZaW",
+        "offer",
+        "poiVg",
+        "spam"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 13,
+        "n_ctx": 10,
+        "d_head": 3,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 52,
+        "act_fn": "gelu",
+        "d_vocab": 14,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.16,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 3952,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
+    },
+    {
+      "case_id": "25",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/25",
+      "task_description": "Normalizes token frequencies in a sequence to a range between 0 and 1.",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model_cfg.pkl"
+        },
+        {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 16,
+        "d_model": 62,
         "n_ctx": 10,
-        "d_head": 4,
+        "d_head": 15,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 64,
+        "d_mlp": 248,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.08295613557843402,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 56,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 91264,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/edges.pkl"
+    },
+    {
+      "case_id": "11",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11",
+      "task_description": "Counts the number of words in a sequence based on their length.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 12,
+        "n_ctx": 10,
+        "d_head": 3,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 48,
         "act_fn": "gelu",
         "d_vocab": 10,
         "eps": 1e-05,
@@ -3032,7 +6333,7 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.16329931618554522,
+        "initializer_range": 0.1460593486680443,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
@@ -3040,7 +6341,7 @@
         "d_vocab_out": 5,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 6144,
+        "n_params": 3456,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -3052,68 +6353,195 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 1.0,
-        "epochs": 2000,
+        "strict_weight": 0.4,
+        "epochs": 500,
+        "act_fn": "gelu",
+        "clip_grad_norm": 1.0,
+        "lr_scheduler": ""
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
+    },
+    {
+      "case_id": "54",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/54",
+      "task_description": "Applies the hyperbolic tangent to each element.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1392621247645583,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/54/edges.pkl"
     },
     {
-      "case_id": "35",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35",
-      "task_description": "Alternates capitalization of each character in words.",
+      "case_id": "124",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/124",
+      "task_description": "Check if all elements in a list are equal.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 9,
+        "n_layers": 3,
+        "d_model": 24,
         "n_ctx": 10,
-        "d_head": 2,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 36,
+        "d_mlp": 96,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -3132,18 +6560,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1539600717839002,
+        "initializer_range": 0.11547005383792516,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 8,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 1872,
+        "n_params": 20736,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -3153,65 +6581,121 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 1.0,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/edges.pkl"
     },
     {
-      "case_id": "36",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36",
-      "task_description": "Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",
+      "case_id": "102",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/102",
+      "task_description": "Reflects each element within a range (default is [2, 7]).",
       "vocab": [
-        "\ud83d\udcd8",
-        "\ud83d\ude0a",
-        "\ud83d\ude22"
+        -20,
+        -19,
+        -18,
+        -17,
+        -16,
+        -15,
+        -14,
+        -13,
+        -12,
+        -11,
+        -10,
+        -9,
+        -8,
+        -7,
+        -6,
+        -5,
+        -4,
+        -3,
+        -2,
+        -1,
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 6,
+        "d_model": 4,
         "n_ctx": 10,
         "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 24,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 42,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -3233,15 +6717,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.19402850002906638,
+        "initializer_range": 0.1059625885652035,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 6,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 768,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -3251,70 +6735,111 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 1.0,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 82,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/102/edges.pkl"
     },
     {
-      "case_id": "37",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37",
-      "task_description": "Reverses each word in the sequence except for specified exclusions.",
+      "case_id": "129",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/129",
+      "task_description": "Checks if all elements are a multiple of n (set the default at 2).",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 12,
+        "n_layers": 3,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 3,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 48,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 32,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -3333,18 +6858,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1539600717839002,
+        "initializer_range": 0.10504514628777804,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 8,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3456,
+        "n_params": 576,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -3354,88 +6879,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 1.0,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/edges.pkl"
     },
     {
-      "case_id": "39",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/39",
-      "task_description": "Returns the fraction of 'x' in the input up to the i-th position for all i.",
+      "case_id": "50",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/50",
+      "task_description": "Applies the hyperbolic cosine to each element",
       "vocab": [
-        "a",
-        "b",
-        "c",
-        "d",
-        "e",
-        "f",
-        "g",
-        "h",
-        "i",
-        "j",
-        "k",
-        "l",
-        "m",
-        "n",
-        "o",
-        "p",
-        "q",
-        "r",
-        "s",
-        "t",
-        "u",
-        "v",
-        "w",
-        "x",
-        "y",
-        "z"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
-      "max_seq_len": 60,
+      "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 120,
-        "n_ctx": 60,
-        "d_head": 30,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 480,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 28,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -3457,15 +6986,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.08432740427115679,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 1,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 345600,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -3480,7 +7009,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -3488,7 +7017,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -3510,51 +7039,66 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/50/edges.pkl"
     },
     {
-      "case_id": "4",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4",
-      "task_description": "Return fraction of previous open tokens minus the fraction of close tokens.",
+      "case_id": "123",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/123",
+      "task_description": "Apply arccosine to each element of the input sequence.",
       "vocab": [
-        "(",
-        ")",
-        "a",
-        "b",
-        "c"
+        -1.0,
+        -0.9,
+        -0.8,
+        -0.7,
+        -0.6,
+        -0.5,
+        -0.4,
+        -0.30000000000000004,
+        -0.19999999999999996,
+        -0.09999999999999998,
+        0.0,
+        0.10000000000000009,
+        0.19999999999999996,
+        0.30000000000000004,
+        0.3999999999999999,
+        0.5,
+        0.6000000000000001,
+        0.7,
+        0.8,
+        0.8999999999999999
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 20,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 5,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 80,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 7,
+        "d_vocab": 22,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -3576,15 +7120,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.17056057308448835,
+        "initializer_range": 0.11202240672224079,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 1,
+        "d_vocab_out": 20,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 9600,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -3594,29 +7138,49 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
         "lr": 0.001,
-        "use_single_loss": false,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 0.4,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/123/edges.pkl"
     },
     {
-      "case_id": "40",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/40",
-      "task_description": "Sum the last and previous to last digits of a number",
+      "case_id": "86",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/86",
+      "task_description": "Check if each element is a power of 2. Return 1 if true, otherwise 0.",
       "vocab": [
         0,
         1,
@@ -3628,44 +7192,26 @@
         7,
         8,
         9,
-        10,
-        11,
-        12,
-        13,
-        14,
-        15,
-        16,
-        17,
-        18,
-        19,
-        20,
-        21,
-        22,
-        23,
-        24,
-        25,
-        26,
-        27,
-        28
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -3677,7 +7223,7 @@
         "n_heads": 4,
         "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 31,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -3699,12 +7245,12 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.09847319278346618,
+        "initializer_range": 0.16329931618554522,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 12,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 384,
@@ -3722,7 +7268,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -3752,14 +7298,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/edges.pkl"
     },
     {
-      "case_id": "44",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",
-      "task_description": "Replaces each element with the number of elements greater than it in the sequence",
+      "case_id": "105",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/105",
+      "task_description": "Replaces each number with the next prime after that number.",
       "vocab": [
         0,
         1,
@@ -3771,38 +7317,57 @@
         7,
         8,
         9,
-        10
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 24,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 6,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 96,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 13,
+        "d_vocab": 32,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -3821,10 +7386,10 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.13719886811400708,
+        "initializer_range": 0.11202240672224079,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
@@ -3832,7 +7397,7 @@
         "d_vocab_out": 10,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 13824,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -3847,7 +7412,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -3855,7 +7420,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -3875,16 +7440,16 @@
           "val/strict_accuracy"
         ],
         "siit_sampling": "sample_all",
-        "val_iia_sampling": "all"
-      },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl"
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/105/edges.pkl"
     },
     {
-      "case_id": "45",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/45",
-      "task_description": "Doubles the first half of the sequence",
+      "case_id": "58",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/58",
+      "task_description": "Mirrors the first half of the sequence to the second half.",
       "vocab": [
         0,
         1,
@@ -3903,29 +7468,29 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 3,
-        "d_model": 24,
+        "d_model": 32,
         "n_ctx": 10,
-        "d_head": 6,
+        "d_head": 8,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 96,
+        "d_mlp": 128,
         "act_fn": "gelu",
         "d_vocab": 13,
         "eps": 1e-05,
@@ -3949,15 +7514,15 @@
         "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.11094003924504584,
+        "initializer_range": 0.10415112878465911,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 16,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 20736,
+        "n_params": 36864,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -3972,7 +7537,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -4002,14 +7567,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/edges.pkl"
     },
     {
-      "case_id": "51",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/51",
-      "task_description": "Checks if each element is a Fibonacci number",
+      "case_id": "70",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/70",
+      "task_description": "Apply the cosine function to each element of the input sequence.",
       "vocab": [
         0,
         1,
@@ -4021,115 +7586,26 @@
         7,
         8,
         9,
-        10,
-        11,
-        12,
-        13,
-        14,
-        15,
-        16,
-        17,
-        18,
-        19,
-        20,
-        21,
-        22,
-        23,
-        24,
-        25,
-        26,
-        27,
-        28,
-        29,
-        30,
-        31,
-        32,
-        33,
-        34,
-        35,
-        36,
-        37,
-        38,
-        39,
-        40,
-        41,
-        42,
-        43,
-        44,
-        45,
-        46,
-        47,
-        48,
-        49,
-        50,
-        51,
-        52,
-        53,
-        54,
-        55,
-        56,
-        57,
-        58,
-        59,
-        60,
-        61,
-        62,
-        63,
-        64,
-        65,
-        66,
-        67,
-        68,
-        69,
-        70,
-        71,
-        72,
-        73,
-        74,
-        75,
-        76,
-        77,
-        78,
-        79,
-        80,
-        81,
-        82,
-        83,
-        84,
-        85,
-        86,
-        87,
-        88,
-        89,
-        90,
-        91,
-        92,
-        93,
-        94,
-        95,
-        96,
-        97,
-        98,
-        99
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -4141,7 +7617,7 @@
         "n_heads": 4,
         "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 102,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -4163,12 +7639,12 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.07525766947068778,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 384,
@@ -4186,7 +7662,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -4194,7 +7670,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -4216,14 +7692,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/70/edges.pkl"
     },
     {
-      "case_id": "56",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/56",
-      "task_description": "Sets every third element to zero.",
+      "case_id": "62",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/62",
+      "task_description": "Replaces each element with its factorial.",
       "vocab": [
         0,
         1,
@@ -4242,19 +7718,19 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -4288,12 +7764,12 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.13333333333333333,
+        "initializer_range": 0.1414213562373095,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 11,
+        "d_vocab_out": 10,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 384,
@@ -4311,7 +7787,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -4319,7 +7795,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -4341,14 +7817,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/62/edges.pkl"
     },
     {
-      "case_id": "58",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/58",
-      "task_description": "Mirrors the first half of the sequence to the second half.",
+      "case_id": "111",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/111",
+      "task_description": "Returns the last element of the sequence and pads the rest with zeros.",
       "vocab": [
         0,
         1,
@@ -4367,29 +7843,29 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 3,
-        "d_model": 32,
+        "d_model": 24,
         "n_ctx": 10,
-        "d_head": 8,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 128,
+        "d_mlp": 96,
         "act_fn": "gelu",
         "d_vocab": 13,
         "eps": 1e-05,
@@ -4413,7 +7889,7 @@
         "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.10415112878465911,
+        "initializer_range": 0.09847319278346618,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
@@ -4421,7 +7897,7 @@
         "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 36864,
+        "n_params": 20736,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -4436,7 +7912,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -4466,57 +7942,54 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/edges.pkl"
     },
     {
-      "case_id": "63",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/63",
-      "task_description": "Replaces each element with the number of elements less than it in the sequence.",
+      "case_id": "35",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35",
+      "task_description": "Alternates capitalization of each character in words.",
       "vocab": [
-        0,
-        1,
-        2,
-        3,
-        4,
-        5,
-        6,
-        7,
-        8,
-        9,
-        10
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 24,
+        "d_model": 9,
         "n_ctx": 10,
-        "d_head": 6,
+        "d_head": 2,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 96,
+        "d_mlp": 36,
         "act_fn": "gelu",
-        "d_vocab": 13,
+        "d_vocab": 10,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -4535,18 +8008,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.13719886811400708,
+        "initializer_range": 0.1539600717839002,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 10,
+        "d_vocab_out": 8,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 13824,
+        "n_params": 1872,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -4556,92 +8029,66 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false,
-        "load_in_4bit": false,
-        "num_experts": null,
-        "experts_per_token": null
+        "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl",
       "training_args": {
-        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.001,
-        "use_single_loss": true,
-        "iit_weight": 1.0,
-        "behavior_weight": 0.4,
-        "strict_weight": 0.4,
-        "epochs": 1000,
-        "early_stop_accuracy_threshold": 99.9,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 1.0,
+        "epochs": 2000,
         "act_fn": "gelu",
-        "use_wandb": true,
-        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "linear",
-        "model_pair": "strict",
-        "same_size": false,
-        "seed": 67,
-        "batch_size": 256,
-        "include_mlp": false,
-        "detach_while_caching": true,
-        "scheduler_val_metric": [
-          "val/accuracy",
-          "val/IIA",
-          "val/strict_accuracy"
-        ],
-        "siit_sampling": "sample_all",
-        "val_iia_sampling": "all"
+        "lr_scheduler": "",
+        "model_pair": "strict"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
     },
     {
-      "case_id": "69",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/69",
-      "task_description": "Assign -1, 0, or 1 to each element of the input sequence based on its sign.",
+      "case_id": "3",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3",
+      "task_description": "Returns the fraction of 'x' in the input up to the i-th position for all i.",
       "vocab": [
-        0,
-        1,
-        2,
-        3,
-        4,
-        5,
-        6,
-        7,
-        8,
-        9,
-        10
+        "a",
+        "b",
+        "c",
+        "x"
       ],
-      "max_seq_len": 10,
+      "max_seq_len": 5,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 4,
-        "n_ctx": 10,
-        "d_head": 1,
+        "d_model": 12,
+        "n_ctx": 5,
+        "d_head": 3,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
+        "d_mlp": 48,
         "act_fn": "gelu",
-        "d_vocab": 13,
+        "d_vocab": 6,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -4663,15 +8110,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.16329931618554522,
+        "initializer_range": 0.22188007849009167,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 1,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
+        "n_params": 3456,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -4681,82 +8128,64 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false,
-        "load_in_4bit": false,
-        "num_experts": null,
-        "experts_per_token": null
+        "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl",
       "training_args": {
-        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.001,
-        "use_single_loss": true,
+        "lr": 0.01,
+        "use_single_loss": false,
         "iit_weight": 1.0,
-        "behavior_weight": 0.4,
-        "strict_weight": 0.4,
-        "epochs": 1000,
-        "early_stop_accuracy_threshold": 99.9,
+        "behavior_weight": 1.0,
+        "strict_weight": 10.0,
+        "epochs": 2000,
         "act_fn": "gelu",
-        "use_wandb": true,
-        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "linear",
+        "lr_scheduler": "",
         "model_pair": "strict",
-        "same_size": false,
-        "seed": 67,
-        "batch_size": 256,
-        "include_mlp": false,
-        "detach_while_caching": true,
-        "scheduler_val_metric": [
-          "val/accuracy",
-          "val/IIA",
-          "val/strict_accuracy"
-        ],
-        "siit_sampling": "sample_all",
-        "val_iia_sampling": "all"
+        "same_size": false
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
     },
     {
-      "case_id": "7",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7",
-      "task_description": "Returns the number of times each token occurs in the input.",
+      "case_id": "36",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36",
+      "task_description": "Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        "\ud83d\udcd8",
+        "\ud83d\ude0a",
+        "\ud83d\ude22"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 17,
+        "d_model": 6,
         "n_ctx": 10,
-        "d_head": 4,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 68,
+        "d_mlp": 24,
         "act_fn": "gelu",
         "d_vocab": 5,
         "eps": 1e-05,
@@ -4777,18 +8206,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.15689290811054724,
+        "initializer_range": 0.19402850002906638,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 10,
+        "d_vocab_out": 3,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 6800,
+        "n_params": 768,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -4800,34 +8229,28 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 0.5,
+        "strict_weight": 1.0,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
         "lr_scheduler": "",
-        "model_pair": "strict",
-        "same_size": false,
-        "seed": 1234,
-        "batch_size": 256,
-        "include_mlp": false,
-        "next_token": false,
-        "detach_while_caching": true
+        "model_pair": "strict"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
     },
     {
-      "case_id": "79",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79",
-      "task_description": "Check if each number in a sequence is prime",
+      "case_id": "64",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/64",
+      "task_description": "Cubes each element in the sequence.",
       "vocab": [
         0,
         1,
@@ -4846,19 +8269,19 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -4892,12 +8315,12 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.16329931618554522,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 384,
@@ -4915,7 +8338,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -4923,7 +8346,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -4945,14 +8368,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/64/edges.pkl"
     },
     {
-      "case_id": "82",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82",
-      "task_description": "Halve the elements in the second half of the sequence.",
+      "case_id": "8",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8",
+      "task_description": "Identity",
       "vocab": [
         0,
         1,
@@ -4971,29 +8394,29 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 4,
-        "d_model": 24,
+        "n_layers": 2,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 6,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 96,
+        "d_mlp": 16,
         "act_fn": "gelu",
         "d_vocab": 13,
         "eps": 1e-05,
@@ -5014,18 +8437,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1059625885652035,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 16,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 27648,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -5040,7 +8463,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -5048,7 +8471,7 @@
         "use_single_loss": true,
         "iit_weight": 1.0,
         "behavior_weight": 0.4,
-        "strict_weight": 0.4,
+        "strict_weight": 0.7,
         "epochs": 1000,
         "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
@@ -5070,14 +8493,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
     },
     {
-      "case_id": "86",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/86",
-      "task_description": "Check if each element is a power of 2. Return 1 if true, otherwise 0.",
+      "case_id": "103",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/103",
+      "task_description": "Swap consecutive numbers in a list",
       "vocab": [
         0,
         1,
@@ -5096,29 +8519,29 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 4,
+        "n_layers": 3,
+        "d_model": 24,
         "n_ctx": 10,
-        "d_head": 1,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 16,
+        "d_mlp": 96,
         "act_fn": "gelu",
         "d_vocab": 13,
         "eps": 1e-05,
@@ -5139,18 +8562,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.16329931618554522,
+        "initializer_range": 0.10787197799411874,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 384,
+        "n_params": 20736,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -5165,7 +8588,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -5195,14 +8618,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/edges.pkl"
     },
     {
-      "case_id": "87",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/87",
-      "task_description": "Binarize a sequence of integers using a threshold.",
+      "case_id": "56",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/56",
+      "task_description": "Sets every third element to zero.",
       "vocab": [
         0,
         1,
@@ -5221,19 +8644,19 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -5267,12 +8690,12 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.16329931618554522,
+        "initializer_range": 0.13333333333333333,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 384,
@@ -5290,7 +8713,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -5320,14 +8743,14 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/edges.pkl"
     },
     {
-      "case_id": "93",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/93",
-      "task_description": "Swaps the nth with the n+1th element if n%2==1.",
+      "case_id": "97",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/97",
+      "task_description": "Scale a sequence by its maximum element.",
       "vocab": [
         0,
         1,
@@ -5346,29 +8769,29 @@
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 3,
-        "d_model": 20,
+        "d_model": 200,
         "n_ctx": 10,
-        "d_head": 5,
+        "d_head": 50,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 80,
+        "d_mlp": 800,
         "act_fn": "gelu",
         "d_vocab": 13,
         "eps": 1e-05,
@@ -5392,15 +8815,15 @@
         "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.10886621079036347,
+        "initializer_range": 0.05405899027195888,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 11,
+        "d_vocab_out": 64,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 14400,
+        "n_params": 1440000,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -5415,7 +8838,7 @@
         "num_experts": null,
         "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/ll_model_cfg.pkl",
       "training_args": {
         "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
@@ -5433,7 +8856,7 @@
         "lr_scheduler": "linear",
         "model_pair": "strict",
         "same_size": false,
-        "seed": 67,
+        "seed": 82,
         "batch_size": 256,
         "include_mlp": false,
         "detach_while_caching": true,
@@ -5445,223 +8868,134 @@
         "siit_sampling": "sample_all",
         "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/97/edges.pkl"
     },
     {
-      "case_id": "ioi",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi",
-      "task_description": "Indirect Object Identification (IOI) task.",
-      "max_seq_len": 16,
-      "min_seq_len": 16,
-      "files": [
-        {
-          "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl"
-        },
-        {
-          "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth"
-        },
-        {
-          "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl"
-        },
-        {
-          "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json"
-        }
+      "case_id": "46",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/46",
+      "task_description": "Decrements each element in the sequence by 1",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
-      "transformer_cfg": {
-        "n_layers": 6,
-        "d_model": 64,
-        "n_ctx": 1024,
-        "d_head": 16,
-        "model_name": "gpt2",
-        "n_heads": 4,
-        "d_mlp": 3072,
-        "act_fn": "gelu_new",
-        "d_vocab": 50257,
-        "eps": 1e-05,
-        "use_attn_result": false,
-        "use_attn_scale": true,
-        "use_split_qkv_input": false,
-        "use_hook_mlp_in": false,
-        "use_attn_in": false,
-        "use_local_attn": false,
-        "original_architecture": "GPT2LMHeadModel",
-        "from_checkpoint": false,
-        "checkpoint_index": null,
-        "checkpoint_label_type": null,
-        "checkpoint_value": null,
-        "tokenizer_name": "gpt2",
-        "window_size": null,
-        "attn_types": null,
-        "init_mode": "gpt2",
-        "normalization_type": "LNPre",
-        "n_devices": 1,
-        "attention_dir": "causal",
-        "attn_only": false,
-        "seed": null,
-        "initializer_range": 0.02886751345948129,
-        "init_weights": false,
-        "scale_attn_by_inverse_layer_idx": false,
-        "positional_embedding_type": "standard",
-        "final_rms": false,
-        "d_vocab_out": 50257,
-        "parallel_attn_mlp": false,
-        "rotary_dim": null,
-        "n_params": 84934656,
-        "use_hook_tokens": false,
-        "gated_mlp": false,
-        "default_prepend_bos": true,
-        "dtype": "torch.float32",
-        "tokenizer_prepends_bos": false,
-        "n_key_value_heads": null,
-        "post_embedding_ln": false,
-        "rotary_base": 10000,
-        "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
-      },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl",
-      "training_args": {
-        "next_token": false,
-        "non_ioi_thresh": 0.65,
-        "use_per_token_check": false,
-        "batch_size": 512,
-        "num_workers": 0,
-        "early_stop": true,
-        "lr_scheduler": null,
-        "scheduler_val_metric": [
-          "val/accuracy",
-          "val/IIA"
-        ],
-        "scheduler_mode": "max",
-        "scheduler_kwargs": {},
-        "clip_grad_norm": 1.0,
-        "seed": 0,
-        "lr": 0.001,
-        "detach_while_caching": true,
-        "optimizer_kwargs": {
-          "betas": [
-            0.9,
-            0.9
-          ]
-        },
-        "atol": 0.05,
-        "use_single_loss": false,
-        "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "val_IIA_sampling": "random",
-        "use_all_tokens_for_behavior": false,
-        "strict_weight": 0.4,
-        "siit_sampling": "individual"
-      },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl"
-    },
-    {
-      "case_id": "ioi_next_token",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token",
-      "task_description": "Indirect Object Identification (IOI) task, trained using next token prediction.",
-      "max_seq_len": 16,
-      "min_seq_len": 16,
+      "max_seq_len": 10,
+      "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 6,
-        "d_model": 64,
-        "n_ctx": 1024,
-        "d_head": 16,
-        "model_name": "gpt2",
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 3072,
-        "act_fn": "gelu_new",
-        "d_vocab": 50257,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 13,
         "eps": 1e-05,
-        "use_attn_result": false,
+        "use_attn_result": true,
         "use_attn_scale": true,
-        "use_split_qkv_input": false,
-        "use_hook_mlp_in": false,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
         "use_attn_in": false,
         "use_local_attn": false,
-        "original_architecture": "GPT2LMHeadModel",
+        "original_architecture": null,
         "from_checkpoint": false,
         "checkpoint_index": null,
         "checkpoint_label_type": null,
         "checkpoint_value": null,
-        "tokenizer_name": "gpt2",
+        "tokenizer_name": null,
         "window_size": null,
         "attn_types": null,
         "init_mode": "gpt2",
-        "normalization_type": "LNPre",
+        "normalization_type": null,
         "n_devices": 1,
         "attention_dir": "causal",
         "attn_only": false,
-        "seed": null,
-        "initializer_range": 0.02886751345948129,
+        "seed": 0,
+        "initializer_range": 0.1392621247645583,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 50257,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 2457600,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "dtype": "torch.float32",
-        "tokenizer_prepends_bos": false,
+        "tokenizer_prepends_bos": null,
         "n_key_value_heads": null,
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/ll_model_cfg.pkl",
       "training_args": {
-        "next_token": true,
-        "non_ioi_thresh": 0.65,
-        "use_per_token_check": false,
-        "batch_size": 256,
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
         "lr": 0.001,
-        "num_workers": 0,
-        "early_stop": true,
-        "lr_scheduler": null,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.7,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
         "scheduler_val_metric": [
           "val/accuracy",
-          "val/IIA"
+          "val/IIA",
+          "val/strict_accuracy"
         ],
-        "scheduler_mode": "max",
-        "clip_grad_norm": 1.0,
-        "atol": 0.05,
-        "use_single_loss": false,
-        "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 0.4
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/46/edges.pkl"
     }
   ]
 }
\ No newline at end of file