neuronpedia
commited on
Commit
•
6b1a2e3
1
Parent(s):
64e1a5e
Upload 48 files
Browse files- 0-res_mid_128k-oai/cfg.json +1 -0
- 0-res_mid_128k-oai/metrics.json +1 -0
- 0-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 0-res_mid_128k-oai/sparsity.safetensors +3 -0
- 1-res_mid_128k-oai/cfg.json +1 -0
- 1-res_mid_128k-oai/metrics.json +1 -0
- 1-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 1-res_mid_128k-oai/sparsity.safetensors +3 -0
- 10-res_mid_128k-oai/cfg.json +1 -0
- 10-res_mid_128k-oai/metrics.json +1 -0
- 10-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 10-res_mid_128k-oai/sparsity.safetensors +3 -0
- 11-res_mid_128k-oai/cfg.json +1 -0
- 11-res_mid_128k-oai/metrics.json +1 -0
- 11-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 11-res_mid_128k-oai/sparsity.safetensors +3 -0
- 2-res_mid_128k-oai/cfg.json +1 -0
- 2-res_mid_128k-oai/metrics.json +1 -0
- 2-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 2-res_mid_128k-oai/sparsity.safetensors +3 -0
- 3-res_mid_128k-oai/cfg.json +1 -0
- 3-res_mid_128k-oai/metrics.json +1 -0
- 3-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 3-res_mid_128k-oai/sparsity.safetensors +3 -0
- 4-res_mid_128k-oai/cfg.json +1 -0
- 4-res_mid_128k-oai/metrics.json +1 -0
- 4-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 4-res_mid_128k-oai/sparsity.safetensors +3 -0
- 5-res_mid_128k-oai/cfg.json +1 -0
- 5-res_mid_128k-oai/metrics.json +1 -0
- 5-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 5-res_mid_128k-oai/sparsity.safetensors +3 -0
- 6-res_mid_128k-oai/cfg.json +1 -0
- 6-res_mid_128k-oai/metrics.json +1 -0
- 6-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 6-res_mid_128k-oai/sparsity.safetensors +3 -0
- 7-res_mid_128k-oai/cfg.json +1 -0
- 7-res_mid_128k-oai/metrics.json +1 -0
- 7-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 7-res_mid_128k-oai/sparsity.safetensors +3 -0
- 8-res_mid_128k-oai/cfg.json +1 -0
- 8-res_mid_128k-oai/metrics.json +1 -0
- 8-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 8-res_mid_128k-oai/sparsity.safetensors +3 -0
- 9-res_mid_128k-oai/cfg.json +1 -0
- 9-res_mid_128k-oai/metrics.json +1 -0
- 9-res_mid_128k-oai/sae_weights.safetensors +3 -0
- 9-res_mid_128k-oai/sparsity.safetensors +3 -0
0-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.0.hook_resid_mid", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
0-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.0038433405570685863, "metrics/kl_div_with_ablation": 12.480283737182617, "metrics/ce_loss_with_sae": 3.603421926498413, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 15.861976623535156, "metrics/kl_div_score": 0.9996920470208848, "metrics/ce_loss_score": 0.9996446734723997, "metrics/l2_norm_in": 32.70796203613281, "metrics/l2_norm_out": 32.60739517211914, "metrics/l2_ratio": 0.9969363212585449, "metrics/l0": 31.98079490661621, "metrics/l1": 44.247344970703125, "metrics/explained_variance": 0.9764951467514038, "metrics/mse": 5.842685222625732, "metrics/total_tokens_evaluated": 6144}
|
0-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53c0b574675a7cb8d08aa9e285188adb4aa93010196c9abe2477b44efb31010b
|
3 |
+
size 805834048
|
0-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c97f5d57a4ab3ed04963c38314c1d6190cb348deaf00ed450a55be408377837c
|
3 |
+
size 524368
|
1-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.1.hook_resid_mid", "hook_layer": 1, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
1-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.006731455214321613, "metrics/kl_div_with_ablation": 16.217103958129883, "metrics/ce_loss_with_sae": 3.605462074279785, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 19.600265502929688, "metrics/kl_div_score": 0.999584916318493, "metrics/ce_loss_score": 0.999600187150498, "metrics/l2_norm_in": 56.929866790771484, "metrics/l2_norm_out": 56.6934928894043, "metrics/l2_ratio": 0.9958688616752625, "metrics/l0": 31.9990234375, "metrics/l1": 59.95335388183594, "metrics/explained_variance": 0.9722690582275391, "metrics/mse": 31.145605087280273, "metrics/total_tokens_evaluated": 6144}
|
1-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b70e6a77f0297230ab8316ae3b0b87be64d872f7dcae70d3b4210389a7820d6
|
3 |
+
size 805834048
|
1-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73aaa61cd691b1360f3c29f3b852d40d4c563c5183daca89b952b73efeff6553
|
3 |
+
size 524368
|
10-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.10.hook_resid_mid", "hook_layer": 10, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
10-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.10664453357458115, "metrics/kl_div_with_ablation": 6.193092346191406, "metrics/ce_loss_with_sae": 3.692218780517578, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 9.754217147827148, "metrics/kl_div_score": 0.98278008341985, "metrics/ce_loss_score": 0.9848656566878472, "metrics/l2_norm_in": 224.28759765625, "metrics/l2_norm_out": 218.76922607421875, "metrics/l2_ratio": 0.9688126444816589, "metrics/l0": 31.998046875, "metrics/l1": 47.411495208740234, "metrics/explained_variance": 0.8776161670684814, "metrics/mse": 1806.194091796875, "metrics/total_tokens_evaluated": 6144}
|
10-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d6a0caf038ffa29fe41fbc18beef10ff74f0560966014d21cd994f22e649b5d
|
3 |
+
size 805834048
|
10-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b9dad4028b6d5f23adc11428739cead9a520c0fcb536d02cf63286c0682b999
|
3 |
+
size 524368
|
11-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.11.hook_resid_mid", "hook_layer": 11, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
11-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.1366678923368454, "metrics/kl_div_with_ablation": 13.087514877319336, "metrics/ce_loss_with_sae": 3.7329623699188232, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 16.484846115112305, "metrics/kl_div_score": 0.9895573839939856, "metrics/ce_loss_score": 0.9896088738509167, "metrics/l2_norm_in": 395.5395202636719, "metrics/l2_norm_out": 391.4725036621094, "metrics/l2_ratio": 0.9892591834068298, "metrics/l0": 32.0, "metrics/l1": 31.824054718017578, "metrics/explained_variance": 0.8704243898391724, "metrics/mse": 3098.075927734375, "metrics/total_tokens_evaluated": 6144}
|
11-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b297415ca9916b4e6929f7d82eea602acef5c39dd7bed502ac77873a55e72c68
|
3 |
+
size 805834048
|
11-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ab69d29a274fbe5b7360c67fd0c62fffa384ff5979184093cba1e727ccd7427
|
3 |
+
size 524368
|
2-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.2.hook_resid_mid", "hook_layer": 2, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
2-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.014034643769264221, "metrics/kl_div_with_ablation": 12.81351089477539, "metrics/ce_loss_with_sae": 3.6128032207489014, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 16.32787322998047, "metrics/kl_div_score": 0.9989046995874498, "metrics/ce_loss_score": 0.9989206662941394, "metrics/l2_norm_in": 68.90753173828125, "metrics/l2_norm_out": 68.5189208984375, "metrics/l2_ratio": 0.9936020374298096, "metrics/l0": 31.99934959411621, "metrics/l1": 53.266326904296875, "metrics/explained_variance": 0.9691864848136902, "metrics/mse": 49.732627868652344, "metrics/total_tokens_evaluated": 6144}
|
2-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1caab24df127183793a8a856b3d147dc2e34b9f8422f0b6b404233f70214efb2
|
3 |
+
size 805834048
|
2-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a32fe5876cb55265ae4de471b9003f0e8fa03d5df356dc9e6e320bc7d618037
|
3 |
+
size 524368
|
3-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.3.hook_resid_mid", "hook_layer": 3, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
3-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.023511391133069992, "metrics/kl_div_with_ablation": 10.10186767578125, "metrics/ce_loss_with_sae": 3.622774839401245, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 13.548822402954102, "metrics/kl_div_score": 0.9976725698764163, "metrics/ce_loss_score": 0.9976170022128419, "metrics/l2_norm_in": 103.71144104003906, "metrics/l2_norm_out": 103.02696228027344, "metrics/l2_ratio": 0.9895378351211548, "metrics/l0": 31.983074188232422, "metrics/l1": 54.82560348510742, "metrics/explained_variance": 0.9752952456474304, "metrics/mse": 89.62007904052734, "metrics/total_tokens_evaluated": 6144}
|
3-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06cbde3bc5a4e83d69f0ce34dd1ba53c843b7c94b40b7d006020732b54026826
|
3 |
+
size 805834048
|
3-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0456b0483683bcfe4f1efa9f1ed4f46ca8b5840cba2a95b2bed5c6e3cd830492
|
3 |
+
size 524368
|
4-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.4.hook_resid_mid", "hook_layer": 4, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
4-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.03753046691417694, "metrics/kl_div_with_ablation": 13.249712944030762, "metrics/ce_loss_with_sae": 3.640705108642578, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 16.69910430908203, "metrics/kl_div_score": 0.9971674505649509, "metrics/ce_loss_score": 0.9968213439818392, "metrics/l2_norm_in": 111.40328216552734, "metrics/l2_norm_out": 110.28677368164062, "metrics/l2_ratio": 0.9840068817138672, "metrics/l0": 31.94856834411621, "metrics/l1": 56.49782943725586, "metrics/explained_variance": 0.9639231562614441, "metrics/mse": 153.8262939453125, "metrics/total_tokens_evaluated": 6144}
|
4-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e225c8a8b01cc6f6bc625e9f259e7dc0488c0f483d4584d1a32361b90666b41
|
3 |
+
size 805834048
|
4-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af6f11b2f0ca3f1b787c7c1c11a3fe30c8f58a52c0611ab7f781d67b773325e1
|
3 |
+
size 524368
|
5-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.5.hook_resid_mid", "hook_layer": 5, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
5-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.04741125553846359, "metrics/kl_div_with_ablation": 11.519681930541992, "metrics/ce_loss_with_sae": 3.6441657543182373, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 14.860109329223633, "metrics/kl_div_score": 0.9958843259888311, "metrics/ce_loss_score": 0.9959949394740818, "metrics/l2_norm_in": 119.6514892578125, "metrics/l2_norm_out": 118.05345916748047, "metrics/l2_ratio": 0.9789802432060242, "metrics/l0": 31.9580078125, "metrics/l1": 56.149471282958984, "metrics/explained_variance": 0.9522756338119507, "metrics/mse": 238.1262664794922, "metrics/total_tokens_evaluated": 6144}
|
5-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b61e22ab49ca410f4c70b7a95897f3209a598dffcd82248a713bdfea743df862
|
3 |
+
size 805834048
|
5-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57431e5cb89876fb8170d1cc730430f296b725aaab7962e6e5b571cf4718f863
|
3 |
+
size 524368
|
6-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.6.hook_resid_mid", "hook_layer": 6, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
6-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.06079387292265892, "metrics/kl_div_with_ablation": 6.933250427246094, "metrics/ce_loss_with_sae": 3.655242681503296, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 10.522689819335938, "metrics/kl_div_score": 0.9912315480941302, "metrics/ce_loss_score": 0.9918860291994546, "metrics/l2_norm_in": 128.84793090820312, "metrics/l2_norm_out": 126.68817138671875, "metrics/l2_ratio": 0.9742312431335449, "metrics/l0": 31.99609375, "metrics/l1": 56.119468688964844, "metrics/explained_variance": 0.940334677696228, "metrics/mse": 349.56976318359375, "metrics/total_tokens_evaluated": 6144}
|
6-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de3601867837d9a54d22e60ed60e3dcb19cc8a9ea35d2c3f3250051915006162
|
3 |
+
size 805834048
|
6-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c936737aa0fbd6f271e8be7aea29053357e83055b9922fca067bb96eb2958c4
|
3 |
+
size 524368
|
7-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.7.hook_resid_mid", "hook_layer": 7, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
7-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.07359001040458679, "metrics/kl_div_with_ablation": 9.511523246765137, "metrics/ce_loss_with_sae": 3.668210983276367, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 13.054040908813477, "metrics/kl_div_score": 0.9922630678078178, "metrics/ce_loss_score": 0.9926867722998524, "metrics/l2_norm_in": 140.90599060058594, "metrics/l2_norm_out": 138.12875366210938, "metrics/l2_ratio": 0.9708306789398193, "metrics/l0": 31.9970703125, "metrics/l1": 55.75236129760742, "metrics/explained_variance": 0.928668200969696, "metrics/mse": 499.7657165527344, "metrics/total_tokens_evaluated": 6144}
|
7-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e663dba31833e757306887ad8b311e74a3a1cce66474be454c958106768b39c
|
3 |
+
size 805834048
|
7-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffcaab47977c368c98875bffe48f3e4c66ffd455605e791e06f3178c5f5b96fa
|
3 |
+
size 524368
|
8-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.8.hook_resid_mid", "hook_layer": 8, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
8-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.08936788886785507, "metrics/kl_div_with_ablation": 7.897105693817139, "metrics/ce_loss_with_sae": 3.679746389389038, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 11.460872650146484, "metrics/kl_div_score": 0.9886834629884942, "metrics/ce_loss_score": 0.9897375005583807, "metrics/l2_norm_in": 157.34324645996094, "metrics/l2_norm_out": 153.83953857421875, "metrics/l2_ratio": 0.9683517217636108, "metrics/l0": 31.99837303161621, "metrics/l1": 53.45509338378906, "metrics/explained_variance": 0.9138767123222351, "metrics/mse": 732.31787109375, "metrics/total_tokens_evaluated": 6144}
|
8-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60f311c3a33f5fc526a04fc344266b83e16ec90cea69d6059089e0c8790f37ca
|
3 |
+
size 805834048
|
8-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57ecd3cf156b9a328e813f10d378b57ed1a6f37d2604bf0f37e11e75438b301f
|
3 |
+
size 524368
|
9-res_mid_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.9.hook_resid_mid", "hook_layer": 9, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
9-res_mid_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.09692329168319702, "metrics/kl_div_with_ablation": 5.3963117599487305, "metrics/ce_loss_with_sae": 3.6957435607910156, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 8.97047233581543, "metrics/kl_div_score": 0.9820389747674404, "metrics/ce_loss_score": 0.9820011853887981, "metrics/l2_norm_in": 181.313720703125, "metrics/l2_norm_out": 176.829345703125, "metrics/l2_ratio": 0.966578483581543, "metrics/l0": 31.9970703125, "metrics/l1": 51.3939323425293, "metrics/explained_variance": 0.8956956267356873, "metrics/mse": 1123.425048828125, "metrics/total_tokens_evaluated": 6144}
|
9-res_mid_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97d5d90159e873f77ae4b4882a584278e7db18c6c0f50bf8730db80f00fe4d2f
|
3 |
+
size 805834048
|
9-res_mid_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76ab099ff4942bfbb42909a54aef6a580fd995b4ffaed0ac23c9b7d3ccb222d1
|
3 |
+
size 524368
|