neuronpedia
commited on
Upload 48 files
Browse files- 0-res_att_128k-oai/cfg.json +1 -0
- 0-res_att_128k-oai/metrics.json +1 -0
- 0-res_att_128k-oai/sae_weights.safetensors +3 -0
- 0-res_att_128k-oai/sparsity.safetensors +3 -0
- 1-res_att_128k-oai/cfg.json +1 -0
- 1-res_att_128k-oai/metrics.json +1 -0
- 1-res_att_128k-oai/sae_weights.safetensors +3 -0
- 1-res_att_128k-oai/sparsity.safetensors +3 -0
- 10-res_att_128k-oai/cfg.json +1 -0
- 10-res_att_128k-oai/metrics.json +1 -0
- 10-res_att_128k-oai/sae_weights.safetensors +3 -0
- 10-res_att_128k-oai/sparsity.safetensors +3 -0
- 11-res_att_128k-oai/cfg.json +1 -0
- 11-res_att_128k-oai/metrics.json +1 -0
- 11-res_att_128k-oai/sae_weights.safetensors +3 -0
- 11-res_att_128k-oai/sparsity.safetensors +3 -0
- 2-res_att_128k-oai/cfg.json +1 -0
- 2-res_att_128k-oai/metrics.json +1 -0
- 2-res_att_128k-oai/sae_weights.safetensors +3 -0
- 2-res_att_128k-oai/sparsity.safetensors +3 -0
- 3-res_att_128k-oai/cfg.json +1 -0
- 3-res_att_128k-oai/metrics.json +1 -0
- 3-res_att_128k-oai/sae_weights.safetensors +3 -0
- 3-res_att_128k-oai/sparsity.safetensors +3 -0
- 4-res_att_128k-oai/cfg.json +1 -0
- 4-res_att_128k-oai/metrics.json +1 -0
- 4-res_att_128k-oai/sae_weights.safetensors +3 -0
- 4-res_att_128k-oai/sparsity.safetensors +3 -0
- 5-res_att_128k-oai/cfg.json +1 -0
- 5-res_att_128k-oai/metrics.json +1 -0
- 5-res_att_128k-oai/sae_weights.safetensors +3 -0
- 5-res_att_128k-oai/sparsity.safetensors +3 -0
- 6-res_att_128k-oai/cfg.json +1 -0
- 6-res_att_128k-oai/metrics.json +1 -0
- 6-res_att_128k-oai/sae_weights.safetensors +3 -0
- 6-res_att_128k-oai/sparsity.safetensors +3 -0
- 7-res_att_128k-oai/cfg.json +1 -0
- 7-res_att_128k-oai/metrics.json +1 -0
- 7-res_att_128k-oai/sae_weights.safetensors +3 -0
- 7-res_att_128k-oai/sparsity.safetensors +3 -0
- 8-res_att_128k-oai/cfg.json +1 -0
- 8-res_att_128k-oai/metrics.json +1 -0
- 8-res_att_128k-oai/sae_weights.safetensors +3 -0
- 8-res_att_128k-oai/sparsity.safetensors +3 -0
- 9-res_att_128k-oai/cfg.json +1 -0
- 9-res_att_128k-oai/metrics.json +1 -0
- 9-res_att_128k-oai/sae_weights.safetensors +3 -0
- 9-res_att_128k-oai/sparsity.safetensors +3 -0
0-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.0.hook_attn_out", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
0-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.003102128393948078, "metrics/kl_div_with_ablation": 2.121527671813965, "metrics/ce_loss_with_sae": 3.6000306606292725, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 5.74860143661499, "metrics/kl_div_score": 0.9985377855612434, "metrics/ce_loss_score": 0.9995505673311031, "metrics/l2_norm_in": 32.01313781738281, "metrics/l2_norm_out": 31.91054916381836, "metrics/l2_ratio": 0.9968054294586182, "metrics/l0": 31.989421844482422, "metrics/l1": 39.967445373535156, "metrics/explained_variance": 0.9732184410095215, "metrics/mse": 6.201825141906738, "metrics/total_tokens_evaluated": 6144}
|
0-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5801db318f259a3e5fe1b5817b9ebba6935b6c9b142e783205a92f4a82beb7b6
|
3 |
+
size 805834048
|
0-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:414c210bf7d6fbddb053ceaa011fac25ceb9f5a073178d9e5d5d9d18053a06e5
|
3 |
+
size 524368
|
1-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.1.hook_attn_out", "hook_layer": 1, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
1-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.001699602697044611, "metrics/kl_div_with_ablation": 0.024065840989351273, "metrics/ce_loss_with_sae": 3.60137939453125, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6206326484680176, "metrics/kl_div_score": 0.9293769663899693, "metrics/ce_loss_score": 0.8926743530504184, "metrics/l2_norm_in": 9.714648246765137, "metrics/l2_norm_out": 9.198001861572266, "metrics/l2_ratio": 0.9423195719718933, "metrics/l0": 31.9990234375, "metrics/l1": 78.19619750976562, "metrics/explained_variance": 0.8797982931137085, "metrics/mse": 8.594531059265137, "metrics/total_tokens_evaluated": 6144}
|
1-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57d70f600a4846151fd7ea487915802ab8bbb90015c1b0e65460cfe90e46da64
|
3 |
+
size 805834048
|
1-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1eabe96ac25facfe3ade666f181005b41973249ec88b0ac22be2f85103668a5
|
3 |
+
size 524368
|
10-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.10.hook_attn_out", "hook_layer": 10, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
10-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.003733165329322219, "metrics/kl_div_with_ablation": 0.02475181221961975, "metrics/ce_loss_with_sae": 3.601586103439331, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6404881477355957, "metrics/kl_div_score": 0.8491760806765053, "metrics/ce_loss_score": 0.9391284828741302, "metrics/l2_norm_in": 31.82137680053711, "metrics/l2_norm_out": 30.27041244506836, "metrics/l2_ratio": 0.9501370787620544, "metrics/l0": 32.0, "metrics/l1": 52.3072624206543, "metrics/explained_variance": 0.8368248343467712, "metrics/mse": 112.1676254272461, "metrics/total_tokens_evaluated": 6144}
|
10-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e39ee1ce57e86bb93fe7a87c6b6a37605c32e576aa1cf4c699a92400db705633
|
3 |
+
size 805834048
|
10-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4e7b306d4f7029f5a36e85a70a927aaaaeeff535cae93ce9cae14e3b221f5e0
|
3 |
+
size 524368
|
11-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.11.hook_attn_out", "hook_layer": 11, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
11-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.003717603161931038, "metrics/kl_div_with_ablation": 0.10687470436096191, "metrics/ce_loss_with_sae": 3.6006953716278076, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.730869770050049, "metrics/kl_div_score": 0.9652153127893099, "metrics/ce_loss_score": 0.9876273219121214, "metrics/l2_norm_in": 280.86444091796875, "metrics/l2_norm_out": 280.55767822265625, "metrics/l2_ratio": 0.9987168312072754, "metrics/l0": 31.75, "metrics/l1": 20.949716567993164, "metrics/explained_variance": 0.9697019457817078, "metrics/mse": 169.45314025878906, "metrics/total_tokens_evaluated": 6144}
|
11-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6eaa35a91eea1a55d199c911f529aaabdb6055e20d83f043ba8055c4f845405
|
3 |
+
size 805834048
|
11-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:678bdda55d4ad6ee4d75de36b9d5b3f79350f6c5405a71d2b7f1e0c1aecd632e
|
3 |
+
size 524368
|
2-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.2.hook_attn_out", "hook_layer": 2, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
2-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.001999747008085251, "metrics/kl_div_with_ablation": 0.031004613265395164, "metrics/ce_loss_with_sae": 3.5997862815856934, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.626660108566284, "metrics/kl_div_score": 0.9355016303229556, "metrics/ce_loss_score": 0.9738474564556262, "metrics/l2_norm_in": 8.641822814941406, "metrics/l2_norm_out": 8.086132049560547, "metrics/l2_ratio": 0.9341656565666199, "metrics/l0": 31.98714256286621, "metrics/l1": 79.24567413330078, "metrics/explained_variance": 0.8692671060562134, "metrics/mse": 8.59189224243164, "metrics/total_tokens_evaluated": 6144}
|
2-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6be094d591c4d3d2c6789824417ba08da1d01f21fb8e2b56a7442f551cb991c
|
3 |
+
size 805834048
|
2-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:faf997fff4f66a268b08854209d7f8edda454fc1f8d8b500bfef44cba5c94d91
|
3 |
+
size 524368
|
3-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.3.hook_attn_out", "hook_layer": 3, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
3-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.00250535411760211, "metrics/kl_div_with_ablation": 0.025133918970823288, "metrics/ce_loss_with_sae": 3.601374387741089, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6286609172821045, "metrics/kl_div_score": 0.900319798097923, "metrics/ce_loss_score": 0.9219565637687698, "metrics/l2_norm_in": 8.571012496948242, "metrics/l2_norm_out": 7.854372024536133, "metrics/l2_ratio": 0.9161635637283325, "metrics/l0": 32.0, "metrics/l1": 77.34489440917969, "metrics/explained_variance": 0.8376470804214478, "metrics/mse": 11.842028617858887, "metrics/total_tokens_evaluated": 6144}
|
3-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97cb9d4eb7f67689e8f61c614e0ca03de47cbf44e3e356737e3d99d19f4d3807
|
3 |
+
size 805834048
|
3-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4be4718eddb1f57d87446d7402af2afc7d46700fd05d7178594a7b2a3896806d
|
3 |
+
size 524368
|
4-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.4.hook_attn_out", "hook_layer": 4, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
4-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.0034840195439755917, "metrics/kl_div_with_ablation": 0.026722650974988937, "metrics/ce_loss_with_sae": 3.6015777587890625, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6321334838867188, "metrics/kl_div_score": 0.8696229821196835, "metrics/ce_loss_score": 0.9240019898919258, "metrics/l2_norm_in": 9.123016357421875, "metrics/l2_norm_out": 8.11795425415039, "metrics/l2_ratio": 0.891417384147644, "metrics/l0": 31.99934959411621, "metrics/l1": 75.46078491210938, "metrics/explained_variance": 0.7981663942337036, "metrics/mse": 17.370716094970703, "metrics/total_tokens_evaluated": 6144}
|
4-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9642c61b839b69ffba9d3b957e3536548290d94358610ec2da586e2a6ec16f4
|
3 |
+
size 805834048
|
4-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffa846f2de01f62de07be9ed953834d3bd1ea5a456f73f59243972db266de1b9
|
3 |
+
size 524368
|
5-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.5.hook_attn_out", "hook_layer": 5, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
5-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.003555751871317625, "metrics/kl_div_with_ablation": 0.031378373503685, "metrics/ce_loss_with_sae": 3.6017022132873535, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6277596950531006, "metrics/kl_div_score": 0.8866814473063734, "metrics/ce_loss_score": 0.9080810262886769, "metrics/l2_norm_in": 10.034396171569824, "metrics/l2_norm_out": 8.960555076599121, "metrics/l2_ratio": 0.8944891691207886, "metrics/l0": 31.99837303161621, "metrics/l1": 73.32916259765625, "metrics/explained_variance": 0.8041481375694275, "metrics/mse": 22.21611785888672, "metrics/total_tokens_evaluated": 6144}
|
5-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c69062f1ae86549c68612dc574d78024749e1b2f6b9cf2338fd74800747aed52
|
3 |
+
size 805834048
|
5-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09a893b71eb1bc60fe9d9ee06770f4853c0c145bf61883acbb36d00b3d4daea7
|
3 |
+
size 524368
|
6-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.6.hook_attn_out", "hook_layer": 6, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
6-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.004435557406395674, "metrics/kl_div_with_ablation": 0.03278880566358566, "metrics/ce_loss_with_sae": 3.6031253337860107, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.634286403656006, "metrics/kl_div_score": 0.8647234226246404, "metrics/ce_loss_score": 0.8847093704097312, "metrics/l2_norm_in": 11.67806625366211, "metrics/l2_norm_out": 10.348960876464844, "metrics/l2_ratio": 0.8881208300590515, "metrics/l0": 31.998699188232422, "metrics/l1": 70.52044677734375, "metrics/explained_variance": 0.7761087417602539, "metrics/mse": 31.851516723632812, "metrics/total_tokens_evaluated": 6144}
|
6-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d57cca0ea3db86f6e07a251c29fa046056c2aced38bfb97fb9c14a72e1235e86
|
3 |
+
size 805834048
|
6-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ca493d7aa8d26ea18ce5a93b247b29c43b0f058f4dcde9582c8dd9e569d1dc1
|
3 |
+
size 524368
|
7-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.7.hook_attn_out", "hook_layer": 7, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
7-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.004355770535767078, "metrics/kl_div_with_ablation": 0.03466065973043442, "metrics/ce_loss_with_sae": 3.602255344390869, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.634833812713623, "metrics/kl_div_score": 0.8743309974581235, "metrics/ce_loss_score": 0.9107960567097922, "metrics/l2_norm_in": 13.65020751953125, "metrics/l2_norm_out": 12.425106048583984, "metrics/l2_ratio": 0.9115623235702515, "metrics/l0": 31.99934959411621, "metrics/l1": 66.42544555664062, "metrics/explained_variance": 0.8032481074333191, "metrics/mse": 37.54448699951172, "metrics/total_tokens_evaluated": 6144}
|
7-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3d55f6e9221c68f5bce0af83fd51b437c33d0ce4e1fd7f66f14b56072026585
|
3 |
+
size 805834048
|
7-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:781f838bccc120af03ede7b27ed850d78e9534c5139949122572aebb4d56c260
|
3 |
+
size 524368
|
8-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.8.hook_attn_out", "hook_layer": 8, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
8-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.005139736924320459, "metrics/kl_div_with_ablation": 0.02938206121325493, "metrics/ce_loss_with_sae": 3.602754592895508, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6258018016815186, "metrics/kl_div_score": 0.8250722817907069, "metrics/ce_loss_score": 0.8619899415037808, "metrics/l2_norm_in": 16.137948989868164, "metrics/l2_norm_out": 14.539435386657715, "metrics/l2_ratio": 0.902074933052063, "metrics/l0": 32.0, "metrics/l1": 66.75920867919922, "metrics/explained_variance": 0.7767739295959473, "metrics/mse": 52.779170989990234, "metrics/total_tokens_evaluated": 6144}
|
8-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3479c0a21589142e220e0585a8b03d3c9e7be8960f764d23627badf132c98dff
|
3 |
+
size 805834048
|
8-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9034f92998777870d690662ab5c26afcd3c34281b50e8c8a17debe53d7c773c
|
3 |
+
size 524368
|
9-res_att_128k-oai/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.9.hook_attn_out", "hook_layer": 9, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
9-res_att_128k-oai/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.004128233529627323, "metrics/kl_div_with_ablation": 0.028918448835611343, "metrics/ce_loss_with_sae": 3.601983070373535, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.636500835418701, "metrics/kl_div_score": 0.8572456789403016, "metrics/ce_loss_score": 0.922041281628338, "metrics/l2_norm_in": 20.912498474121094, "metrics/l2_norm_out": 19.252647399902344, "metrics/l2_ratio": 0.9233078956604004, "metrics/l0": 31.99934959411621, "metrics/l1": 60.197113037109375, "metrics/explained_variance": 0.800841212272644, "metrics/mse": 70.06503295898438, "metrics/total_tokens_evaluated": 6144}
|
9-res_att_128k-oai/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cd8c2ec884af8d2cadb1b9c31696a8abb7c5448c6db603881a78bae8b7e93a3
|
3 |
+
size 805834048
|
9-res_att_128k-oai/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:093efc1211187d42323e47b20c49e2a9fbf8a5edebf8dc9cd4fccf3beca64cbd
|
3 |
+
size 524368
|