Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- benchmark_stats.csv +13 -0
- benchmark_stats.html +734 -0
- benchmark_stats.png +3 -0
- v5_128k_layer_0/cfg.json +1 -0
- v5_128k_layer_0/metrics.json +1 -0
- v5_128k_layer_0/sae_weights.safetensors +3 -0
- v5_128k_layer_0/sparsity.safetensors +3 -0
- v5_128k_layer_1/cfg.json +1 -0
- v5_128k_layer_1/metrics.json +1 -0
- v5_128k_layer_1/sae_weights.safetensors +3 -0
- v5_128k_layer_1/sparsity.safetensors +3 -0
- v5_128k_layer_10/cfg.json +1 -0
- v5_128k_layer_10/metrics.json +1 -0
- v5_128k_layer_10/sae_weights.safetensors +3 -0
- v5_128k_layer_10/sparsity.safetensors +3 -0
- v5_128k_layer_11/cfg.json +1 -0
- v5_128k_layer_11/metrics.json +1 -0
- v5_128k_layer_11/sae_weights.safetensors +3 -0
- v5_128k_layer_11/sparsity.safetensors +3 -0
- v5_128k_layer_2/cfg.json +1 -0
- v5_128k_layer_2/metrics.json +1 -0
- v5_128k_layer_2/sae_weights.safetensors +3 -0
- v5_128k_layer_2/sparsity.safetensors +3 -0
- v5_128k_layer_3/cfg.json +1 -0
- v5_128k_layer_3/metrics.json +1 -0
- v5_128k_layer_3/sae_weights.safetensors +3 -0
- v5_128k_layer_3/sparsity.safetensors +3 -0
- v5_128k_layer_4/cfg.json +1 -0
- v5_128k_layer_4/metrics.json +1 -0
- v5_128k_layer_4/sae_weights.safetensors +3 -0
- v5_128k_layer_4/sparsity.safetensors +3 -0
- v5_128k_layer_5/cfg.json +1 -0
- v5_128k_layer_5/metrics.json +1 -0
- v5_128k_layer_5/sae_weights.safetensors +3 -0
- v5_128k_layer_5/sparsity.safetensors +3 -0
- v5_128k_layer_6/cfg.json +1 -0
- v5_128k_layer_6/metrics.json +1 -0
- v5_128k_layer_6/sae_weights.safetensors +3 -0
- v5_128k_layer_6/sparsity.safetensors +3 -0
- v5_128k_layer_7/cfg.json +1 -0
- v5_128k_layer_7/metrics.json +1 -0
- v5_128k_layer_7/sae_weights.safetensors +3 -0
- v5_128k_layer_7/sparsity.safetensors +3 -0
- v5_128k_layer_8/cfg.json +1 -0
- v5_128k_layer_8/metrics.json +1 -0
- v5_128k_layer_8/sae_weights.safetensors +3 -0
- v5_128k_layer_8/sparsity.safetensors +3 -0
- v5_128k_layer_9/cfg.json +1 -0
- v5_128k_layer_9/metrics.json +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
benchmark_stats.png filter=lfs diff=lfs merge=lfs -text
|
benchmark_stats.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,version,d_sae,layer,kl_div_with_sae,kl_div_with_ablation,ce_loss_with_sae,ce_loss_without_sae,ce_loss_with_ablation,kl_div_score,ce_loss_score,l2_norm_in,l2_norm_out,l2_ratio,l0,l1,explained_variance,mse,total_tokens_evaluated,filepath
|
2 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_0/metrics.json,5,128,0,0.0038433405570685863,12.480283737182617,3.603421926498413,3.599064588546753,15.861976623535156,0.9996920470208848,0.9996446734723997,32.70796203613281,32.60739517211914,0.9969363212585449,31.98079490661621,44.247344970703125,0.9764951467514038,5.842685222625732,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_0/metrics.json
|
3 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_1/metrics.json,5,128,1,0.006731455214321613,16.217103958129883,3.605462074279785,3.599064588546753,19.600265502929688,0.999584916318493,0.999600187150498,56.929866790771484,56.6934928894043,0.9958688616752625,31.9990234375,59.95335388183594,0.9722690582275391,31.145605087280273,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_1/metrics.json
|
4 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_2/metrics.json,5,128,2,0.014034643769264221,12.81351089477539,3.6128032207489014,3.599064588546753,16.32787322998047,0.9989046995874498,0.9989206662941394,68.90753173828125,68.5189208984375,0.9936020374298096,31.99934959411621,53.266326904296875,0.9691864848136902,49.732627868652344,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_2/metrics.json
|
5 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_3/metrics.json,5,128,3,0.023511391133069992,10.10186767578125,3.622774839401245,3.599064588546753,13.548822402954102,0.9976725698764163,0.9976170022128419,103.71144104003906,103.02696228027344,0.9895378351211548,31.983074188232422,54.82560348510742,0.9752952456474304,89.62007904052734,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_3/metrics.json
|
6 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_4/metrics.json,5,128,4,0.03753046691417694,13.249712944030762,3.640705108642578,3.599064588546753,16.69910430908203,0.9971674505649509,0.9968213439818392,111.40328216552734,110.28677368164062,0.9840068817138672,31.94856834411621,56.49782943725586,0.9639231562614441,153.8262939453125,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_4/metrics.json
|
7 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_5/metrics.json,5,128,5,0.04741125553846359,11.519681930541992,3.6441657543182373,3.599064588546753,14.860109329223633,0.9958843259888311,0.9959949394740818,119.6514892578125,118.05345916748047,0.9789802432060242,31.9580078125,56.149471282958984,0.9522756338119507,238.1262664794922,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_5/metrics.json
|
8 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_6/metrics.json,5,128,6,0.06079387292265892,6.933250427246094,3.655242681503296,3.599064588546753,10.522689819335938,0.9912315480941302,0.9918860291994546,128.84793090820312,126.68817138671875,0.9742312431335449,31.99609375,56.119468688964844,0.940334677696228,349.56976318359375,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_6/metrics.json
|
9 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_7/metrics.json,5,128,7,0.07359001040458679,9.511523246765137,3.668210983276367,3.599064588546753,13.054040908813477,0.9922630678078178,0.9926867722998524,140.90599060058594,138.12875366210938,0.9708306789398193,31.9970703125,55.75236129760742,0.928668200969696,499.7657165527344,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_7/metrics.json
|
10 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_8/metrics.json,5,128,8,0.08936788886785507,7.897105693817139,3.679746389389038,3.599064588546753,11.460872650146484,0.9886834629884942,0.9897375005583807,157.34324645996094,153.83953857421875,0.9683517217636108,31.99837303161621,53.45509338378906,0.9138767123222351,732.31787109375,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_8/metrics.json
|
11 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_9/metrics.json,5,128,9,0.09692329168319702,5.3963117599487305,3.6957435607910156,3.599064588546753,8.97047233581543,0.9820389747674404,0.9820011853887981,181.313720703125,176.829345703125,0.966578483581543,31.9970703125,51.3939323425293,0.8956956267356873,1123.425048828125,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_9/metrics.json
|
12 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_10/metrics.json,5,128,10,0.10664453357458115,6.193092346191406,3.692218780517578,3.599064588546753,9.754217147827148,0.98278008341985,0.9848656566878472,224.28759765625,218.76922607421875,0.9688126444816589,31.998046875,47.411495208740234,0.8776161670684814,1806.194091796875,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_10/metrics.json
|
13 |
+
OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_11/metrics.json,5,128,11,0.1366678923368454,13.087514877319336,3.7329623699188232,3.599064588546753,16.484846115112305,0.9895573839939856,0.9896088738509167,395.5395202636719,391.4725036621094,0.9892591834068298,32.0,31.824054718017578,0.8704243898391724,3098.075927734375,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_11/metrics.json
|
benchmark_stats.html
ADDED
@@ -0,0 +1,734 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<style type="text/css">
|
2 |
+
#T_53d0e_row0_col2, #T_53d0e_row0_col3, #T_53d0e_row0_col5, #T_53d0e_row0_col6, #T_53d0e_row0_col10, #T_53d0e_row0_col11, #T_53d0e_row0_col16, #T_53d0e_row0_col17, #T_53d0e_row1_col6, #T_53d0e_row1_col17, #T_53d0e_row2_col6, #T_53d0e_row2_col17, #T_53d0e_row3_col6, #T_53d0e_row3_col17, #T_53d0e_row4_col6, #T_53d0e_row4_col13, #T_53d0e_row4_col17, #T_53d0e_row5_col6, #T_53d0e_row5_col17, #T_53d0e_row6_col6, #T_53d0e_row6_col17, #T_53d0e_row7_col6, #T_53d0e_row7_col17, #T_53d0e_row8_col6, #T_53d0e_row8_col17, #T_53d0e_row9_col4, #T_53d0e_row9_col6, #T_53d0e_row9_col7, #T_53d0e_row9_col8, #T_53d0e_row9_col9, #T_53d0e_row9_col12, #T_53d0e_row9_col17, #T_53d0e_row10_col6, #T_53d0e_row10_col17, #T_53d0e_row11_col6, #T_53d0e_row11_col14, #T_53d0e_row11_col15, #T_53d0e_row11_col17 {
|
3 |
+
background-color: #440154;
|
4 |
+
color: #f1f1f1;
|
5 |
+
}
|
6 |
+
#T_53d0e_row0_col4 {
|
7 |
+
background-color: #31b57b;
|
8 |
+
color: #f1f1f1;
|
9 |
+
}
|
10 |
+
#T_53d0e_row0_col7 {
|
11 |
+
background-color: #2eb37c;
|
12 |
+
color: #f1f1f1;
|
13 |
+
}
|
14 |
+
#T_53d0e_row0_col8, #T_53d0e_row0_col9, #T_53d0e_row0_col12, #T_53d0e_row0_col15, #T_53d0e_row1_col4, #T_53d0e_row1_col7, #T_53d0e_row1_col9, #T_53d0e_row1_col14, #T_53d0e_row11_col2, #T_53d0e_row11_col3, #T_53d0e_row11_col5, #T_53d0e_row11_col10, #T_53d0e_row11_col11, #T_53d0e_row11_col13, #T_53d0e_row11_col16 {
|
15 |
+
background-color: #fde725;
|
16 |
+
color: #000000;
|
17 |
+
}
|
18 |
+
#T_53d0e_row0_col13 {
|
19 |
+
background-color: #28ae80;
|
20 |
+
color: #f1f1f1;
|
21 |
+
}
|
22 |
+
#T_53d0e_row0_col14, #T_53d0e_row8_col9 {
|
23 |
+
background-color: #26828e;
|
24 |
+
color: #f1f1f1;
|
25 |
+
}
|
26 |
+
#T_53d0e_row1_col2 {
|
27 |
+
background-color: #482173;
|
28 |
+
color: #f1f1f1;
|
29 |
+
}
|
30 |
+
#T_53d0e_row1_col3 {
|
31 |
+
background-color: #46085c;
|
32 |
+
color: #f1f1f1;
|
33 |
+
}
|
34 |
+
#T_53d0e_row1_col5 {
|
35 |
+
background-color: #46075a;
|
36 |
+
color: #f1f1f1;
|
37 |
+
}
|
38 |
+
#T_53d0e_row1_col8 {
|
39 |
+
background-color: #fbe723;
|
40 |
+
color: #000000;
|
41 |
+
}
|
42 |
+
#T_53d0e_row1_col10, #T_53d0e_row1_col11, #T_53d0e_row10_col15 {
|
43 |
+
background-color: #481a6c;
|
44 |
+
color: #f1f1f1;
|
45 |
+
}
|
46 |
+
#T_53d0e_row1_col12, #T_53d0e_row10_col13 {
|
47 |
+
background-color: #e7e419;
|
48 |
+
color: #000000;
|
49 |
+
}
|
50 |
+
#T_53d0e_row1_col13 {
|
51 |
+
background-color: #f4e61e;
|
52 |
+
color: #000000;
|
53 |
+
}
|
54 |
+
#T_53d0e_row1_col15, #T_53d0e_row2_col9 {
|
55 |
+
background-color: #e5e419;
|
56 |
+
color: #000000;
|
57 |
+
}
|
58 |
+
#T_53d0e_row1_col16 {
|
59 |
+
background-color: #450457;
|
60 |
+
color: #f1f1f1;
|
61 |
+
}
|
62 |
+
#T_53d0e_row2_col2, #T_53d0e_row5_col13 {
|
63 |
+
background-color: #433e85;
|
64 |
+
color: #f1f1f1;
|
65 |
+
}
|
66 |
+
#T_53d0e_row2_col3, #T_53d0e_row5_col16 {
|
67 |
+
background-color: #481c6e;
|
68 |
+
color: #f1f1f1;
|
69 |
+
}
|
70 |
+
#T_53d0e_row2_col4, #T_53d0e_row10_col5 {
|
71 |
+
background-color: #3dbc74;
|
72 |
+
color: #f1f1f1;
|
73 |
+
}
|
74 |
+
#T_53d0e_row2_col5, #T_53d0e_row10_col4, #T_53d0e_row10_col7, #T_53d0e_row10_col12 {
|
75 |
+
background-color: #481b6d;
|
76 |
+
color: #f1f1f1;
|
77 |
+
}
|
78 |
+
#T_53d0e_row2_col7 {
|
79 |
+
background-color: #40bd72;
|
80 |
+
color: #f1f1f1;
|
81 |
+
}
|
82 |
+
#T_53d0e_row2_col8 {
|
83 |
+
background-color: #e2e418;
|
84 |
+
color: #000000;
|
85 |
+
}
|
86 |
+
#T_53d0e_row2_col10, #T_53d0e_row2_col11 {
|
87 |
+
background-color: #482475;
|
88 |
+
color: #f1f1f1;
|
89 |
+
}
|
90 |
+
#T_53d0e_row2_col12 {
|
91 |
+
background-color: #b5de2b;
|
92 |
+
color: #000000;
|
93 |
+
}
|
94 |
+
#T_53d0e_row2_col13 {
|
95 |
+
background-color: #f6e620;
|
96 |
+
color: #000000;
|
97 |
+
}
|
98 |
+
#T_53d0e_row2_col14 {
|
99 |
+
background-color: #65cb5e;
|
100 |
+
color: #000000;
|
101 |
+
}
|
102 |
+
#T_53d0e_row2_col15 {
|
103 |
+
background-color: #d2e21b;
|
104 |
+
color: #000000;
|
105 |
+
}
|
106 |
+
#T_53d0e_row2_col16 {
|
107 |
+
background-color: #450559;
|
108 |
+
color: #f1f1f1;
|
109 |
+
}
|
110 |
+
#T_53d0e_row3_col2 {
|
111 |
+
background-color: #38588c;
|
112 |
+
color: #f1f1f1;
|
113 |
+
}
|
114 |
+
#T_53d0e_row3_col3, #T_53d0e_row6_col7 {
|
115 |
+
background-color: #46337f;
|
116 |
+
color: #f1f1f1;
|
117 |
+
}
|
118 |
+
#T_53d0e_row3_col4 {
|
119 |
+
background-color: #26818e;
|
120 |
+
color: #f1f1f1;
|
121 |
+
}
|
122 |
+
#T_53d0e_row3_col5 {
|
123 |
+
background-color: #463480;
|
124 |
+
color: #f1f1f1;
|
125 |
+
}
|
126 |
+
#T_53d0e_row3_col7, #T_53d0e_row11_col9 {
|
127 |
+
background-color: #27808e;
|
128 |
+
color: #f1f1f1;
|
129 |
+
}
|
130 |
+
#T_53d0e_row3_col8, #T_53d0e_row3_col9 {
|
131 |
+
background-color: #b2dd2d;
|
132 |
+
color: #000000;
|
133 |
+
}
|
134 |
+
#T_53d0e_row3_col10, #T_53d0e_row3_col11 {
|
135 |
+
background-color: #414287;
|
136 |
+
color: #f1f1f1;
|
137 |
+
}
|
138 |
+
#T_53d0e_row3_col12 {
|
139 |
+
background-color: #60ca60;
|
140 |
+
color: #000000;
|
141 |
+
}
|
142 |
+
#T_53d0e_row3_col13 {
|
143 |
+
background-color: #37b878;
|
144 |
+
color: #f1f1f1;
|
145 |
+
}
|
146 |
+
#T_53d0e_row3_col14, #T_53d0e_row9_col2 {
|
147 |
+
background-color: #86d549;
|
148 |
+
color: #000000;
|
149 |
+
}
|
150 |
+
#T_53d0e_row3_col15 {
|
151 |
+
background-color: #f8e621;
|
152 |
+
color: #000000;
|
153 |
+
}
|
154 |
+
#T_53d0e_row3_col16 {
|
155 |
+
background-color: #460a5d;
|
156 |
+
color: #f1f1f1;
|
157 |
+
}
|
158 |
+
#T_53d0e_row4_col2 {
|
159 |
+
background-color: #2d708e;
|
160 |
+
color: #f1f1f1;
|
161 |
+
}
|
162 |
+
#T_53d0e_row4_col3, #T_53d0e_row6_col12 {
|
163 |
+
background-color: #3b528b;
|
164 |
+
color: #f1f1f1;
|
165 |
+
}
|
166 |
+
#T_53d0e_row4_col4 {
|
167 |
+
background-color: #50c46a;
|
168 |
+
color: #000000;
|
169 |
+
}
|
170 |
+
#T_53d0e_row4_col5 {
|
171 |
+
background-color: #365c8d;
|
172 |
+
color: #f1f1f1;
|
173 |
+
}
|
174 |
+
#T_53d0e_row4_col7, #T_53d0e_row8_col2 {
|
175 |
+
background-color: #52c569;
|
176 |
+
color: #000000;
|
177 |
+
}
|
178 |
+
#T_53d0e_row4_col8 {
|
179 |
+
background-color: #a0da39;
|
180 |
+
color: #000000;
|
181 |
+
}
|
182 |
+
#T_53d0e_row4_col9 {
|
183 |
+
background-color: #95d840;
|
184 |
+
color: #000000;
|
185 |
+
}
|
186 |
+
#T_53d0e_row4_col10, #T_53d0e_row4_col11 {
|
187 |
+
background-color: #3f4889;
|
188 |
+
color: #f1f1f1;
|
189 |
+
}
|
190 |
+
#T_53d0e_row4_col12 {
|
191 |
+
background-color: #1fa187;
|
192 |
+
color: #f1f1f1;
|
193 |
+
}
|
194 |
+
#T_53d0e_row4_col14 {
|
195 |
+
background-color: #addc30;
|
196 |
+
color: #000000;
|
197 |
+
}
|
198 |
+
#T_53d0e_row4_col15 {
|
199 |
+
background-color: #b0dd2f;
|
200 |
+
color: #000000;
|
201 |
+
}
|
202 |
+
#T_53d0e_row4_col16 {
|
203 |
+
background-color: #471365;
|
204 |
+
color: #f1f1f1;
|
205 |
+
}
|
206 |
+
#T_53d0e_row5_col2 {
|
207 |
+
background-color: #25858e;
|
208 |
+
color: #f1f1f1;
|
209 |
+
}
|
210 |
+
#T_53d0e_row5_col3 {
|
211 |
+
background-color: #31668e;
|
212 |
+
color: #f1f1f1;
|
213 |
+
}
|
214 |
+
#T_53d0e_row5_col4 {
|
215 |
+
background-color: #1fa088;
|
216 |
+
color: #f1f1f1;
|
217 |
+
}
|
218 |
+
#T_53d0e_row5_col5 {
|
219 |
+
background-color: #33638d;
|
220 |
+
color: #f1f1f1;
|
221 |
+
}
|
222 |
+
#T_53d0e_row5_col7, #T_53d0e_row10_col14 {
|
223 |
+
background-color: #1e9d89;
|
224 |
+
color: #f1f1f1;
|
225 |
+
}
|
226 |
+
#T_53d0e_row5_col8 {
|
227 |
+
background-color: #70cf57;
|
228 |
+
color: #000000;
|
229 |
+
}
|
230 |
+
#T_53d0e_row5_col9 {
|
231 |
+
background-color: #77d153;
|
232 |
+
color: #000000;
|
233 |
+
}
|
234 |
+
#T_53d0e_row5_col10 {
|
235 |
+
background-color: #3c4f8a;
|
236 |
+
color: #f1f1f1;
|
237 |
+
}
|
238 |
+
#T_53d0e_row5_col11, #T_53d0e_row8_col16, #T_53d0e_row9_col15 {
|
239 |
+
background-color: #3d4e8a;
|
240 |
+
color: #f1f1f1;
|
241 |
+
}
|
242 |
+
#T_53d0e_row5_col12, #T_53d0e_row8_col15, #T_53d0e_row9_col10 {
|
243 |
+
background-color: #297a8e;
|
244 |
+
color: #f1f1f1;
|
245 |
+
}
|
246 |
+
#T_53d0e_row5_col14, #T_53d0e_row6_col14 {
|
247 |
+
background-color: #a5db36;
|
248 |
+
color: #000000;
|
249 |
+
}
|
250 |
+
#T_53d0e_row5_col15 {
|
251 |
+
background-color: #69cd5b;
|
252 |
+
color: #000000;
|
253 |
+
}
|
254 |
+
#T_53d0e_row6_col2 {
|
255 |
+
background-color: #1e9b8a;
|
256 |
+
color: #f1f1f1;
|
257 |
+
}
|
258 |
+
#T_53d0e_row6_col3, #T_53d0e_row11_col8 {
|
259 |
+
background-color: #277f8e;
|
260 |
+
color: #f1f1f1;
|
261 |
+
}
|
262 |
+
#T_53d0e_row6_col4 {
|
263 |
+
background-color: #46327e;
|
264 |
+
color: #f1f1f1;
|
265 |
+
}
|
266 |
+
#T_53d0e_row6_col5, #T_53d0e_row9_col11 {
|
267 |
+
background-color: #2a788e;
|
268 |
+
color: #f1f1f1;
|
269 |
+
}
|
270 |
+
#T_53d0e_row6_col8 {
|
271 |
+
background-color: #1f958b;
|
272 |
+
color: #f1f1f1;
|
273 |
+
}
|
274 |
+
#T_53d0e_row6_col9 {
|
275 |
+
background-color: #1f9f88;
|
276 |
+
color: #f1f1f1;
|
277 |
+
}
|
278 |
+
#T_53d0e_row6_col10, #T_53d0e_row6_col11 {
|
279 |
+
background-color: #39558c;
|
280 |
+
color: #f1f1f1;
|
281 |
+
}
|
282 |
+
#T_53d0e_row6_col13 {
|
283 |
+
background-color: #cde11d;
|
284 |
+
color: #000000;
|
285 |
+
}
|
286 |
+
#T_53d0e_row6_col15 {
|
287 |
+
background-color: #32b67a;
|
288 |
+
color: #f1f1f1;
|
289 |
+
}
|
290 |
+
#T_53d0e_row6_col16 {
|
291 |
+
background-color: #482878;
|
292 |
+
color: #f1f1f1;
|
293 |
+
}
|
294 |
+
#T_53d0e_row7_col2 {
|
295 |
+
background-color: #2ab07f;
|
296 |
+
color: #f1f1f1;
|
297 |
+
}
|
298 |
+
#T_53d0e_row7_col3 {
|
299 |
+
background-color: #1f968b;
|
300 |
+
color: #f1f1f1;
|
301 |
+
}
|
302 |
+
#T_53d0e_row7_col4 {
|
303 |
+
background-color: #2c738e;
|
304 |
+
color: #f1f1f1;
|
305 |
+
}
|
306 |
+
#T_53d0e_row7_col5 {
|
307 |
+
background-color: #21918c;
|
308 |
+
color: #f1f1f1;
|
309 |
+
}
|
310 |
+
#T_53d0e_row7_col7 {
|
311 |
+
background-color: #2b748e;
|
312 |
+
color: #f1f1f1;
|
313 |
+
}
|
314 |
+
#T_53d0e_row7_col8 {
|
315 |
+
background-color: #20a386;
|
316 |
+
color: #f1f1f1;
|
317 |
+
}
|
318 |
+
#T_53d0e_row7_col9 {
|
319 |
+
background-color: #24aa83;
|
320 |
+
color: #f1f1f1;
|
321 |
+
}
|
322 |
+
#T_53d0e_row7_col10 {
|
323 |
+
background-color: #355f8d;
|
324 |
+
color: #f1f1f1;
|
325 |
+
}
|
326 |
+
#T_53d0e_row7_col11 {
|
327 |
+
background-color: #355e8d;
|
328 |
+
color: #f1f1f1;
|
329 |
+
}
|
330 |
+
#T_53d0e_row7_col12 {
|
331 |
+
background-color: #46307e;
|
332 |
+
color: #f1f1f1;
|
333 |
+
}
|
334 |
+
#T_53d0e_row7_col13, #T_53d0e_row9_col13 {
|
335 |
+
background-color: #dae319;
|
336 |
+
color: #000000;
|
337 |
+
}
|
338 |
+
#T_53d0e_row7_col14 {
|
339 |
+
background-color: #9bd93c;
|
340 |
+
color: #000000;
|
341 |
+
}
|
342 |
+
#T_53d0e_row7_col15 {
|
343 |
+
background-color: #1e9c89;
|
344 |
+
color: #f1f1f1;
|
345 |
+
}
|
346 |
+
#T_53d0e_row7_col16 {
|
347 |
+
background-color: #453781;
|
348 |
+
color: #f1f1f1;
|
349 |
+
}
|
350 |
+
#T_53d0e_row8_col3 {
|
351 |
+
background-color: #2db27d;
|
352 |
+
color: #f1f1f1;
|
353 |
+
}
|
354 |
+
#T_53d0e_row8_col4, #T_53d0e_row8_col7 {
|
355 |
+
background-color: #3d4d8a;
|
356 |
+
color: #f1f1f1;
|
357 |
+
}
|
358 |
+
#T_53d0e_row8_col5 {
|
359 |
+
background-color: #21a585;
|
360 |
+
color: #f1f1f1;
|
361 |
+
}
|
362 |
+
#T_53d0e_row8_col8 {
|
363 |
+
background-color: #2c728e;
|
364 |
+
color: #f1f1f1;
|
365 |
+
}
|
366 |
+
#T_53d0e_row8_col10 {
|
367 |
+
background-color: #306a8e;
|
368 |
+
color: #f1f1f1;
|
369 |
+
}
|
370 |
+
#T_53d0e_row8_col11 {
|
371 |
+
background-color: #30698e;
|
372 |
+
color: #f1f1f1;
|
373 |
+
}
|
374 |
+
#T_53d0e_row8_col12 {
|
375 |
+
background-color: #481668;
|
376 |
+
color: #f1f1f1;
|
377 |
+
}
|
378 |
+
#T_53d0e_row8_col13 {
|
379 |
+
background-color: #eae51a;
|
380 |
+
color: #000000;
|
381 |
+
}
|
382 |
+
#T_53d0e_row8_col14 {
|
383 |
+
background-color: #67cc5c;
|
384 |
+
color: #000000;
|
385 |
+
}
|
386 |
+
#T_53d0e_row9_col3 {
|
387 |
+
background-color: #44bf70;
|
388 |
+
color: #f1f1f1;
|
389 |
+
}
|
390 |
+
#T_53d0e_row9_col5 {
|
391 |
+
background-color: #4ac16d;
|
392 |
+
color: #000000;
|
393 |
+
}
|
394 |
+
#T_53d0e_row9_col14 {
|
395 |
+
background-color: #42be71;
|
396 |
+
color: #f1f1f1;
|
397 |
+
}
|
398 |
+
#T_53d0e_row9_col16 {
|
399 |
+
background-color: #2e6f8e;
|
400 |
+
color: #f1f1f1;
|
401 |
+
}
|
402 |
+
#T_53d0e_row10_col2 {
|
403 |
+
background-color: #c2df23;
|
404 |
+
color: #000000;
|
405 |
+
}
|
406 |
+
#T_53d0e_row10_col3 {
|
407 |
+
background-color: #6ccd5a;
|
408 |
+
color: #000000;
|
409 |
+
}
|
410 |
+
#T_53d0e_row10_col8 {
|
411 |
+
background-color: #471063;
|
412 |
+
color: #f1f1f1;
|
413 |
+
}
|
414 |
+
#T_53d0e_row10_col9 {
|
415 |
+
background-color: #453882;
|
416 |
+
color: #f1f1f1;
|
417 |
+
}
|
418 |
+
#T_53d0e_row10_col10 {
|
419 |
+
background-color: #1f978b;
|
420 |
+
color: #f1f1f1;
|
421 |
+
}
|
422 |
+
#T_53d0e_row10_col11 {
|
423 |
+
background-color: #1f948c;
|
424 |
+
color: #f1f1f1;
|
425 |
+
}
|
426 |
+
#T_53d0e_row10_col16 {
|
427 |
+
background-color: #20a486;
|
428 |
+
color: #f1f1f1;
|
429 |
+
}
|
430 |
+
#T_53d0e_row11_col4 {
|
431 |
+
background-color: #48c16e;
|
432 |
+
color: #f1f1f1;
|
433 |
+
}
|
434 |
+
#T_53d0e_row11_col7 {
|
435 |
+
background-color: #46c06f;
|
436 |
+
color: #f1f1f1;
|
437 |
+
}
|
438 |
+
#T_53d0e_row11_col12 {
|
439 |
+
background-color: #5cc863;
|
440 |
+
color: #000000;
|
441 |
+
}
|
442 |
+
</style>
|
443 |
+
<table id="T_53d0e">
|
444 |
+
<thead>
|
445 |
+
<tr>
|
446 |
+
<th class="blank level0" > </th>
|
447 |
+
<th id="T_53d0e_level0_col0" class="col_heading level0 col0" >version</th>
|
448 |
+
<th id="T_53d0e_level0_col1" class="col_heading level0 col1" >d_sae</th>
|
449 |
+
<th id="T_53d0e_level0_col2" class="col_heading level0 col2" >layer</th>
|
450 |
+
<th id="T_53d0e_level0_col3" class="col_heading level0 col3" >kl_div_with_sae</th>
|
451 |
+
<th id="T_53d0e_level0_col4" class="col_heading level0 col4" >kl_div_with_ablation</th>
|
452 |
+
<th id="T_53d0e_level0_col5" class="col_heading level0 col5" >ce_loss_with_sae</th>
|
453 |
+
<th id="T_53d0e_level0_col6" class="col_heading level0 col6" >ce_loss_without_sae</th>
|
454 |
+
<th id="T_53d0e_level0_col7" class="col_heading level0 col7" >ce_loss_with_ablation</th>
|
455 |
+
<th id="T_53d0e_level0_col8" class="col_heading level0 col8" >kl_div_score</th>
|
456 |
+
<th id="T_53d0e_level0_col9" class="col_heading level0 col9" >ce_loss_score</th>
|
457 |
+
<th id="T_53d0e_level0_col10" class="col_heading level0 col10" >l2_norm_in</th>
|
458 |
+
<th id="T_53d0e_level0_col11" class="col_heading level0 col11" >l2_norm_out</th>
|
459 |
+
<th id="T_53d0e_level0_col12" class="col_heading level0 col12" >l2_ratio</th>
|
460 |
+
<th id="T_53d0e_level0_col13" class="col_heading level0 col13" >l0</th>
|
461 |
+
<th id="T_53d0e_level0_col14" class="col_heading level0 col14" >l1</th>
|
462 |
+
<th id="T_53d0e_level0_col15" class="col_heading level0 col15" >explained_variance</th>
|
463 |
+
<th id="T_53d0e_level0_col16" class="col_heading level0 col16" >mse</th>
|
464 |
+
<th id="T_53d0e_level0_col17" class="col_heading level0 col17" >total_tokens_evaluated</th>
|
465 |
+
<th id="T_53d0e_level0_col18" class="col_heading level0 col18" >filepath</th>
|
466 |
+
</tr>
|
467 |
+
</thead>
|
468 |
+
<tbody>
|
469 |
+
<tr>
|
470 |
+
<th id="T_53d0e_level0_row0" class="row_heading level0 row0" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_0/metrics.json</th>
|
471 |
+
<td id="T_53d0e_row0_col0" class="data row0 col0" >5</td>
|
472 |
+
<td id="T_53d0e_row0_col1" class="data row0 col1" >128</td>
|
473 |
+
<td id="T_53d0e_row0_col2" class="data row0 col2" >0</td>
|
474 |
+
<td id="T_53d0e_row0_col3" class="data row0 col3" >0.003843</td>
|
475 |
+
<td id="T_53d0e_row0_col4" class="data row0 col4" >12.480284</td>
|
476 |
+
<td id="T_53d0e_row0_col5" class="data row0 col5" >3.603422</td>
|
477 |
+
<td id="T_53d0e_row0_col6" class="data row0 col6" >3.599065</td>
|
478 |
+
<td id="T_53d0e_row0_col7" class="data row0 col7" >15.861977</td>
|
479 |
+
<td id="T_53d0e_row0_col8" class="data row0 col8" >0.999692</td>
|
480 |
+
<td id="T_53d0e_row0_col9" class="data row0 col9" >0.999645</td>
|
481 |
+
<td id="T_53d0e_row0_col10" class="data row0 col10" >32.707962</td>
|
482 |
+
<td id="T_53d0e_row0_col11" class="data row0 col11" >32.607395</td>
|
483 |
+
<td id="T_53d0e_row0_col12" class="data row0 col12" >0.996936</td>
|
484 |
+
<td id="T_53d0e_row0_col13" class="data row0 col13" >31.980795</td>
|
485 |
+
<td id="T_53d0e_row0_col14" class="data row0 col14" >44.247345</td>
|
486 |
+
<td id="T_53d0e_row0_col15" class="data row0 col15" >0.976495</td>
|
487 |
+
<td id="T_53d0e_row0_col16" class="data row0 col16" >5.842685</td>
|
488 |
+
<td id="T_53d0e_row0_col17" class="data row0 col17" >6144.000000</td>
|
489 |
+
<td id="T_53d0e_row0_col18" class="data row0 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_0/metrics.json</td>
|
490 |
+
</tr>
|
491 |
+
<tr>
|
492 |
+
<th id="T_53d0e_level0_row1" class="row_heading level0 row1" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_1/metrics.json</th>
|
493 |
+
<td id="T_53d0e_row1_col0" class="data row1 col0" >5</td>
|
494 |
+
<td id="T_53d0e_row1_col1" class="data row1 col1" >128</td>
|
495 |
+
<td id="T_53d0e_row1_col2" class="data row1 col2" >1</td>
|
496 |
+
<td id="T_53d0e_row1_col3" class="data row1 col3" >0.006731</td>
|
497 |
+
<td id="T_53d0e_row1_col4" class="data row1 col4" >16.217104</td>
|
498 |
+
<td id="T_53d0e_row1_col5" class="data row1 col5" >3.605462</td>
|
499 |
+
<td id="T_53d0e_row1_col6" class="data row1 col6" >3.599065</td>
|
500 |
+
<td id="T_53d0e_row1_col7" class="data row1 col7" >19.600266</td>
|
501 |
+
<td id="T_53d0e_row1_col8" class="data row1 col8" >0.999585</td>
|
502 |
+
<td id="T_53d0e_row1_col9" class="data row1 col9" >0.999600</td>
|
503 |
+
<td id="T_53d0e_row1_col10" class="data row1 col10" >56.929867</td>
|
504 |
+
<td id="T_53d0e_row1_col11" class="data row1 col11" >56.693493</td>
|
505 |
+
<td id="T_53d0e_row1_col12" class="data row1 col12" >0.995869</td>
|
506 |
+
<td id="T_53d0e_row1_col13" class="data row1 col13" >31.999023</td>
|
507 |
+
<td id="T_53d0e_row1_col14" class="data row1 col14" >59.953354</td>
|
508 |
+
<td id="T_53d0e_row1_col15" class="data row1 col15" >0.972269</td>
|
509 |
+
<td id="T_53d0e_row1_col16" class="data row1 col16" >31.145605</td>
|
510 |
+
<td id="T_53d0e_row1_col17" class="data row1 col17" >6144.000000</td>
|
511 |
+
<td id="T_53d0e_row1_col18" class="data row1 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_1/metrics.json</td>
|
512 |
+
</tr>
|
513 |
+
<tr>
|
514 |
+
<th id="T_53d0e_level0_row2" class="row_heading level0 row2" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_2/metrics.json</th>
|
515 |
+
<td id="T_53d0e_row2_col0" class="data row2 col0" >5</td>
|
516 |
+
<td id="T_53d0e_row2_col1" class="data row2 col1" >128</td>
|
517 |
+
<td id="T_53d0e_row2_col2" class="data row2 col2" >2</td>
|
518 |
+
<td id="T_53d0e_row2_col3" class="data row2 col3" >0.014035</td>
|
519 |
+
<td id="T_53d0e_row2_col4" class="data row2 col4" >12.813511</td>
|
520 |
+
<td id="T_53d0e_row2_col5" class="data row2 col5" >3.612803</td>
|
521 |
+
<td id="T_53d0e_row2_col6" class="data row2 col6" >3.599065</td>
|
522 |
+
<td id="T_53d0e_row2_col7" class="data row2 col7" >16.327873</td>
|
523 |
+
<td id="T_53d0e_row2_col8" class="data row2 col8" >0.998905</td>
|
524 |
+
<td id="T_53d0e_row2_col9" class="data row2 col9" >0.998921</td>
|
525 |
+
<td id="T_53d0e_row2_col10" class="data row2 col10" >68.907532</td>
|
526 |
+
<td id="T_53d0e_row2_col11" class="data row2 col11" >68.518921</td>
|
527 |
+
<td id="T_53d0e_row2_col12" class="data row2 col12" >0.993602</td>
|
528 |
+
<td id="T_53d0e_row2_col13" class="data row2 col13" >31.999350</td>
|
529 |
+
<td id="T_53d0e_row2_col14" class="data row2 col14" >53.266327</td>
|
530 |
+
<td id="T_53d0e_row2_col15" class="data row2 col15" >0.969186</td>
|
531 |
+
<td id="T_53d0e_row2_col16" class="data row2 col16" >49.732628</td>
|
532 |
+
<td id="T_53d0e_row2_col17" class="data row2 col17" >6144.000000</td>
|
533 |
+
<td id="T_53d0e_row2_col18" class="data row2 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_2/metrics.json</td>
|
534 |
+
</tr>
|
535 |
+
<tr>
|
536 |
+
<th id="T_53d0e_level0_row3" class="row_heading level0 row3" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_3/metrics.json</th>
|
537 |
+
<td id="T_53d0e_row3_col0" class="data row3 col0" >5</td>
|
538 |
+
<td id="T_53d0e_row3_col1" class="data row3 col1" >128</td>
|
539 |
+
<td id="T_53d0e_row3_col2" class="data row3 col2" >3</td>
|
540 |
+
<td id="T_53d0e_row3_col3" class="data row3 col3" >0.023511</td>
|
541 |
+
<td id="T_53d0e_row3_col4" class="data row3 col4" >10.101868</td>
|
542 |
+
<td id="T_53d0e_row3_col5" class="data row3 col5" >3.622775</td>
|
543 |
+
<td id="T_53d0e_row3_col6" class="data row3 col6" >3.599065</td>
|
544 |
+
<td id="T_53d0e_row3_col7" class="data row3 col7" >13.548822</td>
|
545 |
+
<td id="T_53d0e_row3_col8" class="data row3 col8" >0.997673</td>
|
546 |
+
<td id="T_53d0e_row3_col9" class="data row3 col9" >0.997617</td>
|
547 |
+
<td id="T_53d0e_row3_col10" class="data row3 col10" >103.711441</td>
|
548 |
+
<td id="T_53d0e_row3_col11" class="data row3 col11" >103.026962</td>
|
549 |
+
<td id="T_53d0e_row3_col12" class="data row3 col12" >0.989538</td>
|
550 |
+
<td id="T_53d0e_row3_col13" class="data row3 col13" >31.983074</td>
|
551 |
+
<td id="T_53d0e_row3_col14" class="data row3 col14" >54.825603</td>
|
552 |
+
<td id="T_53d0e_row3_col15" class="data row3 col15" >0.975295</td>
|
553 |
+
<td id="T_53d0e_row3_col16" class="data row3 col16" >89.620079</td>
|
554 |
+
<td id="T_53d0e_row3_col17" class="data row3 col17" >6144.000000</td>
|
555 |
+
<td id="T_53d0e_row3_col18" class="data row3 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_3/metrics.json</td>
|
556 |
+
</tr>
|
557 |
+
<tr>
|
558 |
+
<th id="T_53d0e_level0_row4" class="row_heading level0 row4" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_4/metrics.json</th>
|
559 |
+
<td id="T_53d0e_row4_col0" class="data row4 col0" >5</td>
|
560 |
+
<td id="T_53d0e_row4_col1" class="data row4 col1" >128</td>
|
561 |
+
<td id="T_53d0e_row4_col2" class="data row4 col2" >4</td>
|
562 |
+
<td id="T_53d0e_row4_col3" class="data row4 col3" >0.037530</td>
|
563 |
+
<td id="T_53d0e_row4_col4" class="data row4 col4" >13.249713</td>
|
564 |
+
<td id="T_53d0e_row4_col5" class="data row4 col5" >3.640705</td>
|
565 |
+
<td id="T_53d0e_row4_col6" class="data row4 col6" >3.599065</td>
|
566 |
+
<td id="T_53d0e_row4_col7" class="data row4 col7" >16.699104</td>
|
567 |
+
<td id="T_53d0e_row4_col8" class="data row4 col8" >0.997167</td>
|
568 |
+
<td id="T_53d0e_row4_col9" class="data row4 col9" >0.996821</td>
|
569 |
+
<td id="T_53d0e_row4_col10" class="data row4 col10" >111.403282</td>
|
570 |
+
<td id="T_53d0e_row4_col11" class="data row4 col11" >110.286774</td>
|
571 |
+
<td id="T_53d0e_row4_col12" class="data row4 col12" >0.984007</td>
|
572 |
+
<td id="T_53d0e_row4_col13" class="data row4 col13" >31.948568</td>
|
573 |
+
<td id="T_53d0e_row4_col14" class="data row4 col14" >56.497829</td>
|
574 |
+
<td id="T_53d0e_row4_col15" class="data row4 col15" >0.963923</td>
|
575 |
+
<td id="T_53d0e_row4_col16" class="data row4 col16" >153.826294</td>
|
576 |
+
<td id="T_53d0e_row4_col17" class="data row4 col17" >6144.000000</td>
|
577 |
+
<td id="T_53d0e_row4_col18" class="data row4 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_4/metrics.json</td>
|
578 |
+
</tr>
|
579 |
+
<tr>
|
580 |
+
<th id="T_53d0e_level0_row5" class="row_heading level0 row5" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_5/metrics.json</th>
|
581 |
+
<td id="T_53d0e_row5_col0" class="data row5 col0" >5</td>
|
582 |
+
<td id="T_53d0e_row5_col1" class="data row5 col1" >128</td>
|
583 |
+
<td id="T_53d0e_row5_col2" class="data row5 col2" >5</td>
|
584 |
+
<td id="T_53d0e_row5_col3" class="data row5 col3" >0.047411</td>
|
585 |
+
<td id="T_53d0e_row5_col4" class="data row5 col4" >11.519682</td>
|
586 |
+
<td id="T_53d0e_row5_col5" class="data row5 col5" >3.644166</td>
|
587 |
+
<td id="T_53d0e_row5_col6" class="data row5 col6" >3.599065</td>
|
588 |
+
<td id="T_53d0e_row5_col7" class="data row5 col7" >14.860109</td>
|
589 |
+
<td id="T_53d0e_row5_col8" class="data row5 col8" >0.995884</td>
|
590 |
+
<td id="T_53d0e_row5_col9" class="data row5 col9" >0.995995</td>
|
591 |
+
<td id="T_53d0e_row5_col10" class="data row5 col10" >119.651489</td>
|
592 |
+
<td id="T_53d0e_row5_col11" class="data row5 col11" >118.053459</td>
|
593 |
+
<td id="T_53d0e_row5_col12" class="data row5 col12" >0.978980</td>
|
594 |
+
<td id="T_53d0e_row5_col13" class="data row5 col13" >31.958008</td>
|
595 |
+
<td id="T_53d0e_row5_col14" class="data row5 col14" >56.149471</td>
|
596 |
+
<td id="T_53d0e_row5_col15" class="data row5 col15" >0.952276</td>
|
597 |
+
<td id="T_53d0e_row5_col16" class="data row5 col16" >238.126266</td>
|
598 |
+
<td id="T_53d0e_row5_col17" class="data row5 col17" >6144.000000</td>
|
599 |
+
<td id="T_53d0e_row5_col18" class="data row5 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_5/metrics.json</td>
|
600 |
+
</tr>
|
601 |
+
<tr>
|
602 |
+
<th id="T_53d0e_level0_row6" class="row_heading level0 row6" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_6/metrics.json</th>
|
603 |
+
<td id="T_53d0e_row6_col0" class="data row6 col0" >5</td>
|
604 |
+
<td id="T_53d0e_row6_col1" class="data row6 col1" >128</td>
|
605 |
+
<td id="T_53d0e_row6_col2" class="data row6 col2" >6</td>
|
606 |
+
<td id="T_53d0e_row6_col3" class="data row6 col3" >0.060794</td>
|
607 |
+
<td id="T_53d0e_row6_col4" class="data row6 col4" >6.933250</td>
|
608 |
+
<td id="T_53d0e_row6_col5" class="data row6 col5" >3.655243</td>
|
609 |
+
<td id="T_53d0e_row6_col6" class="data row6 col6" >3.599065</td>
|
610 |
+
<td id="T_53d0e_row6_col7" class="data row6 col7" >10.522690</td>
|
611 |
+
<td id="T_53d0e_row6_col8" class="data row6 col8" >0.991232</td>
|
612 |
+
<td id="T_53d0e_row6_col9" class="data row6 col9" >0.991886</td>
|
613 |
+
<td id="T_53d0e_row6_col10" class="data row6 col10" >128.847931</td>
|
614 |
+
<td id="T_53d0e_row6_col11" class="data row6 col11" >126.688171</td>
|
615 |
+
<td id="T_53d0e_row6_col12" class="data row6 col12" >0.974231</td>
|
616 |
+
<td id="T_53d0e_row6_col13" class="data row6 col13" >31.996094</td>
|
617 |
+
<td id="T_53d0e_row6_col14" class="data row6 col14" >56.119469</td>
|
618 |
+
<td id="T_53d0e_row6_col15" class="data row6 col15" >0.940335</td>
|
619 |
+
<td id="T_53d0e_row6_col16" class="data row6 col16" >349.569763</td>
|
620 |
+
<td id="T_53d0e_row6_col17" class="data row6 col17" >6144.000000</td>
|
621 |
+
<td id="T_53d0e_row6_col18" class="data row6 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_6/metrics.json</td>
|
622 |
+
</tr>
|
623 |
+
<tr>
|
624 |
+
<th id="T_53d0e_level0_row7" class="row_heading level0 row7" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_7/metrics.json</th>
|
625 |
+
<td id="T_53d0e_row7_col0" class="data row7 col0" >5</td>
|
626 |
+
<td id="T_53d0e_row7_col1" class="data row7 col1" >128</td>
|
627 |
+
<td id="T_53d0e_row7_col2" class="data row7 col2" >7</td>
|
628 |
+
<td id="T_53d0e_row7_col3" class="data row7 col3" >0.073590</td>
|
629 |
+
<td id="T_53d0e_row7_col4" class="data row7 col4" >9.511523</td>
|
630 |
+
<td id="T_53d0e_row7_col5" class="data row7 col5" >3.668211</td>
|
631 |
+
<td id="T_53d0e_row7_col6" class="data row7 col6" >3.599065</td>
|
632 |
+
<td id="T_53d0e_row7_col7" class="data row7 col7" >13.054041</td>
|
633 |
+
<td id="T_53d0e_row7_col8" class="data row7 col8" >0.992263</td>
|
634 |
+
<td id="T_53d0e_row7_col9" class="data row7 col9" >0.992687</td>
|
635 |
+
<td id="T_53d0e_row7_col10" class="data row7 col10" >140.905991</td>
|
636 |
+
<td id="T_53d0e_row7_col11" class="data row7 col11" >138.128754</td>
|
637 |
+
<td id="T_53d0e_row7_col12" class="data row7 col12" >0.970831</td>
|
638 |
+
<td id="T_53d0e_row7_col13" class="data row7 col13" >31.997070</td>
|
639 |
+
<td id="T_53d0e_row7_col14" class="data row7 col14" >55.752361</td>
|
640 |
+
<td id="T_53d0e_row7_col15" class="data row7 col15" >0.928668</td>
|
641 |
+
<td id="T_53d0e_row7_col16" class="data row7 col16" >499.765717</td>
|
642 |
+
<td id="T_53d0e_row7_col17" class="data row7 col17" >6144.000000</td>
|
643 |
+
<td id="T_53d0e_row7_col18" class="data row7 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_7/metrics.json</td>
|
644 |
+
</tr>
|
645 |
+
<tr>
|
646 |
+
<th id="T_53d0e_level0_row8" class="row_heading level0 row8" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_8/metrics.json</th>
|
647 |
+
<td id="T_53d0e_row8_col0" class="data row8 col0" >5</td>
|
648 |
+
<td id="T_53d0e_row8_col1" class="data row8 col1" >128</td>
|
649 |
+
<td id="T_53d0e_row8_col2" class="data row8 col2" >8</td>
|
650 |
+
<td id="T_53d0e_row8_col3" class="data row8 col3" >0.089368</td>
|
651 |
+
<td id="T_53d0e_row8_col4" class="data row8 col4" >7.897106</td>
|
652 |
+
<td id="T_53d0e_row8_col5" class="data row8 col5" >3.679746</td>
|
653 |
+
<td id="T_53d0e_row8_col6" class="data row8 col6" >3.599065</td>
|
654 |
+
<td id="T_53d0e_row8_col7" class="data row8 col7" >11.460873</td>
|
655 |
+
<td id="T_53d0e_row8_col8" class="data row8 col8" >0.988683</td>
|
656 |
+
<td id="T_53d0e_row8_col9" class="data row8 col9" >0.989738</td>
|
657 |
+
<td id="T_53d0e_row8_col10" class="data row8 col10" >157.343246</td>
|
658 |
+
<td id="T_53d0e_row8_col11" class="data row8 col11" >153.839539</td>
|
659 |
+
<td id="T_53d0e_row8_col12" class="data row8 col12" >0.968352</td>
|
660 |
+
<td id="T_53d0e_row8_col13" class="data row8 col13" >31.998373</td>
|
661 |
+
<td id="T_53d0e_row8_col14" class="data row8 col14" >53.455093</td>
|
662 |
+
<td id="T_53d0e_row8_col15" class="data row8 col15" >0.913877</td>
|
663 |
+
<td id="T_53d0e_row8_col16" class="data row8 col16" >732.317871</td>
|
664 |
+
<td id="T_53d0e_row8_col17" class="data row8 col17" >6144.000000</td>
|
665 |
+
<td id="T_53d0e_row8_col18" class="data row8 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_8/metrics.json</td>
|
666 |
+
</tr>
|
667 |
+
<tr>
|
668 |
+
<th id="T_53d0e_level0_row9" class="row_heading level0 row9" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_9/metrics.json</th>
|
669 |
+
<td id="T_53d0e_row9_col0" class="data row9 col0" >5</td>
|
670 |
+
<td id="T_53d0e_row9_col1" class="data row9 col1" >128</td>
|
671 |
+
<td id="T_53d0e_row9_col2" class="data row9 col2" >9</td>
|
672 |
+
<td id="T_53d0e_row9_col3" class="data row9 col3" >0.096923</td>
|
673 |
+
<td id="T_53d0e_row9_col4" class="data row9 col4" >5.396312</td>
|
674 |
+
<td id="T_53d0e_row9_col5" class="data row9 col5" >3.695744</td>
|
675 |
+
<td id="T_53d0e_row9_col6" class="data row9 col6" >3.599065</td>
|
676 |
+
<td id="T_53d0e_row9_col7" class="data row9 col7" >8.970472</td>
|
677 |
+
<td id="T_53d0e_row9_col8" class="data row9 col8" >0.982039</td>
|
678 |
+
<td id="T_53d0e_row9_col9" class="data row9 col9" >0.982001</td>
|
679 |
+
<td id="T_53d0e_row9_col10" class="data row9 col10" >181.313721</td>
|
680 |
+
<td id="T_53d0e_row9_col11" class="data row9 col11" >176.829346</td>
|
681 |
+
<td id="T_53d0e_row9_col12" class="data row9 col12" >0.966578</td>
|
682 |
+
<td id="T_53d0e_row9_col13" class="data row9 col13" >31.997070</td>
|
683 |
+
<td id="T_53d0e_row9_col14" class="data row9 col14" >51.393932</td>
|
684 |
+
<td id="T_53d0e_row9_col15" class="data row9 col15" >0.895696</td>
|
685 |
+
<td id="T_53d0e_row9_col16" class="data row9 col16" >1123.425049</td>
|
686 |
+
<td id="T_53d0e_row9_col17" class="data row9 col17" >6144.000000</td>
|
687 |
+
<td id="T_53d0e_row9_col18" class="data row9 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_9/metrics.json</td>
|
688 |
+
</tr>
|
689 |
+
<tr>
|
690 |
+
<th id="T_53d0e_level0_row10" class="row_heading level0 row10" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_10/metrics.json</th>
|
691 |
+
<td id="T_53d0e_row10_col0" class="data row10 col0" >5</td>
|
692 |
+
<td id="T_53d0e_row10_col1" class="data row10 col1" >128</td>
|
693 |
+
<td id="T_53d0e_row10_col2" class="data row10 col2" >10</td>
|
694 |
+
<td id="T_53d0e_row10_col3" class="data row10 col3" >0.106645</td>
|
695 |
+
<td id="T_53d0e_row10_col4" class="data row10 col4" >6.193092</td>
|
696 |
+
<td id="T_53d0e_row10_col5" class="data row10 col5" >3.692219</td>
|
697 |
+
<td id="T_53d0e_row10_col6" class="data row10 col6" >3.599065</td>
|
698 |
+
<td id="T_53d0e_row10_col7" class="data row10 col7" >9.754217</td>
|
699 |
+
<td id="T_53d0e_row10_col8" class="data row10 col8" >0.982780</td>
|
700 |
+
<td id="T_53d0e_row10_col9" class="data row10 col9" >0.984866</td>
|
701 |
+
<td id="T_53d0e_row10_col10" class="data row10 col10" >224.287598</td>
|
702 |
+
<td id="T_53d0e_row10_col11" class="data row10 col11" >218.769226</td>
|
703 |
+
<td id="T_53d0e_row10_col12" class="data row10 col12" >0.968813</td>
|
704 |
+
<td id="T_53d0e_row10_col13" class="data row10 col13" >31.998047</td>
|
705 |
+
<td id="T_53d0e_row10_col14" class="data row10 col14" >47.411495</td>
|
706 |
+
<td id="T_53d0e_row10_col15" class="data row10 col15" >0.877616</td>
|
707 |
+
<td id="T_53d0e_row10_col16" class="data row10 col16" >1806.194092</td>
|
708 |
+
<td id="T_53d0e_row10_col17" class="data row10 col17" >6144.000000</td>
|
709 |
+
<td id="T_53d0e_row10_col18" class="data row10 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_10/metrics.json</td>
|
710 |
+
</tr>
|
711 |
+
<tr>
|
712 |
+
<th id="T_53d0e_level0_row11" class="row_heading level0 row11" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_11/metrics.json</th>
|
713 |
+
<td id="T_53d0e_row11_col0" class="data row11 col0" >5</td>
|
714 |
+
<td id="T_53d0e_row11_col1" class="data row11 col1" >128</td>
|
715 |
+
<td id="T_53d0e_row11_col2" class="data row11 col2" >11</td>
|
716 |
+
<td id="T_53d0e_row11_col3" class="data row11 col3" >0.136668</td>
|
717 |
+
<td id="T_53d0e_row11_col4" class="data row11 col4" >13.087515</td>
|
718 |
+
<td id="T_53d0e_row11_col5" class="data row11 col5" >3.732962</td>
|
719 |
+
<td id="T_53d0e_row11_col6" class="data row11 col6" >3.599065</td>
|
720 |
+
<td id="T_53d0e_row11_col7" class="data row11 col7" >16.484846</td>
|
721 |
+
<td id="T_53d0e_row11_col8" class="data row11 col8" >0.989557</td>
|
722 |
+
<td id="T_53d0e_row11_col9" class="data row11 col9" >0.989609</td>
|
723 |
+
<td id="T_53d0e_row11_col10" class="data row11 col10" >395.539520</td>
|
724 |
+
<td id="T_53d0e_row11_col11" class="data row11 col11" >391.472504</td>
|
725 |
+
<td id="T_53d0e_row11_col12" class="data row11 col12" >0.989259</td>
|
726 |
+
<td id="T_53d0e_row11_col13" class="data row11 col13" >32.000000</td>
|
727 |
+
<td id="T_53d0e_row11_col14" class="data row11 col14" >31.824055</td>
|
728 |
+
<td id="T_53d0e_row11_col15" class="data row11 col15" >0.870424</td>
|
729 |
+
<td id="T_53d0e_row11_col16" class="data row11 col16" >3098.075928</td>
|
730 |
+
<td id="T_53d0e_row11_col17" class="data row11 col17" >6144.000000</td>
|
731 |
+
<td id="T_53d0e_row11_col18" class="data row11 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_11/metrics.json</td>
|
732 |
+
</tr>
|
733 |
+
</tbody>
|
734 |
+
</table>
|
benchmark_stats.png
ADDED
Git LFS Details
|
v5_128k_layer_0/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.0.hook_resid_mid", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_0/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.0038433405570685863, "metrics/kl_div_with_ablation": 12.480283737182617, "metrics/ce_loss_with_sae": 3.603421926498413, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 15.861976623535156, "metrics/kl_div_score": 0.9996920470208848, "metrics/ce_loss_score": 0.9996446734723997, "metrics/l2_norm_in": 32.70796203613281, "metrics/l2_norm_out": 32.60739517211914, "metrics/l2_ratio": 0.9969363212585449, "metrics/l0": 31.98079490661621, "metrics/l1": 44.247344970703125, "metrics/explained_variance": 0.9764951467514038, "metrics/mse": 5.842685222625732, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_0/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53c0b574675a7cb8d08aa9e285188adb4aa93010196c9abe2477b44efb31010b
|
3 |
+
size 805834048
|
v5_128k_layer_0/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c97f5d57a4ab3ed04963c38314c1d6190cb348deaf00ed450a55be408377837c
|
3 |
+
size 524368
|
v5_128k_layer_1/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.1.hook_resid_mid", "hook_layer": 1, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_1/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.006731455214321613, "metrics/kl_div_with_ablation": 16.217103958129883, "metrics/ce_loss_with_sae": 3.605462074279785, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 19.600265502929688, "metrics/kl_div_score": 0.999584916318493, "metrics/ce_loss_score": 0.999600187150498, "metrics/l2_norm_in": 56.929866790771484, "metrics/l2_norm_out": 56.6934928894043, "metrics/l2_ratio": 0.9958688616752625, "metrics/l0": 31.9990234375, "metrics/l1": 59.95335388183594, "metrics/explained_variance": 0.9722690582275391, "metrics/mse": 31.145605087280273, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_1/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b70e6a77f0297230ab8316ae3b0b87be64d872f7dcae70d3b4210389a7820d6
|
3 |
+
size 805834048
|
v5_128k_layer_1/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73aaa61cd691b1360f3c29f3b852d40d4c563c5183daca89b952b73efeff6553
|
3 |
+
size 524368
|
v5_128k_layer_10/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.10.hook_resid_mid", "hook_layer": 10, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_10/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.10664453357458115, "metrics/kl_div_with_ablation": 6.193092346191406, "metrics/ce_loss_with_sae": 3.692218780517578, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 9.754217147827148, "metrics/kl_div_score": 0.98278008341985, "metrics/ce_loss_score": 0.9848656566878472, "metrics/l2_norm_in": 224.28759765625, "metrics/l2_norm_out": 218.76922607421875, "metrics/l2_ratio": 0.9688126444816589, "metrics/l0": 31.998046875, "metrics/l1": 47.411495208740234, "metrics/explained_variance": 0.8776161670684814, "metrics/mse": 1806.194091796875, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_10/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d6a0caf038ffa29fe41fbc18beef10ff74f0560966014d21cd994f22e649b5d
|
3 |
+
size 805834048
|
v5_128k_layer_10/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b9dad4028b6d5f23adc11428739cead9a520c0fcb536d02cf63286c0682b999
|
3 |
+
size 524368
|
v5_128k_layer_11/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.11.hook_resid_mid", "hook_layer": 11, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_11/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.1366678923368454, "metrics/kl_div_with_ablation": 13.087514877319336, "metrics/ce_loss_with_sae": 3.7329623699188232, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 16.484846115112305, "metrics/kl_div_score": 0.9895573839939856, "metrics/ce_loss_score": 0.9896088738509167, "metrics/l2_norm_in": 395.5395202636719, "metrics/l2_norm_out": 391.4725036621094, "metrics/l2_ratio": 0.9892591834068298, "metrics/l0": 32.0, "metrics/l1": 31.824054718017578, "metrics/explained_variance": 0.8704243898391724, "metrics/mse": 3098.075927734375, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_11/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b297415ca9916b4e6929f7d82eea602acef5c39dd7bed502ac77873a55e72c68
|
3 |
+
size 805834048
|
v5_128k_layer_11/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ab69d29a274fbe5b7360c67fd0c62fffa384ff5979184093cba1e727ccd7427
|
3 |
+
size 524368
|
v5_128k_layer_2/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.2.hook_resid_mid", "hook_layer": 2, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_2/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.014034643769264221, "metrics/kl_div_with_ablation": 12.81351089477539, "metrics/ce_loss_with_sae": 3.6128032207489014, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 16.32787322998047, "metrics/kl_div_score": 0.9989046995874498, "metrics/ce_loss_score": 0.9989206662941394, "metrics/l2_norm_in": 68.90753173828125, "metrics/l2_norm_out": 68.5189208984375, "metrics/l2_ratio": 0.9936020374298096, "metrics/l0": 31.99934959411621, "metrics/l1": 53.266326904296875, "metrics/explained_variance": 0.9691864848136902, "metrics/mse": 49.732627868652344, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_2/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1caab24df127183793a8a856b3d147dc2e34b9f8422f0b6b404233f70214efb2
|
3 |
+
size 805834048
|
v5_128k_layer_2/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a32fe5876cb55265ae4de471b9003f0e8fa03d5df356dc9e6e320bc7d618037
|
3 |
+
size 524368
|
v5_128k_layer_3/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.3.hook_resid_mid", "hook_layer": 3, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_3/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.023511391133069992, "metrics/kl_div_with_ablation": 10.10186767578125, "metrics/ce_loss_with_sae": 3.622774839401245, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 13.548822402954102, "metrics/kl_div_score": 0.9976725698764163, "metrics/ce_loss_score": 0.9976170022128419, "metrics/l2_norm_in": 103.71144104003906, "metrics/l2_norm_out": 103.02696228027344, "metrics/l2_ratio": 0.9895378351211548, "metrics/l0": 31.983074188232422, "metrics/l1": 54.82560348510742, "metrics/explained_variance": 0.9752952456474304, "metrics/mse": 89.62007904052734, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_3/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06cbde3bc5a4e83d69f0ce34dd1ba53c843b7c94b40b7d006020732b54026826
|
3 |
+
size 805834048
|
v5_128k_layer_3/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0456b0483683bcfe4f1efa9f1ed4f46ca8b5840cba2a95b2bed5c6e3cd830492
|
3 |
+
size 524368
|
v5_128k_layer_4/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.4.hook_resid_mid", "hook_layer": 4, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_4/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.03753046691417694, "metrics/kl_div_with_ablation": 13.249712944030762, "metrics/ce_loss_with_sae": 3.640705108642578, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 16.69910430908203, "metrics/kl_div_score": 0.9971674505649509, "metrics/ce_loss_score": 0.9968213439818392, "metrics/l2_norm_in": 111.40328216552734, "metrics/l2_norm_out": 110.28677368164062, "metrics/l2_ratio": 0.9840068817138672, "metrics/l0": 31.94856834411621, "metrics/l1": 56.49782943725586, "metrics/explained_variance": 0.9639231562614441, "metrics/mse": 153.8262939453125, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_4/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e225c8a8b01cc6f6bc625e9f259e7dc0488c0f483d4584d1a32361b90666b41
|
3 |
+
size 805834048
|
v5_128k_layer_4/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af6f11b2f0ca3f1b787c7c1c11a3fe30c8f58a52c0611ab7f781d67b773325e1
|
3 |
+
size 524368
|
v5_128k_layer_5/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.5.hook_resid_mid", "hook_layer": 5, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_5/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.04741125553846359, "metrics/kl_div_with_ablation": 11.519681930541992, "metrics/ce_loss_with_sae": 3.6441657543182373, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 14.860109329223633, "metrics/kl_div_score": 0.9958843259888311, "metrics/ce_loss_score": 0.9959949394740818, "metrics/l2_norm_in": 119.6514892578125, "metrics/l2_norm_out": 118.05345916748047, "metrics/l2_ratio": 0.9789802432060242, "metrics/l0": 31.9580078125, "metrics/l1": 56.149471282958984, "metrics/explained_variance": 0.9522756338119507, "metrics/mse": 238.1262664794922, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_5/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b61e22ab49ca410f4c70b7a95897f3209a598dffcd82248a713bdfea743df862
|
3 |
+
size 805834048
|
v5_128k_layer_5/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57431e5cb89876fb8170d1cc730430f296b725aaab7962e6e5b571cf4718f863
|
3 |
+
size 524368
|
v5_128k_layer_6/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.6.hook_resid_mid", "hook_layer": 6, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_6/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.06079387292265892, "metrics/kl_div_with_ablation": 6.933250427246094, "metrics/ce_loss_with_sae": 3.655242681503296, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 10.522689819335938, "metrics/kl_div_score": 0.9912315480941302, "metrics/ce_loss_score": 0.9918860291994546, "metrics/l2_norm_in": 128.84793090820312, "metrics/l2_norm_out": 126.68817138671875, "metrics/l2_ratio": 0.9742312431335449, "metrics/l0": 31.99609375, "metrics/l1": 56.119468688964844, "metrics/explained_variance": 0.940334677696228, "metrics/mse": 349.56976318359375, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_6/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de3601867837d9a54d22e60ed60e3dcb19cc8a9ea35d2c3f3250051915006162
|
3 |
+
size 805834048
|
v5_128k_layer_6/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c936737aa0fbd6f271e8be7aea29053357e83055b9922fca067bb96eb2958c4
|
3 |
+
size 524368
|
v5_128k_layer_7/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.7.hook_resid_mid", "hook_layer": 7, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_7/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.07359001040458679, "metrics/kl_div_with_ablation": 9.511523246765137, "metrics/ce_loss_with_sae": 3.668210983276367, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 13.054040908813477, "metrics/kl_div_score": 0.9922630678078178, "metrics/ce_loss_score": 0.9926867722998524, "metrics/l2_norm_in": 140.90599060058594, "metrics/l2_norm_out": 138.12875366210938, "metrics/l2_ratio": 0.9708306789398193, "metrics/l0": 31.9970703125, "metrics/l1": 55.75236129760742, "metrics/explained_variance": 0.928668200969696, "metrics/mse": 499.7657165527344, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_7/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e663dba31833e757306887ad8b311e74a3a1cce66474be454c958106768b39c
|
3 |
+
size 805834048
|
v5_128k_layer_7/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffcaab47977c368c98875bffe48f3e4c66ffd455605e791e06f3178c5f5b96fa
|
3 |
+
size 524368
|
v5_128k_layer_8/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.8.hook_resid_mid", "hook_layer": 8, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_8/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.08936788886785507, "metrics/kl_div_with_ablation": 7.897105693817139, "metrics/ce_loss_with_sae": 3.679746389389038, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 11.460872650146484, "metrics/kl_div_score": 0.9886834629884942, "metrics/ce_loss_score": 0.9897375005583807, "metrics/l2_norm_in": 157.34324645996094, "metrics/l2_norm_out": 153.83953857421875, "metrics/l2_ratio": 0.9683517217636108, "metrics/l0": 31.99837303161621, "metrics/l1": 53.45509338378906, "metrics/explained_variance": 0.9138767123222351, "metrics/mse": 732.31787109375, "metrics/total_tokens_evaluated": 6144}
|
v5_128k_layer_8/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60f311c3a33f5fc526a04fc344266b83e16ec90cea69d6059089e0c8790f37ca
|
3 |
+
size 805834048
|
v5_128k_layer_8/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57ecd3cf156b9a328e813f10d378b57ed1a6f37d2604bf0f37e11e75438b301f
|
3 |
+
size 524368
|
v5_128k_layer_9/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.9.hook_resid_mid", "hook_layer": 9, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_128k_layer_9/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.09692329168319702, "metrics/kl_div_with_ablation": 5.3963117599487305, "metrics/ce_loss_with_sae": 3.6957435607910156, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 8.97047233581543, "metrics/kl_div_score": 0.9820389747674404, "metrics/ce_loss_score": 0.9820011853887981, "metrics/l2_norm_in": 181.313720703125, "metrics/l2_norm_out": 176.829345703125, "metrics/l2_ratio": 0.966578483581543, "metrics/l0": 31.9970703125, "metrics/l1": 51.3939323425293, "metrics/explained_variance": 0.8956956267356873, "metrics/mse": 1123.425048828125, "metrics/total_tokens_evaluated": 6144}
|