jbloom commited on
Commit
ceef8ed
1 Parent(s): 14584da

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. benchmark_stats.csv +13 -0
  3. benchmark_stats.html +734 -0
  4. benchmark_stats.png +3 -0
  5. v5_128k_layer_0/cfg.json +1 -0
  6. v5_128k_layer_0/metrics.json +1 -0
  7. v5_128k_layer_0/sae_weights.safetensors +3 -0
  8. v5_128k_layer_0/sparsity.safetensors +3 -0
  9. v5_128k_layer_1/cfg.json +1 -0
  10. v5_128k_layer_1/metrics.json +1 -0
  11. v5_128k_layer_1/sae_weights.safetensors +3 -0
  12. v5_128k_layer_1/sparsity.safetensors +3 -0
  13. v5_128k_layer_10/cfg.json +1 -0
  14. v5_128k_layer_10/metrics.json +1 -0
  15. v5_128k_layer_10/sae_weights.safetensors +3 -0
  16. v5_128k_layer_10/sparsity.safetensors +3 -0
  17. v5_128k_layer_11/cfg.json +1 -0
  18. v5_128k_layer_11/metrics.json +1 -0
  19. v5_128k_layer_11/sae_weights.safetensors +3 -0
  20. v5_128k_layer_11/sparsity.safetensors +3 -0
  21. v5_128k_layer_2/cfg.json +1 -0
  22. v5_128k_layer_2/metrics.json +1 -0
  23. v5_128k_layer_2/sae_weights.safetensors +3 -0
  24. v5_128k_layer_2/sparsity.safetensors +3 -0
  25. v5_128k_layer_3/cfg.json +1 -0
  26. v5_128k_layer_3/metrics.json +1 -0
  27. v5_128k_layer_3/sae_weights.safetensors +3 -0
  28. v5_128k_layer_3/sparsity.safetensors +3 -0
  29. v5_128k_layer_4/cfg.json +1 -0
  30. v5_128k_layer_4/metrics.json +1 -0
  31. v5_128k_layer_4/sae_weights.safetensors +3 -0
  32. v5_128k_layer_4/sparsity.safetensors +3 -0
  33. v5_128k_layer_5/cfg.json +1 -0
  34. v5_128k_layer_5/metrics.json +1 -0
  35. v5_128k_layer_5/sae_weights.safetensors +3 -0
  36. v5_128k_layer_5/sparsity.safetensors +3 -0
  37. v5_128k_layer_6/cfg.json +1 -0
  38. v5_128k_layer_6/metrics.json +1 -0
  39. v5_128k_layer_6/sae_weights.safetensors +3 -0
  40. v5_128k_layer_6/sparsity.safetensors +3 -0
  41. v5_128k_layer_7/cfg.json +1 -0
  42. v5_128k_layer_7/metrics.json +1 -0
  43. v5_128k_layer_7/sae_weights.safetensors +3 -0
  44. v5_128k_layer_7/sparsity.safetensors +3 -0
  45. v5_128k_layer_8/cfg.json +1 -0
  46. v5_128k_layer_8/metrics.json +1 -0
  47. v5_128k_layer_8/sae_weights.safetensors +3 -0
  48. v5_128k_layer_8/sparsity.safetensors +3 -0
  49. v5_128k_layer_9/cfg.json +1 -0
  50. v5_128k_layer_9/metrics.json +1 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ benchmark_stats.png filter=lfs diff=lfs merge=lfs -text
benchmark_stats.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,version,d_sae,layer,kl_div_with_sae,kl_div_with_ablation,ce_loss_with_sae,ce_loss_without_sae,ce_loss_with_ablation,kl_div_score,ce_loss_score,l2_norm_in,l2_norm_out,l2_ratio,l0,l1,explained_variance,mse,total_tokens_evaluated,filepath
2
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_0/metrics.json,5,128,0,0.0038433405570685863,12.480283737182617,3.603421926498413,3.599064588546753,15.861976623535156,0.9996920470208848,0.9996446734723997,32.70796203613281,32.60739517211914,0.9969363212585449,31.98079490661621,44.247344970703125,0.9764951467514038,5.842685222625732,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_0/metrics.json
3
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_1/metrics.json,5,128,1,0.006731455214321613,16.217103958129883,3.605462074279785,3.599064588546753,19.600265502929688,0.999584916318493,0.999600187150498,56.929866790771484,56.6934928894043,0.9958688616752625,31.9990234375,59.95335388183594,0.9722690582275391,31.145605087280273,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_1/metrics.json
4
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_2/metrics.json,5,128,2,0.014034643769264221,12.81351089477539,3.6128032207489014,3.599064588546753,16.32787322998047,0.9989046995874498,0.9989206662941394,68.90753173828125,68.5189208984375,0.9936020374298096,31.99934959411621,53.266326904296875,0.9691864848136902,49.732627868652344,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_2/metrics.json
5
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_3/metrics.json,5,128,3,0.023511391133069992,10.10186767578125,3.622774839401245,3.599064588546753,13.548822402954102,0.9976725698764163,0.9976170022128419,103.71144104003906,103.02696228027344,0.9895378351211548,31.983074188232422,54.82560348510742,0.9752952456474304,89.62007904052734,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_3/metrics.json
6
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_4/metrics.json,5,128,4,0.03753046691417694,13.249712944030762,3.640705108642578,3.599064588546753,16.69910430908203,0.9971674505649509,0.9968213439818392,111.40328216552734,110.28677368164062,0.9840068817138672,31.94856834411621,56.49782943725586,0.9639231562614441,153.8262939453125,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_4/metrics.json
7
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_5/metrics.json,5,128,5,0.04741125553846359,11.519681930541992,3.6441657543182373,3.599064588546753,14.860109329223633,0.9958843259888311,0.9959949394740818,119.6514892578125,118.05345916748047,0.9789802432060242,31.9580078125,56.149471282958984,0.9522756338119507,238.1262664794922,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_5/metrics.json
8
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_6/metrics.json,5,128,6,0.06079387292265892,6.933250427246094,3.655242681503296,3.599064588546753,10.522689819335938,0.9912315480941302,0.9918860291994546,128.84793090820312,126.68817138671875,0.9742312431335449,31.99609375,56.119468688964844,0.940334677696228,349.56976318359375,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_6/metrics.json
9
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_7/metrics.json,5,128,7,0.07359001040458679,9.511523246765137,3.668210983276367,3.599064588546753,13.054040908813477,0.9922630678078178,0.9926867722998524,140.90599060058594,138.12875366210938,0.9708306789398193,31.9970703125,55.75236129760742,0.928668200969696,499.7657165527344,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_7/metrics.json
10
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_8/metrics.json,5,128,8,0.08936788886785507,7.897105693817139,3.679746389389038,3.599064588546753,11.460872650146484,0.9886834629884942,0.9897375005583807,157.34324645996094,153.83953857421875,0.9683517217636108,31.99837303161621,53.45509338378906,0.9138767123222351,732.31787109375,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_8/metrics.json
11
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_9/metrics.json,5,128,9,0.09692329168319702,5.3963117599487305,3.6957435607910156,3.599064588546753,8.97047233581543,0.9820389747674404,0.9820011853887981,181.313720703125,176.829345703125,0.966578483581543,31.9970703125,51.3939323425293,0.8956956267356873,1123.425048828125,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_9/metrics.json
12
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_10/metrics.json,5,128,10,0.10664453357458115,6.193092346191406,3.692218780517578,3.599064588546753,9.754217147827148,0.98278008341985,0.9848656566878472,224.28759765625,218.76922607421875,0.9688126444816589,31.998046875,47.411495208740234,0.8776161670684814,1806.194091796875,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_10/metrics.json
13
+ OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_11/metrics.json,5,128,11,0.1366678923368454,13.087514877319336,3.7329623699188232,3.599064588546753,16.484846115112305,0.9895573839939856,0.9896088738509167,395.5395202636719,391.4725036621094,0.9892591834068298,32.0,31.824054718017578,0.8704243898391724,3098.075927734375,6144.0,OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_11/metrics.json
benchmark_stats.html ADDED
@@ -0,0 +1,734 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <style type="text/css">
2
+ #T_53d0e_row0_col2, #T_53d0e_row0_col3, #T_53d0e_row0_col5, #T_53d0e_row0_col6, #T_53d0e_row0_col10, #T_53d0e_row0_col11, #T_53d0e_row0_col16, #T_53d0e_row0_col17, #T_53d0e_row1_col6, #T_53d0e_row1_col17, #T_53d0e_row2_col6, #T_53d0e_row2_col17, #T_53d0e_row3_col6, #T_53d0e_row3_col17, #T_53d0e_row4_col6, #T_53d0e_row4_col13, #T_53d0e_row4_col17, #T_53d0e_row5_col6, #T_53d0e_row5_col17, #T_53d0e_row6_col6, #T_53d0e_row6_col17, #T_53d0e_row7_col6, #T_53d0e_row7_col17, #T_53d0e_row8_col6, #T_53d0e_row8_col17, #T_53d0e_row9_col4, #T_53d0e_row9_col6, #T_53d0e_row9_col7, #T_53d0e_row9_col8, #T_53d0e_row9_col9, #T_53d0e_row9_col12, #T_53d0e_row9_col17, #T_53d0e_row10_col6, #T_53d0e_row10_col17, #T_53d0e_row11_col6, #T_53d0e_row11_col14, #T_53d0e_row11_col15, #T_53d0e_row11_col17 {
3
+ background-color: #440154;
4
+ color: #f1f1f1;
5
+ }
6
+ #T_53d0e_row0_col4 {
7
+ background-color: #31b57b;
8
+ color: #f1f1f1;
9
+ }
10
+ #T_53d0e_row0_col7 {
11
+ background-color: #2eb37c;
12
+ color: #f1f1f1;
13
+ }
14
+ #T_53d0e_row0_col8, #T_53d0e_row0_col9, #T_53d0e_row0_col12, #T_53d0e_row0_col15, #T_53d0e_row1_col4, #T_53d0e_row1_col7, #T_53d0e_row1_col9, #T_53d0e_row1_col14, #T_53d0e_row11_col2, #T_53d0e_row11_col3, #T_53d0e_row11_col5, #T_53d0e_row11_col10, #T_53d0e_row11_col11, #T_53d0e_row11_col13, #T_53d0e_row11_col16 {
15
+ background-color: #fde725;
16
+ color: #000000;
17
+ }
18
+ #T_53d0e_row0_col13 {
19
+ background-color: #28ae80;
20
+ color: #f1f1f1;
21
+ }
22
+ #T_53d0e_row0_col14, #T_53d0e_row8_col9 {
23
+ background-color: #26828e;
24
+ color: #f1f1f1;
25
+ }
26
+ #T_53d0e_row1_col2 {
27
+ background-color: #482173;
28
+ color: #f1f1f1;
29
+ }
30
+ #T_53d0e_row1_col3 {
31
+ background-color: #46085c;
32
+ color: #f1f1f1;
33
+ }
34
+ #T_53d0e_row1_col5 {
35
+ background-color: #46075a;
36
+ color: #f1f1f1;
37
+ }
38
+ #T_53d0e_row1_col8 {
39
+ background-color: #fbe723;
40
+ color: #000000;
41
+ }
42
+ #T_53d0e_row1_col10, #T_53d0e_row1_col11, #T_53d0e_row10_col15 {
43
+ background-color: #481a6c;
44
+ color: #f1f1f1;
45
+ }
46
+ #T_53d0e_row1_col12, #T_53d0e_row10_col13 {
47
+ background-color: #e7e419;
48
+ color: #000000;
49
+ }
50
+ #T_53d0e_row1_col13 {
51
+ background-color: #f4e61e;
52
+ color: #000000;
53
+ }
54
+ #T_53d0e_row1_col15, #T_53d0e_row2_col9 {
55
+ background-color: #e5e419;
56
+ color: #000000;
57
+ }
58
+ #T_53d0e_row1_col16 {
59
+ background-color: #450457;
60
+ color: #f1f1f1;
61
+ }
62
+ #T_53d0e_row2_col2, #T_53d0e_row5_col13 {
63
+ background-color: #433e85;
64
+ color: #f1f1f1;
65
+ }
66
+ #T_53d0e_row2_col3, #T_53d0e_row5_col16 {
67
+ background-color: #481c6e;
68
+ color: #f1f1f1;
69
+ }
70
+ #T_53d0e_row2_col4, #T_53d0e_row10_col5 {
71
+ background-color: #3dbc74;
72
+ color: #f1f1f1;
73
+ }
74
+ #T_53d0e_row2_col5, #T_53d0e_row10_col4, #T_53d0e_row10_col7, #T_53d0e_row10_col12 {
75
+ background-color: #481b6d;
76
+ color: #f1f1f1;
77
+ }
78
+ #T_53d0e_row2_col7 {
79
+ background-color: #40bd72;
80
+ color: #f1f1f1;
81
+ }
82
+ #T_53d0e_row2_col8 {
83
+ background-color: #e2e418;
84
+ color: #000000;
85
+ }
86
+ #T_53d0e_row2_col10, #T_53d0e_row2_col11 {
87
+ background-color: #482475;
88
+ color: #f1f1f1;
89
+ }
90
+ #T_53d0e_row2_col12 {
91
+ background-color: #b5de2b;
92
+ color: #000000;
93
+ }
94
+ #T_53d0e_row2_col13 {
95
+ background-color: #f6e620;
96
+ color: #000000;
97
+ }
98
+ #T_53d0e_row2_col14 {
99
+ background-color: #65cb5e;
100
+ color: #000000;
101
+ }
102
+ #T_53d0e_row2_col15 {
103
+ background-color: #d2e21b;
104
+ color: #000000;
105
+ }
106
+ #T_53d0e_row2_col16 {
107
+ background-color: #450559;
108
+ color: #f1f1f1;
109
+ }
110
+ #T_53d0e_row3_col2 {
111
+ background-color: #38588c;
112
+ color: #f1f1f1;
113
+ }
114
+ #T_53d0e_row3_col3, #T_53d0e_row6_col7 {
115
+ background-color: #46337f;
116
+ color: #f1f1f1;
117
+ }
118
+ #T_53d0e_row3_col4 {
119
+ background-color: #26818e;
120
+ color: #f1f1f1;
121
+ }
122
+ #T_53d0e_row3_col5 {
123
+ background-color: #463480;
124
+ color: #f1f1f1;
125
+ }
126
+ #T_53d0e_row3_col7, #T_53d0e_row11_col9 {
127
+ background-color: #27808e;
128
+ color: #f1f1f1;
129
+ }
130
+ #T_53d0e_row3_col8, #T_53d0e_row3_col9 {
131
+ background-color: #b2dd2d;
132
+ color: #000000;
133
+ }
134
+ #T_53d0e_row3_col10, #T_53d0e_row3_col11 {
135
+ background-color: #414287;
136
+ color: #f1f1f1;
137
+ }
138
+ #T_53d0e_row3_col12 {
139
+ background-color: #60ca60;
140
+ color: #000000;
141
+ }
142
+ #T_53d0e_row3_col13 {
143
+ background-color: #37b878;
144
+ color: #f1f1f1;
145
+ }
146
+ #T_53d0e_row3_col14, #T_53d0e_row9_col2 {
147
+ background-color: #86d549;
148
+ color: #000000;
149
+ }
150
+ #T_53d0e_row3_col15 {
151
+ background-color: #f8e621;
152
+ color: #000000;
153
+ }
154
+ #T_53d0e_row3_col16 {
155
+ background-color: #460a5d;
156
+ color: #f1f1f1;
157
+ }
158
+ #T_53d0e_row4_col2 {
159
+ background-color: #2d708e;
160
+ color: #f1f1f1;
161
+ }
162
+ #T_53d0e_row4_col3, #T_53d0e_row6_col12 {
163
+ background-color: #3b528b;
164
+ color: #f1f1f1;
165
+ }
166
+ #T_53d0e_row4_col4 {
167
+ background-color: #50c46a;
168
+ color: #000000;
169
+ }
170
+ #T_53d0e_row4_col5 {
171
+ background-color: #365c8d;
172
+ color: #f1f1f1;
173
+ }
174
+ #T_53d0e_row4_col7, #T_53d0e_row8_col2 {
175
+ background-color: #52c569;
176
+ color: #000000;
177
+ }
178
+ #T_53d0e_row4_col8 {
179
+ background-color: #a0da39;
180
+ color: #000000;
181
+ }
182
+ #T_53d0e_row4_col9 {
183
+ background-color: #95d840;
184
+ color: #000000;
185
+ }
186
+ #T_53d0e_row4_col10, #T_53d0e_row4_col11 {
187
+ background-color: #3f4889;
188
+ color: #f1f1f1;
189
+ }
190
+ #T_53d0e_row4_col12 {
191
+ background-color: #1fa187;
192
+ color: #f1f1f1;
193
+ }
194
+ #T_53d0e_row4_col14 {
195
+ background-color: #addc30;
196
+ color: #000000;
197
+ }
198
+ #T_53d0e_row4_col15 {
199
+ background-color: #b0dd2f;
200
+ color: #000000;
201
+ }
202
+ #T_53d0e_row4_col16 {
203
+ background-color: #471365;
204
+ color: #f1f1f1;
205
+ }
206
+ #T_53d0e_row5_col2 {
207
+ background-color: #25858e;
208
+ color: #f1f1f1;
209
+ }
210
+ #T_53d0e_row5_col3 {
211
+ background-color: #31668e;
212
+ color: #f1f1f1;
213
+ }
214
+ #T_53d0e_row5_col4 {
215
+ background-color: #1fa088;
216
+ color: #f1f1f1;
217
+ }
218
+ #T_53d0e_row5_col5 {
219
+ background-color: #33638d;
220
+ color: #f1f1f1;
221
+ }
222
+ #T_53d0e_row5_col7, #T_53d0e_row10_col14 {
223
+ background-color: #1e9d89;
224
+ color: #f1f1f1;
225
+ }
226
+ #T_53d0e_row5_col8 {
227
+ background-color: #70cf57;
228
+ color: #000000;
229
+ }
230
+ #T_53d0e_row5_col9 {
231
+ background-color: #77d153;
232
+ color: #000000;
233
+ }
234
+ #T_53d0e_row5_col10 {
235
+ background-color: #3c4f8a;
236
+ color: #f1f1f1;
237
+ }
238
+ #T_53d0e_row5_col11, #T_53d0e_row8_col16, #T_53d0e_row9_col15 {
239
+ background-color: #3d4e8a;
240
+ color: #f1f1f1;
241
+ }
242
+ #T_53d0e_row5_col12, #T_53d0e_row8_col15, #T_53d0e_row9_col10 {
243
+ background-color: #297a8e;
244
+ color: #f1f1f1;
245
+ }
246
+ #T_53d0e_row5_col14, #T_53d0e_row6_col14 {
247
+ background-color: #a5db36;
248
+ color: #000000;
249
+ }
250
+ #T_53d0e_row5_col15 {
251
+ background-color: #69cd5b;
252
+ color: #000000;
253
+ }
254
+ #T_53d0e_row6_col2 {
255
+ background-color: #1e9b8a;
256
+ color: #f1f1f1;
257
+ }
258
+ #T_53d0e_row6_col3, #T_53d0e_row11_col8 {
259
+ background-color: #277f8e;
260
+ color: #f1f1f1;
261
+ }
262
+ #T_53d0e_row6_col4 {
263
+ background-color: #46327e;
264
+ color: #f1f1f1;
265
+ }
266
+ #T_53d0e_row6_col5, #T_53d0e_row9_col11 {
267
+ background-color: #2a788e;
268
+ color: #f1f1f1;
269
+ }
270
+ #T_53d0e_row6_col8 {
271
+ background-color: #1f958b;
272
+ color: #f1f1f1;
273
+ }
274
+ #T_53d0e_row6_col9 {
275
+ background-color: #1f9f88;
276
+ color: #f1f1f1;
277
+ }
278
+ #T_53d0e_row6_col10, #T_53d0e_row6_col11 {
279
+ background-color: #39558c;
280
+ color: #f1f1f1;
281
+ }
282
+ #T_53d0e_row6_col13 {
283
+ background-color: #cde11d;
284
+ color: #000000;
285
+ }
286
+ #T_53d0e_row6_col15 {
287
+ background-color: #32b67a;
288
+ color: #f1f1f1;
289
+ }
290
+ #T_53d0e_row6_col16 {
291
+ background-color: #482878;
292
+ color: #f1f1f1;
293
+ }
294
+ #T_53d0e_row7_col2 {
295
+ background-color: #2ab07f;
296
+ color: #f1f1f1;
297
+ }
298
+ #T_53d0e_row7_col3 {
299
+ background-color: #1f968b;
300
+ color: #f1f1f1;
301
+ }
302
+ #T_53d0e_row7_col4 {
303
+ background-color: #2c738e;
304
+ color: #f1f1f1;
305
+ }
306
+ #T_53d0e_row7_col5 {
307
+ background-color: #21918c;
308
+ color: #f1f1f1;
309
+ }
310
+ #T_53d0e_row7_col7 {
311
+ background-color: #2b748e;
312
+ color: #f1f1f1;
313
+ }
314
+ #T_53d0e_row7_col8 {
315
+ background-color: #20a386;
316
+ color: #f1f1f1;
317
+ }
318
+ #T_53d0e_row7_col9 {
319
+ background-color: #24aa83;
320
+ color: #f1f1f1;
321
+ }
322
+ #T_53d0e_row7_col10 {
323
+ background-color: #355f8d;
324
+ color: #f1f1f1;
325
+ }
326
+ #T_53d0e_row7_col11 {
327
+ background-color: #355e8d;
328
+ color: #f1f1f1;
329
+ }
330
+ #T_53d0e_row7_col12 {
331
+ background-color: #46307e;
332
+ color: #f1f1f1;
333
+ }
334
+ #T_53d0e_row7_col13, #T_53d0e_row9_col13 {
335
+ background-color: #dae319;
336
+ color: #000000;
337
+ }
338
+ #T_53d0e_row7_col14 {
339
+ background-color: #9bd93c;
340
+ color: #000000;
341
+ }
342
+ #T_53d0e_row7_col15 {
343
+ background-color: #1e9c89;
344
+ color: #f1f1f1;
345
+ }
346
+ #T_53d0e_row7_col16 {
347
+ background-color: #453781;
348
+ color: #f1f1f1;
349
+ }
350
+ #T_53d0e_row8_col3 {
351
+ background-color: #2db27d;
352
+ color: #f1f1f1;
353
+ }
354
+ #T_53d0e_row8_col4, #T_53d0e_row8_col7 {
355
+ background-color: #3d4d8a;
356
+ color: #f1f1f1;
357
+ }
358
+ #T_53d0e_row8_col5 {
359
+ background-color: #21a585;
360
+ color: #f1f1f1;
361
+ }
362
+ #T_53d0e_row8_col8 {
363
+ background-color: #2c728e;
364
+ color: #f1f1f1;
365
+ }
366
+ #T_53d0e_row8_col10 {
367
+ background-color: #306a8e;
368
+ color: #f1f1f1;
369
+ }
370
+ #T_53d0e_row8_col11 {
371
+ background-color: #30698e;
372
+ color: #f1f1f1;
373
+ }
374
+ #T_53d0e_row8_col12 {
375
+ background-color: #481668;
376
+ color: #f1f1f1;
377
+ }
378
+ #T_53d0e_row8_col13 {
379
+ background-color: #eae51a;
380
+ color: #000000;
381
+ }
382
+ #T_53d0e_row8_col14 {
383
+ background-color: #67cc5c;
384
+ color: #000000;
385
+ }
386
+ #T_53d0e_row9_col3 {
387
+ background-color: #44bf70;
388
+ color: #f1f1f1;
389
+ }
390
+ #T_53d0e_row9_col5 {
391
+ background-color: #4ac16d;
392
+ color: #000000;
393
+ }
394
+ #T_53d0e_row9_col14 {
395
+ background-color: #42be71;
396
+ color: #f1f1f1;
397
+ }
398
+ #T_53d0e_row9_col16 {
399
+ background-color: #2e6f8e;
400
+ color: #f1f1f1;
401
+ }
402
+ #T_53d0e_row10_col2 {
403
+ background-color: #c2df23;
404
+ color: #000000;
405
+ }
406
+ #T_53d0e_row10_col3 {
407
+ background-color: #6ccd5a;
408
+ color: #000000;
409
+ }
410
+ #T_53d0e_row10_col8 {
411
+ background-color: #471063;
412
+ color: #f1f1f1;
413
+ }
414
+ #T_53d0e_row10_col9 {
415
+ background-color: #453882;
416
+ color: #f1f1f1;
417
+ }
418
+ #T_53d0e_row10_col10 {
419
+ background-color: #1f978b;
420
+ color: #f1f1f1;
421
+ }
422
+ #T_53d0e_row10_col11 {
423
+ background-color: #1f948c;
424
+ color: #f1f1f1;
425
+ }
426
+ #T_53d0e_row10_col16 {
427
+ background-color: #20a486;
428
+ color: #f1f1f1;
429
+ }
430
+ #T_53d0e_row11_col4 {
431
+ background-color: #48c16e;
432
+ color: #f1f1f1;
433
+ }
434
+ #T_53d0e_row11_col7 {
435
+ background-color: #46c06f;
436
+ color: #f1f1f1;
437
+ }
438
+ #T_53d0e_row11_col12 {
439
+ background-color: #5cc863;
440
+ color: #000000;
441
+ }
442
+ </style>
443
+ <table id="T_53d0e">
444
+ <thead>
445
+ <tr>
446
+ <th class="blank level0" >&nbsp;</th>
447
+ <th id="T_53d0e_level0_col0" class="col_heading level0 col0" >version</th>
448
+ <th id="T_53d0e_level0_col1" class="col_heading level0 col1" >d_sae</th>
449
+ <th id="T_53d0e_level0_col2" class="col_heading level0 col2" >layer</th>
450
+ <th id="T_53d0e_level0_col3" class="col_heading level0 col3" >kl_div_with_sae</th>
451
+ <th id="T_53d0e_level0_col4" class="col_heading level0 col4" >kl_div_with_ablation</th>
452
+ <th id="T_53d0e_level0_col5" class="col_heading level0 col5" >ce_loss_with_sae</th>
453
+ <th id="T_53d0e_level0_col6" class="col_heading level0 col6" >ce_loss_without_sae</th>
454
+ <th id="T_53d0e_level0_col7" class="col_heading level0 col7" >ce_loss_with_ablation</th>
455
+ <th id="T_53d0e_level0_col8" class="col_heading level0 col8" >kl_div_score</th>
456
+ <th id="T_53d0e_level0_col9" class="col_heading level0 col9" >ce_loss_score</th>
457
+ <th id="T_53d0e_level0_col10" class="col_heading level0 col10" >l2_norm_in</th>
458
+ <th id="T_53d0e_level0_col11" class="col_heading level0 col11" >l2_norm_out</th>
459
+ <th id="T_53d0e_level0_col12" class="col_heading level0 col12" >l2_ratio</th>
460
+ <th id="T_53d0e_level0_col13" class="col_heading level0 col13" >l0</th>
461
+ <th id="T_53d0e_level0_col14" class="col_heading level0 col14" >l1</th>
462
+ <th id="T_53d0e_level0_col15" class="col_heading level0 col15" >explained_variance</th>
463
+ <th id="T_53d0e_level0_col16" class="col_heading level0 col16" >mse</th>
464
+ <th id="T_53d0e_level0_col17" class="col_heading level0 col17" >total_tokens_evaluated</th>
465
+ <th id="T_53d0e_level0_col18" class="col_heading level0 col18" >filepath</th>
466
+ </tr>
467
+ </thead>
468
+ <tbody>
469
+ <tr>
470
+ <th id="T_53d0e_level0_row0" class="row_heading level0 row0" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_0/metrics.json</th>
471
+ <td id="T_53d0e_row0_col0" class="data row0 col0" >5</td>
472
+ <td id="T_53d0e_row0_col1" class="data row0 col1" >128</td>
473
+ <td id="T_53d0e_row0_col2" class="data row0 col2" >0</td>
474
+ <td id="T_53d0e_row0_col3" class="data row0 col3" >0.003843</td>
475
+ <td id="T_53d0e_row0_col4" class="data row0 col4" >12.480284</td>
476
+ <td id="T_53d0e_row0_col5" class="data row0 col5" >3.603422</td>
477
+ <td id="T_53d0e_row0_col6" class="data row0 col6" >3.599065</td>
478
+ <td id="T_53d0e_row0_col7" class="data row0 col7" >15.861977</td>
479
+ <td id="T_53d0e_row0_col8" class="data row0 col8" >0.999692</td>
480
+ <td id="T_53d0e_row0_col9" class="data row0 col9" >0.999645</td>
481
+ <td id="T_53d0e_row0_col10" class="data row0 col10" >32.707962</td>
482
+ <td id="T_53d0e_row0_col11" class="data row0 col11" >32.607395</td>
483
+ <td id="T_53d0e_row0_col12" class="data row0 col12" >0.996936</td>
484
+ <td id="T_53d0e_row0_col13" class="data row0 col13" >31.980795</td>
485
+ <td id="T_53d0e_row0_col14" class="data row0 col14" >44.247345</td>
486
+ <td id="T_53d0e_row0_col15" class="data row0 col15" >0.976495</td>
487
+ <td id="T_53d0e_row0_col16" class="data row0 col16" >5.842685</td>
488
+ <td id="T_53d0e_row0_col17" class="data row0 col17" >6144.000000</td>
489
+ <td id="T_53d0e_row0_col18" class="data row0 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_0/metrics.json</td>
490
+ </tr>
491
+ <tr>
492
+ <th id="T_53d0e_level0_row1" class="row_heading level0 row1" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_1/metrics.json</th>
493
+ <td id="T_53d0e_row1_col0" class="data row1 col0" >5</td>
494
+ <td id="T_53d0e_row1_col1" class="data row1 col1" >128</td>
495
+ <td id="T_53d0e_row1_col2" class="data row1 col2" >1</td>
496
+ <td id="T_53d0e_row1_col3" class="data row1 col3" >0.006731</td>
497
+ <td id="T_53d0e_row1_col4" class="data row1 col4" >16.217104</td>
498
+ <td id="T_53d0e_row1_col5" class="data row1 col5" >3.605462</td>
499
+ <td id="T_53d0e_row1_col6" class="data row1 col6" >3.599065</td>
500
+ <td id="T_53d0e_row1_col7" class="data row1 col7" >19.600266</td>
501
+ <td id="T_53d0e_row1_col8" class="data row1 col8" >0.999585</td>
502
+ <td id="T_53d0e_row1_col9" class="data row1 col9" >0.999600</td>
503
+ <td id="T_53d0e_row1_col10" class="data row1 col10" >56.929867</td>
504
+ <td id="T_53d0e_row1_col11" class="data row1 col11" >56.693493</td>
505
+ <td id="T_53d0e_row1_col12" class="data row1 col12" >0.995869</td>
506
+ <td id="T_53d0e_row1_col13" class="data row1 col13" >31.999023</td>
507
+ <td id="T_53d0e_row1_col14" class="data row1 col14" >59.953354</td>
508
+ <td id="T_53d0e_row1_col15" class="data row1 col15" >0.972269</td>
509
+ <td id="T_53d0e_row1_col16" class="data row1 col16" >31.145605</td>
510
+ <td id="T_53d0e_row1_col17" class="data row1 col17" >6144.000000</td>
511
+ <td id="T_53d0e_row1_col18" class="data row1 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_1/metrics.json</td>
512
+ </tr>
513
+ <tr>
514
+ <th id="T_53d0e_level0_row2" class="row_heading level0 row2" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_2/metrics.json</th>
515
+ <td id="T_53d0e_row2_col0" class="data row2 col0" >5</td>
516
+ <td id="T_53d0e_row2_col1" class="data row2 col1" >128</td>
517
+ <td id="T_53d0e_row2_col2" class="data row2 col2" >2</td>
518
+ <td id="T_53d0e_row2_col3" class="data row2 col3" >0.014035</td>
519
+ <td id="T_53d0e_row2_col4" class="data row2 col4" >12.813511</td>
520
+ <td id="T_53d0e_row2_col5" class="data row2 col5" >3.612803</td>
521
+ <td id="T_53d0e_row2_col6" class="data row2 col6" >3.599065</td>
522
+ <td id="T_53d0e_row2_col7" class="data row2 col7" >16.327873</td>
523
+ <td id="T_53d0e_row2_col8" class="data row2 col8" >0.998905</td>
524
+ <td id="T_53d0e_row2_col9" class="data row2 col9" >0.998921</td>
525
+ <td id="T_53d0e_row2_col10" class="data row2 col10" >68.907532</td>
526
+ <td id="T_53d0e_row2_col11" class="data row2 col11" >68.518921</td>
527
+ <td id="T_53d0e_row2_col12" class="data row2 col12" >0.993602</td>
528
+ <td id="T_53d0e_row2_col13" class="data row2 col13" >31.999350</td>
529
+ <td id="T_53d0e_row2_col14" class="data row2 col14" >53.266327</td>
530
+ <td id="T_53d0e_row2_col15" class="data row2 col15" >0.969186</td>
531
+ <td id="T_53d0e_row2_col16" class="data row2 col16" >49.732628</td>
532
+ <td id="T_53d0e_row2_col17" class="data row2 col17" >6144.000000</td>
533
+ <td id="T_53d0e_row2_col18" class="data row2 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_2/metrics.json</td>
534
+ </tr>
535
+ <tr>
536
+ <th id="T_53d0e_level0_row3" class="row_heading level0 row3" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_3/metrics.json</th>
537
+ <td id="T_53d0e_row3_col0" class="data row3 col0" >5</td>
538
+ <td id="T_53d0e_row3_col1" class="data row3 col1" >128</td>
539
+ <td id="T_53d0e_row3_col2" class="data row3 col2" >3</td>
540
+ <td id="T_53d0e_row3_col3" class="data row3 col3" >0.023511</td>
541
+ <td id="T_53d0e_row3_col4" class="data row3 col4" >10.101868</td>
542
+ <td id="T_53d0e_row3_col5" class="data row3 col5" >3.622775</td>
543
+ <td id="T_53d0e_row3_col6" class="data row3 col6" >3.599065</td>
544
+ <td id="T_53d0e_row3_col7" class="data row3 col7" >13.548822</td>
545
+ <td id="T_53d0e_row3_col8" class="data row3 col8" >0.997673</td>
546
+ <td id="T_53d0e_row3_col9" class="data row3 col9" >0.997617</td>
547
+ <td id="T_53d0e_row3_col10" class="data row3 col10" >103.711441</td>
548
+ <td id="T_53d0e_row3_col11" class="data row3 col11" >103.026962</td>
549
+ <td id="T_53d0e_row3_col12" class="data row3 col12" >0.989538</td>
550
+ <td id="T_53d0e_row3_col13" class="data row3 col13" >31.983074</td>
551
+ <td id="T_53d0e_row3_col14" class="data row3 col14" >54.825603</td>
552
+ <td id="T_53d0e_row3_col15" class="data row3 col15" >0.975295</td>
553
+ <td id="T_53d0e_row3_col16" class="data row3 col16" >89.620079</td>
554
+ <td id="T_53d0e_row3_col17" class="data row3 col17" >6144.000000</td>
555
+ <td id="T_53d0e_row3_col18" class="data row3 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_3/metrics.json</td>
556
+ </tr>
557
+ <tr>
558
+ <th id="T_53d0e_level0_row4" class="row_heading level0 row4" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_4/metrics.json</th>
559
+ <td id="T_53d0e_row4_col0" class="data row4 col0" >5</td>
560
+ <td id="T_53d0e_row4_col1" class="data row4 col1" >128</td>
561
+ <td id="T_53d0e_row4_col2" class="data row4 col2" >4</td>
562
+ <td id="T_53d0e_row4_col3" class="data row4 col3" >0.037530</td>
563
+ <td id="T_53d0e_row4_col4" class="data row4 col4" >13.249713</td>
564
+ <td id="T_53d0e_row4_col5" class="data row4 col5" >3.640705</td>
565
+ <td id="T_53d0e_row4_col6" class="data row4 col6" >3.599065</td>
566
+ <td id="T_53d0e_row4_col7" class="data row4 col7" >16.699104</td>
567
+ <td id="T_53d0e_row4_col8" class="data row4 col8" >0.997167</td>
568
+ <td id="T_53d0e_row4_col9" class="data row4 col9" >0.996821</td>
569
+ <td id="T_53d0e_row4_col10" class="data row4 col10" >111.403282</td>
570
+ <td id="T_53d0e_row4_col11" class="data row4 col11" >110.286774</td>
571
+ <td id="T_53d0e_row4_col12" class="data row4 col12" >0.984007</td>
572
+ <td id="T_53d0e_row4_col13" class="data row4 col13" >31.948568</td>
573
+ <td id="T_53d0e_row4_col14" class="data row4 col14" >56.497829</td>
574
+ <td id="T_53d0e_row4_col15" class="data row4 col15" >0.963923</td>
575
+ <td id="T_53d0e_row4_col16" class="data row4 col16" >153.826294</td>
576
+ <td id="T_53d0e_row4_col17" class="data row4 col17" >6144.000000</td>
577
+ <td id="T_53d0e_row4_col18" class="data row4 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_4/metrics.json</td>
578
+ </tr>
579
+ <tr>
580
+ <th id="T_53d0e_level0_row5" class="row_heading level0 row5" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_5/metrics.json</th>
581
+ <td id="T_53d0e_row5_col0" class="data row5 col0" >5</td>
582
+ <td id="T_53d0e_row5_col1" class="data row5 col1" >128</td>
583
+ <td id="T_53d0e_row5_col2" class="data row5 col2" >5</td>
584
+ <td id="T_53d0e_row5_col3" class="data row5 col3" >0.047411</td>
585
+ <td id="T_53d0e_row5_col4" class="data row5 col4" >11.519682</td>
586
+ <td id="T_53d0e_row5_col5" class="data row5 col5" >3.644166</td>
587
+ <td id="T_53d0e_row5_col6" class="data row5 col6" >3.599065</td>
588
+ <td id="T_53d0e_row5_col7" class="data row5 col7" >14.860109</td>
589
+ <td id="T_53d0e_row5_col8" class="data row5 col8" >0.995884</td>
590
+ <td id="T_53d0e_row5_col9" class="data row5 col9" >0.995995</td>
591
+ <td id="T_53d0e_row5_col10" class="data row5 col10" >119.651489</td>
592
+ <td id="T_53d0e_row5_col11" class="data row5 col11" >118.053459</td>
593
+ <td id="T_53d0e_row5_col12" class="data row5 col12" >0.978980</td>
594
+ <td id="T_53d0e_row5_col13" class="data row5 col13" >31.958008</td>
595
+ <td id="T_53d0e_row5_col14" class="data row5 col14" >56.149471</td>
596
+ <td id="T_53d0e_row5_col15" class="data row5 col15" >0.952276</td>
597
+ <td id="T_53d0e_row5_col16" class="data row5 col16" >238.126266</td>
598
+ <td id="T_53d0e_row5_col17" class="data row5 col17" >6144.000000</td>
599
+ <td id="T_53d0e_row5_col18" class="data row5 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_5/metrics.json</td>
600
+ </tr>
601
+ <tr>
602
+ <th id="T_53d0e_level0_row6" class="row_heading level0 row6" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_6/metrics.json</th>
603
+ <td id="T_53d0e_row6_col0" class="data row6 col0" >5</td>
604
+ <td id="T_53d0e_row6_col1" class="data row6 col1" >128</td>
605
+ <td id="T_53d0e_row6_col2" class="data row6 col2" >6</td>
606
+ <td id="T_53d0e_row6_col3" class="data row6 col3" >0.060794</td>
607
+ <td id="T_53d0e_row6_col4" class="data row6 col4" >6.933250</td>
608
+ <td id="T_53d0e_row6_col5" class="data row6 col5" >3.655243</td>
609
+ <td id="T_53d0e_row6_col6" class="data row6 col6" >3.599065</td>
610
+ <td id="T_53d0e_row6_col7" class="data row6 col7" >10.522690</td>
611
+ <td id="T_53d0e_row6_col8" class="data row6 col8" >0.991232</td>
612
+ <td id="T_53d0e_row6_col9" class="data row6 col9" >0.991886</td>
613
+ <td id="T_53d0e_row6_col10" class="data row6 col10" >128.847931</td>
614
+ <td id="T_53d0e_row6_col11" class="data row6 col11" >126.688171</td>
615
+ <td id="T_53d0e_row6_col12" class="data row6 col12" >0.974231</td>
616
+ <td id="T_53d0e_row6_col13" class="data row6 col13" >31.996094</td>
617
+ <td id="T_53d0e_row6_col14" class="data row6 col14" >56.119469</td>
618
+ <td id="T_53d0e_row6_col15" class="data row6 col15" >0.940335</td>
619
+ <td id="T_53d0e_row6_col16" class="data row6 col16" >349.569763</td>
620
+ <td id="T_53d0e_row6_col17" class="data row6 col17" >6144.000000</td>
621
+ <td id="T_53d0e_row6_col18" class="data row6 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_6/metrics.json</td>
622
+ </tr>
623
+ <tr>
624
+ <th id="T_53d0e_level0_row7" class="row_heading level0 row7" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_7/metrics.json</th>
625
+ <td id="T_53d0e_row7_col0" class="data row7 col0" >5</td>
626
+ <td id="T_53d0e_row7_col1" class="data row7 col1" >128</td>
627
+ <td id="T_53d0e_row7_col2" class="data row7 col2" >7</td>
628
+ <td id="T_53d0e_row7_col3" class="data row7 col3" >0.073590</td>
629
+ <td id="T_53d0e_row7_col4" class="data row7 col4" >9.511523</td>
630
+ <td id="T_53d0e_row7_col5" class="data row7 col5" >3.668211</td>
631
+ <td id="T_53d0e_row7_col6" class="data row7 col6" >3.599065</td>
632
+ <td id="T_53d0e_row7_col7" class="data row7 col7" >13.054041</td>
633
+ <td id="T_53d0e_row7_col8" class="data row7 col8" >0.992263</td>
634
+ <td id="T_53d0e_row7_col9" class="data row7 col9" >0.992687</td>
635
+ <td id="T_53d0e_row7_col10" class="data row7 col10" >140.905991</td>
636
+ <td id="T_53d0e_row7_col11" class="data row7 col11" >138.128754</td>
637
+ <td id="T_53d0e_row7_col12" class="data row7 col12" >0.970831</td>
638
+ <td id="T_53d0e_row7_col13" class="data row7 col13" >31.997070</td>
639
+ <td id="T_53d0e_row7_col14" class="data row7 col14" >55.752361</td>
640
+ <td id="T_53d0e_row7_col15" class="data row7 col15" >0.928668</td>
641
+ <td id="T_53d0e_row7_col16" class="data row7 col16" >499.765717</td>
642
+ <td id="T_53d0e_row7_col17" class="data row7 col17" >6144.000000</td>
643
+ <td id="T_53d0e_row7_col18" class="data row7 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_7/metrics.json</td>
644
+ </tr>
645
+ <tr>
646
+ <th id="T_53d0e_level0_row8" class="row_heading level0 row8" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_8/metrics.json</th>
647
+ <td id="T_53d0e_row8_col0" class="data row8 col0" >5</td>
648
+ <td id="T_53d0e_row8_col1" class="data row8 col1" >128</td>
649
+ <td id="T_53d0e_row8_col2" class="data row8 col2" >8</td>
650
+ <td id="T_53d0e_row8_col3" class="data row8 col3" >0.089368</td>
651
+ <td id="T_53d0e_row8_col4" class="data row8 col4" >7.897106</td>
652
+ <td id="T_53d0e_row8_col5" class="data row8 col5" >3.679746</td>
653
+ <td id="T_53d0e_row8_col6" class="data row8 col6" >3.599065</td>
654
+ <td id="T_53d0e_row8_col7" class="data row8 col7" >11.460873</td>
655
+ <td id="T_53d0e_row8_col8" class="data row8 col8" >0.988683</td>
656
+ <td id="T_53d0e_row8_col9" class="data row8 col9" >0.989738</td>
657
+ <td id="T_53d0e_row8_col10" class="data row8 col10" >157.343246</td>
658
+ <td id="T_53d0e_row8_col11" class="data row8 col11" >153.839539</td>
659
+ <td id="T_53d0e_row8_col12" class="data row8 col12" >0.968352</td>
660
+ <td id="T_53d0e_row8_col13" class="data row8 col13" >31.998373</td>
661
+ <td id="T_53d0e_row8_col14" class="data row8 col14" >53.455093</td>
662
+ <td id="T_53d0e_row8_col15" class="data row8 col15" >0.913877</td>
663
+ <td id="T_53d0e_row8_col16" class="data row8 col16" >732.317871</td>
664
+ <td id="T_53d0e_row8_col17" class="data row8 col17" >6144.000000</td>
665
+ <td id="T_53d0e_row8_col18" class="data row8 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_8/metrics.json</td>
666
+ </tr>
667
+ <tr>
668
+ <th id="T_53d0e_level0_row9" class="row_heading level0 row9" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_9/metrics.json</th>
669
+ <td id="T_53d0e_row9_col0" class="data row9 col0" >5</td>
670
+ <td id="T_53d0e_row9_col1" class="data row9 col1" >128</td>
671
+ <td id="T_53d0e_row9_col2" class="data row9 col2" >9</td>
672
+ <td id="T_53d0e_row9_col3" class="data row9 col3" >0.096923</td>
673
+ <td id="T_53d0e_row9_col4" class="data row9 col4" >5.396312</td>
674
+ <td id="T_53d0e_row9_col5" class="data row9 col5" >3.695744</td>
675
+ <td id="T_53d0e_row9_col6" class="data row9 col6" >3.599065</td>
676
+ <td id="T_53d0e_row9_col7" class="data row9 col7" >8.970472</td>
677
+ <td id="T_53d0e_row9_col8" class="data row9 col8" >0.982039</td>
678
+ <td id="T_53d0e_row9_col9" class="data row9 col9" >0.982001</td>
679
+ <td id="T_53d0e_row9_col10" class="data row9 col10" >181.313721</td>
680
+ <td id="T_53d0e_row9_col11" class="data row9 col11" >176.829346</td>
681
+ <td id="T_53d0e_row9_col12" class="data row9 col12" >0.966578</td>
682
+ <td id="T_53d0e_row9_col13" class="data row9 col13" >31.997070</td>
683
+ <td id="T_53d0e_row9_col14" class="data row9 col14" >51.393932</td>
684
+ <td id="T_53d0e_row9_col15" class="data row9 col15" >0.895696</td>
685
+ <td id="T_53d0e_row9_col16" class="data row9 col16" >1123.425049</td>
686
+ <td id="T_53d0e_row9_col17" class="data row9 col17" >6144.000000</td>
687
+ <td id="T_53d0e_row9_col18" class="data row9 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_9/metrics.json</td>
688
+ </tr>
689
+ <tr>
690
+ <th id="T_53d0e_level0_row10" class="row_heading level0 row10" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_10/metrics.json</th>
691
+ <td id="T_53d0e_row10_col0" class="data row10 col0" >5</td>
692
+ <td id="T_53d0e_row10_col1" class="data row10 col1" >128</td>
693
+ <td id="T_53d0e_row10_col2" class="data row10 col2" >10</td>
694
+ <td id="T_53d0e_row10_col3" class="data row10 col3" >0.106645</td>
695
+ <td id="T_53d0e_row10_col4" class="data row10 col4" >6.193092</td>
696
+ <td id="T_53d0e_row10_col5" class="data row10 col5" >3.692219</td>
697
+ <td id="T_53d0e_row10_col6" class="data row10 col6" >3.599065</td>
698
+ <td id="T_53d0e_row10_col7" class="data row10 col7" >9.754217</td>
699
+ <td id="T_53d0e_row10_col8" class="data row10 col8" >0.982780</td>
700
+ <td id="T_53d0e_row10_col9" class="data row10 col9" >0.984866</td>
701
+ <td id="T_53d0e_row10_col10" class="data row10 col10" >224.287598</td>
702
+ <td id="T_53d0e_row10_col11" class="data row10 col11" >218.769226</td>
703
+ <td id="T_53d0e_row10_col12" class="data row10 col12" >0.968813</td>
704
+ <td id="T_53d0e_row10_col13" class="data row10 col13" >31.998047</td>
705
+ <td id="T_53d0e_row10_col14" class="data row10 col14" >47.411495</td>
706
+ <td id="T_53d0e_row10_col15" class="data row10 col15" >0.877616</td>
707
+ <td id="T_53d0e_row10_col16" class="data row10 col16" >1806.194092</td>
708
+ <td id="T_53d0e_row10_col17" class="data row10 col17" >6144.000000</td>
709
+ <td id="T_53d0e_row10_col18" class="data row10 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_10/metrics.json</td>
710
+ </tr>
711
+ <tr>
712
+ <th id="T_53d0e_level0_row11" class="row_heading level0 row11" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_11/metrics.json</th>
713
+ <td id="T_53d0e_row11_col0" class="data row11 col0" >5</td>
714
+ <td id="T_53d0e_row11_col1" class="data row11 col1" >128</td>
715
+ <td id="T_53d0e_row11_col2" class="data row11 col2" >11</td>
716
+ <td id="T_53d0e_row11_col3" class="data row11 col3" >0.136668</td>
717
+ <td id="T_53d0e_row11_col4" class="data row11 col4" >13.087515</td>
718
+ <td id="T_53d0e_row11_col5" class="data row11 col5" >3.732962</td>
719
+ <td id="T_53d0e_row11_col6" class="data row11 col6" >3.599065</td>
720
+ <td id="T_53d0e_row11_col7" class="data row11 col7" >16.484846</td>
721
+ <td id="T_53d0e_row11_col8" class="data row11 col8" >0.989557</td>
722
+ <td id="T_53d0e_row11_col9" class="data row11 col9" >0.989609</td>
723
+ <td id="T_53d0e_row11_col10" class="data row11 col10" >395.539520</td>
724
+ <td id="T_53d0e_row11_col11" class="data row11 col11" >391.472504</td>
725
+ <td id="T_53d0e_row11_col12" class="data row11 col12" >0.989259</td>
726
+ <td id="T_53d0e_row11_col13" class="data row11 col13" >32.000000</td>
727
+ <td id="T_53d0e_row11_col14" class="data row11 col14" >31.824055</td>
728
+ <td id="T_53d0e_row11_col15" class="data row11 col15" >0.870424</td>
729
+ <td id="T_53d0e_row11_col16" class="data row11 col16" >3098.075928</td>
730
+ <td id="T_53d0e_row11_col17" class="data row11 col17" >6144.000000</td>
731
+ <td id="T_53d0e_row11_col18" class="data row11 col18" >OAI_GPT2Small_v5_128k_resid_post_attn/v5_128k_layer_11/metrics.json</td>
732
+ </tr>
733
+ </tbody>
734
+ </table>
benchmark_stats.png ADDED

Git LFS Details

  • SHA256: aa5364cfaf1a4040a0479c26956a08ef06750a18ba36ad2f443d1a4f19be117e
  • Pointer size: 132 Bytes
  • Size of remote file: 4.61 MB
v5_128k_layer_0/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.0.hook_resid_mid", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_0/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.0038433405570685863, "metrics/kl_div_with_ablation": 12.480283737182617, "metrics/ce_loss_with_sae": 3.603421926498413, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 15.861976623535156, "metrics/kl_div_score": 0.9996920470208848, "metrics/ce_loss_score": 0.9996446734723997, "metrics/l2_norm_in": 32.70796203613281, "metrics/l2_norm_out": 32.60739517211914, "metrics/l2_ratio": 0.9969363212585449, "metrics/l0": 31.98079490661621, "metrics/l1": 44.247344970703125, "metrics/explained_variance": 0.9764951467514038, "metrics/mse": 5.842685222625732, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_0/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c0b574675a7cb8d08aa9e285188adb4aa93010196c9abe2477b44efb31010b
3
+ size 805834048
v5_128k_layer_0/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c97f5d57a4ab3ed04963c38314c1d6190cb348deaf00ed450a55be408377837c
3
+ size 524368
v5_128k_layer_1/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.1.hook_resid_mid", "hook_layer": 1, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_1/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.006731455214321613, "metrics/kl_div_with_ablation": 16.217103958129883, "metrics/ce_loss_with_sae": 3.605462074279785, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 19.600265502929688, "metrics/kl_div_score": 0.999584916318493, "metrics/ce_loss_score": 0.999600187150498, "metrics/l2_norm_in": 56.929866790771484, "metrics/l2_norm_out": 56.6934928894043, "metrics/l2_ratio": 0.9958688616752625, "metrics/l0": 31.9990234375, "metrics/l1": 59.95335388183594, "metrics/explained_variance": 0.9722690582275391, "metrics/mse": 31.145605087280273, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_1/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b70e6a77f0297230ab8316ae3b0b87be64d872f7dcae70d3b4210389a7820d6
3
+ size 805834048
v5_128k_layer_1/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73aaa61cd691b1360f3c29f3b852d40d4c563c5183daca89b952b73efeff6553
3
+ size 524368
v5_128k_layer_10/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.10.hook_resid_mid", "hook_layer": 10, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_10/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.10664453357458115, "metrics/kl_div_with_ablation": 6.193092346191406, "metrics/ce_loss_with_sae": 3.692218780517578, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 9.754217147827148, "metrics/kl_div_score": 0.98278008341985, "metrics/ce_loss_score": 0.9848656566878472, "metrics/l2_norm_in": 224.28759765625, "metrics/l2_norm_out": 218.76922607421875, "metrics/l2_ratio": 0.9688126444816589, "metrics/l0": 31.998046875, "metrics/l1": 47.411495208740234, "metrics/explained_variance": 0.8776161670684814, "metrics/mse": 1806.194091796875, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_10/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d6a0caf038ffa29fe41fbc18beef10ff74f0560966014d21cd994f22e649b5d
3
+ size 805834048
v5_128k_layer_10/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b9dad4028b6d5f23adc11428739cead9a520c0fcb536d02cf63286c0682b999
3
+ size 524368
v5_128k_layer_11/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.11.hook_resid_mid", "hook_layer": 11, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_11/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.1366678923368454, "metrics/kl_div_with_ablation": 13.087514877319336, "metrics/ce_loss_with_sae": 3.7329623699188232, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 16.484846115112305, "metrics/kl_div_score": 0.9895573839939856, "metrics/ce_loss_score": 0.9896088738509167, "metrics/l2_norm_in": 395.5395202636719, "metrics/l2_norm_out": 391.4725036621094, "metrics/l2_ratio": 0.9892591834068298, "metrics/l0": 32.0, "metrics/l1": 31.824054718017578, "metrics/explained_variance": 0.8704243898391724, "metrics/mse": 3098.075927734375, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_11/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b297415ca9916b4e6929f7d82eea602acef5c39dd7bed502ac77873a55e72c68
3
+ size 805834048
v5_128k_layer_11/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ab69d29a274fbe5b7360c67fd0c62fffa384ff5979184093cba1e727ccd7427
3
+ size 524368
v5_128k_layer_2/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.2.hook_resid_mid", "hook_layer": 2, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_2/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.014034643769264221, "metrics/kl_div_with_ablation": 12.81351089477539, "metrics/ce_loss_with_sae": 3.6128032207489014, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 16.32787322998047, "metrics/kl_div_score": 0.9989046995874498, "metrics/ce_loss_score": 0.9989206662941394, "metrics/l2_norm_in": 68.90753173828125, "metrics/l2_norm_out": 68.5189208984375, "metrics/l2_ratio": 0.9936020374298096, "metrics/l0": 31.99934959411621, "metrics/l1": 53.266326904296875, "metrics/explained_variance": 0.9691864848136902, "metrics/mse": 49.732627868652344, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_2/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1caab24df127183793a8a856b3d147dc2e34b9f8422f0b6b404233f70214efb2
3
+ size 805834048
v5_128k_layer_2/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a32fe5876cb55265ae4de471b9003f0e8fa03d5df356dc9e6e320bc7d618037
3
+ size 524368
v5_128k_layer_3/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.3.hook_resid_mid", "hook_layer": 3, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_3/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.023511391133069992, "metrics/kl_div_with_ablation": 10.10186767578125, "metrics/ce_loss_with_sae": 3.622774839401245, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 13.548822402954102, "metrics/kl_div_score": 0.9976725698764163, "metrics/ce_loss_score": 0.9976170022128419, "metrics/l2_norm_in": 103.71144104003906, "metrics/l2_norm_out": 103.02696228027344, "metrics/l2_ratio": 0.9895378351211548, "metrics/l0": 31.983074188232422, "metrics/l1": 54.82560348510742, "metrics/explained_variance": 0.9752952456474304, "metrics/mse": 89.62007904052734, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_3/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06cbde3bc5a4e83d69f0ce34dd1ba53c843b7c94b40b7d006020732b54026826
3
+ size 805834048
v5_128k_layer_3/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0456b0483683bcfe4f1efa9f1ed4f46ca8b5840cba2a95b2bed5c6e3cd830492
3
+ size 524368
v5_128k_layer_4/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.4.hook_resid_mid", "hook_layer": 4, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_4/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.03753046691417694, "metrics/kl_div_with_ablation": 13.249712944030762, "metrics/ce_loss_with_sae": 3.640705108642578, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 16.69910430908203, "metrics/kl_div_score": 0.9971674505649509, "metrics/ce_loss_score": 0.9968213439818392, "metrics/l2_norm_in": 111.40328216552734, "metrics/l2_norm_out": 110.28677368164062, "metrics/l2_ratio": 0.9840068817138672, "metrics/l0": 31.94856834411621, "metrics/l1": 56.49782943725586, "metrics/explained_variance": 0.9639231562614441, "metrics/mse": 153.8262939453125, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_4/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e225c8a8b01cc6f6bc625e9f259e7dc0488c0f483d4584d1a32361b90666b41
3
+ size 805834048
v5_128k_layer_4/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af6f11b2f0ca3f1b787c7c1c11a3fe30c8f58a52c0611ab7f781d67b773325e1
3
+ size 524368
v5_128k_layer_5/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.5.hook_resid_mid", "hook_layer": 5, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_5/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.04741125553846359, "metrics/kl_div_with_ablation": 11.519681930541992, "metrics/ce_loss_with_sae": 3.6441657543182373, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 14.860109329223633, "metrics/kl_div_score": 0.9958843259888311, "metrics/ce_loss_score": 0.9959949394740818, "metrics/l2_norm_in": 119.6514892578125, "metrics/l2_norm_out": 118.05345916748047, "metrics/l2_ratio": 0.9789802432060242, "metrics/l0": 31.9580078125, "metrics/l1": 56.149471282958984, "metrics/explained_variance": 0.9522756338119507, "metrics/mse": 238.1262664794922, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_5/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b61e22ab49ca410f4c70b7a95897f3209a598dffcd82248a713bdfea743df862
3
+ size 805834048
v5_128k_layer_5/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57431e5cb89876fb8170d1cc730430f296b725aaab7962e6e5b571cf4718f863
3
+ size 524368
v5_128k_layer_6/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.6.hook_resid_mid", "hook_layer": 6, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_6/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.06079387292265892, "metrics/kl_div_with_ablation": 6.933250427246094, "metrics/ce_loss_with_sae": 3.655242681503296, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 10.522689819335938, "metrics/kl_div_score": 0.9912315480941302, "metrics/ce_loss_score": 0.9918860291994546, "metrics/l2_norm_in": 128.84793090820312, "metrics/l2_norm_out": 126.68817138671875, "metrics/l2_ratio": 0.9742312431335449, "metrics/l0": 31.99609375, "metrics/l1": 56.119468688964844, "metrics/explained_variance": 0.940334677696228, "metrics/mse": 349.56976318359375, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_6/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de3601867837d9a54d22e60ed60e3dcb19cc8a9ea35d2c3f3250051915006162
3
+ size 805834048
v5_128k_layer_6/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c936737aa0fbd6f271e8be7aea29053357e83055b9922fca067bb96eb2958c4
3
+ size 524368
v5_128k_layer_7/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.7.hook_resid_mid", "hook_layer": 7, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_7/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.07359001040458679, "metrics/kl_div_with_ablation": 9.511523246765137, "metrics/ce_loss_with_sae": 3.668210983276367, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 13.054040908813477, "metrics/kl_div_score": 0.9922630678078178, "metrics/ce_loss_score": 0.9926867722998524, "metrics/l2_norm_in": 140.90599060058594, "metrics/l2_norm_out": 138.12875366210938, "metrics/l2_ratio": 0.9708306789398193, "metrics/l0": 31.9970703125, "metrics/l1": 55.75236129760742, "metrics/explained_variance": 0.928668200969696, "metrics/mse": 499.7657165527344, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_7/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e663dba31833e757306887ad8b311e74a3a1cce66474be454c958106768b39c
3
+ size 805834048
v5_128k_layer_7/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffcaab47977c368c98875bffe48f3e4c66ffd455605e791e06f3178c5f5b96fa
3
+ size 524368
v5_128k_layer_8/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.8.hook_resid_mid", "hook_layer": 8, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_8/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.08936788886785507, "metrics/kl_div_with_ablation": 7.897105693817139, "metrics/ce_loss_with_sae": 3.679746389389038, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 11.460872650146484, "metrics/kl_div_score": 0.9886834629884942, "metrics/ce_loss_score": 0.9897375005583807, "metrics/l2_norm_in": 157.34324645996094, "metrics/l2_norm_out": 153.83953857421875, "metrics/l2_ratio": 0.9683517217636108, "metrics/l0": 31.99837303161621, "metrics/l1": 53.45509338378906, "metrics/explained_variance": 0.9138767123222351, "metrics/mse": 732.31787109375, "metrics/total_tokens_evaluated": 6144}
v5_128k_layer_8/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60f311c3a33f5fc526a04fc344266b83e16ec90cea69d6059089e0c8790f37ca
3
+ size 805834048
v5_128k_layer_8/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ecd3cf156b9a328e813f10d378b57ed1a6f37d2604bf0f37e11e75438b301f
3
+ size 524368
v5_128k_layer_9/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 131072, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.9.hook_resid_mid", "hook_layer": 9, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_128k_layer_9/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.09692329168319702, "metrics/kl_div_with_ablation": 5.3963117599487305, "metrics/ce_loss_with_sae": 3.6957435607910156, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 8.97047233581543, "metrics/kl_div_score": 0.9820389747674404, "metrics/ce_loss_score": 0.9820011853887981, "metrics/l2_norm_in": 181.313720703125, "metrics/l2_norm_out": 176.829345703125, "metrics/l2_ratio": 0.966578483581543, "metrics/l0": 31.9970703125, "metrics/l1": 51.3939323425293, "metrics/explained_variance": 0.8956956267356873, "metrics/mse": 1123.425048828125, "metrics/total_tokens_evaluated": 6144}