Dmitrii commited on
Commit
b17a7e8
1 Parent(s): c98496e

Revision 5&6

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .venv
2
+ .venv/**
app.py CHANGED
@@ -14,43 +14,80 @@ TOKENIZER = "microsoft/Phi-3-mini-4k-instruct"
14
 
15
  dataset = load_dataset("kisate-team/feature-explanations", split="train")
16
 
17
- layers = dataset.unique("layer")
18
-
19
- features = {layer:{item["feature"]:item for item in dataset if item["layer"] == layer} for layer in layers}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  nearby = 8
22
  stride = 0.25
23
  n_bins = 10
24
 
25
- def make_cache_name(layer):
26
- return f"{cache_path}/phi-l{layer}-r4-st0.25x128-activations.parquet"
27
 
28
  with gr.Blocks() as demo:
29
  feature_table = gr.State(None)
30
 
31
- tokenizer_name = gr.Textbox(TOKENIZER)
32
- layer_dropdown = gr.Dropdown(layers)
33
- feature_dropdown = gr.Dropdown()
 
34
 
35
  def update_features(layer):
36
  feature_dropdown = gr.Dropdown(features[layer].keys())
37
  return feature_dropdown
38
-
39
- layer_dropdown.input(update_features, layer_dropdown, feature_dropdown)
40
-
 
41
 
42
  frequency = gr.Number(0, label="Total frequency (%)")
 
 
43
  # histogram = gr.LinePlot(x="activation", y="freq")
44
 
45
- autoi_expl = gr.Textbox()
46
- selfe_expl = gr.Textbox()
 
 
 
 
47
 
48
  cm = gr.HighlightedText()
49
  frame = gr.Highlightedtext()
50
 
51
- def update(layer, feature, tokenizer_name):
52
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
53
- table = pq.read_table(make_cache_name(layer))
54
  table_feat = table.filter(pc.field("feature") == feature).to_pandas()
55
 
56
  # freq_t = table_feat[["activation", "freq"]]
@@ -85,19 +122,22 @@ with gr.Blocks() as demo:
85
  flat_data = []
86
  color_map_data = []
87
 
88
- autoi_expl = features[layer][feature]["explanation"]
89
- selfe_expl = features[layer][feature]["gen_explanations"]
90
-
91
- if selfe_expl is not None:
92
- selfe_expl = "\n".join(
93
- f"{i+1}. \"{x}\"" for i, x in enumerate(selfe_expl)
94
- )
95
 
 
 
 
96
  return flat_data, color_map_data, total_freq, autoi_expl, selfe_expl
97
 
98
 
99
- feature_dropdown.change(update, [layer_dropdown, feature_dropdown, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
100
- # feature_input.change(update, [dropdown, feature_input, tokenizer_name, token_range], [frame, cm, histogram, frequency])
101
 
102
 
103
  if __name__ == "__main__":
 
14
 
15
  dataset = load_dataset("kisate-team/feature-explanations", split="train")
16
 
17
+ def find_revions():
18
+ revisions = set()
19
+ for parquet in parquets:
20
+ if parquet.endswith(".parquet"):
21
+ parts = parquet.split("-")
22
+ if len(parts) > 2:
23
+ revisions.add(int(parts[2][1:]))
24
+ return sorted(revisions)
25
+
26
+ def find_layers(revision):
27
+ layers = set()
28
+ for parquet in parquets:
29
+ if parquet.endswith(".parquet"):
30
+ parts = parquet.split("-")
31
+ if len(parts) > 2 and int(parts[2][1:]) == revision:
32
+ layers.add(int(parts[1][1:]))
33
+ return sorted(layers)
34
+
35
+ revisions = find_revions()
36
+ layers = {
37
+ revision: find_layers(revision) for revision in revisions
38
+ }
39
+
40
+ features = {
41
+ revision: {
42
+ layer: {
43
+ item["feature"]:item for item in dataset if item["layer"] == layer and item["version"] == revision
44
+ } for layer in layers[revision]
45
+ } for revision in revisions
46
+ }
47
+
48
+ # layers = dataset.unique("layer")
49
 
50
  nearby = 8
51
  stride = 0.25
52
  n_bins = 10
53
 
54
+ def make_cache_name(layer, revision):
55
+ return f"{cache_path}/phi-l{layer}-r{revision}-st0.25x128-activations.parquet"
56
 
57
  with gr.Blocks() as demo:
58
  feature_table = gr.State(None)
59
 
60
+ tokenizer_name = gr.Textbox(TOKENIZER, label="Tokenizer")
61
+ revision_dropdown = gr.Dropdown(revisions, label="Revision")
62
+
63
+ layer_dropdown = gr.Dropdown(layers[4], label="Layer")
64
 
65
  def update_features(layer):
66
  feature_dropdown = gr.Dropdown(features[layer].keys())
67
  return feature_dropdown
68
+
69
+ def update_layers(revision):
70
+ layer_dropdown = gr.Dropdown(layers[revision])
71
+ return layer_dropdown
72
 
73
  frequency = gr.Number(0, label="Total frequency (%)")
74
+
75
+ # layer_dropdown.input(update_features, layer_dropdown, feature_dropdown)
76
  # histogram = gr.LinePlot(x="activation", y="freq")
77
 
78
+ revision_dropdown.input(update_layers, revision_dropdown, layer_dropdown)
79
+
80
+ feature_input = gr.Number(0, label="Feature")
81
+
82
+ autoi_expl = gr.Textbox(label="AutoInterp Explanation")
83
+ selfe_expl = gr.Textbox(label="SelfExplain Explanation")
84
 
85
  cm = gr.HighlightedText()
86
  frame = gr.Highlightedtext()
87
 
88
+ def update(revision, layer, feature, tokenizer_name):
89
  tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
90
+ table = pq.read_table(make_cache_name(layer, revision))
91
  table_feat = table.filter(pc.field("feature") == feature).to_pandas()
92
 
93
  # freq_t = table_feat[["activation", "freq"]]
 
122
  flat_data = []
123
  color_map_data = []
124
 
125
+ if feature in features[revision][layer]:
126
+ autoi_expl = features[revision][layer][feature]["explanation"]
127
+ selfe_expl = features[revision][layer][feature]["gen_explanations"]
128
+ if selfe_expl is not None:
129
+ selfe_expl = "\n".join(
130
+ f"{i+1}. \"{x}\"" for i, x in enumerate(selfe_expl)
131
+ )
132
 
133
+ else:
134
+ autoi_expl = "No explanation found"
135
+ selfe_expl = "No explanation found"
136
  return flat_data, color_map_data, total_freq, autoi_expl, selfe_expl
137
 
138
 
139
+ # feature_dropdown.change(update, [layer_dropdown, feature_dropdown, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
140
+ feature_input.change(update, [revision_dropdown, layer_dropdown, feature_input, tokenizer_name], [frame, cm, frequency, autoi_expl, selfe_expl])
141
 
142
 
143
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -2,4 +2,5 @@ pyarrow
2
  transformers[cpu]
3
  numpy
4
  pandas
5
- datasets
 
 
2
  transformers[cpu]
3
  numpy
4
  pandas
5
+ datasets
6
+ gradio
weights/caches/phi-l12-r5-st0.25x128-activations.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e391b0bbf9d8b0f5425a5c87cccaa76adf81b8b30c97de441ec1d70abd0aee72
3
+ size 9038528
weights/caches/phi-l12-r6-st0.25x128-activations.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ad17c622910e9574810b391932152ad27cb3715a6db89f469e15b1d3f9af60
3
+ size 13772640
weights/caches/phi-l16-r5-st0.25x128-activations.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4316ea56954784cd894505174b34ae4830bedeb38fc9acdc440a167e2d9444d9
3
+ size 12012570
weights/caches/phi-l16-r6-st0.25x128-activations.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d3f29af97308aa376f3f85ebe05f161d3816e8c23c67b1dcfb54c0bd89e1cd3
3
+ size 10877417
weights/caches/phi-l20-r5-st0.25x128-activations.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba0048dbd58dc56a887ed25f44b13a27e79e98739df0233e9ca6caaaa43f132f
3
+ size 18612669
weights/caches/phi-l20-r6-st0.25x128-activations.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10c26b2de80b22c20eb7b8d38366bd5ad5199e9042a2a42a8eb07fc8ffab12b6
3
+ size 27539933