Martijn van Beers commited on
Commit
adf3a47
1 Parent(s): 6395dfb

Move text and examples into separate files

Browse files
Files changed (6) hide show
  1. app.py +15 -52
  2. description.md +9 -0
  3. entity_description.md +5 -0
  4. entity_examples.csv +3 -0
  5. examples.csv +12 -0
  6. footer.md +13 -0
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import re
2
  import sys
 
 
3
  import gradio as gr
4
 
5
- # sys.path.append("../")
6
  sys.path.append("CLIP_explainability/Transformer-MM-Explainability/")
7
 
8
  import torch
@@ -24,6 +25,12 @@ clip.clip._MODELS = {
24
  "ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt",
25
  }
26
 
 
 
 
 
 
 
27
  colour_map = {
28
  "N": "#f77189",
29
  "CARDINAL": "#f7764a",
@@ -91,17 +98,9 @@ def run_demo(*args):
91
 
92
  # Default demo:
93
 
94
- description = """This demo is a copy of the demo CLIPGroundingExlainability built by Paul Hilders, Danilo de Goede and Piyush Bagad, as part of the course Interpretability and Explainability in AI (MSc AI, UvA, June 2022).
95
- <br> <br>
96
- This demo shows attributions scores on both the image and the text input when presenting CLIP with a
97
- <text,image> pair. Attributions are computed as Gradient-weighted Attention Rollout (Chefer et al.,
98
- 2021), and can be thought of as an estimate of the effective attention CLIP pays to its input when
99
- computing a multimodal representation. <span style="color:red">Warning:</span> Note that attribution
100
- methods such as the one from this demo can only give an estimate of the real underlying behavior
101
- of the model."""
102
-
103
  with gr.Blocks(title="CLIP Grounding Explainability") as iface_default:
104
- gr.Markdown(description)
105
  with gr.Row():
106
  with gr.Column() as inputs:
107
  orig = gr.components.Image(type='pil', label="Original Image")
@@ -112,22 +111,7 @@ with gr.Blocks(title="CLIP Grounding Explainability") as iface_default:
112
  with gr.Column() as outputs:
113
  image = gr.components.Image(type='pil', label="Output Image")
114
  text = gr.components.HighlightedText(label="Text importance")
115
- gr.Examples(
116
- examples=[
117
- ["example_images/London.png", "London Eye"],
118
- ["example_images/London.png", "Big Ben"],
119
- ["example_images/harrypotter.png", "Harry"],
120
- ["example_images/harrypotter.png", "Hermione"],
121
- ["example_images/harrypotter.png", "Ron"],
122
- ["example_images/Amsterdam.png", "Amsterdam canal"],
123
- ["example_images/Amsterdam.png", "Old buildings"],
124
- ["example_images/Amsterdam.png", "Pink flowers"],
125
- ["example_images/dogs_on_bed.png", "Two dogs"],
126
- ["example_images/dogs_on_bed.png", "Book"],
127
- ["example_images/dogs_on_bed.png", "Cat"]
128
- ],
129
- inputs=[orig, description]
130
- )
131
  default_model.change(update_slider, inputs=default_model, outputs=default_layer)
132
  submit.click(run_demo, inputs=[orig, description, default_model, default_layer], outputs=[image, text])
133
 
@@ -181,13 +165,9 @@ def NER_demo(image, text, model_name):
181
  return labeled_text, gallery_images
182
 
183
 
184
- description_NER = """Automatically generated CLIP grounding explanations for
185
- noun chunks, retrieved with the spaCy model. <span style="color:red">Warning:</span> Note
186
- that attribution methods such as the one from this demo can only give an estimate of the real
187
- underlying behavior of the model."""
188
-
189
  with gr.Blocks(title="Entity Grounding explainability using CLIP") as iface_NER:
190
- gr.Markdown(description_NER)
191
  with gr.Row():
192
  with gr.Column() as inputs:
193
  img = gr.Image(type='pil', label="Original Image")
@@ -199,28 +179,11 @@ with gr.Blocks(title="Entity Grounding explainability using CLIP") as iface_NER:
199
  text = gr.components.HighlightedText(show_legend=True, color_map=colour_map, label="Noun chunks")
200
  gallery = gr.components.Gallery(type='pil', label="NER Entity explanations")
201
 
202
- gr.Examples(
203
- examples=[
204
- ["example_images/London.png", "In this image we see Big Ben and the London Eye, on both sides of the river Thames."],
205
- ["example_images/harrypotter.png", "Hermione, Harry and Ron in their school uniform"],
206
- ],
207
- inputs=[img, text],
208
- )
209
  ner_model.change(update_slider, inputs=ner_model, outputs=ner_layer)
210
  submit.click(run_demo, inputs=[img, intext, ner_model, ner_layer], outputs=[text, gallery])
211
 
212
  demo_tabs = gr.TabbedInterface([iface_default, iface_NER], ["Default", "Entities"])
213
  with demo_tabs:
214
- gr.Markdown("""
215
- ### Acknowledgements
216
- This demo was developed for the Interpretability & Explainability in AI course at the University of
217
- Amsterdam. We would like to express our thanks to Jelle Zuidema, Jaap Jumelet, Tom Kersten, Christos
218
- Athanasiadis, Peter Heemskerk, Zhi Zhang, and all the other TAs who helped us during this course.
219
-
220
- ---
221
- ### References
222
- \[1\]: Chefer, H., Gur, S., & Wolf, L. (2021). Generic attention-model explainability for interpreting bi-modal and encoder-decoder transformers. <br>
223
- \[2\]: Abnar, S., & Zuidema, W. (2020). Quantifying attention flow in transformers. arXiv preprint arXiv:2005.00928. <br>
224
- \[3\]: [https://samiraabnar.github.io/articles/2020-04/attention_flow](https://samiraabnar.github.io/articles/2020-04/attention_flow) <br>
225
- """)
226
  demo_tabs.launch(show_error=True)
 
1
  import re
2
  import sys
3
+ import pathlib
4
+ import csv
5
  import gradio as gr
6
 
 
7
  sys.path.append("CLIP_explainability/Transformer-MM-Explainability/")
8
 
9
  import torch
 
25
  "ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt",
26
  }
27
 
28
+ def iter_file(filename):
29
+ with pathlib.Path(filename).open("r") as fh:
30
+ header = next(fh)
31
+ for line in fh:
32
+ yield line
33
+
34
  colour_map = {
35
  "N": "#f77189",
36
  "CARDINAL": "#f7764a",
 
98
 
99
  # Default demo:
100
 
101
+ examples = list(csv.reader(iter_file("examples.csv")))
 
 
 
 
 
 
 
 
102
  with gr.Blocks(title="CLIP Grounding Explainability") as iface_default:
103
+ gr.Markdown(pathlib.Path("description.md").read_text)
104
  with gr.Row():
105
  with gr.Column() as inputs:
106
  orig = gr.components.Image(type='pil', label="Original Image")
 
111
  with gr.Column() as outputs:
112
  image = gr.components.Image(type='pil', label="Output Image")
113
  text = gr.components.HighlightedText(label="Text importance")
114
+ gr.Examples(examples=examples, inputs=[orig, description])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  default_model.change(update_slider, inputs=default_model, outputs=default_layer)
116
  submit.click(run_demo, inputs=[orig, description, default_model, default_layer], outputs=[image, text])
117
 
 
165
  return labeled_text, gallery_images
166
 
167
 
168
+ entity_examples = list(csv.reader(iter_file("entity_examples.csv")))
 
 
 
 
169
  with gr.Blocks(title="Entity Grounding explainability using CLIP") as iface_NER:
170
+ gr.Markdown(pathlib.Path("entity_description.md").read_text)
171
  with gr.Row():
172
  with gr.Column() as inputs:
173
  img = gr.Image(type='pil', label="Original Image")
 
179
  text = gr.components.HighlightedText(show_legend=True, color_map=colour_map, label="Noun chunks")
180
  gallery = gr.components.Gallery(type='pil', label="NER Entity explanations")
181
 
182
+ gr.Examples(examples=entity_examples, inputs=[img, text])
 
 
 
 
 
 
183
  ner_model.change(update_slider, inputs=ner_model, outputs=ner_layer)
184
  submit.click(run_demo, inputs=[img, intext, ner_model, ner_layer], outputs=[text, gallery])
185
 
186
  demo_tabs = gr.TabbedInterface([iface_default, iface_NER], ["Default", "Entities"])
187
  with demo_tabs:
188
+ gr.Markdown(pathlib.Path("footer.md").read_text)
 
 
 
 
 
 
 
 
 
 
 
189
  demo_tabs.launch(show_error=True)
description.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ This demo is a copy of the demo CLIPGroundingExlainability built by Paul Hilders, Danilo de Goede and Piyush Bagad, as part of the course Interpretability and Explainability in AI (MSc AI, UvA, June 2022).
2
+
3
+
4
+ This demo shows attributions scores on both the image and the text input when presenting CLIP with a
5
+ <text,image> pair. Attributions are computed as Gradient-weighted Attention Rollout (Chefer et al.,
6
+ 2021), and can be thought of as an estimate of the effective attention CLIP pays to its input when
7
+ computing a multimodal representation. <span style="color:red">Warning:</span> Note that attribution
8
+ methods such as the one from this demo can only give an estimate of the real underlying behavior
9
+ of the model.
entity_description.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Automatically generated CLIP grounding explanations for noun chunks,
2
+ retrieved with the spaCy model.
3
+ <span style="color:red">Warning:</span> Note that attribution methods
4
+ such as the one from this demo can only give an estimate of the real
5
+ underlying behavior of the model.
entity_examples.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ image,text
2
+ "example_images/London.png","In this image we see Big Ben and the London Eye, on both sides of the river Thames."
3
+ "example_images/harrypotter.png","Hermione, Harry and Ron in their school uniform"
examples.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image,text
2
+ "example_images/London.png","London Eye"
3
+ "example_images/London.png","Big Ben"
4
+ "example_images/harrypotter.png","Harry"
5
+ "example_images/harrypotter.png","Hermione"
6
+ "example_images/harrypotter.png","Ron"
7
+ "example_images/Amsterdam.png","Amsterdam canal"
8
+ "example_images/Amsterdam.png","Old buildings"
9
+ "example_images/Amsterdam.png","Pink flowers"
10
+ "example_images/dogs_on_bed.png","Two dogs"
11
+ "example_images/dogs_on_bed.png","Book"
12
+ "example_images/dogs_on_bed.png","Cat"
footer.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Acknowledgements
2
+
3
+ This demo was developed for the Interpretability & Explainability in AI course at the University of
4
+ Amsterdam. We would like to express our thanks to Jelle Zuidema, Jaap Jumelet, Tom Kersten, Christos
5
+ Athanasiadis, Peter Heemskerk, Zhi Zhang, and all the other TAs who helped us during this course.
6
+
7
+ ---
8
+
9
+ ### References
10
+
11
+ \[1\]: Chefer, H., Gur, S., & Wolf, L. (2021). Generic attention-model explainability for interpreting bi-modal and encoder-decoder transformers. <br>
12
+ \[2\]: Abnar, S., & Zuidema, W. (2020). Quantifying attention flow in transformers. arXiv preprint arXiv:2005.00928. <br>
13
+ \[3\]: [https://samiraabnar.github.io/articles/2020-04/attention_flow](https://samiraabnar.github.io/articles/2020-04/attention_flow) <br>