Martijn van Beers commited on
Commit
7751ada
1 Parent(s): 330a2ff

Use noun_chunks instead of entities

Browse files
Files changed (1) hide show
  1. app.py +82 -33
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import sys
2
  import gradio as gr
3
 
@@ -25,6 +26,28 @@ clip.clip._MODELS = {
25
  "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
26
  }
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  device = "cuda" if torch.cuda.is_available() else "cpu"
29
  model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
30
 
@@ -53,11 +76,16 @@ def run_demo(image, text):
53
 
54
 
55
  # Default demo:
56
- input_img = gr.inputs.Image(type='pil', label="Original Image")
57
- input_txt = "text"
58
- inputs = [input_img, input_txt]
59
 
60
- outputs = [gr.inputs.Image(type='pil', label="Output Image"), "highlight"]
 
 
 
 
 
 
 
 
61
 
62
 
63
  description = """This demo is a copy of the demo CLIPGroundingExlainability built by Paul Hilders, Danilo de Goede and Piyush Bagad, as part of the course Interpretability and Explainability in AI (MSc AI, UvA, June 2022).
@@ -70,8 +98,8 @@ description = """This demo is a copy of the demo CLIPGroundingExlainability buil
70
  of the model."""
71
 
72
  iface = gr.Interface(fn=run_demo,
73
- inputs=inputs,
74
- outputs=outputs,
75
  title="CLIP Grounding Explainability",
76
  description=description,
77
  examples=[["example_images/London.png", "London Eye"],
@@ -91,40 +119,58 @@ def add_label_to_img(img, label, add_entity_label=True):
91
  img = ImageOps.expand(img, border=45, fill=(255,255,255))
92
  draw = ImageDraw.Draw(img)
93
  font = ImageFont.truetype("arial.ttf", 24)
94
- if add_entity_label:
95
- draw.text((5,5), f"Entity: {str(label)}" , align="center", fill=(0, 0, 0), font=font)
 
 
 
 
96
  else:
97
- draw.text((5,5), str(label), align="center", fill=(0, 0, 0), font=font)
98
 
99
  return img
100
 
101
  def NER_demo(image, text):
102
- # Apply NER to extract named entities, and run the explainability method
103
- # for each named entity.
104
- highlighed_entities = []
105
- for ent in nlp(text).ents:
106
- ent_text = ent.text
107
- ent_label = ent.label_
108
- highlighed_entities.append((ent_text, ent_label))
109
-
110
  # As the default image, we run the default demo on the input image and text:
111
  overlapped, highlighted_text = run_demo(image, text)
112
 
113
- # Then, we run the demo for each of the named entities:
114
- gallery_images = [add_label_to_img(overlapped, "Full explanation", add_entity_label=False)]
115
- for ent_text, ent_label in highlighed_entities:
116
- overlapped_ent, highlighted_text_ent = run_demo(image, ent_text)
117
- overlapped_ent_labelled = add_label_to_img(overlapped_ent, f"{str(ent_text)} ({str(ent_label)})")
118
-
119
- gallery_images.append(overlapped_ent_labelled)
120
-
121
- return highlighed_entities, gallery_images
122
-
123
- input_img_NER = gr.inputs.Image(type='pil', label="Original Image")
124
- input_txt_NER = "text"
125
- inputs_NER = [input_img_NER, input_txt_NER]
126
-
127
- outputs_NER = ["highlight", gr.Gallery(type='pil', label="NER Entity explanations")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  description_NER = """Automatically generated CLIP grounding explanations for
130
  named entities, retrieved from the spacy NER model. <span style="color:red">Warning:</span> Note
@@ -136,7 +182,10 @@ iface_NER = gr.Interface(fn=NER_demo,
136
  outputs=outputs_NER,
137
  title="Named Entity Grounding explainability using CLIP",
138
  description=description_NER,
139
- examples=[["example_images/London.png", "In this image we see Big Ben and the London Eye, on both sides of the river Thames."]],
 
 
 
140
  cache_examples=False)
141
 
142
  demo_tabs = gr.TabbedInterface([iface, iface_NER], ["Default", "NER"])
 
1
+ import re
2
  import sys
3
  import gradio as gr
4
 
 
26
  "ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
27
  }
28
 
29
+ colour_map = {
30
+ "N": "#f77189",
31
+ "CARDINAL": "#f7764a",
32
+ "DATE": "#d98a32",
33
+ "EVENT": "#bf9632",
34
+ "FAC": "#a99e31",
35
+ "GPE": "#90a531",
36
+ "LANGUAGE": "#68ad31",
37
+ "LAW": "#32b25e",
38
+ "LOC": "#34af86",
39
+ "MONEY": "#35ae9c",
40
+ "NORP": "#36acac",
41
+ "ORDINAL": "#37aabd",
42
+ "ORG": "#39a7d4",
43
+ "PERCENT": "#539ff4",
44
+ "PERSON": "#9890f4",
45
+ "PRODUCT": "#c47ef4",
46
+ "QUANTITY": "#ef5ff4",
47
+ "TIME": "#f565d0",
48
+ "WORK_OF_ART": "#f66baf",
49
+ }
50
+
51
  device = "cuda" if torch.cuda.is_available() else "cpu"
52
  model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
53
 
 
76
 
77
 
78
  # Default demo:
 
 
 
79
 
80
+ default_inputs = [
81
+ gr.components.Image(type='pil', label="Original Image"),
82
+ gr.components.Textbox(label="Image description"),
83
+ ]
84
+
85
+ default_outputs = [
86
+ gr.components.Image(type='pil', label="Output Image"),
87
+ gr.components.HighlightedText(label="Text importance"),
88
+ ]
89
 
90
 
91
  description = """This demo is a copy of the demo CLIPGroundingExlainability built by Paul Hilders, Danilo de Goede and Piyush Bagad, as part of the course Interpretability and Explainability in AI (MSc AI, UvA, June 2022).
 
98
  of the model."""
99
 
100
  iface = gr.Interface(fn=run_demo,
101
+ inputs=default_inputs,
102
+ outputs=default_outputs,
103
  title="CLIP Grounding Explainability",
104
  description=description,
105
  examples=[["example_images/London.png", "London Eye"],
 
119
  img = ImageOps.expand(img, border=45, fill=(255,255,255))
120
  draw = ImageDraw.Draw(img)
121
  font = ImageFont.truetype("arial.ttf", 24)
122
+ m = re.match(r".*\((\w+)\)", label)
123
+ if add_entity_label and m is not None:
124
+ cat = m.group(1)
125
+ colours = tuple(map(lambda l: int(''.join(l),16), zip(*[iter(colour_map[cat][1:])]*2)))
126
+
127
+ draw.text((5,5), label , align="center", fill=colours, font=font)
128
  else:
129
+ draw.text((5,5), label, align="center", fill=(0, 0, 0), font=font)
130
 
131
  return img
132
 
133
  def NER_demo(image, text):
 
 
 
 
 
 
 
 
134
  # As the default image, we run the default demo on the input image and text:
135
  overlapped, highlighted_text = run_demo(image, text)
136
 
137
+ gallery_images = [add_label_to_img(overlapped, "Complete sentence", add_entity_label=False)]
138
+
139
+ labeled_text = dict(
140
+ text=text,
141
+ entities=[],
142
+ )
143
+
144
+ # Then, we run the demo for each of the noun chunks in the text:
145
+ for chunk in nlp(text).noun_chunks:
146
+ if len(chunk) == 1 and chunk[0].pos_ == "PRON":
147
+ continue
148
+ chunk_text = chunk.text
149
+ chunk_label = None
150
+ for t in chunk:
151
+ if t.ent_type_ != '':
152
+ chunk_label = t.ent_type_
153
+ break
154
+ if chunk_label is None:
155
+ chunk_label = "N"
156
+
157
+ labeled_text['entities'].append({'entity': chunk_label, 'start': chunk.start_char, 'end': chunk.end_char})
158
+ overlapped, highlighted_text = run_demo(image, chunk_text)
159
+ overlapped_labelled = add_label_to_img(overlapped, f"{chunk_text} ({chunk_label})")
160
+ gallery_images.append(overlapped_labelled)
161
+
162
+ return labeled_text, gallery_images
163
+
164
+ inputs_NER = [
165
+ gr.Image(type='pil', label="Original Image"),
166
+ gr.components.Textbox(label="Descriptive text"),
167
+ ]
168
+
169
+ #colours = highlighter._style["color_map"]
170
+ outputs_NER = [
171
+ gr.components.HighlightedText(show_legend=True, color_map=colour_map, label="Noun chunks"),
172
+ gr.components.Gallery(type='pil', label="NER Entity explanations")
173
+ ]
174
 
175
  description_NER = """Automatically generated CLIP grounding explanations for
176
  named entities, retrieved from the spacy NER model. <span style="color:red">Warning:</span> Note
 
182
  outputs=outputs_NER,
183
  title="Named Entity Grounding explainability using CLIP",
184
  description=description_NER,
185
+ examples=[
186
+ ["example_images/London.png", "In this image we see Big Ben and the London Eye, on both sides of the river Thames."],
187
+ ["example_images/harrypotter.png", "Hermione, Harry and Ron in their school uniform"],
188
+ ],
189
  cache_examples=False)
190
 
191
  demo_tabs = gr.TabbedInterface([iface, iface_NER], ["Default", "NER"])