Martijn van Beers
Move text and examples into separate files
adf3a47
raw
history blame
No virus
7.61 kB
import re
import sys
import pathlib
import csv
import gradio as gr
sys.path.append("CLIP_explainability/Transformer-MM-Explainability/")
import torch
import CLIP.clip as clip
import spacy
from PIL import Image, ImageFont, ImageDraw, ImageOps
from clip_grounding.utils.image import pad_to_square
from clip_grounding.datasets.png import (
overlay_relevance_map_on_image,
)
from CLIP_explainability.utils import interpret, show_img_heatmap, show_heatmap_on_text
clip.clip._MODELS = {
"ViT-B/32": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt",
"ViT-B/16": "https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt",
"ViT-L/14": "https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt",
"ViT-L/14@336px": "https://openaipublic.azureedge.net/clip/models/3035c92b350959924f9f00213499208652fc7ea050643e8b385c2dac08641f02/ViT-L-14-336px.pt",
}
def iter_file(filename):
with pathlib.Path(filename).open("r") as fh:
header = next(fh)
for line in fh:
yield line
colour_map = {
"N": "#f77189",
"CARDINAL": "#f7764a",
"DATE": "#d98a32",
"EVENT": "#bf9632",
"FAC": "#a99e31",
"GPE": "#90a531",
"LANGUAGE": "#68ad31",
"LAW": "#32b25e",
"LOC": "#34af86",
"MONEY": "#35ae9c",
"NORP": "#36acac",
"ORDINAL": "#37aabd",
"ORG": "#39a7d4",
"PERCENT": "#539ff4",
"PERSON": "#9890f4",
"PRODUCT": "#c47ef4",
"QUANTITY": "#ef5ff4",
"TIME": "#f565d0",
"WORK_OF_ART": "#f66baf",
}
device = "cuda" if torch.cuda.is_available() else "cpu"
# nlp = spacy.load("en_core_web_sm")
import en_core_web_sm
nlp = en_core_web_sm.load()
# Gradio Section:
def update_slider(model):
if model == "ViT-L/14":
return gr.update(maximum=23, value=23)
else:
return gr.update(maximum=11, value=11)
def run_demo(*args):
if len(args) == 4:
image, text, model_name, vision_layer = args
elif len(args) == 2:
image, text = args
model_name = "ViT-B/32"
vision_layer = 11
else:
raise ValueError("Unexpected number of parameters")
vision_layer = int(vision_layer)
model, preprocess = clip.load(model_name, device=device, jit=False)
orig_image = pad_to_square(image)
img = preprocess(orig_image).unsqueeze(0).to(device)
text_input = clip.tokenize([text]).to(device)
R_text, R_image = interpret(model=model, image=img, texts=text_input, device=device, start_layer=vision_layer)
image_relevance = show_img_heatmap(R_image[0], img, orig_image=orig_image, device=device)
overlapped = overlay_relevance_map_on_image(image, image_relevance)
text_scores, text_tokens_decoded = show_heatmap_on_text(text, text_input, R_text[0])
highlighted_text = []
for i, token in enumerate(text_tokens_decoded):
highlighted_text.append((str(token), float(text_scores[i])))
return overlapped, highlighted_text
# Default demo:
examples = list(csv.reader(iter_file("examples.csv")))
with gr.Blocks(title="CLIP Grounding Explainability") as iface_default:
gr.Markdown(pathlib.Path("description.md").read_text)
with gr.Row():
with gr.Column() as inputs:
orig = gr.components.Image(type='pil', label="Original Image")
description = gr.components.Textbox(label="Image description")
default_model = gr.Dropdown(label="CLIP Model", choices=['ViT-B/16', 'ViT-B/32', 'ViT-L/14'], value="ViT-B/32")
default_layer = gr.Slider(label="Vision start layer", minimum=0, maximum=11, step=1, value=11)
submit = gr.Button("Submit")
with gr.Column() as outputs:
image = gr.components.Image(type='pil', label="Output Image")
text = gr.components.HighlightedText(label="Text importance")
gr.Examples(examples=examples, inputs=[orig, description])
default_model.change(update_slider, inputs=default_model, outputs=default_layer)
submit.click(run_demo, inputs=[orig, description, default_model, default_layer], outputs=[image, text])
# NER demo:
def add_label_to_img(img, label, add_entity_label=True):
img = ImageOps.expand(img, border=45, fill=(255,255,255))
draw = ImageDraw.Draw(img)
font = ImageFont.truetype("arial.ttf", 24)
m = re.match(r".*\((\w+)\)", label)
if add_entity_label and m is not None:
cat = m.group(1)
colours = tuple(map(lambda l: int(''.join(l),16), zip(*[iter(colour_map[cat][1:])]*2)))
draw.text((5,5), label , align="center", fill=colours, font=font)
else:
draw.text((5,5), label, align="center", fill=(0, 0, 0), font=font)
return img
def NER_demo(image, text, model_name):
# As the default image, we run the default demo on the input image and text:
overlapped, highlighted_text = run_demo(image, text, model_name)
gallery_images = [add_label_to_img(overlapped, "Complete sentence", add_entity_label=False)]
labeled_text = dict(
text=text,
entities=[],
)
# Then, we run the demo for each of the noun chunks in the text:
for chunk in nlp(text).noun_chunks:
if len(chunk) == 1 and chunk[0].pos_ == "PRON":
continue
chunk_text = chunk.text
chunk_label = None
for t in chunk:
if t.ent_type_ != '':
chunk_label = t.ent_type_
break
if chunk_label is None:
chunk_label = "N"
labeled_text['entities'].append({'entity': chunk_label, 'start': chunk.start_char, 'end': chunk.end_char})
overlapped, highlighted_text = run_demo(image, chunk_text, model_name)
overlapped_labelled = add_label_to_img(overlapped, f"{chunk_text} ({chunk_label})")
gallery_images.append(overlapped_labelled)
return labeled_text, gallery_images
entity_examples = list(csv.reader(iter_file("entity_examples.csv")))
with gr.Blocks(title="Entity Grounding explainability using CLIP") as iface_NER:
gr.Markdown(pathlib.Path("entity_description.md").read_text)
with gr.Row():
with gr.Column() as inputs:
img = gr.Image(type='pil', label="Original Image")
intext = gr.components.Textbox(label="Descriptive text")
ner_model = gr.Dropdown(label="CLIP Model", choices=['ViT-B/16', 'ViT-B/32', 'ViT-L/14'], value="ViT-B/32")
ner_layer = gr.Slider(label="Vision start layer", minimum=0, maximum=11, step=1, value=11)
submit = gr.Button("Submit")
with gr.Column() as outputs:
text = gr.components.HighlightedText(show_legend=True, color_map=colour_map, label="Noun chunks")
gallery = gr.components.Gallery(type='pil', label="NER Entity explanations")
gr.Examples(examples=entity_examples, inputs=[img, text])
ner_model.change(update_slider, inputs=ner_model, outputs=ner_layer)
submit.click(run_demo, inputs=[img, intext, ner_model, ner_layer], outputs=[text, gallery])
demo_tabs = gr.TabbedInterface([iface_default, iface_NER], ["Default", "Entities"])
with demo_tabs:
gr.Markdown(pathlib.Path("footer.md").read_text)
demo_tabs.launch(show_error=True)