paul hilders commited on
Commit
66dfac7
1 Parent(s): c81ac67

Change descriptions and titles

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -60,11 +60,18 @@ inputs = [input_img, input_txt]
60
  outputs = [gr.inputs.Image(type='pil', label="Output Image"), "highlight"]
61
 
62
 
 
 
 
 
 
 
 
63
  iface = gr.Interface(fn=run_demo,
64
  inputs=inputs,
65
  outputs=outputs,
66
  title="CLIP Grounding Explainability",
67
- description="A demonstration based on the Generic Attention-model Explainability method for Interpreting Bi-Modal Transformers by Chefer et al. (2021): https://github.com/hila-chefer/Transformer-MM-Explainability.",
68
  examples=[["example_images/London.png", "London Eye"],
69
  ["example_images/London.png", "Big Ben"],
70
  ["example_images/harrypotter.png", "Harry"],
@@ -121,6 +128,7 @@ outputs_NER = ["highlight", gr.Gallery(type='pil', label="NER Entity explanation
121
  iface_NER = gr.Interface(fn=NER_demo,
122
  inputs=inputs_NER,
123
  outputs=outputs_NER,
 
124
  examples=[["example_images/London.png", "In this image we see Big Ben and the London Eye, on both sides of the river Thames."]],
125
  cache_examples=False)
126
 
 
60
  outputs = [gr.inputs.Image(type='pil', label="Output Image"), "highlight"]
61
 
62
 
63
+ description = """A demonstration based on the Generic Attention-model Explainability method for Interpreting Bi-Modal
64
+ Transformers by Chefer et al. (2021): https://github.com/hila-chefer/Transformer-MM-Explainability. \n \n
65
+ This demo shows attributions scores on both the image and the text input when presented CLIP with a
66
+ <text,image> pair. Attributions are computed as Gradient-weighted Attention Rollout (Chefer et al.,
67
+ 2021), and can be thought of as an estimate of the effective attention CLIP pays to its input when
68
+ computing a multimodal representation"""
69
+
70
  iface = gr.Interface(fn=run_demo,
71
  inputs=inputs,
72
  outputs=outputs,
73
  title="CLIP Grounding Explainability",
74
+ description=description,
75
  examples=[["example_images/London.png", "London Eye"],
76
  ["example_images/London.png", "Big Ben"],
77
  ["example_images/harrypotter.png", "Harry"],
 
128
  iface_NER = gr.Interface(fn=NER_demo,
129
  inputs=inputs_NER,
130
  outputs=outputs_NER,
131
+ title="Named Entity Grounding explainability using CLIP",
132
  examples=[["example_images/London.png", "In this image we see Big Ben and the London Eye, on both sides of the river Thames."]],
133
  cache_examples=False)
134