Julien Simon commited on
Commit
629288e
1 Parent(s): b25ea0e

Initial version

Browse files
Files changed (2) hide show
  1. app.py +42 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ from transformers import BridgeTowerForImageAndTextRetrieval, BridgeTowerProcessor
4
+
5
+ model_id = "BridgeTower/bridgetower-large-itm-mlm-gaudi"
6
+ processor = BridgeTowerProcessor.from_pretrained(model_id)
7
+ model = BridgeTowerForImageAndTextRetrieval.from_pretrained(model_id)
8
+
9
+ # Process an image
10
+ def process(image, texts):
11
+ scores = {}
12
+ texts = texts.split(",")
13
+ for text in texts:
14
+ encoding = processor(image, text, return_tensors="pt")
15
+ outputs = model(**encoding)
16
+ scores[text] = "{:.2f}".format(outputs.logits[0, 1].item())
17
+ # sort scores in descending order
18
+ scores = dict(sorted(scores.items(), key=lambda item: item[1], reverse=True))
19
+ return scores
20
+
21
+
22
+ # Inputs
23
+ image = gr.Image(label="Image")
24
+ texts = gr.Text(label="List of comma-separated texts")
25
+
26
+ # Output
27
+ scores = gr.JSON(label="Scores")
28
+
29
+ description = "This Space lets you score a list of texts given an input image.\
30
+ This can be used to find the most relevant text for a given image, or for semantic search on images."
31
+
32
+ iface = gr.Interface(
33
+ theme="huggingface",
34
+ description=description,
35
+ fn=process,
36
+ inputs=[image, texts],
37
+ outputs=scores,
38
+ examples=[],
39
+ allow_flagging="never",
40
+ )
41
+
42
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ torch
3
+ requests
4
+ Pillow