h4d35 commited on
Commit
11fbbe6
1 Parent(s): 4dedbfd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ from sentence_transformers import SentenceTransformer, util
4
+
5
+ # define model
6
+ model_sentence = SentenceTransformer('clip-ViT-B-32')
7
+
8
+ def clip_sim_preds(img, text):
9
+ '''
10
+ This function:
11
+ 1. Takes in an IMG/Text/ pair, IMG already as PIl image in RGB form
12
+ 2. Feeds the image/text-pair into the defined clip model
13
+ 3. returns calculated similarities
14
+ '''
15
+ try:
16
+ # Encode an image:
17
+ img_emb = model_sentence.encode(img)
18
+ # Encode text descriptions
19
+ text_emb = model_sentence.encode([text])
20
+ # Compute cosine similarities
21
+ cos_scores = util.cos_sim(img_emb, text_emb)
22
+ # return the predicted similarity
23
+ return cos_scores.item()
24
+ except:
25
+ return "error"
26
+
27
+ # define app
28
+ # takes in upload of an image and a corresponding text, computes and returns cosine similarity
29
+ gr.Interface(clip_sim_preds,
30
+ inputs=[gr.inputs.Image(invert_colors=False, image_mode="RGB", type="pil", source="upload", label=None, optional=False),
31
+ gr.inputs.Textbox(lines=1, placeholder=None, default="two cats with black stripes on a purple blanket, tv remotes, green collar", label="Text", optional=False)],
32
+ outputs=[gr.outputs.Textbox(type="auto", label="Cosine similarity")],
33
+ theme="huggingface",
34
+ title="Clip Cosine similarity",
35
+ description="Cosine similarity of image/text pair using a multimodal clip model",
36
+ allow_flagging=False,).launch(debug=True)