samueldomdey commited on
Commit
2ddbd72
1 Parent(s): 10bb746

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from PIL import Image
4
+ from sentence_transformers import SentenceTransformer, util
5
+
6
+ # define model
7
+ model_sentence = SentenceTransformer('clip-ViT-B-32')
8
+
9
+ # functions
10
+ def download_images(url):
11
+ '''
12
+ This function:
13
+ 1. takes in a URL
14
+ 2. downloads the raw content (image)
15
+ 3. reads this image out
16
+ 4. returns temp img, HTTP status code and flag
17
+ '''
18
+
19
+ try:
20
+ # request image
21
+ response = requests.get(url, stream=True, timeout=3.5).raw
22
+ # request status code (can't be done with .raw)
23
+ status_code = requests.get(url).status_code
24
+ # read in image
25
+ image = Image.open(response)
26
+ # convert all images to rgb -> case png is in rgba format
27
+ rgb_im = image.convert('RGB')
28
+ # return temp image, status code and flag
29
+ return rgb_im, status_code, 0
30
+ except:
31
+ print("error", status_code)
32
+ # error flag
33
+ return "error url", "", -1
34
+
35
+ def clip_sim_preds(url, text):
36
+ '''
37
+ This function:
38
+ 1. Takes in an URL/Text/ID pair
39
+ 2. Calls download images
40
+ 3. Receives a temp image
41
+ 4. Feeds the image/text-pair into the defined clip model
42
+ 5. returns calculated similarities
43
+ '''
44
+
45
+
46
+ # call download images
47
+ image, status_code, flag = download_images(url)
48
+ # if no error occured and temp image successfully downloaded, proceed
49
+ if flag == 0:
50
+ try:
51
+ # Encode an image:
52
+ img_emb = model_sentence.encode(image)
53
+ # Encode text descriptions
54
+ text_emb = model_sentence.encode([text])
55
+ # Compute cosine similarities
56
+ cos_scores = util.cos_sim(img_emb, text_emb)
57
+ # return the predicted similarity, flag
58
+ return cos_scores.item()
59
+ except:
60
+ return "error clip_si"
61
+
62
+ # if error occured, indicate this with -1 flag
63
+ else:
64
+ return "error"
65
+
66
+
67
+ # define app
68
+ # takes in url of an image and a corresponding text, computes and returns cosine similarity
69
+ gr.Interface(clip_sim_preds,
70
+ inputs=[gr.inputs.Textbox(lines=1, placeholder=None, default="http://images.cocodataset.org/val2017/000000039769.jpg", label="URL", optional=False),
71
+ gr.inputs.Textbox(lines=1, placeholder=None, default="two cats with black stripes on a purple blanket, tv remotes, green collar", label="Text", optional=False)],
72
+ outputs=[gr.outputs.Textbox(type="auto", label="Cosine similarity")],
73
+ theme="huggingface",
74
+ title="Cosine similarity",
75
+ description="Cosine similarity of image/text pair using a multimodal clip model",
76
+ allow_flagging=False,).launch(debug=True)