File size: 2,909 Bytes
2ddbd72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9919e8
2ddbd72
 
 
 
 
 
 
af78d78
b6fefc9
 
a6a2f46
b6fefc9
a6a2f46
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
import requests
from PIL import Image
from sentence_transformers import SentenceTransformer, util

# define model 
model_sentence = SentenceTransformer('clip-ViT-B-32')

# functions
def download_images(url):
  '''
  This function:
  1. takes in a URL
  2. downloads the raw content (image)
  3. reads this image out
  4. returns temp img, HTTP status code and flag
  '''

  try:
    # request image 
    response = requests.get(url, stream=True, timeout=3.5).raw
    # request status code (can't be done with .raw)
    status_code = requests.get(url).status_code
    # read in image
    image = Image.open(response)
    # convert all images to rgb -> case png is in rgba format
    rgb_im = image.convert('RGB')
    # return temp image, status code and flag
    return rgb_im, status_code, 0
  except:
    print("error", status_code)
    # error flag
    return "error url", "", -1

def clip_sim_preds(url, text):
  '''
  This function:
  1. Takes in an URL/Text/ID pair
  2. Calls download images
  3. Receives a temp image
  4. Feeds the image/text-pair into the defined clip model
  5. returns calculated similarities
  '''

  
  #  call download images
  image, status_code, flag = download_images(url)
  # if no error occured and temp image successfully downloaded, proceed
  if flag == 0:
    try:
      # Encode an image:
      img_emb = model_sentence.encode(image)
      # Encode text descriptions
      text_emb = model_sentence.encode([text])
      # Compute cosine similarities 
      cos_scores = util.cos_sim(img_emb, text_emb)
      # return the predicted similarity, flag
      return cos_scores.item()
    except:
      return "error clip_si"

    # if error occured, indicate this with -1 flag
  else:
    return "error"
    

article = "<p style='text-align: center'>In case the provided URL does not work, the same model can be used through image upload here instead: <a href='https://huggingface.co/spaces/samueldomdey/ClipCosineSimilarityUpload' target='_blank'>https://huggingface.co/spaces/samueldomdey/ClipCosineSimilarityUpload</a></p>"
# define app
# takes in url of an image and a corresponding text, computes and returns cosine similarity
gr.Interface(clip_sim_preds,
             inputs=[gr.inputs.Textbox(lines=1, placeholder=None, default="http://images.cocodataset.org/val2017/000000039769.jpg", label="URL", optional=False),
                     gr.inputs.Textbox(lines=1, placeholder=None, default="two cats with black stripes on a purple blanket, tv remotes, green collar", label="Text", optional=False)],
             outputs=[gr.outputs.Textbox(type="auto", label="Cosine similarity")],
             theme="huggingface",
             title="Clip Cosine similarity",
             description="Clip cosine similarity of an image/text pair",
             article=article,
             allow_flagging=False,).launch(debug=True)