Spaces:

h4d35
/

CosineSim

Sleeping

App Files Files Community

h4d35 commited on Apr 25, 2022

Commit

11fbbe6

•

1 Parent(s): 4dedbfd

Create app.py

Browse files

Files changed (1) hide show

app.py +36 -0

app.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import gradio as gr
+from PIL import Image
+from sentence_transformers import SentenceTransformer, util
+# define model
+model_sentence = SentenceTransformer('clip-ViT-B-32')
+def clip_sim_preds(img, text):
+  '''
+  This function:
+  1. Takes in an IMG/Text/ pair, IMG already as PIl image in RGB form
+  2. Feeds the image/text-pair into the defined clip model
+  3. returns calculated similarities
+  '''
+  try:
+      # Encode an image:
+      img_emb = model_sentence.encode(img)
+      # Encode text descriptions
+      text_emb = model_sentence.encode([text])
+      # Compute cosine similarities
+      cos_scores = util.cos_sim(img_emb, text_emb)
+      # return the predicted similarity
+      return cos_scores.item()
+  except:
+      return "error"
+# define app
+# takes in upload of an image and a corresponding text, computes and returns cosine similarity
+gr.Interface(clip_sim_preds,
+             inputs=[gr.inputs.Image(invert_colors=False, image_mode="RGB", type="pil", source="upload", label=None, optional=False),
+                     gr.inputs.Textbox(lines=1, placeholder=None, default="two cats with black stripes on a purple blanket, tv remotes, green collar", label="Text", optional=False)],
+             outputs=[gr.outputs.Textbox(type="auto", label="Cosine similarity")],
+             theme="huggingface",
+             title="Clip Cosine similarity",
+             description="Cosine similarity of image/text pair using a multimodal clip model",
+             allow_flagging=False,).launch(debug=True)