Spaces:

JackerKun
/

Text-to-Image-search-using-CLIP

Runtime error

App Files Files Community

JackerKun

DrishtiSharma commited on Nov 17, 2022

Commit

8fc1ab1

0 Parent(s):

Duplicate from DrishtiSharma/Text-to-Image-search-using-CLIP

Browse files

Co-authored-by: Drishti Sharma <DrishtiSharma@users.noreply.huggingface.co>

Files changed (10) hide show

.gitattributes +29 -0
README.md +38 -0
app.py +65 -0
features.npy +3 -0
features_debug.npy +3 -0
photo_ids.csv +0 -0
photo_ids_debug.csv +0 -0
photos.tsv000 +0 -0
photos_debug.tsv000 +0 -0
requirements.txt +4 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,29 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+features.npy filter=lfs diff=lfs merge=lfs -text
+features_debug.npy filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,38 @@

+---
+title: Text To Image Search Using CLIP
+emoji: 👀
+colorFrom: yellow
+colorTo: purple
+sdk: gradio
+app_file: app.py
+pinned: false
+duplicated_from: DrishtiSharma/Text-to-Image-search-using-CLIP
+---
+# Configuration
+`title`: _string_
+Display title for the Space
+`emoji`: _string_
+Space emoji (emoji-only character allowed)
+`colorFrom`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`colorTo`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`sdk`: _string_
+Can be either `gradio` or `streamlit`
+`sdk_version` : _string_
+Only applicable for `streamlit` SDK.
+See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
+`app_file`: _string_
+Path to your main application file (which contains either `gradio` or `streamlit` Python code).
+Path is relative to the root of the repository.
+`pinned`: _boolean_
+Whether the Space stays on top of your list.

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+#Acknowledgments:
+#This project is inspired by:
+#1. https://github.com/haltakov/natural-language-image-search by Vladimir Haltakov
+#2. OpenAI's CLIP
+#Import all the necessary libraries
+import torch
+import requests
+import numpy as np
+import pandas as pd
+import gradio as gr
+from io import BytesIO
+from PIL import Image as PILIMAGE
+from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
+#Selecting device based on availability of GPUs
+device = "cuda" if torch.cuda.is_available() else "cpu"
+#Defining model, processor and tokenizer
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+#Loading the data
+photos = pd.read_csv("./photos_debug.tsv000", sep='\t', header=0)
+photo_features = np.load("./features_debug.npy")
+photo_ids = pd.read_csv("./photo_ids_debug.csv")
+photo_ids = list(photo_ids['photo_id'])
+def find_best_matches(text):
+    #Inference
+    with torch.no_grad():
+        # Encode and normalize the description using CLIP
+        inputs = tokenizer([text],  padding=True, return_tensors="pt")
+        inputs = processor(text=[text], images=None, return_tensors="pt", padding=True)
+    text_encoded =  model.get_text_features(**inputs).detach().numpy()
+    # Finding Cosine similarity
+    similarities = list((text_encoded @ photo_features.T).squeeze(0))
+    #Block of code for displaying top 3 best matches (images)
+    matched_images = []
+    for i in range(3):
+      idx = sorted(zip(similarities, range(photo_features.shape[0])), key=lambda x: x[0], reverse=True)[i][1]
+      photo_id = photo_ids[idx]
+      photo_data = photos[photos["photo_id"] == photo_id].iloc[0]
+      response = requests.get(photo_data["photo_image_url"] + "?w=640")
+      img = PILIMAGE.open(BytesIO(response.content))
+      matched_images.append(img)
+    return matched_images
+#Gradio app
+iface = gr.Interface(fn=find_best_matches, inputs=[gr.inputs.Textbox(lines=1, label="Text query", placeholder="Introduce the search text...",)],
+            examples=[["Dog sticking its tongue out"],["Traffic light on the right"],["Honey bee eating honey"],["Leaves of Bryophyllum fallen on the ground"], ["Cute Kangaroo"], ["Athlete holding a bike in his hands"], ["Happy puppy"], ["Sad puppy"], ["Leopard hiding in the bushes"]],
+            theme = "grass",
+            outputs=gr.outputs.Carousel([gr.outputs.Image(type="pil")]),
+            enable_queue=True,
+            title= "Text to Image search using CLIP",
+            description="This application displays TOP THREE images from Unsplash dataset that best match the natural language search query provided by the user.").launch()

features.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31ac381e52fa007821a642b5808ac9a6eaf7163322ab340d36bcc3c2a94a38c8
+size 25596032

features_debug.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f375dcc5291457739bccad957f9a418fa7fef87f2c68acc487c58d8bb7672b26
+size 50995328

photo_ids.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

photo_ids_debug.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

photos.tsv000 ADDED Viewed

The diff for this file is too large to render. See raw diff

photos_debug.tsv000 ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+transformers
+torch
+numpy
+ftfy