JackerKun DrishtiSharma commited on
Commit
8fc1ab1
Β·
0 Parent(s):

Duplicate from DrishtiSharma/Text-to-Image-search-using-CLIP

Browse files

Co-authored-by: Drishti Sharma <DrishtiSharma@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ features.npy filter=lfs diff=lfs merge=lfs -text
29
+ features_debug.npy filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Text To Image Search Using CLIP
3
+ emoji: πŸ‘€
4
+ colorFrom: yellow
5
+ colorTo: purple
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: false
9
+ duplicated_from: DrishtiSharma/Text-to-Image-search-using-CLIP
10
+ ---
11
+
12
+ # Configuration
13
+
14
+ `title`: _string_
15
+ Display title for the Space
16
+
17
+ `emoji`: _string_
18
+ Space emoji (emoji-only character allowed)
19
+
20
+ `colorFrom`: _string_
21
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
22
+
23
+ `colorTo`: _string_
24
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
25
+
26
+ `sdk`: _string_
27
+ Can be either `gradio` or `streamlit`
28
+
29
+ `sdk_version` : _string_
30
+ Only applicable for `streamlit` SDK.
31
+ See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
32
+
33
+ `app_file`: _string_
34
+ Path to your main application file (which contains either `gradio` or `streamlit` Python code).
35
+ Path is relative to the root of the repository.
36
+
37
+ `pinned`: _boolean_
38
+ Whether the Space stays on top of your list.
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Acknowledgments:
2
+ #This project is inspired by:
3
+ #1. https://github.com/haltakov/natural-language-image-search by Vladimir Haltakov
4
+ #2. OpenAI's CLIP
5
+
6
+
7
+
8
+ #Import all the necessary libraries
9
+ import torch
10
+ import requests
11
+ import numpy as np
12
+ import pandas as pd
13
+ import gradio as gr
14
+ from io import BytesIO
15
+ from PIL import Image as PILIMAGE
16
+ from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
17
+
18
+ #Selecting device based on availability of GPUs
19
+ device = "cuda" if torch.cuda.is_available() else "cpu"
20
+
21
+ #Defining model, processor and tokenizer
22
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
23
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
24
+ tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
25
+
26
+
27
+ #Loading the data
28
+ photos = pd.read_csv("./photos_debug.tsv000", sep='\t', header=0)
29
+ photo_features = np.load("./features_debug.npy")
30
+ photo_ids = pd.read_csv("./photo_ids_debug.csv")
31
+ photo_ids = list(photo_ids['photo_id'])
32
+
33
+ def find_best_matches(text):
34
+
35
+ #Inference
36
+ with torch.no_grad():
37
+ # Encode and normalize the description using CLIP
38
+ inputs = tokenizer([text], padding=True, return_tensors="pt")
39
+ inputs = processor(text=[text], images=None, return_tensors="pt", padding=True)
40
+ text_encoded = model.get_text_features(**inputs).detach().numpy()
41
+
42
+
43
+ # Finding Cosine similarity
44
+ similarities = list((text_encoded @ photo_features.T).squeeze(0))
45
+
46
+ #Block of code for displaying top 3 best matches (images)
47
+ matched_images = []
48
+ for i in range(3):
49
+ idx = sorted(zip(similarities, range(photo_features.shape[0])), key=lambda x: x[0], reverse=True)[i][1]
50
+ photo_id = photo_ids[idx]
51
+ photo_data = photos[photos["photo_id"] == photo_id].iloc[0]
52
+ response = requests.get(photo_data["photo_image_url"] + "?w=640")
53
+ img = PILIMAGE.open(BytesIO(response.content))
54
+ matched_images.append(img)
55
+ return matched_images
56
+
57
+
58
+ #Gradio app
59
+ iface = gr.Interface(fn=find_best_matches, inputs=[gr.inputs.Textbox(lines=1, label="Text query", placeholder="Introduce the search text...",)],
60
+ examples=[["Dog sticking its tongue out"],["Traffic light on the right"],["Honey bee eating honey"],["Leaves of Bryophyllum fallen on the ground"], ["Cute Kangaroo"], ["Athlete holding a bike in his hands"], ["Happy puppy"], ["Sad puppy"], ["Leopard hiding in the bushes"]],
61
+ theme = "grass",
62
+ outputs=gr.outputs.Carousel([gr.outputs.Image(type="pil")]),
63
+ enable_queue=True,
64
+ title= "Text to Image search using CLIP",
65
+ description="This application displays TOP THREE images from Unsplash dataset that best match the natural language search query provided by the user.").launch()
features.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31ac381e52fa007821a642b5808ac9a6eaf7163322ab340d36bcc3c2a94a38c8
3
+ size 25596032
features_debug.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f375dcc5291457739bccad957f9a418fa7fef87f2c68acc487c58d8bb7672b26
3
+ size 50995328
photo_ids.csv ADDED
The diff for this file is too large to render. See raw diff
 
photo_ids_debug.csv ADDED
The diff for this file is too large to render. See raw diff
 
photos.tsv000 ADDED
The diff for this file is too large to render. See raw diff
 
photos_debug.tsv000 ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ numpy
4
+ ftfy