Spaces:
Runtime error
Runtime error
Ahsen Khaliq
commited on
Commit
•
bec70cc
1
Parent(s):
b7083ae
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer, util
|
2 |
+
from PIL import Image
|
3 |
+
import glob
|
4 |
+
import torch
|
5 |
+
import pickle
|
6 |
+
import zipfile
|
7 |
+
import os
|
8 |
+
from tqdm.autonotebook import tqdm
|
9 |
+
import gradio as gr
|
10 |
+
|
11 |
+
|
12 |
+
# Here we load the multilingual CLIP model. Note, this model can only encode text.
|
13 |
+
# If you need embeddings for images, you must load the 'clip-ViT-B-32' model
|
14 |
+
model = SentenceTransformer('clip-ViT-B-32-multilingual-v1')
|
15 |
+
|
16 |
+
|
17 |
+
# Next, we get about 25k images from Unsplash
|
18 |
+
img_folder = 'photos/'
|
19 |
+
if not os.path.exists(img_folder) or len(os.listdir(img_folder)) == 0:
|
20 |
+
os.makedirs(img_folder, exist_ok=True)
|
21 |
+
|
22 |
+
photo_filename = 'unsplash-25k-photos.zip'
|
23 |
+
if not os.path.exists(photo_filename): #Download dataset if does not exist
|
24 |
+
util.http_get('http://sbert.net/datasets/'+photo_filename, photo_filename)
|
25 |
+
|
26 |
+
#Extract all images
|
27 |
+
with zipfile.ZipFile(photo_filename, 'r') as zf:
|
28 |
+
for member in tqdm(zf.infolist(), desc='Extracting'):
|
29 |
+
zf.extract(member, img_folder)
|
30 |
+
|
31 |
+
# Now, we need to compute the embeddings
|
32 |
+
# To speed things up, we destribute pre-computed embeddings
|
33 |
+
# Otherwise you can also encode the images yourself.
|
34 |
+
# To encode an image, you can use the following code:
|
35 |
+
# from PIL import Image
|
36 |
+
# img_emb = model.encode(Image.open(filepath))
|
37 |
+
|
38 |
+
use_precomputed_embeddings = True
|
39 |
+
|
40 |
+
if use_precomputed_embeddings:
|
41 |
+
emb_filename = 'unsplash-25k-photos-embeddings.pkl'
|
42 |
+
if not os.path.exists(emb_filename): #Download dataset if does not exist
|
43 |
+
util.http_get('http://sbert.net/datasets/'+emb_filename, emb_filename)
|
44 |
+
|
45 |
+
with open(emb_filename, 'rb') as fIn:
|
46 |
+
img_names, img_emb = pickle.load(fIn)
|
47 |
+
print("Images:", len(img_names))
|
48 |
+
else:
|
49 |
+
#For embedding images, we need the non-multilingual CLIP model
|
50 |
+
img_model = SentenceTransformer('clip-ViT-B-32')
|
51 |
+
|
52 |
+
img_names = list(glob.glob('photos/*.jpg'))
|
53 |
+
print("Images:", len(img_names))
|
54 |
+
img_emb = img_model.encode([Image.open(filepath) for filepath in img_names], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
|
55 |
+
|
56 |
+
filepath = 'photos/'+img_names[0]
|
57 |
+
one_emb = torch.tensor(img_emb[0])
|
58 |
+
img_model = SentenceTransformer('clip-ViT-B-32')
|
59 |
+
comb_emb = img_model.encode(Image.open(filepath), convert_to_tensor=True).cpu()
|
60 |
+
|
61 |
+
# Next, we define a search function.
|
62 |
+
def search(query):
|
63 |
+
# First, we encode the query (which can either be an image or a text string)
|
64 |
+
query_emb = model.encode([query], convert_to_tensor=True, show_progress_bar=False)
|
65 |
+
|
66 |
+
# Then, we use the util.semantic_search function, which computes the cosine-similarity
|
67 |
+
# between the query embedding and all image embeddings.
|
68 |
+
# It then returns the top_k highest ranked images, which we output
|
69 |
+
hits = util.semantic_search(query_emb, img_emb, top_k=1)[0]
|
70 |
+
|
71 |
+
for hit in hits:
|
72 |
+
return os.path.join(img_folder, img_names[hit['corpus_id']])
|
73 |
+
|
74 |
+
title = "Image Search"
|
75 |
+
description = "demo for multilingual text2image search for 50+ languages. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
|
76 |
+
article = "<p style='text-align: center'><a href='https://www.sbert.net/'>SentenceTransformers Documentation</a> | <a href='https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search'>Github Repo</a></p>"
|
77 |
+
|
78 |
+
gr.Interface(
|
79 |
+
search,
|
80 |
+
gr.inputs.Textbox(label="Input"),
|
81 |
+
gr.outputs.Image(type="file", label="Output"),
|
82 |
+
title=title,
|
83 |
+
description=description,
|
84 |
+
article=article,
|
85 |
+
examples=[
|
86 |
+
['Two dogs playing in the snow'],
|
87 |
+
['Eine Katze auf einem Stuhl'],
|
88 |
+
['Muchos peces'],
|
89 |
+
['棕榈树的沙滩'],
|
90 |
+
['Закат на пляже'],
|
91 |
+
['Parkta bir köpek'],
|
92 |
+
['夜のニューヨーク']
|
93 |
+
]
|
94 |
+
).launch()
|