Ahsen Khaliq commited on
Commit
bec70cc
1 Parent(s): b7083ae

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer, util
2
+ from PIL import Image
3
+ import glob
4
+ import torch
5
+ import pickle
6
+ import zipfile
7
+ import os
8
+ from tqdm.autonotebook import tqdm
9
+ import gradio as gr
10
+
11
+
12
+ # Here we load the multilingual CLIP model. Note, this model can only encode text.
13
+ # If you need embeddings for images, you must load the 'clip-ViT-B-32' model
14
+ model = SentenceTransformer('clip-ViT-B-32-multilingual-v1')
15
+
16
+
17
+ # Next, we get about 25k images from Unsplash
18
+ img_folder = 'photos/'
19
+ if not os.path.exists(img_folder) or len(os.listdir(img_folder)) == 0:
20
+ os.makedirs(img_folder, exist_ok=True)
21
+
22
+ photo_filename = 'unsplash-25k-photos.zip'
23
+ if not os.path.exists(photo_filename): #Download dataset if does not exist
24
+ util.http_get('http://sbert.net/datasets/'+photo_filename, photo_filename)
25
+
26
+ #Extract all images
27
+ with zipfile.ZipFile(photo_filename, 'r') as zf:
28
+ for member in tqdm(zf.infolist(), desc='Extracting'):
29
+ zf.extract(member, img_folder)
30
+
31
+ # Now, we need to compute the embeddings
32
+ # To speed things up, we destribute pre-computed embeddings
33
+ # Otherwise you can also encode the images yourself.
34
+ # To encode an image, you can use the following code:
35
+ # from PIL import Image
36
+ # img_emb = model.encode(Image.open(filepath))
37
+
38
+ use_precomputed_embeddings = True
39
+
40
+ if use_precomputed_embeddings:
41
+ emb_filename = 'unsplash-25k-photos-embeddings.pkl'
42
+ if not os.path.exists(emb_filename): #Download dataset if does not exist
43
+ util.http_get('http://sbert.net/datasets/'+emb_filename, emb_filename)
44
+
45
+ with open(emb_filename, 'rb') as fIn:
46
+ img_names, img_emb = pickle.load(fIn)
47
+ print("Images:", len(img_names))
48
+ else:
49
+ #For embedding images, we need the non-multilingual CLIP model
50
+ img_model = SentenceTransformer('clip-ViT-B-32')
51
+
52
+ img_names = list(glob.glob('photos/*.jpg'))
53
+ print("Images:", len(img_names))
54
+ img_emb = img_model.encode([Image.open(filepath) for filepath in img_names], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
55
+
56
+ filepath = 'photos/'+img_names[0]
57
+ one_emb = torch.tensor(img_emb[0])
58
+ img_model = SentenceTransformer('clip-ViT-B-32')
59
+ comb_emb = img_model.encode(Image.open(filepath), convert_to_tensor=True).cpu()
60
+
61
+ # Next, we define a search function.
62
+ def search(query):
63
+ # First, we encode the query (which can either be an image or a text string)
64
+ query_emb = model.encode([query], convert_to_tensor=True, show_progress_bar=False)
65
+
66
+ # Then, we use the util.semantic_search function, which computes the cosine-similarity
67
+ # between the query embedding and all image embeddings.
68
+ # It then returns the top_k highest ranked images, which we output
69
+ hits = util.semantic_search(query_emb, img_emb, top_k=1)[0]
70
+
71
+ for hit in hits:
72
+ return os.path.join(img_folder, img_names[hit['corpus_id']])
73
+
74
+ title = "Image Search"
75
+ description = "demo for multilingual text2image search for 50+ languages. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
76
+ article = "<p style='text-align: center'><a href='https://www.sbert.net/'>SentenceTransformers Documentation</a> | <a href='https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search'>Github Repo</a></p>"
77
+
78
+ gr.Interface(
79
+ search,
80
+ gr.inputs.Textbox(label="Input"),
81
+ gr.outputs.Image(type="file", label="Output"),
82
+ title=title,
83
+ description=description,
84
+ article=article,
85
+ examples=[
86
+ ['Two dogs playing in the snow'],
87
+ ['Eine Katze auf einem Stuhl'],
88
+ ['Muchos peces'],
89
+ ['棕榈树的沙滩'],
90
+ ['Закат на пляже'],
91
+ ['Parkta bir köpek'],
92
+ ['夜のニューヨーク']
93
+ ]
94
+ ).launch()