davidmezzetti commited on
Commit
e75a985
1 Parent(s): 2f8c8fc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Streamlit version of https://colab.research.google.com/github/neuml/txtai/blob/master/examples/13_Similarity_search_with_images.ipynb
3
+ """
4
+
5
+ import glob
6
+ import os
7
+ import sys
8
+ import tarfile
9
+
10
+ import requests
11
+ import streamlit as st
12
+
13
+ from PIL import Image
14
+
15
+ from txtai.embeddings import Embeddings
16
+
17
+
18
+ def images(directory):
19
+ """
20
+ Generator that loops over each image in a directory.
21
+
22
+ Args:
23
+ directory: directory with images
24
+ """
25
+
26
+ for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"):
27
+ yield (path, Image.open(path), None)
28
+
29
+
30
+ @st.cache(allow_output_mutation=True)
31
+ def build(directory):
32
+ """
33
+ Builds an image embeddings index.
34
+
35
+ Args:
36
+ directory: directory with images
37
+
38
+ Returns:
39
+ Embeddings index
40
+ """
41
+
42
+ embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"})
43
+ embeddings.index(images(directory))
44
+
45
+ # Update model to support multilingual queries
46
+ embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
47
+ embeddings.model = embeddings.loadVectors()
48
+
49
+ return embeddings
50
+
51
+
52
+ def app(directory):
53
+ """
54
+ Streamlit application that runs searches against an image embeddings index.
55
+
56
+ Args:
57
+ directory: directory with images
58
+ """
59
+
60
+ # Build embeddings index
61
+ embeddings = build(directory)
62
+
63
+ st.title("Image search")
64
+
65
+ st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ")
66
+ st.markdown(
67
+ "[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) "
68
+ + "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into "
69
+ + "the same space, enabling image similarity search. txtai can directly utilize these models."
70
+ )
71
+
72
+ query = st.text_input("Search query:")
73
+ if query:
74
+ index, _ = embeddings.search(query, 1)[0]
75
+ st.image(Image.open(index))
76
+
77
+
78
+ if __name__ == "__main__":
79
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
80
+
81
+ images = "/tmp/txtai"
82
+ if not os.path.exists(images):
83
+ os.makedirs(images)
84
+
85
+ response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True)
86
+ f = tarfile.open(fileobj=response.raw, mode="r|gz")
87
+ f.extractall(path="/tmp")
88
+
89
+ app(images)