imagesearch / app.py
davidmezzetti's picture
Update app.py
e205ab6
raw history blame
No virus
2.37 kB
"""
Builds a similarity index for a directory of images
"""
import glob
import os
import sys
import tarfile
import requests
import streamlit as st
from PIL import Image
from txtai.embeddings import Embeddings
def images(directory):
"""
Generator that loops over each image in a directory.
Args:
directory: directory with images
"""
for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"):
yield (path, Image.open(path), None)
@st.cache(allow_output_mutation=True)
def build(directory):
"""
Builds an image embeddings index.
Args:
directory: directory with images
Returns:
Embeddings index
"""
embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"})
embeddings.index(images(directory))
# Update model to support multilingual queries
embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
embeddings.model = embeddings.loadVectors()
return embeddings
def app(directory):
"""
Streamlit application that runs searches against an image embeddings index.
Args:
directory: directory with images
"""
# Build embeddings index
embeddings = build(directory)
st.title("Image search")
st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ")
st.markdown(
"[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) "
+ "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into "
+ "the same space, enabling image similarity search. txtai can directly utilize these models."
)
query = st.text_input("Search query:")
if query:
index, _ = embeddings.search(query, 1)[0]
st.image(Image.open(index))
if __name__ == "__main__":
os.environ["TOKENIZERS_PARALLELISM"] = "false"
images = "/tmp/txtai"
if not os.path.exists(images):
os.makedirs(images)
response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True)
f = tarfile.open(fileobj=response.raw, mode="r|gz")
f.extractall(path="/tmp")
app(images)