Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| """ | |
| Builds a similarity index for a directory of images | |
| """ | |
| import glob | |
| import os | |
| import sys | |
| import tarfile | |
| import requests | |
| import streamlit as st | |
| from PIL import Image | |
| from txtai.embeddings import Embeddings | |
| class Application: | |
| """ | |
| Main application | |
| """ | |
| def __init__(self, directory): | |
| """ | |
| Creates a new application. | |
| Args: | |
| directory: directory of images | |
| """ | |
| self.embeddings = self.build(directory) | |
| def build(self, directory): | |
| """ | |
| Builds an image embeddings index. | |
| Args: | |
| directory: directory with images | |
| Returns: | |
| Embeddings index | |
| """ | |
| embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"}) | |
| embeddings.index(self.images(directory)) | |
| # Update model to support multilingual queries | |
| embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1" | |
| embeddings.model = embeddings.loadvectors() | |
| return embeddings | |
| def images(self, directory): | |
| """ | |
| Generator that loops over each image in a directory. | |
| Args: | |
| directory: directory with images | |
| """ | |
| for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"): | |
| yield (path, Image.open(path), None) | |
| def run(self): | |
| """ | |
| Runs a Streamlit application. | |
| """ | |
| st.title("Image search") | |
| st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ") | |
| st.markdown( | |
| "[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) " | |
| + "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into " | |
| + "the same space, enabling image similarity search. txtai can directly utilize these models." | |
| ) | |
| query = st.text_input("Search query:") | |
| if query: | |
| index, _ = self.embeddings.search(query, 1)[0] | |
| st.image(Image.open(index)) | |
| def create(directory): | |
| """ | |
| Creates and caches a Streamlit application. | |
| Args: | |
| directory: directory of images to index | |
| Returns: | |
| Application | |
| """ | |
| return Application(directory) | |
| if __name__ == "__main__": | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| files = "/tmp/txtai" | |
| if not os.path.exists(files): | |
| os.makedirs(files) | |
| response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True) | |
| f = tarfile.open(fileobj=response.raw, mode="r|gz") | |
| f.extractall(path="/tmp") | |
| # Create and run application | |
| app = create(files) | |
| app.run() | |

