Spaces:

akhaliq
/

Image_Search

Runtime error

Image_Search / app.py

Ahsen Khaliq

Update app.py

535f860 over 2 years ago

No virus

3.81 kB

	from sentence_transformers import SentenceTransformer, util
	from PIL import Image
	import glob
	import torch
	import pickle
	import zipfile
	import os
	from tqdm.autonotebook import tqdm
	import gradio as gr


	# Here we load the multilingual CLIP model. Note, this model can only encode text.
	# If you need embeddings for images, you must load the 'clip-ViT-B-32' model
	model = SentenceTransformer('clip-ViT-B-32-multilingual-v1')


	# Next, we get about 25k images from Unsplash
	img_folder = 'photos/'
	if not os.path.exists(img_folder) or len(os.listdir(img_folder)) == 0:
	os.makedirs(img_folder, exist_ok=True)

	photo_filename = 'unsplash-25k-photos.zip'
	if not os.path.exists(photo_filename): #Download dataset if does not exist
	util.http_get('http://sbert.net/datasets/'+photo_filename, photo_filename)

	#Extract all images
	with zipfile.ZipFile(photo_filename, 'r') as zf:
	for member in tqdm(zf.infolist(), desc='Extracting'):
	zf.extract(member, img_folder)

	# Now, we need to compute the embeddings
	# To speed things up, we destribute pre-computed embeddings
	# Otherwise you can also encode the images yourself.
	# To encode an image, you can use the following code:
	# from PIL import Image
	# img_emb = model.encode(Image.open(filepath))

	use_precomputed_embeddings = True

	if use_precomputed_embeddings:
	emb_filename = 'unsplash-25k-photos-embeddings.pkl'
	if not os.path.exists(emb_filename): #Download dataset if does not exist
	util.http_get('http://sbert.net/datasets/'+emb_filename, emb_filename)

	with open(emb_filename, 'rb') as fIn:
	img_names, img_emb = pickle.load(fIn)
	print("Images:", len(img_names))
	else:
	#For embedding images, we need the non-multilingual CLIP model
	img_model = SentenceTransformer('clip-ViT-B-32')

	img_names = list(glob.glob('photos/*.jpg'))
	print("Images:", len(img_names))
	img_emb = img_model.encode([Image.open(filepath) for filepath in img_names], batch_size=128, convert_to_tensor=True, show_progress_bar=True)

	filepath = 'photos/'+img_names[0]
	one_emb = torch.tensor(img_emb[0])
	img_model = SentenceTransformer('clip-ViT-B-32')
	comb_emb = img_model.encode(Image.open(filepath), convert_to_tensor=True).cpu()

	# Next, we define a search function.
	def search(query):
	# First, we encode the query (which can either be an image or a text string)
	query_emb = model.encode([query], convert_to_tensor=True, show_progress_bar=False)

	# Then, we use the util.semantic_search function, which computes the cosine-similarity
	# between the query embedding and all image embeddings.
	# It then returns the top_k highest ranked images, which we output
	hits = util.semantic_search(query_emb, img_emb, top_k=1)[0]

	for hit in hits:
	return os.path.join(img_folder, img_names[hit['corpus_id']])

	title = "Image Search"
	description = "demo for multilingual text2image search for 50+ languages. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
	article = "<p style='text-align: center'><a href='https://www.sbert.net/'>SentenceTransformers Documentation</a> \| <a href='https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search'>Github Repo</a></p>"

	gr.Interface(
	search,
	gr.inputs.Textbox(label="Input"),
	gr.outputs.Image(type="file", label="Output"),
	title=title,
	description=description,
	article=article,
	examples=[
	['Two dogs playing in the snow'],
	['Eine Katze auf einem Stuhl'],
	['Muchos peces'],
	['棕榈树的沙滩'],
	['Закат на пляже'],
	['Parkta bir köpek'],
	['夜のニューヨーク']
	]
	).launch()