Spaces:
Running
Running
File size: 7,902 Bytes
7084f70 d3f3302 7084f70 c517a0d 7084f70 c517a0d 7084f70 d3f3302 7084f70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
# -*- coding: utf-8 -*-
"""
@author:XuMing(xuming624@qq.com)
@description:
"""
import base64
import glob
import json
import os
import pprint
import sys
import zipfile
from io import BytesIO
from pathlib import Path
import faiss
import gradio as gr
import numpy as np
import pandas as pd
import requests
from PIL import Image
from loguru import logger
from tqdm import tqdm
sys.path.append('..')
from similarities.utils.get_file import http_get
from similarities.clip_module import ClipModule
def batch_search_index(
queries,
model,
faiss_index,
df,
num_results,
threshold,
debug=False,
):
"""
Search index with image inputs or image paths (batch search)
:param queries: list of image paths or list of image inputs or texts or embeddings
:param model: CLIP model
:param faiss_index: faiss index
:param df: corpus dataframe
:param num_results: int, number of results to return
:param threshold: float, threshold to return results
:param debug: bool, whether to print debug info, default True
:return: search results
"""
assert queries is not None, "queries should not be None"
result = []
if isinstance(queries, np.ndarray):
query_features = queries
else:
query_features = model.encode(queries, normalize_embeddings=True)
for query, query_feature in zip(queries, query_features):
query_feature = query_feature.reshape(1, -1)
if threshold is not None:
_, d, i = faiss_index.range_search(query_feature, threshold)
if debug:
logger.debug(f"Found {i.shape} items with query '{query}' and threshold {threshold}")
else:
d, i = faiss_index.search(query_feature, num_results)
i = i[0]
d = d[0]
# Sorted faiss search result with distance
text_scores = []
for ed, ei in zip(d, i):
# Convert to json, avoid float values error
item = df.iloc[ei].to_json(force_ascii=False)
if debug:
logger.debug(f"Found: {item}, similarity: {ed}, id: {ei}")
text_scores.append((item, float(ed), int(ei)))
# Sort by score desc
query_result = sorted(text_scores, key=lambda x: x[1], reverse=True)
result.append(query_result)
return result
def preprocess_image(image_input) -> Image.Image:
"""
Process image input to Image.Image object
"""
if isinstance(image_input, str):
if image_input.startswith('http'):
return Image.open(requests.get(image_input, stream=True).raw)
elif image_input.endswith((".png", ".jpg", ".jpeg", ".bmp")) and os.path.isfile(image_input):
return Image.open(image_input)
else:
raise ValueError(f"Unsupported image input type, image path: {image_input}")
elif isinstance(image_input, np.ndarray):
return Image.fromarray(image_input)
elif isinstance(image_input, bytes):
img_data = base64.b64decode(image_input)
return Image.open(BytesIO(img_data))
else:
raise ValueError(f"Unsupported image input type, image input: {image_input}")
def main():
# we get about 25k images from Unsplash
img_folder = 'photos/'
clip_folder = 'photos/csv/'
if not os.path.exists(clip_folder) or len(os.listdir(clip_folder)) == 0:
os.makedirs(img_folder, exist_ok=True)
photo_filename = 'unsplash-25k-photos.zip'
if not os.path.exists(photo_filename): # Download dataset if not exist
http_get('http://sbert.net/datasets/' + photo_filename, photo_filename)
# Extract all images
with zipfile.ZipFile(photo_filename, 'r') as zf:
for member in tqdm(zf.infolist(), desc='Extracting'):
zf.extract(member, img_folder)
df = pd.DataFrame({'image_path': glob.glob(img_folder + '/*'),
'image_name': [os.path.basename(x) for x in glob.glob(img_folder + '/*')]})
os.makedirs(clip_folder, exist_ok=True)
df.to_csv(f'{clip_folder}/unsplash-25k-photos.csv', index=False)
index_dir = 'clip_engine_25k/image_index/'
index_name = "faiss.index"
corpus_dir = 'clip_engine_25k/corpus/'
model_name = "OFA-Sys/chinese-clip-vit-base-patch16"
logger.info("starting boot of clip server")
index_file = os.path.join(index_dir, index_name)
assert os.path.exists(index_file), f"index file {index_file} not exist"
faiss_index = faiss.read_index(index_file)
model = ClipModule(model_name_or_path=model_name)
df = pd.concat(pd.read_parquet(parquet_file) for parquet_file in sorted(Path(corpus_dir).glob("*.parquet")))
logger.info(f'Load model success. model: {model_name}, index: {faiss_index}, corpus size: {len(df)}')
def image_path_to_base64(image_path: str) -> str:
with open(image_path, "rb") as image_file:
img_str = base64.b64encode(image_file.read()).decode("utf-8")
return img_str
def search_image(text="", image=None):
html_output = ""
if not text and not image:
return "<p>Please provide either text or image input.</p>"
if text and image is not None:
return "<p>Please provide either text or image input, not both.</p>"
if image is not None:
q = [preprocess_image(image)]
results = batch_search_index(q, model, faiss_index, df, 25, None, debug=False)[0]
image_src = "data:image/jpeg;base64," + image_path_to_base64(image)
html_output += f'Query: <img src="{image_src}" width="200" height="200"><br>'
else:
q = [text]
results = batch_search_index(q, model, faiss_index, df, 25, None, debug=False)[0]
html_output += f'Query: {text}<br>'
html_output += f'Result Size: {len(results)}<br>'
for result in results:
item, similarity_score, _ = result
item_dict = json.loads(item)
image_path = item_dict.get("image_path", "")
tip = pprint.pformat(item_dict)
if not image_path:
continue
if image_path.startswith("http"):
image_src = image_path
else:
image_src = "data:image/jpeg;base64," + image_path_to_base64(image_path)
html_output += f'<div style="display: inline-block; position: relative; margin: 10px;">'
html_output += f'<img src="{image_src}" width="200" height="200" title="{tip}">'
html_output += f'<div style="position: absolute; bottom: 0; right: 0; background-color: rgba(255, 255, 255, 0.7); padding: 2px 5px;">'
html_output += f'Score: {similarity_score:.4f}'
html_output += f'</div></div>'
return html_output
def reset_user_input():
return '', None
with gr.Blocks() as demo:
gr.HTML("""<h1 align="center">CLIP Image Search</h1>""")
gr.Markdown(
"> Search for similar images using Faiss and Chinese-CLIP. Link to Github: [similarities](https://github.com/shibing624/similarities)")
with gr.Tab("Text"):
with gr.Row():
with gr.Column():
input_text = gr.Textbox(lines=2, placeholder="Enter text here...")
with gr.Tab("Image"):
with gr.Row():
with gr.Column():
input_image = gr.Image(type="filepath", label="Upload an image")
btn_submit = gr.Button(label="Submit")
output = gr.outputs.HTML(label="Search results")
btn_submit.click(search_image, inputs=[input_text, input_image],
outputs=output, show_progress=True)
btn_submit.click(reset_user_input, outputs=[input_text, input_image])
demo.queue().launch()
if __name__ == '__main__':
main()
|