File size: 8,428 Bytes
7084f70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3f3302
7084f70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdd48fd
 
 
7084f70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c517a0d
7084f70
 
 
 
c517a0d
7084f70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdd48fd
 
 
 
7084f70
 
 
 
 
fdd48fd
 
 
 
7084f70
 
 
 
 
 
 
 
d3f3302
 
7084f70
 
fdd48fd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# -*- coding: utf-8 -*-
"""
@author:XuMing(xuming624@qq.com)
@description: 
"""
import base64
import glob
import json
import os
import pprint
import sys
import zipfile
from io import BytesIO
from pathlib import Path

import faiss
import gradio as gr
import numpy as np
import pandas as pd
import requests
from PIL import Image
from loguru import logger
from tqdm import tqdm

sys.path.append('..')
from similarities.utils.get_file import http_get
from similarities.clip_module import ClipModule


def batch_search_index(
        queries,
        model,
        faiss_index,
        df,
        num_results,
        threshold,
        debug=False,
):
    """
    Search index with image inputs or image paths (batch search)
    :param queries: list of image paths or list of image inputs or texts or embeddings
    :param model: CLIP model
    :param faiss_index: faiss index
    :param df: corpus dataframe
    :param num_results: int, number of results to return
    :param threshold: float, threshold to return results
    :param debug: bool, whether to print debug info, default True
    :return: search results
    """
    assert queries is not None, "queries should not be None"
    result = []
    if isinstance(queries, np.ndarray):
        query_features = queries
    else:
        query_features = model.encode(queries, normalize_embeddings=True)

    for query, query_feature in zip(queries, query_features):
        query_feature = query_feature.reshape(1, -1)
        if threshold is not None:
            _, d, i = faiss_index.range_search(query_feature, threshold)
            if debug:
                logger.debug(f"Found {i.shape} items with query '{query}' and threshold {threshold}")
        else:
            d, i = faiss_index.search(query_feature, num_results)
            i = i[0]
            d = d[0]
        # Sorted faiss search result with distance
        text_scores = []
        for ed, ei in zip(d, i):
            # Convert to json, avoid float values error
            item = df.iloc[ei].to_json(force_ascii=False)
            if debug:
                logger.debug(f"Found: {item}, similarity: {ed}, id: {ei}")
            text_scores.append((item, float(ed), int(ei)))
        # Sort by score desc
        query_result = sorted(text_scores, key=lambda x: x[1], reverse=True)
        result.append(query_result)
    return result


def preprocess_image(image_input) -> Image.Image:
    """
    Process image input to Image.Image object
    """
    if isinstance(image_input, str):
        if image_input.startswith('http'):
            return Image.open(requests.get(image_input, stream=True).raw)
        elif image_input.endswith((".png", ".jpg", ".jpeg", ".bmp")) and os.path.isfile(image_input):
            return Image.open(image_input)
        else:
            raise ValueError(f"Unsupported image input type, image path: {image_input}")
    elif isinstance(image_input, np.ndarray):
        return Image.fromarray(image_input)
    elif isinstance(image_input, bytes):
        img_data = base64.b64decode(image_input)
        return Image.open(BytesIO(img_data))
    else:
        raise ValueError(f"Unsupported image input type, image input: {image_input}")


def main():
    text_examples = [["黑猫"], ["坐着的女孩"], ["两只狗拉雪橇"], ["tiger"], ["full Moon"]]
    image_examples = [["photos/YMJ1IiItvPY.jpg"], ["photos/6Fo47c49zEQ.jpg"], ["photos/OM7CvKnhjfs.jpg"],
                      ["photos/lyStEjlKNSw.jpg"], ["photos/mCbo65vkb80.jpg"]]
    # we get about 25k images from Unsplash
    img_folder = 'photos/'
    clip_folder = 'photos/csv/'
    if not os.path.exists(clip_folder) or len(os.listdir(clip_folder)) == 0:
        os.makedirs(img_folder, exist_ok=True)

        photo_filename = 'unsplash-25k-photos.zip'
        if not os.path.exists(photo_filename):  # Download dataset if not exist
            http_get('http://sbert.net/datasets/' + photo_filename, photo_filename)

        # Extract all images
        with zipfile.ZipFile(photo_filename, 'r') as zf:
            for member in tqdm(zf.infolist(), desc='Extracting'):
                zf.extract(member, img_folder)
        df = pd.DataFrame({'image_path': glob.glob(img_folder + '/*'),
                           'image_name': [os.path.basename(x) for x in glob.glob(img_folder + '/*')]})
        os.makedirs(clip_folder, exist_ok=True)
        df.to_csv(f'{clip_folder}/unsplash-25k-photos.csv', index=False)

    index_dir = 'clip_engine_25k/image_index/'
    index_name = "faiss.index"
    corpus_dir = 'clip_engine_25k/corpus/'
    model_name = "OFA-Sys/chinese-clip-vit-base-patch16"

    logger.info("starting boot of clip server")
    index_file = os.path.join(index_dir, index_name)
    assert os.path.exists(index_file), f"index file {index_file} not exist"
    faiss_index = faiss.read_index(index_file)
    model = ClipModule(model_name_or_path=model_name)
    df = pd.concat(pd.read_parquet(parquet_file) for parquet_file in sorted(Path(corpus_dir).glob("*.parquet")))
    logger.info(f'Load model success. model: {model_name}, index: {faiss_index}, corpus size: {len(df)}')

    def image_path_to_base64(image_path: str) -> str:
        with open(image_path, "rb") as image_file:
            img_str = base64.b64encode(image_file.read()).decode("utf-8")
        return img_str

    def search_image(text="", image=None):
        html_output = ""

        if not text and not image:
            return "<p>Please provide either text or image input.</p>"

        if text and image is not None:
            return "<p>Please provide either text or image input, not both.</p>"

        if image is not None:
            q = [preprocess_image(image)]
            results = batch_search_index(q, model, faiss_index, df, 25, None, debug=False)[0]
            image_src = "data:image/jpeg;base64," + image_path_to_base64(image)
            html_output += f'Query: <img src="{image_src}" width="200" height="200"><br>'
        else:
            q = [text]
            results = batch_search_index(q, model, faiss_index, df, 25, None, debug=False)[0]
            html_output += f'Query: {text}<br>'

        html_output += f'Result Size: {len(results)}<br>'
        for result in results:
            item, similarity_score, _ = result
            item_dict = json.loads(item)
            image_path = item_dict.get("image_path", "")
            tip = pprint.pformat(item_dict)
            if not image_path:
                continue
            if image_path.startswith("http"):
                image_src = image_path
            else:
                image_src = "data:image/jpeg;base64," + image_path_to_base64(image_path)
            html_output += f'<div style="display: inline-block; position: relative; margin: 10px;">'
            html_output += f'<img src="{image_src}" width="200" height="200" title="{tip}">'
            html_output += f'<div style="position: absolute; bottom: 0; right: 0; background-color: rgba(255, 255, 255, 0.7); padding: 2px 5px;">'
            html_output += f'Score: {similarity_score:.4f}'
            html_output += f'</div></div>'

        return html_output

    def reset_user_input():
        return '', None

    with gr.Blocks() as demo:
        gr.HTML("""<h1 align="center">CLIP Image Search</h1>""")
        gr.Markdown(
            "> Search for similar images using Faiss and Chinese-CLIP. Link to Github: [similarities](https://github.com/shibing624/similarities)")
        with gr.Tab("Text"):
            with gr.Row():
                with gr.Column():
                    input_text = gr.Textbox(lines=2, placeholder="Enter text here...")
            gr.Examples(
                examples=text_examples,
                inputs=[input_text],
            )

        with gr.Tab("Image"):
            with gr.Row():
                with gr.Column():
                    input_image = gr.Image(type="filepath", label="Upload an image")
            gr.Examples(
                examples=image_examples,
                inputs=[input_image],
            )

        btn_submit = gr.Button(label="Submit")
        output = gr.outputs.HTML(label="Search results")
        btn_submit.click(search_image, inputs=[input_text, input_image],
                         outputs=output, show_progress=True)
        btn_submit.click(reset_user_input, outputs=[input_text, input_image])

    demo.queue().launch()


if __name__ == '__main__':
    main()