import jieba
from whoosh.qparser import QueryParser
from PIL import Image
from whoosh.index import open_dir
from whoosh.analysis import Tokenizer, Token


class ChineseTokenizer(Tokenizer):
    def __call__(self, value, positions=False, chars=False,
                 keeporiginal=False, removestops=True,
                 start_pos=0, start_char=0, mode='', **kwargs):
        t = Token(positions, chars, removestops=removestops, mode=mode,
                  **kwargs)
        seglist = jieba.cut(value, cut_all=True)
        for w in seglist:
            t.original = t.text = w
            t.boost = 1.0
            if positions:
                t.pos = start_pos + value.find(w)
            if chars:
                t.startchar = start_char + value.find(w)
            if chars and positions:
                t.endchar = start_char + value.find(w) + len(w)
            yield t


def ChineseAnalyzer():
    return ChineseTokenizer()


def search(query, lang='CN', k=10):

    ix = open_dir('indexes')
    # Tokenize the query string and join tokens with OR operator
    if lang == 'CN':
        query_tokens = jieba.cut(query, cut_all=True)
    else:
        query_tokens = query.split()
    or_query = " OR ".join(query_tokens)

    parser = QueryParser("content", ix.schema)
    myquery = parser.parse(or_query)

    with ix.searcher() as searcher:
        results = searcher.search(myquery, limit=k)

        # Extract and return the file names and descriptions of the top-k hits
        results_list = [(hit['file_name'], hit['content'], hit.score) for hit in results]

        images = []
        for result in results_list:
            print(result)
            image_name = result[0]
            base_name = image_name.split('_img')[0]
            image_full_path = 'images/' + base_name + '/' + image_name + '.png' # 这个代码就是构造图片路径的
            img = Image.open(image_full_path)
            image_title = result[1].split('\n')[-1].split(':')[1]
            # img.show(title=image_title)
            images.append((img, image_title, result[2]))

        return images


jieba.cut("") # 用于预加载中文分词词典，建议提前运行这段命令
# results = search("IF-428x接收端阈值")
results = search("简化结构图")
for result in results:
    print(result[1], result[2]) # result[0] 是图片的 PIL.Image 对象， result[1]是title，2是相似度打分