# -*- coding: utf-8 -*- """ @author:XuMing(xuming624@qq.com) @description: text similarity example, fine-tuned by CoSENT model """ import gradio as gr from similarities import Similarity # 中文句向量模型 sim_model = Similarity(model_name_or_path='shibing624/text2vec-base-chinese') def load_file(path): with open(path, 'r', encoding='utf-8') as f: return f.read().split('\n') sim_model.add_corpus(load_file('corpus.txt')) def ai_text(query): res = sim_model.most_similar(queries=query, topn=5) print(res) for q_id, c in res.items(): print('query:', query) print("search top 5:") for corpus_id, s in c.items(): print(f'\t{sim_model.corpus[corpus_id]}: {s:.4f}') res_show = '\n'.join(['search top5:'] + [f'text: {sim_model.corpus[corpus_id]} score: {s:.4f}' for corpus_id, s in list(res.values())[0].items()]) return res_show if __name__ == '__main__': examples = [ ['星巴克被嘲笑了'], ['西班牙失业率超过50%'], ['她在看书'], ['一个人弹琴'], ] input = gr.inputs.Textbox(lines=2, placeholder="Enter Query") output_text = gr.outputs.Textbox() gr.Interface(ai_text, inputs=[input], outputs=[output_text], theme="grass", title="Chinese Text Semantic Search Model", description="Copy or input Chinese text here. Submit and the machine will find the most similarity texts.", article="Link to Github REPO", examples=examples ).launch()