import gradio as gr import joblib import pickle import numpy as np from janome.tokenizer import Tokenizer model = joblib.load('./doc2vec.pkl') with open('document_vecs.pkl', 'rb') as f: document_vecs = pickle.load(f) with open('Title.pkl', 'rb') as f: Title = pickle.load(f) def sep_by_janome(text): t = Tokenizer() tokens = t.tokenize(text) docs=[] for token in tokens: docs.append(token.surface) return docs def cos_calc(text): tokens = sep_by_janome(text) input_vec = model.infer_vector(tokens) rank_size = 5 v1 = np.linalg.norm(input_vec) cos_sim = [] for v2 in document_vecs: cos_sim.append( np.dot(input_vec,v2)/(v1*np.linalg.norm(v2)) ) doc_sort = np.argsort(np.array(cos_sim))[::-1] cos_sort = sorted(cos_sim,reverse=True) cos = [] titles = [] for i in range(rank_size): cos.append(cos_sort[i]) titles.append(Title[doc_sort[i]]) rank_1 = f'{titles[0]} \nコサイン類似度:{cos[0]}' rank_2 = f'{titles[1]} \nコサイン類似度:{cos[1]}' rank_3 = f'{titles[2]} \nコサイン類似度:{cos[2]}' rank_4 = f'{titles[3]} \nコサイン類似度:{cos[3]}' rank_5 = f'{titles[4]} \nコサイン類似度:{cos[4]}' return rank_1, rank_2, rank_3, rank_4, rank_5 demo = gr.Interface(fn=cos_calc, title="プログラミング参考書推薦アプリ", inputs=gr.Textbox(label="検索ワード"), outputs=[gr.Textbox(label='1位'), gr.Textbox(label='2位'), gr.Textbox(label='3位'), gr.Textbox(label='4位'), gr.Textbox(label='5位') ]) demo.launch()