taichi motegi commited on
Commit
0b10eee
1 Parent(s): 43b1557

Upload 3 files

Browse files
Files changed (3) hide show
  1. Title.pkl +3 -0
  2. app.py +61 -0
  3. document_vecs.pkl +3 -0
Title.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee90d5c9c96cd2550a99a2935264fa8c87074ef9fcdcf7b691bc825db8cad2df
3
+ size 10035
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import pickle
4
+ import numpy as np
5
+ from janome.tokenizer import Tokenizer
6
+
7
+
8
+ model = joblib.load('./doc2vec.pkl')
9
+ with open('document_vecs.pkl', 'rb') as f:
10
+ document_vecs = pickle.load(f)
11
+ with open('Title.pkl', 'rb') as f:
12
+ Title = pickle.load(f)
13
+
14
+ def sep_by_janome(text):
15
+ t = Tokenizer()
16
+ tokens = t.tokenize(text)
17
+ docs=[]
18
+ for token in tokens:
19
+ docs.append(token.surface)
20
+ return docs
21
+
22
+
23
+ #コサイン類似度の計算+ランキング化
24
+
25
+ def cos_calc(text):
26
+ tokens = sep_by_janome(text)
27
+ input_vec = model.infer_vector(tokens)
28
+
29
+ rank_size = 5
30
+
31
+ v1 = np.linalg.norm(input_vec)
32
+ cos_sim = []
33
+ for v2 in document_vecs:
34
+ cos_sim.append( np.dot(input_vec,v2)/(v1*np.linalg.norm(v2)) )
35
+ doc_sort = np.argsort(np.array(cos_sim))[::-1]
36
+ cos_sort = sorted(cos_sim,reverse=True)
37
+
38
+ cos = []
39
+ titles = []
40
+ for i in range(rank_size):
41
+ cos.append(cos_sort[i])
42
+ titles.append(Title[doc_sort[i]])
43
+
44
+ rank_1 = f'{titles[0]} \nコサイン類似度:{cos[0]}'
45
+ rank_2 = f'{titles[1]} \nコサイン類似度:{cos[1]}'
46
+ rank_3 = f'{titles[2]} \nコサイン類似度:{cos[2]}'
47
+ rank_4 = f'{titles[3]} \nコサイン類似度:{cos[3]}'
48
+ rank_5 = f'{titles[4]} \nコサイン類似度:{cos[4]}'
49
+
50
+ return rank_1, rank_2, rank_3, rank_4, rank_5
51
+
52
+ demo = gr.Interface(fn=cos_calc,
53
+ inputs=gr.Textbox(label="検索ワード"),
54
+ outputs=[gr.Textbox(label='1位'),
55
+ gr.Textbox(label='2位'),
56
+ gr.Textbox(label='3位'),
57
+ gr.Textbox(label='4位'),
58
+ gr.Textbox(label='5位')
59
+ ])
60
+
61
+ demo.launch()
document_vecs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a34c2c32ff6772d96321f68b08b680deca82d5b352d44a6e955ad08041def64
3
+ size 148219