atrytone commited on
Commit
c5ff415
1 Parent(s): f9f623d

Upload 3 files

Browse files
Files changed (3) hide show
  1. Build_VecStore.ipynb +0 -0
  2. NBDT_Data_Recs.ipynb +0 -0
  3. app.py +148 -0
Build_VecStore.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
NBDT_Data_Recs.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ import torch
5
+
6
+
7
+ def create_miread_embed(sents, bundle):
8
+ tokenizer = bundle[0]
9
+ model = bundle[1]
10
+ model.cpu()
11
+ tokens = tokenizer(sents,
12
+ max_length=512,
13
+ padding=True,
14
+ truncation=True,
15
+ return_tensors="pt"
16
+ )
17
+ device = torch.device('cpu')
18
+ tokens = tokens.to(device)
19
+ with torch.no_grad():
20
+ out = model.bert(**tokens)
21
+ feature = out.last_hidden_state[:, 0, :]
22
+ return feature.cpu()
23
+
24
+
25
+ def get_matches(query, k):
26
+ matches = vecdb.similarity_search_with_score(query, k=k)
27
+ return matches
28
+
29
+
30
+ def inference(query, k=30):
31
+ matches = get_matches(query, k)
32
+ j_bucket = {}
33
+ n_table = []
34
+ a_table = []
35
+ scores = [round(match[1].item(), 3) for match in matches]
36
+ min_score = min(scores)
37
+ max_score = max(scores)
38
+ def normaliser(x): return round(1 - (x-min_score)/max_score, 3)
39
+ for i, match in enumerate(matches):
40
+ doc = match[0]
41
+ score = normaliser(round(match[1].item(), 3))
42
+ title = doc.metadata['title']
43
+ author = doc.metadata['authors'][0]
44
+ date = doc.metadata.get('date', 'None')
45
+ link = doc.metadata.get('link', 'None')
46
+ submitter = doc.metadata.get('submitter', 'None')
47
+ journal = doc.metadata.get('journal', 'None')
48
+ # For journals
49
+ if journal not in j_bucket:
50
+ j_bucket[journal] = score
51
+ else:
52
+ j_bucket[journal] += score
53
+
54
+ # For authors
55
+ record = [i+1,
56
+ score,
57
+ author,
58
+ title,
59
+ link,
60
+ date]
61
+ n_table.append(record)
62
+
63
+ # For abstracts
64
+ record = [i+1,
65
+ title,
66
+ author,
67
+ submitter,
68
+ journal,
69
+ date,
70
+ link,
71
+ score
72
+ ]
73
+ a_table.append(record)
74
+
75
+ j_table = sorted([[journal, score] for journal,
76
+ score in j_bucket.items()],
77
+ key=lambda x: x[1], reverse=True)
78
+ j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)]
79
+ j_output = gr.Dataframe.update(value=j_table, visible=True)
80
+ n_output = gr.Dataframe.update(value=n_table, visible=True)
81
+ a_output = gr.Dataframe.update(value=a_table, visible=True)
82
+
83
+ return [a_output, j_output, n_output]
84
+
85
+
86
+ model_name = "biodatlab/MIReAD-Neuro"
87
+ model_kwargs = {'device': 'cpu'}
88
+ encode_kwargs = {'normalize_embeddings': False}
89
+ faiss_embedder = HuggingFaceEmbeddings(
90
+ model_name=model_name,
91
+ model_kwargs=model_kwargs,
92
+ encode_kwargs=encode_kwargs
93
+ )
94
+
95
+ vecdb = FAISS.load_local("nbdt_index", faiss_embedder)
96
+
97
+
98
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
99
+ gr.Markdown("# NBDT Recommendation Engine for Editors")
100
+ gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
101
+ It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
102
+ To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
103
+ Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
104
+ The data in our current demo is selected from 2018 to 2022. We will update the data monthly for an up-to-date publications.")
105
+
106
+ abst = gr.Textbox(label="Abstract", lines=10)
107
+
108
+ k = gr.Slider(1, 100, step=1, value=50,
109
+ label="Number of matches to consider")
110
+
111
+ action_btn = gr.Button(value="Find Matches")
112
+
113
+ with gr.Tab("Authors"):
114
+ n_output = gr.Dataframe(
115
+ headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'],
116
+ datatype=['number', 'number', 'str', 'str', 'str', 'str'],
117
+ col_count=(6, "fixed"),
118
+ wrap=True,
119
+ visible=False
120
+ )
121
+ with gr.Tab("Abstracts"):
122
+ a_output = gr.Dataframe(
123
+ headers=['No.', 'Title', 'Author', 'Corresponding Author',
124
+ 'Journal', 'Date', 'Link', 'Score'],
125
+ datatype=['number', 'str', 'str', 'str',
126
+ 'str', 'str', 'str', 'number'],
127
+ col_count=(8, "fixed"),
128
+ wrap=True,
129
+ visible=False
130
+ )
131
+ with gr.Tab("Journals"):
132
+ j_output = gr.Dataframe(
133
+ headers=['No.', 'Name', 'Score'],
134
+ datatype=['number', 'str', 'number'],
135
+ col_count=(3, "fixed"),
136
+ wrap=True,
137
+ visible=False
138
+ )
139
+
140
+ action_btn.click(fn=inference,
141
+ inputs=[
142
+ abst,
143
+ k,
144
+ ],
145
+ outputs=[a_output, j_output, n_output],
146
+ api_name="neurojane")
147
+
148
+ demo.launch(debug=True, share=True)