atrytone commited on
Commit
f9f623d
1 Parent(s): 90369c2

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -205
app.py DELETED
@@ -1,205 +0,0 @@
1
- import gradio as gr
2
- from langchain.vectorstores import FAISS
3
- from langchain.embeddings import HuggingFaceEmbeddings
4
- from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
5
- import textwrap
6
- import torch
7
-
8
- prompt = 'BEGINNING OF CONVERSATION: USER: \
9
- I will provide you with two abstracts, I intend to use the author of the second to review the first. Tell me in a few words why or why not the second author is a good fit to review the first paper.\n\
10
- Abstract To Be Reviewed: '
11
-
12
- tokenizer = LlamaTokenizer.from_pretrained("samwit/koala-7b")
13
-
14
- base_model = LlamaForCausalLM.from_pretrained(
15
- "samwit/koala-7b",
16
- load_in_8bit=True,
17
- device=-1,
18
- device_map='auto',
19
- )
20
-
21
- pipe = pipeline(
22
- "text-generation",
23
- model=base_model,
24
- tokenizer=tokenizer,
25
- max_length=1024,
26
- temperature=0.7,
27
- top_p=0.95,
28
- repetition_penalty=1.15,
29
- device=-1
30
- )
31
-
32
-
33
- def wrap_text_preserve_newlines(text, width=110):
34
- # Split the input text into lines based on newline characters
35
- lines = text.split('\n')
36
- # Wrap each line individually
37
- wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
38
- # Join the wrapped lines back together using newline characters
39
- wrapped_text = '\n'.join(wrapped_lines)
40
- return wrapped_text
41
-
42
-
43
- def create_miread_embed(sents, bundle):
44
- tokenizer = bundle[0]
45
- model = bundle[1]
46
- model.cpu()
47
- tokens = tokenizer(sents,
48
- max_length=512,
49
- padding=True,
50
- truncation=True,
51
- return_tensors="pt"
52
- )
53
- device = torch.device('cpu')
54
- tokens = tokens.to(device)
55
- with torch.no_grad():
56
- out = model.bert(**tokens)
57
- feature = out.last_hidden_state[:, 0, :]
58
- return feature.cpu()
59
-
60
-
61
- def get_matches(query, k):
62
- matches = vecdb.similarity_search_with_score(query, k=k)
63
- return matches
64
-
65
-
66
- def inference(query,k=30):
67
- matches = get_matches(query,k)
68
- j_bucket = {}
69
- n_table = []
70
- a_table = []
71
- r_table = []
72
- scores = [round(match[1].item(),3) for match in matches]
73
- min_score = min(scores)
74
- max_score = max(scores)
75
- normaliser = lambda x: round(1 - (x-min_score)/max_score,3)
76
- for i,match in enumerate(matches):
77
- doc = match[0]
78
- score = normaliser(round(match[1].item(),3))
79
- title = doc.metadata['title']
80
- author = eval(doc.metadata['authors'])[0]
81
- date = doc.metadata.get('date','None')
82
- link = doc.metadata.get('link','None')
83
- submitter = doc.metadata.get('submitter','None')
84
- journal = doc.metadata.get('journal','None')
85
-
86
- # For journals
87
- if journal not in j_bucket:
88
- j_bucket[journal] = score
89
- else:
90
- j_bucket[journal] += score
91
-
92
- # For authors
93
- record = [i+1,
94
- score,
95
- author,
96
- title,
97
- link,
98
- date]
99
- n_table.append(record)
100
-
101
- # For abstracts
102
- record = [i+1,
103
- title,
104
- author,
105
- submitter,
106
- journal,
107
- date,
108
- link,
109
- score
110
- ]
111
- a_table.append(record)
112
-
113
- # For reviewer
114
- output = pipe(prompt + query + '\n Candidate Abstract: ' + candidate + '\n')
115
-
116
- r_record = [i+1,
117
- score,
118
- author,
119
- title,
120
- output[0]['generated_text'],
121
- link,
122
- date]
123
- r_table.append(r_record)
124
-
125
-
126
- j_table = sorted([[journal,score] for journal,score in j_bucket.items()],key= lambda x : x[1],reverse=True)
127
- j_table = [[i+1,item[0],item[1]] for i,item in enumerate(j_table)]
128
- j_output= gr.Dataframe.update(value=j_table,visible=True)
129
- n_output= gr.Dataframe.update(value=n_table,visible=True)
130
- a_output = gr.Dataframe.update(value=a_table,visible=True)
131
- r_output = gr.Dataframe.update(value=r_table,visible=True)
132
-
133
- return [a_output,j_output,n_output,r_output]
134
-
135
-
136
-
137
- model_name = "biodatlab/MIReAD-Neuro"
138
- model_kwargs = {'device': 'cpu'}
139
- encode_kwargs = {'normalize_embeddings': False}
140
- faiss_embedder = HuggingFaceEmbeddings(
141
- model_name=model_name,
142
- model_kwargs=model_kwargs,
143
- encode_kwargs=encode_kwargs
144
- )
145
-
146
- vecdb = FAISS.load_local("faiss_index", faiss_embedder)
147
-
148
-
149
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
150
- gr.Markdown("# NBDT Recommendation Engine for Editors")
151
- gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \
152
- It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\
153
- To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\
154
- Then, you can hover to authors/abstracts/journals tab to find a suggested list.\
155
- The data in our current demo is selected from 2018 to 2022. We will update the data monthly for an up-to-date publications.")
156
-
157
-
158
- abst = gr.Textbox(label="Abstract",lines=10)
159
-
160
- k = gr.Slider(1,100,step=1,value=50,label="Number of matches to consider")
161
-
162
- action_btn = gr.Button(value="Find Matches")
163
-
164
- with gr.Tab("Authors"):
165
- n_output = gr.Dataframe(
166
- headers=['No.','Score','Name','Title','Link','Date'],
167
- datatype=['number','number','str','str','str','str'],
168
- col_count=(6, "fixed"),
169
- wrap=True,
170
- visible=False
171
- )
172
- with gr.Tab("Abstracts"):
173
- a_output = gr.Dataframe(
174
- headers=['No.','Title','Author','Corresponding Author','Journal','Date','Link','Score'],
175
- datatype=['number','str','str','str','str','str','str','number'],
176
- col_count=(8,"fixed"),
177
- wrap=True,
178
- visible=False
179
- )
180
- with gr.Tab("Journals"):
181
- j_output = gr.Dataframe(
182
- headers=['No.','Name','Score'],
183
- datatype=['number','str','number'],
184
- col_count=(3, "fixed"),
185
- wrap=True,
186
- visible=False
187
- )
188
- with gr.Tab("Reviewers New"):
189
- r_output = gr.Dataframe(
190
- headers=['No.','Score','Name','Title','Reasoning','Link','Date'],
191
- datatype=['number','number','str','str','str','str','str'],
192
- col_count=(7,"fixed"),
193
- wrap=True,
194
- visible=False
195
- )
196
- action_btn.click(fn=inference,
197
- inputs=[
198
- abst,
199
- k,
200
- # modes,
201
- ],
202
- outputs=[a_output,j_output,n_output,r_output],
203
- api_name="neurojane")
204
-
205
- demo.launch(debug=True)