Spaces:
Sleeping
Sleeping
Commit
•
f079b05
1
Parent(s):
4a7f361
init
Browse files
app.py
ADDED
@@ -0,0 +1,384 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, json
|
2 |
+
import gradio as gr
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
QUESTIONS = [
|
6 |
+
"What is the DOI of this study?",
|
7 |
+
"What is the Citation ID of this study?",
|
8 |
+
"What is the First author of this study?",
|
9 |
+
"What is the year of this study?",
|
10 |
+
"What is the animal type of this study?",
|
11 |
+
"What is the exposure age of this study?",
|
12 |
+
"Is there any behavior test done in this study?",
|
13 |
+
"What's the Intervention 1's name of this study?(anesthetics only)",
|
14 |
+
"What's the Intervention 2's name of this study?(anesthetics only)",
|
15 |
+
"What's the genetic chain of this study?",
|
16 |
+
]
|
17 |
+
|
18 |
+
template = '''We now have a following <document> in the medical field:
|
19 |
+
|
20 |
+
"""
|
21 |
+
{}
|
22 |
+
"""
|
23 |
+
We have some introduction here:
|
24 |
+
|
25 |
+
1. DOI: The DOI link for the article, usually can be found in the first line of the .txt file for the article. E.g., “DOI: 10.3892/mmr.2019.10397”.
|
26 |
+
2. Citation ID: The number in the file name. E.g., “1134”.
|
27 |
+
3. First author: The last name in the file name. E.g., “Guan”.
|
28 |
+
4. Year: The year in the file name. E.g., “2019”.
|
29 |
+
5. Animal type: The rodent type used in the article, should be one of the choices: mice, rats. E.g., “rats”.
|
30 |
+
6. Exposure age: The age when the animals were exposed to anesthetics, should be mentioned as "PND1", "PND7","postnatal day 7", "Gestational day 21", etc, which should be extract as: 'PND XX' , 'Gestational day xx'. E.g., “PND7”.
|
31 |
+
7. Behavior test: Whether there is any behavior test in the article, should be one of the choices: "Y", "N". "Y" is chosen if there are any of the behavior tests described and done in the article, which mentioned as: "Open field test", "Morris water task", "fear conditioning test", "Dark/light avoidance"; "passive/active avoidance test"; "elevated maze", "Forced swim test", "Object recognition test", "Social interaction/preference“. E.g., “N”.
|
32 |
+
8. Intervention 1 & Intervention 2: Intervention 1 and Intervention 2 are both anesthetic drugs, which listed as: "isoflurane", "sevoflurane", "desflurane", "ketamine", "propofol", "Midazolam", "Nitrous oxide“. If none, put “NA”. E.g., “propofol”.
|
33 |
+
9. Genetic chain: Genetic chain is the genetic type of the animals being used in the article, here is the examples:
|
34 |
+
"C57BL/6", "C57BL/6J" should be extracted as "C57BL/6"; "Sprague Dawley", "Sprague-Dawley", "SD" should be extracted as "Sprague Dawley"; "CD-1" should be extracted as "CD-1"; "Wistar/ST" should be extracted as "Wistar/ST"; "Wistar" should be extracted as "Wistar"; "FMR-1 KO" should be extracted as "FMR-1 KO“. E.g., “Sprague Dawley”.
|
35 |
+
|
36 |
+
We have some <question>s begin with "Question" here:
|
37 |
+
"""
|
38 |
+
{}
|
39 |
+
"""
|
40 |
+
|
41 |
+
Please finish the following task:
|
42 |
+
|
43 |
+
1. Please select the <original sentences> related the each <question> from the <document>.
|
44 |
+
2. Please use the <original sentences> to answer the <question>.
|
45 |
+
3. Please provide <original sentences> coming from the <document>.
|
46 |
+
4. Output the <answer> in the following json format:
|
47 |
+
|
48 |
+
{{
|
49 |
+
"Question 1": {{
|
50 |
+
"question": {{}},
|
51 |
+
"answer": {{}},
|
52 |
+
"original sentences": []
|
53 |
+
}},
|
54 |
+
"Question 2": {{
|
55 |
+
"question": {{}},
|
56 |
+
"answer": {{}},
|
57 |
+
"original sentences": []
|
58 |
+
}},
|
59 |
+
...
|
60 |
+
}}
|
61 |
+
'''
|
62 |
+
|
63 |
+
|
64 |
+
import requests
|
65 |
+
|
66 |
+
class OpenAI:
|
67 |
+
def __init__(self, init_prompt = None):
|
68 |
+
self.history = []
|
69 |
+
if init_prompt is not None:
|
70 |
+
self.history.append({'role': 'system', 'content': init_prompt})
|
71 |
+
|
72 |
+
def clear_history(self):
|
73 |
+
self.history = []
|
74 |
+
|
75 |
+
def show_history(self):
|
76 |
+
for message in self.history:
|
77 |
+
print(f"{message['role']}: {message['content']}")
|
78 |
+
|
79 |
+
def get_raw_history(self):
|
80 |
+
return self.history
|
81 |
+
|
82 |
+
def __call__(self, prompt, with_history = False, model = 'gpt-3.5-turbo', temperature = 0, api_key = None):
|
83 |
+
URL = 'https://api.openai.com/v1/chat/completions'
|
84 |
+
new_message = {'role': 'user', 'content': prompt}
|
85 |
+
if with_history:
|
86 |
+
self.history.append(new_message)
|
87 |
+
messages = self.history
|
88 |
+
else:
|
89 |
+
messages = [new_message]
|
90 |
+
|
91 |
+
resp = requests.post(URL, json={
|
92 |
+
'model': model,
|
93 |
+
'messages': messages,
|
94 |
+
'temperature': temperature,
|
95 |
+
}, headers={
|
96 |
+
'Authorization': f"Bearer {api_key}"
|
97 |
+
})
|
98 |
+
# print(resp.json())
|
99 |
+
self.history.append(resp.json()['choices'][0]['message'])
|
100 |
+
|
101 |
+
return resp.json()['choices'][0]['message']['content']
|
102 |
+
|
103 |
+
|
104 |
+
class Backend:
|
105 |
+
def __init__(self):
|
106 |
+
self.agent = OpenAI()
|
107 |
+
|
108 |
+
def read_file(self, file):
|
109 |
+
# read the file
|
110 |
+
with open(file.name, 'r') as f:
|
111 |
+
text = f.read()
|
112 |
+
return text
|
113 |
+
|
114 |
+
def highlight_text(self, text, highlight_list):
|
115 |
+
# hightlight the reference
|
116 |
+
for hl in highlight_list:
|
117 |
+
text = text.replace(hl, f'<mark style="background: #8B4513">{hl}</mark>')
|
118 |
+
|
119 |
+
# add line break
|
120 |
+
text = text.replace('\n', f" <br /> ")
|
121 |
+
|
122 |
+
# add scroll bar
|
123 |
+
text = f'<div style="height: 500px; overflow: auto;">{text}</div>'
|
124 |
+
|
125 |
+
return text
|
126 |
+
|
127 |
+
def process_file(self, file, question, openai_key):
|
128 |
+
# get the question
|
129 |
+
question = [ f'Question {id_ +1 }: {q}' for id_, q in enumerate(question) if 'Input question' not in q]
|
130 |
+
question = '\n'.join(question)
|
131 |
+
|
132 |
+
# get the text
|
133 |
+
self.text = self.read_file(file)
|
134 |
+
|
135 |
+
# make the prompt
|
136 |
+
prompt = template.format(self.text, question)
|
137 |
+
|
138 |
+
# interact with openai
|
139 |
+
res = self.agent(prompt, with_history = False, temperature = 0.1, model = 'gpt-3.5-turbo-16k', api_key = openai_key)
|
140 |
+
res = json.loads(res)
|
141 |
+
|
142 |
+
# for multiple questions
|
143 |
+
self.gpt_result = res
|
144 |
+
self.curret_question = 0
|
145 |
+
self.totel_question = len(res.keys())
|
146 |
+
|
147 |
+
# make a dataframe to record everything
|
148 |
+
self.ori_answer_df = pd.DataFrame(res).T
|
149 |
+
self.answer_df = pd.DataFrame(res).T
|
150 |
+
|
151 |
+
# default fist question
|
152 |
+
res = res['Question 1']
|
153 |
+
question = res['question']
|
154 |
+
self.answer = res['answer']
|
155 |
+
self.highlighted_out = res['original sentences']
|
156 |
+
highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
|
157 |
+
self.highlighted_out = '\n'.join(self.highlighted_out)
|
158 |
+
|
159 |
+
return question, self.answer, highlighted_out_html, self.answer, self.highlighted_out
|
160 |
+
|
161 |
+
def process_results(self, answer_correct, correct_answer, reference_correct, correct_reference):
|
162 |
+
if not hasattr(self, 'clicked_correct_answer'):
|
163 |
+
raise gr.Error("You need to judge whether the generated answer is correct first")
|
164 |
+
|
165 |
+
if not hasattr(self, 'clicked_correct_reference'):
|
166 |
+
raise gr.Error("You need to judge whether the highlighted reference is correct first")
|
167 |
+
|
168 |
+
if not hasattr(self, 'answer_df'):
|
169 |
+
raise gr.Error("You need to submit the document first")
|
170 |
+
|
171 |
+
if self.curret_question >= self.totel_question or self.curret_question < 0:
|
172 |
+
raise gr.Error("No more questions, please return back")
|
173 |
+
|
174 |
+
# record the answer
|
175 |
+
self.answer_df.loc[f'Question {self.curret_question + 1}', 'answer_correct'] = answer_correct
|
176 |
+
self.answer_df.loc[f'Question {self.curret_question + 1}', 'reference_correct'] = reference_correct
|
177 |
+
|
178 |
+
if self.clicked_correct_answer == True:
|
179 |
+
if hasattr(self, 'answer'):
|
180 |
+
self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_answer'] = self.answer
|
181 |
+
else:
|
182 |
+
raise gr.Error("You need to submit the document first")
|
183 |
+
else:
|
184 |
+
self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_answer'] = correct_answer
|
185 |
+
|
186 |
+
if self.clicked_correct_reference == True:
|
187 |
+
if hasattr(self, 'highlighted_out'):
|
188 |
+
self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_reference'] = self.highlighted_out
|
189 |
+
else:
|
190 |
+
raise gr.Error("You need to submit the document first")
|
191 |
+
else:
|
192 |
+
self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_reference'] = correct_reference
|
193 |
+
|
194 |
+
gr.Info('Results saved!')
|
195 |
+
return "Results saved!"
|
196 |
+
|
197 |
+
def process_next(self):
|
198 |
+
self.curret_question += 1
|
199 |
+
if hasattr(self, 'clicked_correct_answer'):
|
200 |
+
del self.clicked_correct_answer
|
201 |
+
if hasattr(self, 'clicked_correct_reference'):
|
202 |
+
del self.clicked_correct_reference
|
203 |
+
|
204 |
+
if self.curret_question >= self.totel_question:
|
205 |
+
# self.curret_question -= 1
|
206 |
+
return "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
|
207 |
+
else:
|
208 |
+
res = self.gpt_result[f'Question {self.curret_question + 1}']
|
209 |
+
question = res['question']
|
210 |
+
self.answer = res['answer']
|
211 |
+
self.highlighted_out = res['original sentences']
|
212 |
+
highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
|
213 |
+
self.highlighted_out = '\n'.join(self.highlighted_out)
|
214 |
+
return question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
|
215 |
+
|
216 |
+
def process_last(self):
|
217 |
+
self.curret_question -= 1
|
218 |
+
|
219 |
+
if hasattr(self, 'clicked_correct_answer'):
|
220 |
+
del self.clicked_correct_answer
|
221 |
+
if hasattr(self, 'clicked_correct_reference'):
|
222 |
+
del self.clicked_correct_reference
|
223 |
+
|
224 |
+
if self.curret_question < 0:
|
225 |
+
# self.curret_question += 1
|
226 |
+
return "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
|
227 |
+
else:
|
228 |
+
res = self.gpt_result[f'Question {self.curret_question + 1}']
|
229 |
+
question = res['question']
|
230 |
+
self.answer = res['answer']
|
231 |
+
self.highlighted_out = res['original sentences']
|
232 |
+
highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
|
233 |
+
self.highlighted_out = '\n'.join(self.highlighted_out)
|
234 |
+
return question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
|
235 |
+
|
236 |
+
def download_answer(self, path = './tmp', name = 'answer.xlsx'):
|
237 |
+
os.makedirs(path, exist_ok = True)
|
238 |
+
path = os.path.join(path, name)
|
239 |
+
self.ori_answer_df.to_excel(path, index = False)
|
240 |
+
|
241 |
+
return path
|
242 |
+
|
243 |
+
def download_corrected(self, path = './tmp', name = 'corrected_answer.xlsx'):
|
244 |
+
os.makedirs(path, exist_ok = True)
|
245 |
+
path = os.path.join(path, name)
|
246 |
+
self.answer_df.to_excel(path, index = False)
|
247 |
+
|
248 |
+
return path
|
249 |
+
|
250 |
+
def change_correct_answer(self, correctness):
|
251 |
+
if correctness == "Correct":
|
252 |
+
self.clicked_correct_answer = True
|
253 |
+
return "No need to change"
|
254 |
+
else:
|
255 |
+
if hasattr(self, 'answer'):
|
256 |
+
self.clicked_correct_answer = False
|
257 |
+
return self.answer
|
258 |
+
else:
|
259 |
+
return "No answer yet, you need to submit the document first"
|
260 |
+
|
261 |
+
def change_correct_reference(self, correctness):
|
262 |
+
if correctness == "Correct":
|
263 |
+
self.clicked_correct_reference = True
|
264 |
+
return "No need to change"
|
265 |
+
else:
|
266 |
+
if hasattr(self, 'highlighted_out'):
|
267 |
+
self.clicked_correct_reference = False
|
268 |
+
return self.highlighted_out
|
269 |
+
else:
|
270 |
+
return "No answer yet, you need to submit the document first"
|
271 |
+
|
272 |
+
|
273 |
+
with gr.Blocks(theme="dark") as demo:
|
274 |
+
backend = Backend()
|
275 |
+
with gr.Row():
|
276 |
+
with gr.Row():
|
277 |
+
with gr.Group():
|
278 |
+
gr.Markdown(f'<center><h1>Input</h1></center>')
|
279 |
+
gr.Markdown(f'<center><p>Please First Upload the File</p></center>')
|
280 |
+
|
281 |
+
openai_key = gr.Textbox(
|
282 |
+
label='Enter your OpenAI API key here',
|
283 |
+
type='password')
|
284 |
+
|
285 |
+
file = gr.File(label='Upload your .txt file here', file_types=['.txt'])
|
286 |
+
|
287 |
+
questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions", info="Please select the question you want to ask")
|
288 |
+
|
289 |
+
btn_submit_txt = gr.Button(value='Submit txt')
|
290 |
+
btn_submit_txt.style(full_width=True)
|
291 |
+
|
292 |
+
with gr.Group():
|
293 |
+
gr.Markdown(f'<center><h1>Output</h1></center>')
|
294 |
+
gr.Markdown(f'<center><p>The answer to your question is :</p></center>')
|
295 |
+
question_box = gr.Textbox(label='Question')
|
296 |
+
answer_box = gr.Textbox(label='Answer')
|
297 |
+
|
298 |
+
highlighted_text = gr.outputs.HTML(label="Highlighted Text")
|
299 |
+
with gr.Row():
|
300 |
+
btn_last_question = gr.Button(value='Last Question')
|
301 |
+
btn_next_question = gr.Button(value='Next Question')
|
302 |
+
|
303 |
+
with gr.Group():
|
304 |
+
gr.Markdown(f'<center><h1>Correct the Result</h1></center>')
|
305 |
+
gr.Markdown(f'<center><p>Please Correct the Results</p></center>')
|
306 |
+
|
307 |
+
with gr.Row():
|
308 |
+
save_results = gr.Textbox(placeholder = "Still need to click the button above to save the results", label = 'Save Results')
|
309 |
+
with gr.Group():
|
310 |
+
gr.Markdown(f'<center><p>Please Choose: </p></center>')
|
311 |
+
answer_correct = gr.Radio(choices = ["Correct", "Incorrect"], label='Is the Generated Answer Correct?', info="Pease select whether the generated text is correct")
|
312 |
+
correct_answer = gr.Textbox(placeholder = "Please judge on the generated answer", label = 'Correct Answer', interactive = True)
|
313 |
+
|
314 |
+
reference_correct = gr.Radio(choices = ["Correct", "Incorrect"], label="Is the Reference Correct?", info="Pease select whether the reference is correct")
|
315 |
+
correct_reference = gr.Textbox(placeholder = "Please judge on the generated answer", label = 'Correct Reference', interactive = True)
|
316 |
+
|
317 |
+
btn_submit_correctness = gr.Button(value='Submit Correctness')
|
318 |
+
btn_submit_correctness.style(full_width=True)
|
319 |
+
|
320 |
+
with gr.Group():
|
321 |
+
gr.Markdown(f'<center><h1>Download</h1></center>')
|
322 |
+
gr.Markdown(f'<center><p>Download the processed data and corrected data</p></center>')
|
323 |
+
answer_file = gr.File(label='Download processed data', file_types=['.xlsx'])
|
324 |
+
btn_download_answer = gr.Button(value='Download processed data')
|
325 |
+
btn_download_answer.style(full_width=True)
|
326 |
+
corrected_file = gr.File(label='Download corrected data', file_types=['.xlsx'])
|
327 |
+
btn_download_corrected = gr.Button(value='Download corrected data')
|
328 |
+
btn_download_corrected.style(full_width=True)
|
329 |
+
|
330 |
+
|
331 |
+
with gr.Row():
|
332 |
+
reset = gr.Button(value='Reset')
|
333 |
+
reset.style(full_width=True)
|
334 |
+
|
335 |
+
# Answer change
|
336 |
+
answer_correct.input(
|
337 |
+
backend.change_correct_answer,
|
338 |
+
inputs = [answer_correct],
|
339 |
+
outputs = [correct_answer],
|
340 |
+
)
|
341 |
+
|
342 |
+
reference_correct.input(
|
343 |
+
backend.change_correct_reference,
|
344 |
+
inputs = [reference_correct],
|
345 |
+
outputs = [correct_reference],
|
346 |
+
)
|
347 |
+
|
348 |
+
|
349 |
+
# Submit button
|
350 |
+
btn_submit_txt.click(
|
351 |
+
backend.process_file,
|
352 |
+
inputs=[file, questions, openai_key],
|
353 |
+
outputs=[question_box, answer_box, highlighted_text, correct_answer, correct_reference],
|
354 |
+
)
|
355 |
+
|
356 |
+
btn_submit_correctness.click( # TODO
|
357 |
+
backend.process_results,
|
358 |
+
inputs=[answer_correct, correct_answer, reference_correct, correct_reference],
|
359 |
+
outputs=[save_results],
|
360 |
+
)
|
361 |
+
|
362 |
+
# Switch question button
|
363 |
+
btn_last_question.click(
|
364 |
+
backend.process_last,
|
365 |
+
outputs=[question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
|
366 |
+
)
|
367 |
+
|
368 |
+
btn_next_question.click(
|
369 |
+
backend.process_next,
|
370 |
+
outputs=[question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
|
371 |
+
)
|
372 |
+
|
373 |
+
# Download button
|
374 |
+
btn_download_answer.click(
|
375 |
+
backend.download_answer,
|
376 |
+
outputs=[answer_file],
|
377 |
+
)
|
378 |
+
|
379 |
+
btn_download_corrected.click(
|
380 |
+
backend.download_corrected,
|
381 |
+
outputs=[corrected_file],
|
382 |
+
)
|
383 |
+
demo.queue()
|
384 |
+
demo.launch()
|