oliverwang15 commited on
Commit
478965d
1 Parent(s): e329cea

updates on the new prompt and better features

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. app.py +36 -283
  3. backend.py +189 -0
  4. openai.py +38 -0
  5. prompt.py +91 -0
  6. template.py +177 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
app.py CHANGED
@@ -1,305 +1,56 @@
 
 
 
1
  import os, json
2
  import gradio as gr
3
  import pandas as pd
4
 
5
- QUESTIONS = [
6
- "What is the DOI of this study?",
7
- "What is the Citation ID of this study?",
8
- "What is the First author of this study?",
9
- "What is the year of this study?",
10
- "What is the animal type of this study?",
11
- "What is the exposure age of this study?",
12
- "Is there any behavior test done in this study?",
13
- "What's the Intervention 1's name of this study?(anesthetics only)",
14
- "What's the Intervention 2's name of this study?(anesthetics only)",
15
- "What's the genetic chain of this study?",
16
- ]
17
-
18
- template = '''We now have a following <document> in the medical field:
19
-
20
- """
21
- {}
22
- """
23
- We have some introduction here:
24
-
25
- 1. DOI: The DOI link for the article, usually can be found in the first line of the .txt file for the article. E.g., “DOI: 10.3892/mmr.2019.10397”.
26
- 2. Citation ID: The number in the file name. E.g., “1134”.
27
- 3. First author: The last name in the file name. E.g., “Guan”.
28
- 4. Year: The year in the file name. E.g., “2019”.
29
- 5. Animal type: The rodent type used in the article, should be one of the choices: mice, rats. E.g., “rats”.
30
- 6. Exposure age: The age when the animals were exposed to anesthetics, should be mentioned as "PND1", "PND7","postnatal day 7", "Gestational day 21", etc, which should be extract as: 'PND XX' , 'Gestational day xx'. E.g., “PND7”.
31
- 7. Behavior test: Whether there is any behavior test in the article, should be one of the choices: "Y", "N". "Y" is chosen if there are any of the behavior tests described and done in the article, which mentioned as: "Open field test", "Morris water task", "fear conditioning test", "Dark/light avoidance"; "passive/active avoidance test"; "elevated maze", "Forced swim test", "Object recognition test", "Social interaction/preference“. E.g., “N”.
32
- 8. Intervention 1 & Intervention 2: Intervention 1 and Intervention 2 are both anesthetic drugs, which listed as: "isoflurane", "sevoflurane", "desflurane", "ketamine", "propofol", "Midazolam", "Nitrous oxide“. If none, put “NA”. E.g., “propofol”.
33
- 9. Genetic chain: Genetic chain is the genetic type of the animals being used in the article, here is the examples:
34
- "C57BL/6", "C57BL/6J" should be extracted as "C57BL/6"; "Sprague Dawley", "Sprague-Dawley", "SD" should be extracted as "Sprague Dawley"; "CD-1" should be extracted as "CD-1"; "Wistar/ST" should be extracted as "Wistar/ST"; "Wistar" should be extracted as "Wistar"; "FMR-1 KO" should be extracted as "FMR-1 KO“. E.g., “Sprague Dawley”.
35
-
36
- We have some <question>s begin with "Question" here:
37
- """
38
- {}
39
- """
40
-
41
- Please finish the following task:
42
-
43
- 1. Please select the <original sentences> related the each <question> from the <document>.
44
- 2. Please use the <original sentences> to answer the <question>.
45
- 3. Please provide <original sentences> coming from the <document>.
46
- 4. Output the <answer> in the following json format:
47
-
48
- {{
49
- "Question 1": {{
50
- "question": {{}},
51
- "answer": {{}},
52
- "original sentences": []
53
- }},
54
- "Question 2": {{
55
- "question": {{}},
56
- "answer": {{}},
57
- "original sentences": []
58
- }},
59
- ...
60
- }}
61
- '''
62
-
63
-
64
- import requests
65
-
66
- class OpenAI:
67
- def __init__(self, init_prompt = None):
68
- self.history = []
69
- if init_prompt is not None:
70
- self.history.append({'role': 'system', 'content': init_prompt})
71
-
72
- def clear_history(self):
73
- self.history = []
74
-
75
- def show_history(self):
76
- for message in self.history:
77
- print(f"{message['role']}: {message['content']}")
78
-
79
- def get_raw_history(self):
80
- return self.history
81
-
82
- def __call__(self, prompt, with_history = False, model = 'gpt-3.5-turbo', temperature = 0, api_key = None):
83
- URL = 'https://api.openai.com/v1/chat/completions'
84
- new_message = {'role': 'user', 'content': prompt}
85
- if with_history:
86
- self.history.append(new_message)
87
- messages = self.history
88
- else:
89
- messages = [new_message]
90
-
91
- resp = requests.post(URL, json={
92
- 'model': model,
93
- 'messages': messages,
94
- 'temperature': temperature,
95
- }, headers={
96
- 'Authorization': f"Bearer {api_key}"
97
- })
98
- # print(resp.json())
99
- self.history.append(resp.json()['choices'][0]['message'])
100
-
101
- return resp.json()['choices'][0]['message']['content']
102
-
103
-
104
- class Backend:
105
- def __init__(self):
106
- self.agent = OpenAI()
107
-
108
- def read_file(self, file):
109
- # read the file
110
- with open(file.name, 'r') as f:
111
- text = f.read()
112
- return text
113
-
114
- def highlight_text(self, text, highlight_list):
115
- # hightlight the reference
116
- for hl in highlight_list:
117
- text = text.replace(hl, f'<mark style="background: #A5D2F1">{hl}</mark>')
118
-
119
- # add line break
120
- text = text.replace('\n', f" <br /> ")
121
-
122
- # add scroll bar
123
- text = f'<div style="height: 500px; overflow: auto;">{text}</div>'
124
-
125
- return text
126
-
127
- def process_file(self, file, question, openai_key):
128
- # get the question
129
- question = [ f'Question {id_ +1 }: {q}' for id_, q in enumerate(question) if 'Input question' not in q]
130
- question = '\n'.join(question)
131
-
132
- # get the text
133
- self.text = self.read_file(file)
134
-
135
- # make the prompt
136
- prompt = template.format(self.text, question)
137
-
138
- # interact with openai
139
- res = self.agent(prompt, with_history = False, temperature = 0.1, model = 'gpt-3.5-turbo-16k', api_key = openai_key)
140
- res = json.loads(res)
141
-
142
- # for multiple questions
143
- self.gpt_result = res
144
- self.curret_question = 0
145
- self.totel_question = len(res.keys())
146
-
147
- # make a dataframe to record everything
148
- self.ori_answer_df = pd.DataFrame(res).T
149
- self.answer_df = pd.DataFrame(res).T
150
-
151
- # default fist question
152
- res = res['Question 1']
153
- question = res['question']
154
- self.answer = res['answer']
155
- self.highlighted_out = res['original sentences']
156
- highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
157
- self.highlighted_out = '\n'.join(self.highlighted_out)
158
-
159
- return question, self.answer, highlighted_out_html, self.answer, self.highlighted_out
160
-
161
- def process_results(self, answer_correct, correct_answer, reference_correct, correct_reference):
162
- if not hasattr(self, 'clicked_correct_answer'):
163
- raise gr.Error("You need to judge whether the generated answer is correct first")
164
-
165
- if not hasattr(self, 'clicked_correct_reference'):
166
- raise gr.Error("You need to judge whether the highlighted reference is correct first")
167
-
168
- if not hasattr(self, 'answer_df'):
169
- raise gr.Error("You need to submit the document first")
170
 
171
- if self.curret_question >= self.totel_question or self.curret_question < 0:
172
- raise gr.Error("No more questions, please return back")
173
-
174
- # record the answer
175
- self.answer_df.loc[f'Question {self.curret_question + 1}', 'answer_correct'] = answer_correct
176
- self.answer_df.loc[f'Question {self.curret_question + 1}', 'reference_correct'] = reference_correct
177
-
178
- if self.clicked_correct_answer == True:
179
- if hasattr(self, 'answer'):
180
- self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_answer'] = self.answer
181
- else:
182
- raise gr.Error("You need to submit the document first")
183
- else:
184
- self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_answer'] = correct_answer
185
-
186
- if self.clicked_correct_reference == True:
187
- if hasattr(self, 'highlighted_out'):
188
- self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_reference'] = self.highlighted_out
189
- else:
190
- raise gr.Error("You need to submit the document first")
191
- else:
192
- self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_reference'] = correct_reference
193
-
194
- gr.Info('Results saved!')
195
- return "Results saved!"
196
-
197
- def process_next(self):
198
- self.curret_question += 1
199
- if hasattr(self, 'clicked_correct_answer'):
200
- del self.clicked_correct_answer
201
- if hasattr(self, 'clicked_correct_reference'):
202
- del self.clicked_correct_reference
203
-
204
- if self.curret_question >= self.totel_question:
205
- # self.curret_question -= 1
206
- return "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
207
- else:
208
- res = self.gpt_result[f'Question {self.curret_question + 1}']
209
- question = res['question']
210
- self.answer = res['answer']
211
- self.highlighted_out = res['original sentences']
212
- highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
213
- self.highlighted_out = '\n'.join(self.highlighted_out)
214
- return question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
215
-
216
- def process_last(self):
217
- self.curret_question -= 1
218
-
219
- if hasattr(self, 'clicked_correct_answer'):
220
- del self.clicked_correct_answer
221
- if hasattr(self, 'clicked_correct_reference'):
222
- del self.clicked_correct_reference
223
-
224
- if self.curret_question < 0:
225
- # self.curret_question += 1
226
- return "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
227
- else:
228
- res = self.gpt_result[f'Question {self.curret_question + 1}']
229
- question = res['question']
230
- self.answer = res['answer']
231
- self.highlighted_out = res['original sentences']
232
- highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
233
- self.highlighted_out = '\n'.join(self.highlighted_out)
234
- return question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
235
-
236
- def download_answer(self, path = './tmp', name = 'answer.xlsx'):
237
- os.makedirs(path, exist_ok = True)
238
- path = os.path.join(path, name)
239
- self.ori_answer_df.to_excel(path, index = False)
240
-
241
- return path
242
-
243
- def download_corrected(self, path = './tmp', name = 'corrected_answer.xlsx'):
244
- os.makedirs(path, exist_ok = True)
245
- path = os.path.join(path, name)
246
- self.answer_df.to_excel(path, index = False)
247
-
248
- return path
249
-
250
- def change_correct_answer(self, correctness):
251
- if correctness == "Correct":
252
- self.clicked_correct_answer = True
253
- return "No need to change"
254
- else:
255
- if hasattr(self, 'answer'):
256
- self.clicked_correct_answer = False
257
- return self.answer
258
- else:
259
- return "No answer yet, you need to submit the document first"
260
-
261
- def change_correct_reference(self, correctness):
262
- if correctness == "Correct":
263
- self.clicked_correct_reference = True
264
- return "No need to change"
265
- else:
266
- if hasattr(self, 'highlighted_out'):
267
- self.clicked_correct_reference = False
268
- return self.highlighted_out
269
- else:
270
- return "No answer yet, you need to submit the document first"
271
 
 
 
 
 
 
 
 
 
272
 
273
  with gr.Blocks(theme="dark") as demo:
274
  backend = Backend()
275
  with gr.Row():
276
  with gr.Row():
 
277
  with gr.Group():
278
  gr.Markdown(f'<center><h1>Input</h1></center>')
279
  gr.Markdown(f'<center><p>Please First Upload the File</p></center>')
280
 
281
  openai_key = gr.Textbox(
282
- label='Enter your OpenAI API key here',
283
  type='password')
284
-
285
  file = gr.File(label='Upload your .txt file here', file_types=['.txt'])
286
-
287
  questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions", info="Please select the question you want to ask")
288
 
289
  btn_submit_txt = gr.Button(value='Submit txt')
290
  btn_submit_txt.style(full_width=True)
291
 
 
292
  with gr.Group():
293
  gr.Markdown(f'<center><h1>Output</h1></center>')
294
  gr.Markdown(f'<center><p>The answer to your question is :</p></center>')
295
  question_box = gr.Textbox(label='Question')
296
  answer_box = gr.Textbox(label='Answer')
 
297
 
298
  highlighted_text = gr.outputs.HTML(label="Highlighted Text")
299
  with gr.Row():
300
  btn_last_question = gr.Button(value='Last Question')
301
  btn_next_question = gr.Button(value='Next Question')
302
-
 
303
  with gr.Group():
304
  gr.Markdown(f'<center><h1>Correct the Result</h1></center>')
305
  gr.Markdown(f'<center><p>Please Correct the Results</p></center>')
@@ -310,13 +61,14 @@ with gr.Blocks(theme="dark") as demo:
310
  gr.Markdown(f'<center><p>Please Choose: </p></center>')
311
  answer_correct = gr.Radio(choices = ["Correct", "Incorrect"], label='Is the Generated Answer Correct?', info="Pease select whether the generated text is correct")
312
  correct_answer = gr.Textbox(placeholder = "Please judge on the generated answer", label = 'Correct Answer', interactive = True)
313
-
314
  reference_correct = gr.Radio(choices = ["Correct", "Incorrect"], label="Is the Reference Correct?", info="Pease select whether the reference is correct")
315
  correct_reference = gr.Textbox(placeholder = "Please judge on the generated answer", label = 'Correct Reference', interactive = True)
316
-
317
  btn_submit_correctness = gr.Button(value='Submit Correctness')
318
  btn_submit_correctness.style(full_width=True)
319
-
 
320
  with gr.Group():
321
  gr.Markdown(f'<center><h1>Download</h1></center>')
322
  gr.Markdown(f'<center><p>Download the processed data and corrected data</p></center>')
@@ -326,8 +78,8 @@ with gr.Blocks(theme="dark") as demo:
326
  corrected_file = gr.File(label='Download corrected data', file_types=['.xlsx'])
327
  btn_download_corrected = gr.Button(value='Download corrected data')
328
  btn_download_corrected.style(full_width=True)
329
-
330
-
331
  with gr.Row():
332
  reset = gr.Button(value='Reset')
333
  reset.style(full_width=True)
@@ -344,41 +96,42 @@ with gr.Blocks(theme="dark") as demo:
344
  inputs = [reference_correct],
345
  outputs = [correct_reference],
346
  )
347
-
348
 
349
  # Submit button
350
  btn_submit_txt.click(
351
  backend.process_file,
352
  inputs=[file, questions, openai_key],
353
- outputs=[question_box, answer_box, highlighted_text, correct_answer, correct_reference],
354
  )
355
-
356
  btn_submit_correctness.click( # TODO
357
  backend.process_results,
358
  inputs=[answer_correct, correct_answer, reference_correct, correct_reference],
359
  outputs=[save_results],
360
  )
361
-
362
  # Switch question button
363
  btn_last_question.click(
364
  backend.process_last,
365
- outputs=[question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
366
  )
367
 
368
  btn_next_question.click(
369
  backend.process_next,
370
- outputs=[question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
371
  )
372
-
373
  # Download button
374
  btn_download_answer.click(
375
  backend.download_answer,
376
  outputs=[answer_file],
377
  )
378
-
379
  btn_download_corrected.click(
380
  backend.download_corrected,
381
  outputs=[corrected_file],
382
  )
 
383
  demo.queue()
384
- demo.launch()
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
  import os, json
5
  import gradio as gr
6
  import pandas as pd
7
 
8
+ from backend import Backend
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ QUESTIONS = [
12
+ "Animal Type",
13
+ "Exposure Age",
14
+ "Behavior Test",
15
+ "Intervention 1",
16
+ "Intervention 2",
17
+ "Genetic Chain",
18
+ ]
19
 
20
  with gr.Blocks(theme="dark") as demo:
21
  backend = Backend()
22
  with gr.Row():
23
  with gr.Row():
24
+ # Update
25
  with gr.Group():
26
  gr.Markdown(f'<center><h1>Input</h1></center>')
27
  gr.Markdown(f'<center><p>Please First Upload the File</p></center>')
28
 
29
  openai_key = gr.Textbox(
30
+ label='Enter your OpenAI API key here',
31
  type='password')
32
+
33
  file = gr.File(label='Upload your .txt file here', file_types=['.txt'])
34
+
35
  questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions", info="Please select the question you want to ask")
36
 
37
  btn_submit_txt = gr.Button(value='Submit txt')
38
  btn_submit_txt.style(full_width=True)
39
 
40
+ # Output
41
  with gr.Group():
42
  gr.Markdown(f'<center><h1>Output</h1></center>')
43
  gr.Markdown(f'<center><p>The answer to your question is :</p></center>')
44
  question_box = gr.Textbox(label='Question')
45
  answer_box = gr.Textbox(label='Answer')
46
+ reference_box = gr.Textbox(label='Reference')
47
 
48
  highlighted_text = gr.outputs.HTML(label="Highlighted Text")
49
  with gr.Row():
50
  btn_last_question = gr.Button(value='Last Question')
51
  btn_next_question = gr.Button(value='Next Question')
52
+
53
+ # Correctness
54
  with gr.Group():
55
  gr.Markdown(f'<center><h1>Correct the Result</h1></center>')
56
  gr.Markdown(f'<center><p>Please Correct the Results</p></center>')
 
61
  gr.Markdown(f'<center><p>Please Choose: </p></center>')
62
  answer_correct = gr.Radio(choices = ["Correct", "Incorrect"], label='Is the Generated Answer Correct?', info="Pease select whether the generated text is correct")
63
  correct_answer = gr.Textbox(placeholder = "Please judge on the generated answer", label = 'Correct Answer', interactive = True)
64
+
65
  reference_correct = gr.Radio(choices = ["Correct", "Incorrect"], label="Is the Reference Correct?", info="Pease select whether the reference is correct")
66
  correct_reference = gr.Textbox(placeholder = "Please judge on the generated answer", label = 'Correct Reference', interactive = True)
67
+
68
  btn_submit_correctness = gr.Button(value='Submit Correctness')
69
  btn_submit_correctness.style(full_width=True)
70
+
71
+ # Download
72
  with gr.Group():
73
  gr.Markdown(f'<center><h1>Download</h1></center>')
74
  gr.Markdown(f'<center><p>Download the processed data and corrected data</p></center>')
 
78
  corrected_file = gr.File(label='Download corrected data', file_types=['.xlsx'])
79
  btn_download_corrected = gr.Button(value='Download corrected data')
80
  btn_download_corrected.style(full_width=True)
81
+
82
+
83
  with gr.Row():
84
  reset = gr.Button(value='Reset')
85
  reset.style(full_width=True)
 
96
  inputs = [reference_correct],
97
  outputs = [correct_reference],
98
  )
99
+
100
 
101
  # Submit button
102
  btn_submit_txt.click(
103
  backend.process_file,
104
  inputs=[file, questions, openai_key],
105
+ outputs=[question_box, answer_box, reference_box, highlighted_text, correct_answer, correct_reference],
106
  )
107
+
108
  btn_submit_correctness.click( # TODO
109
  backend.process_results,
110
  inputs=[answer_correct, correct_answer, reference_correct, correct_reference],
111
  outputs=[save_results],
112
  )
113
+
114
  # Switch question button
115
  btn_last_question.click(
116
  backend.process_last,
117
+ outputs=[question_box, answer_box, reference_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
118
  )
119
 
120
  btn_next_question.click(
121
  backend.process_next,
122
+ outputs=[question_box, answer_box, reference_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
123
  )
124
+
125
  # Download button
126
  btn_download_answer.click(
127
  backend.download_answer,
128
  outputs=[answer_file],
129
  )
130
+
131
  btn_download_corrected.click(
132
  backend.download_corrected,
133
  outputs=[corrected_file],
134
  )
135
+
136
  demo.queue()
137
+ demo.launch(show_error=True, show_tips=True)
backend.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from prompt import Prompt
2
+ from openai import OpenAI
3
+ from fuzzywuzzy import fuzz
4
+ from fuzzywuzzy import process
5
+
6
+ import gradio as gr
7
+ import pandas as pd
8
+ import os
9
+
10
+ class Backend:
11
+ def __init__(self):
12
+ self.agent = OpenAI()
13
+ self.prompt = Prompt()
14
+
15
+ def read_file(self, file):
16
+ # read the file
17
+ if file is not None:
18
+ with open(file.name, 'r') as f:
19
+ text = f.read()
20
+ else:
21
+ raise gr.Error("You need to upload a file first")
22
+ return text
23
+
24
+ def highlight_text(self, text, highlight_list):
25
+ # Find the original sentences
26
+ # Split the passage into sentences
27
+ sentences_in_passage = text.split('.')
28
+ sentences_in_passage = [i.split('\n') for i in sentences_in_passage]
29
+ new_sentences_in_passage = []
30
+ for i in sentences_in_passage:
31
+ new_sentences_in_passage =new_sentences_in_passage + i
32
+
33
+ # hightlight the reference
34
+ for hl in highlight_list:
35
+ # Find the best match using fuzzy matching
36
+ best_match = process.extractOne(hl, new_sentences_in_passage, scorer=fuzz.partial_ratio)
37
+ text = text.replace(best_match[0], f'<mark style="background: #A5D2F1">{best_match[0]}</mark><mark style="background: #FFC0CB"><font color="red"> (match score:{best_match[1]})</font></mark>')
38
+
39
+ # add line break
40
+ text = text.replace('\n', f" <br /> ")
41
+
42
+ # add scroll bar
43
+ text = f'<div style="height: 500px; overflow: auto;">{text}</div>'
44
+
45
+ return text
46
+
47
+ def process_file(self, file, questions, openai_key):
48
+ # record the questions
49
+ self.questions = questions
50
+
51
+ # get the text
52
+ self.text = self.read_file(file)
53
+
54
+ # make the prompt
55
+ prompt = self.prompt.get(self.text, questions, 'v3')
56
+
57
+ # interact with openai
58
+ res = self.agent(prompt, with_history = False, temperature = 0.1, model = 'gpt-3.5-turbo-16k', api_key = openai_key)
59
+ res = self.prompt.process_result(res, 'v3')
60
+
61
+ # for multiple questions
62
+ self.gpt_result = res
63
+ self.curret_question = 0
64
+ self.totel_question = len(res.keys())
65
+
66
+ # make a dataframe to record everything
67
+ self.ori_answer_df = pd.DataFrame(res).T
68
+ self.answer_df = pd.DataFrame(res).T
69
+
70
+ # default fist question
71
+ res = res['Question 1']
72
+ question = self.questions[self.curret_question]
73
+ self.answer = res['answer']
74
+ self.highlighted_out = res['original sentences']
75
+ highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
76
+ self.highlighted_out = '\n'.join(self.highlighted_out)
77
+
78
+ return question, self.answer, self.highlighted_out, highlighted_out_html, self.answer, self.highlighted_out
79
+
80
+ def process_results(self, answer_correct, correct_answer, reference_correct, correct_reference):
81
+ if not hasattr(self, 'clicked_correct_answer'):
82
+ raise gr.Error("You need to judge whether the generated answer is correct first")
83
+
84
+ if not hasattr(self, 'clicked_correct_reference'):
85
+ raise gr.Error("You need to judge whether the highlighted reference is correct first")
86
+
87
+ if not hasattr(self, 'answer_df'):
88
+ raise gr.Error("You need to submit the document first")
89
+
90
+ if self.curret_question >= self.totel_question or self.curret_question < 0:
91
+ raise gr.Error("No more questions, please return back")
92
+
93
+ # record the answer
94
+ self.answer_df.loc[f'Question {self.curret_question + 1}', 'answer_correct'] = answer_correct
95
+ self.answer_df.loc[f'Question {self.curret_question + 1}', 'reference_correct'] = reference_correct
96
+
97
+ if self.clicked_correct_answer == True:
98
+ if hasattr(self, 'answer'):
99
+ self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_answer'] = self.answer
100
+ else:
101
+ raise gr.Error("You need to submit the document first")
102
+ else:
103
+ self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_answer'] = correct_answer
104
+
105
+ if self.clicked_correct_reference == True:
106
+ if hasattr(self, 'highlighted_out'):
107
+ self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_reference'] = self.highlighted_out
108
+ else:
109
+ raise gr.Error("You need to submit the document first")
110
+ else:
111
+ self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_reference'] = correct_reference
112
+
113
+ gr.Info('Results saved!')
114
+ return "Results saved!"
115
+
116
+ def process_next(self):
117
+ self.curret_question += 1
118
+ if hasattr(self, 'clicked_correct_answer'):
119
+ del self.clicked_correct_answer
120
+ if hasattr(self, 'clicked_correct_reference'):
121
+ del self.clicked_correct_reference
122
+
123
+ if self.curret_question >= self.totel_question:
124
+ # self.curret_question -= 1
125
+ return "No more questions!", "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
126
+ else:
127
+ res = self.gpt_result[f'Question {self.curret_question + 1}']
128
+ question = self.questions[self.curret_question]
129
+ self.answer = res['answer']
130
+ self.highlighted_out = res['original sentences']
131
+ highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
132
+ self.highlighted_out = '\n'.join(self.highlighted_out)
133
+ return question, self.answer,self.highlighted_out, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
134
+
135
+ def process_last(self):
136
+ self.curret_question -= 1
137
+
138
+ if hasattr(self, 'clicked_correct_answer'):
139
+ del self.clicked_correct_answer
140
+ if hasattr(self, 'clicked_correct_reference'):
141
+ del self.clicked_correct_reference
142
+
143
+ if self.curret_question < 0:
144
+ # self.curret_question += 1
145
+ return "No more questions!", "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
146
+ else:
147
+ res = self.gpt_result[f'Question {self.curret_question + 1}']
148
+ question = self.questions[self.curret_question]
149
+ self.answer = res['answer']
150
+ self.highlighted_out = res['original sentences']
151
+ highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
152
+ self.highlighted_out = '\n'.join(self.highlighted_out)
153
+ return question, self.answer, self.highlighted_out, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
154
+
155
+ def download_answer(self, path = './tmp', name = 'answer.xlsx'):
156
+ os.makedirs(path, exist_ok = True)
157
+ path = os.path.join(path, name)
158
+ self.ori_answer_df.to_excel(path, index = False)
159
+
160
+ return path
161
+
162
+ def download_corrected(self, path = './tmp', name = 'corrected_answer.xlsx'):
163
+ os.makedirs(path, exist_ok = True)
164
+ path = os.path.join(path, name)
165
+ self.answer_df.to_excel(path, index = False)
166
+
167
+ return path
168
+
169
+ def change_correct_answer(self, correctness):
170
+ if correctness == "Correct":
171
+ self.clicked_correct_answer = True
172
+ return "No need to change"
173
+ else:
174
+ if hasattr(self, 'answer'):
175
+ self.clicked_correct_answer = False
176
+ return self.answer
177
+ else:
178
+ return "No answer yet, you need to submit the document first"
179
+
180
+ def change_correct_reference(self, correctness):
181
+ if correctness == "Correct":
182
+ self.clicked_correct_reference = True
183
+ return "No need to change"
184
+ else:
185
+ if hasattr(self, 'highlighted_out'):
186
+ self.clicked_correct_reference = False
187
+ return self.highlighted_out
188
+ else:
189
+ return "No answer yet, you need to submit the document first"
openai.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ class OpenAI:
4
+ def __init__(self, init_prompt = None):
5
+ self.history = []
6
+ if init_prompt is not None:
7
+ self.history.append({'role': 'system', 'content': init_prompt})
8
+
9
+ def clear_history(self):
10
+ self.history = []
11
+
12
+ def show_history(self):
13
+ for message in self.history:
14
+ print(f"{message['role']}: {message['content']}")
15
+
16
+ def get_raw_history(self):
17
+ return self.history
18
+
19
+ def __call__(self, prompt, with_history = False, model = 'gpt-3.5-turbo', temperature = 0, api_key = None):
20
+ URL = 'https://api.openai.com/v1/chat/completions'
21
+ new_message = {'role': 'user', 'content': prompt}
22
+ if with_history:
23
+ self.history.append(new_message)
24
+ messages = self.history
25
+ else:
26
+ messages = [new_message]
27
+
28
+ resp = requests.post(URL, json={
29
+ 'model': model,
30
+ 'messages': messages,
31
+ 'temperature': temperature,
32
+ }, headers={
33
+ 'Authorization': f"Bearer {api_key}"
34
+ })
35
+ # print(resp.json())
36
+ self.history.append(resp.json()['choices'][0]['message'])
37
+
38
+ return resp.json()['choices'][0]['message']['content']
prompt.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from template import TEMPLATE_v1, TEMPLATE_v2, TEMPLATE_v3, QUESTIONS
2
+ import json
3
+
4
+ class Prompt:
5
+ def __init__(self) -> None:
6
+ # self.questions = QUESTIONS
7
+ self.template_v1 = TEMPLATE_v1
8
+ self.template_v2 = TEMPLATE_v2
9
+ self.template_v3 = TEMPLATE_v3
10
+ self.version = "v3"
11
+
12
+ def combine_questions(self, questions):
13
+ questions = [ f'Question {id_ +1 }: {q}' for id_, q in enumerate(questions) if 'Input question' not in q]
14
+ questions = '\n'.join(questions)
15
+ return questions
16
+
17
+ def _get_v1(self, input, questions):
18
+ questions = self.combine_questions(questions)
19
+ return self.template_v1.format(input, self.questions)
20
+
21
+ def _get_v2(self, input, questions):
22
+ questions = self.combine_questions(questions)
23
+ return self.template_v2.format(input, self.questions)
24
+
25
+ def _get_v3(self, input, questions):
26
+ return self.template_v3.format(input)
27
+
28
+ def get(self, input, questions, version = None):
29
+ self.version = version if version else self.version
30
+ if self.version == 'v1':
31
+ return self._get_v1(input, questions)
32
+ elif self.version == 'v2':
33
+ return self._get_v2(input, questions)
34
+ elif self.version == 'v3':
35
+ return self._get_v3(input, questions)
36
+ else:
37
+ raise ValueError('Version should be one of {v1, v2, v3}')
38
+
39
+ def _process_v1(self, res):
40
+ res = json.loads(res)
41
+ return res
42
+
43
+ def _process_v2(self, res):
44
+ res = json.loads(res)
45
+ return res
46
+
47
+ def _process_v3(self, x):
48
+ x = json.loads(x)
49
+ res = {}
50
+ question_id = 0
51
+ for k, v in x.items():
52
+
53
+ if 'answer' in v:
54
+ question_id += 1
55
+ question_name = f'Question {question_id}'
56
+ res_tmp = {"answer": v['answer'], "original sentences": v['original sentences']}
57
+ res[question_name] = res_tmp
58
+ else:
59
+
60
+ k_1, k_2 = v.keys()
61
+ in_1 = v[k_1]
62
+ in_2 = v[k_2]
63
+
64
+ question_id += 1
65
+ question_name = f'Question {question_id}'
66
+ res_tmp_1 = {"answer": in_1['answer'], "original sentences": in_1['original sentences']}
67
+ res[question_name] = res_tmp_1
68
+
69
+ question_id += 1
70
+ question_name = f'Question {question_id}'
71
+ res_tmp_2 = {"answer": in_2['answer'], "original sentences": in_2['original sentences']}
72
+ res[question_name] = res_tmp_2
73
+
74
+ return res
75
+
76
+ def process_result(self, result, version = None):
77
+ if not version is None and self.version != version:
78
+ self.version = version
79
+ print(f'Version changed to {version}')
80
+
81
+ if version == 'v1':
82
+ result = self._process_v1(result)
83
+ return result
84
+ elif version == 'v2':
85
+ result = self._process_v2(result)
86
+ return result
87
+ elif version == 'v3':
88
+ result = self._process_v3(result)
89
+ return result
90
+ else:
91
+ raise ValueError('Version should be one of {v1, v2, v3}')
template.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TEMPLATE_v3 = '''We now have a following <document> in the medical field:
2
+
3
+ """
4
+ {}
5
+ """
6
+ You are an expert in biomedical research.
7
+ You are asked to answer the following <question>s based on the <document>, the <question>s and their <instruction>s and <rule>s are as follows:
8
+
9
+ - "Question 1":
10
+ - "question": "What is the <animal type> of this study?"
11
+ - "instruction": "This task is to find the <animal type> according to the <document>."
12
+ - "definition":
13
+ - "animal type": "The rodent type used in the article"
14
+ - "rule": "<answer> of <animal type> should be one of the two choices {{mice/rats}} or both"
15
+
16
+ - "Question 2":
17
+ - "question": "What is the <exposure age> of this study?"
18
+ - "instruction": "This task is to find the <exposure age> according to the <document>."
19
+ - "definition":
20
+ - "exposure age": "The age when the animals were exposed to anesthetics. There are two kinds of <exposure age>: <postnatal day> and <gestational day>"
21
+ - "postnatal day": "<postnatal day> means the days after the animals were born. For example, 'postnatal day <int>' means the animals were born for <int> day. 'postnatal day <int>' is sometimes shortened to 'PND <int>' or 'pnd <int>', which still means 'postnatal day <int>', after birth. 'postnatal day <int>' is sometimes shortened to 'p<int>', which still means 'postnatal day <int>', after birth"
22
+ - "gestational day": "<gestational day> means the days after the animals were pregnant. For example, 'gestational day <int>' means the animals were pregnant for <int> day. 'gestational day <int>' is sometimes abbreviated as 'E <int>', 'E' meaning before birth"
23
+ - "rule": "<answer> of <exposure age> should be expressed as one of {{'postnatal day <int>'/'gestational day <int>'}}. For Example: "postnatal day 7", "Gestational day 21"
24
+
25
+ - "Question 3":
26
+ - "question": "Is there any <behavior test> done in this study?"
27
+ - "instruction": "This task is to find whether there are any <behavior test> in the study according to the <document>."
28
+ - "definition":
29
+ - "behavior test": "If there are any of the <behavior tests> described and done in the article, which mentioned as: 'Open field test', 'Morris water task', 'fear conditioning test', 'Dark/light avoidance'; 'passive/active avoidance test'; 'elevated maze', 'Forced swim test', 'Object recognition test', 'Social interaction/preference'."
30
+ - "rule": "<answer> to <behavior test> should be one of the two choices {{Yes/No}}."
31
+
32
+ - "Question 4":
33
+ - "question": "What's the <intervention>s of this study?
34
+ - "instruction": "This task is to find the <intervention>s according to the <document>."
35
+ - "definition":
36
+ - "intervention": "The <intervention>s are anesthetic drugs, which in one of {{"isoflurane"/"sevoflurane"/"desflurane"/"ketamine"/"propofol"/"Midazolam"/"Nitrous oxide“}}."
37
+ - "rule": "There are one or two <intervention>s in the <document>. Please find them all and answer the <question>. If there is only one <intervention>, the second one is 'None'."
38
+
39
+ - "Question 5":
40
+ - "question": "What's the <genetic chain> of this study?"
41
+ - "instruction": "This task is to find the <genetic chain> according to the <document>."
42
+ - "definition":
43
+ - "genetic chain": "The <genetic chain> is the genetic type of the animals being used in the article."
44
+ - "rule": "Please do as the following examples: 'C57BL/6', 'C57BL/6J' should be extracted as 'C57BL/6'; 'Sprague Dawley', 'Sprague-Dawley', 'SD' should be extracted as 'Sprague Dawley'; 'CD-1' should be extracted as 'CD-1'; 'Wistar/ST' should be extracted as 'Wistar/ST'; 'Wistar' should be extracted as 'Wistar'; 'FMR-1 KO' should be extracted as 'FMR-1 KO'."
45
+
46
+
47
+ Here is the instrucrtions for all the <question>s:
48
+
49
+ 1. Please select the <original sentences> related the "behavior tests" from the <document> for each <question>.
50
+ 2. Please use the <original sentences> to answer the <question> by following the <rule> and <instruction> accroding to the <definition>.
51
+ 3. Please provide <original sentences> coming from the <document>.
52
+ 4. Output the <answer> in the following JSON format:
53
+
54
+ {{
55
+ "Question 1": {{
56
+ "answer": "{{mice/rats/both}}",
57
+ "original sentences": []
58
+ }},
59
+ "Question 2": {{
60
+ "answer": "{{postnatal day <int>/gestational day <int>}}",
61
+ "original sentences": []
62
+ }},
63
+ "Question 3": {{
64
+ "answer": "{{Yes/No}}",
65
+ "original sentences": []
66
+ }},
67
+ "Question 4":
68
+ {{
69
+ {{intervention 1}}: {{
70
+ "answer": "{{intervention 1}}]",
71
+ "original sentences": []
72
+ }}
73
+ {{intervention 2}}: {{
74
+ "answer": "{{intervention 2}}",
75
+ "original sentences": []
76
+ }}
77
+ }},
78
+ "Question 5": {{
79
+ "answer": "{{genetic chain}}",
80
+ "original sentences": []
81
+ }}
82
+ }}
83
+ '''
84
+
85
+ TEMPLATE_v2 = '''We now have a following <document> in the medical field:
86
+
87
+ """
88
+ {}
89
+ """
90
+ We have some introduction here:
91
+
92
+ 1. DOI: The DOI link for the article, usually can be found in the first line of the .txt file for the article. E.g., “DOI: 10.3892/mmr.2019.10397”.
93
+ 2. Citation ID: The number in the file name. E.g., “1134”.
94
+ 3. First author: The last name in the file name. E.g., “Guan”.
95
+ 4. Year: The year in the file name. E.g., “2019”.
96
+ 5. Animal type: The rodent type used in the article, should be one of the choices: mice, rats. E.g., “rats”.
97
+ 6. Exposure age: The age when the animals were exposed to anesthetics, should be mentioned as "PND1", "PND7","postnatal day 7", "Gestational day 21", etc, which should be extract as: 'PND XX' , 'Gestational day xx'. E.g., “PND7”.
98
+ 7. Behavior test: Whether there is any behavior test in the article, should be one of the choices: "Y", "N". "Y" is chosen if there are any of the behavior tests described and done in the article, which mentioned as: "Open field test", "Morris water task", "fear conditioning test", "Dark/light avoidance"; "passive/active avoidance test"; "elevated maze", "Forced swim test", "Object recognition test", "Social interaction/preference“. E.g., “N”.
99
+ 8. Intervention 1 & Intervention 2: Intervention 1 and Intervention 2 are both anesthetic drugs, which listed as: "isoflurane", "sevoflurane", "desflurane", "ketamine", "propofol", "Midazolam", "Nitrous oxide“. If none, put “NA”. E.g., “propofol”.
100
+ 9. Genetic chain: Genetic chain is the genetic type of the animals being used in the article, here is the examples:
101
+ "C57BL/6", "C57BL/6J" should be extracted as "C57BL/6"; "Sprague Dawley", "Sprague-Dawley", "SD" should be extracted as "Sprague Dawley"; "CD-1" should be extracted as "CD-1"; "Wistar/ST" should be extracted as "Wistar/ST"; "Wistar" should be extracted as "Wistar"; "FMR-1 KO" should be extracted as "FMR-1 KO“. E.g., “Sprague Dawley”.
102
+
103
+ We have some <question>s begin with "Question" here:
104
+ """
105
+ {}
106
+ """
107
+
108
+ Please finish the following task:
109
+
110
+ 1. Please select the <original sentences> related the each <question> from the <document>.
111
+ 2. Please use the <original sentences> to answer the <question>.
112
+ 3. Please provide <original sentences> coming from the <document>.
113
+ 4. Output the <answer> in the following json format:
114
+
115
+ {{
116
+ "Question 1": {{
117
+ "question": {{}},
118
+ "answer": {{}},
119
+ "original sentences": []
120
+ }},
121
+ "Question 2": {{
122
+ "question": {{}},
123
+ "answer": {{}},
124
+ "original sentences": []
125
+ }},
126
+ ...
127
+ }}
128
+ '''
129
+
130
+ TEMPLATE_v1 = '''We now have a following <document> in the medical field:
131
+
132
+ """
133
+ {}
134
+ """
135
+
136
+ We have some <question>s begin with "Question" here:
137
+ """
138
+ {}
139
+ """
140
+
141
+ Please finish the following task:
142
+
143
+ 1. Please select the <original sentences> related the each <question> from the <document>.
144
+ 2. Please use the <original sentences> to answer the <question>.
145
+ 3. Please provide <original sentences> coming from the <document>.
146
+ 4. Output the <answer> in the following json format:
147
+
148
+ {{
149
+ "Question 1": {{
150
+ "question": {{}},
151
+ "answer": {{}},
152
+ "original sentences": []
153
+ }},
154
+ "Question 2": {{
155
+ "question": {{}},
156
+ "answer": {{}},
157
+ "original sentences": []
158
+ }},
159
+ ...
160
+ }}
161
+ '''
162
+
163
+ QUESTIONS = [
164
+ "What is the DOI of this study?",
165
+ "What is the Citation ID of this study?",
166
+ "What is the First author of this study?",
167
+ "What is the year of this study?",
168
+ "What is the animal type of this study?",
169
+ "What is the exposure age of this study?",
170
+ "Is there any behavior test done in this study?",
171
+ "What's the Intervention 1's name of this study?(anesthetics only)",
172
+ "What's the Intervention 2's name of this study?(anesthetics only)",
173
+ "What's the genetic chain of this study?",
174
+ "Input question",
175
+ ]
176
+ QUESTIONS = [ f'Question {id_ +1 }: {q}' for id_, q in enumerate(QUESTIONS) if 'Input question' not in q]
177
+ QUESTIONS = '\n'.join(QUESTIONS)