Spaces:
Runtime error
Runtime error
Roland Ding
commited on
Commit
•
82b9d78
1
Parent(s):
4e46bb4
9.9.22.67 mass update of the application
Browse files+ Revised internally to apply usage of langchain and async call
+ Realigned the application to use the new terms and prompts from
search term 13n
+ Added the new chains.py module to support the new langchain
+ revised the application.py to align with all the new backend data
structure.
modified: app.py
modified: application.py
new file: chains.py
modified: features.py
modified: requirements.txt
modified: supplier.py
modified: ui_studies.py
modified: ui_study.py
- app.py +16 -9
- application.py +26 -16
- chains.py +107 -0
- features.py +374 -198
- requirements.txt +2 -1
- supplier.py +13 -14
- ui_studies.py +17 -13
- ui_study.py +81 -59
app.py
CHANGED
@@ -13,23 +13,30 @@ from utility import *
|
|
13 |
|
14 |
from ui_study import *
|
15 |
from ui_studies import *
|
|
|
16 |
|
17 |
|
18 |
examples = []
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
def refresh_data():
|
31 |
return
|
32 |
|
33 |
if __name__ == "__main__":
|
34 |
init_app_data()
|
|
|
35 |
demo.launch()
|
|
|
13 |
|
14 |
from ui_study import *
|
15 |
from ui_studies import *
|
16 |
+
# from application import app_data
|
17 |
|
18 |
|
19 |
examples = []
|
20 |
|
21 |
+
@terminal_print
|
22 |
+
def init_demo():
|
23 |
+
'''
|
24 |
+
initialize the demo data
|
25 |
+
'''
|
26 |
+
study_page = init_study_page()
|
27 |
+
studies_page = init_studies_page()
|
28 |
+
|
29 |
+
return gr.TabbedInterface(
|
30 |
+
[study_page,studies_page],
|
31 |
+
["Clinical Study","Studies"],
|
32 |
+
theme = gr.themes.Soft(primary_hue="sky",secondary_hue="orange"),
|
33 |
+
css = "footer {visibility: hidden}",
|
34 |
+
title="AMRA AI Medi Reader")
|
35 |
|
36 |
def refresh_data():
|
37 |
return
|
38 |
|
39 |
if __name__ == "__main__":
|
40 |
init_app_data()
|
41 |
+
demo = init_demo()
|
42 |
demo.launch()
|
application.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
import os
|
2 |
|
3 |
-
from collections import defaultdict
|
4 |
-
|
5 |
'''
|
6 |
shared environment variables
|
7 |
'''
|
@@ -54,28 +52,40 @@ tables_inst=[
|
|
54 |
f"include all table titles."
|
55 |
]
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
accepted_month_inst=[
|
68 |
-
f"extract the acceptance month of the article from the system text.",
|
69 |
-
f"return the results on a single line as 'Accepted Month: <month>.",
|
70 |
-
]
|
71 |
|
72 |
'''
|
73 |
application default data
|
74 |
'''
|
75 |
app_data = {
|
76 |
"current article":{},
|
77 |
-
"articles":
|
78 |
"prompts":{},
|
79 |
"terms":[],
|
80 |
-
"summary":
|
81 |
}
|
|
|
1 |
import os
|
2 |
|
|
|
|
|
3 |
'''
|
4 |
shared environment variables
|
5 |
'''
|
|
|
52 |
f"include all table titles."
|
53 |
]
|
54 |
|
55 |
+
article_prompts = {
|
56 |
+
"Authors": '''extract all of the authors of the article from the above text.\n
|
57 |
+
Return the results on the same line separated by commas as Authors: Author A, Author B...
|
58 |
+
''',
|
59 |
+
"Acceptance Year": '''extract the acceptance year of the article from the above text.\n
|
60 |
+
Return the results on a single line as Accepted Year: <year>.
|
61 |
+
''',
|
62 |
+
|
63 |
+
"Acceptance Month":'''extract the acceptance month of the article from the above text.\n
|
64 |
+
Return the results on a single line as Accepted Month: <month>.
|
65 |
+
'''
|
66 |
+
}
|
67 |
|
68 |
+
overview_prompts = clinical_prompts = radiological_prompts = other_prompts = {}
|
69 |
+
|
70 |
+
# populate the prompts from .prompt/overview/ folder
|
71 |
+
def update_prompts_from_dir(prompts,path):
|
72 |
+
for file in os.listdir(path):
|
73 |
+
with open(f"{path}/{file}","r") as f:
|
74 |
+
prompts[file.split(".")[0]] = f.read()
|
75 |
+
|
76 |
+
update_prompts_from_dir(overview_prompts,".prompts/overview")
|
77 |
+
update_prompts_from_dir(clinical_prompts,".prompts/clinical")
|
78 |
+
update_prompts_from_dir(radiological_prompts,".prompts/radiologic")
|
79 |
+
update_prompts_from_dir(other_prompts,".prompts/other")
|
80 |
|
|
|
|
|
|
|
|
|
81 |
|
82 |
'''
|
83 |
application default data
|
84 |
'''
|
85 |
app_data = {
|
86 |
"current article":{},
|
87 |
+
"articles":{},
|
88 |
"prompts":{},
|
89 |
"terms":[],
|
90 |
+
"summary":{}
|
91 |
}
|
chains.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
|
3 |
+
from langchain.chat_models import ChatOpenAI
|
4 |
+
from langchain.prompts.chat import ChatPromptTemplate
|
5 |
+
from langchain.schema import BaseOutputParser
|
6 |
+
|
7 |
+
from utility import read_pdf,terminal_print
|
8 |
+
|
9 |
+
class Replacement(BaseOutputParser):
|
10 |
+
"""Parse the output of an LLM call to a comma-separated list."""
|
11 |
+
|
12 |
+
|
13 |
+
def parse(self, text: str, **kwargs):
|
14 |
+
"""Parse the output of an LLM call."""
|
15 |
+
if kwargs:
|
16 |
+
print(kwargs)
|
17 |
+
return text.strip().split(", ")
|
18 |
+
|
19 |
+
@terminal_print # need to review this.
|
20 |
+
async def async_generate(article,name,chain,replacement_term=None):
|
21 |
+
if replacement_term:
|
22 |
+
resp = await chain.ainvoke({"term":replacement_term})
|
23 |
+
else:
|
24 |
+
resp = await chain.ainvoke({"term":""})
|
25 |
+
article[name] = resp.content
|
26 |
+
|
27 |
+
@terminal_print # need to review this.
|
28 |
+
async def execute_concurrent(article,prompts):
|
29 |
+
llm = ChatOpenAI(temperature=0.0,model_name="gpt-3.5-turbo-16k")
|
30 |
+
tasks = []
|
31 |
+
|
32 |
+
prompt_type = article["logic"]
|
33 |
+
prompt_list = list(prompts.keys())
|
34 |
+
print(prompt_list)
|
35 |
+
|
36 |
+
# for name,p in prompts.items():
|
37 |
+
while prompt_list:
|
38 |
+
name = prompt_list.pop(0)
|
39 |
+
p = prompts[name]
|
40 |
+
|
41 |
+
if any([s not in article for s in p["input_list"]]):
|
42 |
+
# prompt_list.append(name)
|
43 |
+
print("skip",name,"due to missing input",p["input_list"])
|
44 |
+
continue
|
45 |
+
|
46 |
+
print("executing",p["assessment_step"],name)
|
47 |
+
input_text = "".join([article[s] for s in p["input_list"]])
|
48 |
+
|
49 |
+
chat_prompt = ChatPromptTemplate.from_messages([
|
50 |
+
("human",input_text),
|
51 |
+
("system",p[prompt_type]),
|
52 |
+
])
|
53 |
+
|
54 |
+
if "reformat_inst" in p:
|
55 |
+
chat_prompt.append(
|
56 |
+
("system",p["reformat_inst"])
|
57 |
+
)
|
58 |
+
|
59 |
+
post_prompt_maping = {}
|
60 |
+
post_replace_term = lambda res,map=post_prompt_maping:replace_term(res,map=map)
|
61 |
+
|
62 |
+
chain = chat_prompt | llm | post_replace_term
|
63 |
+
if "term" in p:
|
64 |
+
tasks.append(async_generate(article,name,chain,replacement_term=p["term"]["term_prompt"])) # in here the name shall be the term_prompt from the terms triggered
|
65 |
+
else:
|
66 |
+
tasks.append(async_generate(article,name,chain)) # in here the name shall be the term_prompt from the terms triggered
|
67 |
+
|
68 |
+
await asyncio.gather(*tasks)
|
69 |
+
|
70 |
+
def replace_term(res,**kwargs):
|
71 |
+
if "map" in kwargs:
|
72 |
+
for key,term in kwargs["map"].items():
|
73 |
+
res.content = res.content.replace(key,term)
|
74 |
+
return res
|
75 |
+
|
76 |
+
if __name__ == "__main__":
|
77 |
+
# lets try the Blood Loss, Operation Time, and Need for ICU in other folder
|
78 |
+
sample_artice = ".samples/Ha SK, 2008.pdf"
|
79 |
+
sample_content,_ = read_pdf(sample_artice)
|
80 |
+
|
81 |
+
llm = ChatOpenAI(temperature=0.0,model_name="gpt-3.5-turbo-16k")
|
82 |
+
# with open(".prompts/other/Need for ICU.txt") as f:
|
83 |
+
# prompt = f.read()
|
84 |
+
# name = "Need for ICU"
|
85 |
+
with open(".prompts/other/Operation Time.txt") as f:
|
86 |
+
prompt = f.read()
|
87 |
+
name = "Operation Time"
|
88 |
+
# with open(".prompts/other/Blood Loss.txt") as f:
|
89 |
+
# prompt = f.read()
|
90 |
+
# name = "Blood Loss"
|
91 |
+
|
92 |
+
post_prompt_maping = {}
|
93 |
+
post_replace_term = lambda res,map=post_prompt_maping:replace_term(res,map=map)
|
94 |
+
|
95 |
+
chain_prompt = ChatPromptTemplate.from_messages([
|
96 |
+
("human",sample_artice),
|
97 |
+
("system",prompt),
|
98 |
+
])
|
99 |
+
|
100 |
+
# experiment with cascading the chain
|
101 |
+
chain = chain_prompt | llm
|
102 |
+
chain2 = chain | post_replace_term
|
103 |
+
|
104 |
+
# lets try remove from chain
|
105 |
+
chain2.last.with_retry = True
|
106 |
+
res = chain2.invoke({"term":name})
|
107 |
+
print(res.content)
|
features.py
CHANGED
@@ -1,29 +1,66 @@
|
|
1 |
# language default packages
|
2 |
from datetime import datetime
|
3 |
-
from collections import defaultdict
|
4 |
|
5 |
# external packages
|
6 |
import gradio as gr
|
7 |
-
import
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# internal packages
|
|
|
10 |
from cloud_db import *
|
11 |
from cloud_storage import *
|
|
|
12 |
from supplier import *
|
13 |
-
|
14 |
-
encoding = tiktoken.get_encoding("cl100k_base")
|
15 |
|
16 |
# get prompts, terms, outputs from the cloud
|
|
|
17 |
def init_app_data():
|
18 |
'''
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
'''
|
21 |
-
app_data["prompts"] = get_table("prompts")
|
22 |
app_data["terms"] = get_table("terms")
|
23 |
-
|
24 |
-
app_data["
|
25 |
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
domain,
|
28 |
study_file_obj,
|
29 |
study_content,
|
@@ -39,47 +76,89 @@ def process_study(
|
|
39 |
else:
|
40 |
return "No file or content provided","No file or content provided","No file or content provided"
|
41 |
|
42 |
-
#
|
43 |
-
|
44 |
-
index_discussion = raw_content.lower().index("discussion") if "discussion" in raw_content.lower() else len(raw_content)
|
45 |
-
meta_content = raw_content[:index_discussion]
|
46 |
-
key_content = get_key_content(raw_content)
|
47 |
-
|
48 |
-
authors = send_inst(create_inst(meta_content,authors_inst))
|
49 |
-
accepted_date = send_inst(create_inst(meta_content,accepted_date_inst))
|
50 |
-
tables = send_inst(create_inst(key_content,tables_inst))
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
selected_prompts = select_prompts(key_content,terms=app_data["terms"],prompts=app_data["prompts"])
|
55 |
-
res = process_prompts(key_content,selected_prompts)
|
56 |
-
|
57 |
-
detail_views = create_detail_views(res)
|
58 |
-
overview = create_overview(res)
|
59 |
-
|
60 |
-
article.update({
|
61 |
-
"meta":{
|
62 |
-
"authors":authors,
|
63 |
-
"accepted_date":accepted_date,
|
64 |
-
},
|
65 |
-
"extractions":res
|
66 |
-
})
|
67 |
|
68 |
-
article
|
69 |
-
{
|
70 |
-
"key_content":key_content,
|
71 |
-
"tables":tables,
|
72 |
-
}
|
73 |
-
)
|
74 |
app_data["current_article"] = article
|
|
|
|
|
75 |
try:
|
76 |
update_article(article)
|
77 |
except Exception as e:
|
78 |
print(e)
|
79 |
# return overview, detail_views
|
80 |
|
|
|
|
|
|
|
|
|
81 |
return overview, detail_views
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
def refresh():
|
84 |
'''
|
85 |
this function refresh the application data from the cloud backend
|
@@ -89,64 +168,56 @@ def refresh():
|
|
89 |
article = app_data["current_article"]
|
90 |
if not article:
|
91 |
return "No file or content provided"
|
92 |
-
|
93 |
|
94 |
-
|
|
|
95 |
|
96 |
-
article.update({
|
97 |
-
"extractions":res
|
98 |
-
})
|
99 |
-
|
100 |
-
detail_views = create_detail_views(res)
|
101 |
-
overview = create_overview(res)
|
102 |
update_article(article=article)
|
103 |
|
104 |
return overview, detail_views
|
105 |
|
|
|
106 |
def create_overview(article):
|
107 |
-
md_text = ""
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
return gr.update(value=md_text)
|
113 |
|
|
|
114 |
def create_detail_views(article):
|
115 |
-
md_text = ""
|
|
|
116 |
|
117 |
# add performance
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
for content in safeties:
|
129 |
-
md_text += f"#### {content['assessment']} - {content['template_name']}\n\n"
|
130 |
-
md_text += content["content"] + "\n\n"
|
131 |
-
|
132 |
-
# add other
|
133 |
-
others = [v for _,v in article.items() if v["assessment"] == "other"]
|
134 |
-
|
135 |
-
md_text += f"### Other\n\n"
|
136 |
-
for title,content in others:
|
137 |
-
md_text += f"#### {content['assessment']} - {content['template_name']}\n\n"
|
138 |
-
md_text += content["content"] + "\n\n"
|
139 |
|
140 |
return gr.update(value=md_text)
|
141 |
|
142 |
-
|
|
|
143 |
'''
|
144 |
this function extract the content between start and end
|
145 |
-
and return the content in between.
|
146 |
-
|
147 |
-
and find all the end and keep the last one showing up in the
|
148 |
-
text. If no start or end is found, the function will return
|
149 |
-
the no text.
|
150 |
|
151 |
Parameters
|
152 |
----------
|
@@ -162,19 +233,32 @@ def get_key_content(text,case_sensitive=False):
|
|
162 |
str
|
163 |
content between start and end
|
164 |
'''
|
165 |
-
if not case_sensitive:
|
166 |
-
|
|
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
|
|
178 |
def get_articles(update_local=True):
|
179 |
'''
|
180 |
this function return the list of articles
|
@@ -191,10 +275,11 @@ def get_articles(update_local=True):
|
|
191 |
'''
|
192 |
articles = get_table("articles")
|
193 |
if update_local:
|
194 |
-
app_data["articles"] = articles
|
195 |
|
196 |
return articles
|
197 |
|
|
|
198 |
def get_article(domain,name):
|
199 |
'''
|
200 |
this function return the article object
|
@@ -215,6 +300,7 @@ def get_article(domain,name):
|
|
215 |
|
216 |
return article
|
217 |
|
|
|
218 |
def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True):
|
219 |
'''
|
220 |
this function receive the domain name and file obj
|
@@ -236,29 +322,29 @@ def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True)
|
|
236 |
dict
|
237 |
article object
|
238 |
'''
|
239 |
-
if
|
|
|
|
|
|
|
|
|
|
|
240 |
content, _ = read_pdf(file)
|
241 |
filename = file.name.split("\\")[-1]
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
|
248 |
article ={
|
249 |
"domain":domain,
|
250 |
"name":filename,
|
251 |
-
"
|
252 |
-
"raw":content
|
253 |
-
},
|
254 |
"upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
255 |
}
|
256 |
-
|
257 |
-
if add_to_s3 and file_object:
|
258 |
-
upload_fileobj(file,default_s3_bucket,filename)
|
259 |
|
260 |
if add_to_local:
|
261 |
-
app_data["articles"]
|
262 |
|
263 |
res = post_item("articles",article)
|
264 |
if "Error" in res:
|
@@ -267,6 +353,7 @@ def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True)
|
|
267 |
|
268 |
return article
|
269 |
|
|
|
270 |
def remove_article(domain,name,remove_from_s3=True, remove_from_local=True):
|
271 |
'''
|
272 |
this function remove the article from the cloud, s3 and local memory
|
@@ -291,12 +378,13 @@ def remove_article(domain,name,remove_from_s3=True, remove_from_local=True):
|
|
291 |
if remove_from_s3:
|
292 |
delete_file(domain,name)
|
293 |
if remove_from_local:
|
294 |
-
|
295 |
pass
|
296 |
delete_item("articles",{"domain":domain,"name":name})
|
297 |
|
298 |
return True
|
299 |
|
|
|
300 |
def update_article(article,file_obj=None,update_local=True):
|
301 |
'''
|
302 |
this function receive the article object and update the article
|
@@ -320,118 +408,206 @@ def update_article(article,file_obj=None,update_local=True):
|
|
320 |
upload_fileobj(file_obj,article["domain"],article["name"])
|
321 |
|
322 |
if update_local:
|
323 |
-
app_data["articles"]
|
324 |
|
325 |
post_item("articles",article)
|
326 |
|
327 |
return article
|
328 |
|
329 |
-
|
330 |
-
|
331 |
-
|
|
|
332 |
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
#
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
# if "Error" in res:
|
353 |
-
# print(res)
|
354 |
-
# return False
|
355 |
-
# return output
|
356 |
-
|
357 |
-
# def remove_output(domain,name):
|
358 |
-
# res = delete_item("outputs",{"domain":domain,"name":name})
|
359 |
-
# if "Error" in res:
|
360 |
-
# print(res)
|
361 |
-
# return False
|
362 |
-
# return True
|
363 |
-
|
364 |
-
# def update_output(output):
|
365 |
-
# res = put_item("outputs",output)
|
366 |
-
# if "Error" in res:
|
367 |
-
# print(res)
|
368 |
-
# return False
|
369 |
-
# return True
|
370 |
-
|
371 |
-
# identify article state
|
372 |
-
def identify_logic(text):
|
373 |
-
article_logic = [
|
374 |
-
"groups",
|
375 |
-
"levels",
|
376 |
-
"preoperatives"
|
377 |
-
]
|
378 |
|
379 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
|
387 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
|
395 |
-
|
|
|
|
|
396 |
|
|
|
397 |
def keyword_search(keywords,full_text):
|
398 |
keywords_result = {}
|
399 |
for k in keywords:
|
400 |
-
if type(k) is tuple:
|
401 |
-
keywords_result[k]=
|
402 |
else:
|
403 |
-
keywords_result[k]=
|
404 |
return keywords_result
|
405 |
|
406 |
-
|
407 |
-
|
408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
409 |
|
410 |
-
|
411 |
-
|
412 |
-
text : str
|
413 |
-
text of the article
|
414 |
-
prompts : list
|
415 |
-
list of prompts
|
416 |
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
421 |
'''
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
}
|
436 |
-
|
437 |
-
return res
|
|
|
1 |
# language default packages
|
2 |
from datetime import datetime
|
|
|
3 |
|
4 |
# external packages
|
5 |
import gradio as gr
|
6 |
+
import asyncio
|
7 |
+
|
8 |
+
from langchain.llms import OpenAI
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain.chains import LLMChain
|
11 |
|
12 |
# internal packages
|
13 |
+
from chains import *
|
14 |
from cloud_db import *
|
15 |
from cloud_storage import *
|
16 |
+
from cloud_textract import *
|
17 |
from supplier import *
|
18 |
+
from utility import list_dict_to_dict
|
|
|
19 |
|
20 |
# get prompts, terms, outputs from the cloud
|
21 |
+
@terminal_print
|
22 |
def init_app_data():
|
23 |
'''
|
24 |
+
A function to initialize the application data from the cloud backend.
|
25 |
+
All the cloud data was saved in the app_data dictionary.
|
26 |
+
|
27 |
+
Parameters
|
28 |
+
----------
|
29 |
+
None
|
30 |
+
|
31 |
+
Returns
|
32 |
+
-------
|
33 |
+
None
|
34 |
'''
|
35 |
+
app_data["prompts"] = list_dict_to_dict(get_table("prompts"),key="prompt_name")
|
36 |
app_data["terms"] = get_table("terms")
|
37 |
+
app_data["articles"] = list_dict_to_dict(get_table("articles"),key="name")
|
38 |
+
app_data["summary"] = list_dict_to_dict(get_table("summary"),key="term")
|
39 |
|
40 |
+
@terminal_print
|
41 |
+
def get_existing_article(
|
42 |
+
article_name,
|
43 |
+
):
|
44 |
+
'''
|
45 |
+
get_existing_article function receive the article name and return the article object
|
46 |
+
|
47 |
+
Parameters
|
48 |
+
----------
|
49 |
+
article_name : str
|
50 |
+
name of the article
|
51 |
+
|
52 |
+
Returns
|
53 |
+
-------
|
54 |
+
dict
|
55 |
+
article object
|
56 |
+
'''
|
57 |
+
article = app_data["articles"][article_name]
|
58 |
+
app_data["current_article"] = article
|
59 |
+
|
60 |
+
return create_overview(article), create_detail_views(article)
|
61 |
+
|
62 |
+
@terminal_print
|
63 |
+
def process_study( # need revision
|
64 |
domain,
|
65 |
study_file_obj,
|
66 |
study_content,
|
|
|
76 |
else:
|
77 |
return "No file or content provided","No file or content provided","No file or content provided"
|
78 |
|
79 |
+
# update the common article segment from its existing attributes.
|
80 |
+
update_article_segment(article)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
+
# perform pathway logic and content extraction
|
83 |
+
process_prompts(article=article)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
+
# set the current article to the completed article object
|
|
|
|
|
|
|
|
|
|
|
86 |
app_data["current_article"] = article
|
87 |
+
|
88 |
+
# update the article to the cloud
|
89 |
try:
|
90 |
update_article(article)
|
91 |
except Exception as e:
|
92 |
print(e)
|
93 |
# return overview, detail_views
|
94 |
|
95 |
+
# create overview and detail markdown views for the article
|
96 |
+
detail_views = create_detail_views(article)
|
97 |
+
overview = create_overview(article)
|
98 |
+
|
99 |
return overview, detail_views
|
100 |
|
101 |
+
@terminal_print
|
102 |
+
def update_article_segment(article):
|
103 |
+
# get the key content between article objective and discussion
|
104 |
+
raw_content = article["raw"]
|
105 |
+
index_discussion = raw_content.lower().index("discussion") if "discussion" in raw_content.lower() else len(raw_content)
|
106 |
+
|
107 |
+
# get the meta data
|
108 |
+
meta_content = raw_content[:index_discussion]
|
109 |
+
abstract, next_content = get_key_content(raw_content,"objective","key") # article Liu does not have objective and key but has introduction.
|
110 |
+
introduction, next_content = get_key_content(next_content,"key","methods")
|
111 |
+
materials_and_methods, next_content = get_key_content(next_content,"methods","results")
|
112 |
+
results, _ = get_key_content(next_content,"results","discussion")
|
113 |
+
|
114 |
+
|
115 |
+
# update the article object
|
116 |
+
article.update({
|
117 |
+
"Abstract": abstract,
|
118 |
+
"Introduction": introduction,
|
119 |
+
"Material and Methods": materials_and_methods,
|
120 |
+
"Results": results,
|
121 |
+
"Meta Content": meta_content,
|
122 |
+
"tables": get_tables(article["name"]),
|
123 |
+
|
124 |
+
})
|
125 |
+
|
126 |
+
# add the key content as an aggregation of the other sections
|
127 |
+
article.update({
|
128 |
+
"key_content": article["Abstract"] + article["Introduction"] + article["Material and Methods"] + article["Results"],
|
129 |
+
})
|
130 |
+
# add the recognized logic to the article
|
131 |
+
article.update(identify_logic(article["key_content"]))
|
132 |
+
# one thing to notice here, due to the fact that update_article_segment function perform direct change on the article object,
|
133 |
+
# there is no need to re-assign the article object to the same variable name
|
134 |
+
|
135 |
+
pre_loop = asyncio.new_event_loop()
|
136 |
+
pre_loop.run_until_complete(get_segments(article,article_prompts))
|
137 |
+
pre_loop.close()
|
138 |
+
|
139 |
+
|
140 |
+
@terminal_print # need to review this.
|
141 |
+
async def gen_segment(article,name,chain):
|
142 |
+
|
143 |
+
resp = await chain.ainvoke({"term":""})
|
144 |
+
article[name] = resp.content #["content"]
|
145 |
+
|
146 |
+
@terminal_print # need to review this.
|
147 |
+
async def get_segments(article,prompts):
|
148 |
+
llm = ChatOpenAI(temperature=0.0,model_name="gpt-3.5-turbo-16k")
|
149 |
+
tasks = []
|
150 |
+
|
151 |
+
for name,p in prompts.items():
|
152 |
+
prompt = ChatPromptTemplate.from_messages([
|
153 |
+
("human",article["Meta Content"]),
|
154 |
+
("system","From the text above "+p),
|
155 |
+
])
|
156 |
+
chain = prompt | llm
|
157 |
+
tasks.append(gen_segment(article,name,chain))
|
158 |
+
|
159 |
+
await asyncio.gather(*tasks)
|
160 |
+
|
161 |
+
@terminal_print
|
162 |
def refresh():
|
163 |
'''
|
164 |
this function refresh the application data from the cloud backend
|
|
|
168 |
article = app_data["current_article"]
|
169 |
if not article:
|
170 |
return "No file or content provided"
|
171 |
+
process_prompts(article)
|
172 |
|
173 |
+
detail_views = create_detail_views(article)
|
174 |
+
overview = create_overview(article)
|
175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
update_article(article=article)
|
177 |
|
178 |
return overview, detail_views
|
179 |
|
180 |
+
@terminal_print
|
181 |
def create_overview(article):
|
182 |
+
# md_text = ""
|
183 |
+
assessment = "overview"
|
184 |
+
|
185 |
+
md_text = f"## Overview\n\n"
|
186 |
+
overview_components = article["extraction"][assessment]
|
187 |
+
for component in overview_components:
|
188 |
+
md_text += f"#### {assessment} - {component}\n\n"
|
189 |
+
if component in article:
|
190 |
+
md_text += article[component] + "\n\n"
|
191 |
+
else:
|
192 |
+
md_text += "No content found\n\n"
|
193 |
+
# md_text += article[component] + "\n\n"
|
194 |
return gr.update(value=md_text)
|
195 |
|
196 |
+
@terminal_print
|
197 |
def create_detail_views(article):
|
198 |
+
md_text = "## Performance\n\n"
|
199 |
+
assessments = ["clinical","radiologic","safety","other"]
|
200 |
|
201 |
# add performance
|
202 |
+
for a in assessments:
|
203 |
+
if a in article["extraction"]:
|
204 |
+
md_text += f"### {a.capitalize()}\n\n"
|
205 |
+
performance_components = article["extraction"][a]
|
206 |
+
for component in performance_components:
|
207 |
+
md_text += f"#### {a} - {component}\n\n"
|
208 |
+
if component in article:
|
209 |
+
md_text += article[component] + "\n\n"
|
210 |
+
else:
|
211 |
+
md_text += "No content found\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
return gr.update(value=md_text)
|
214 |
|
215 |
+
@terminal_print
|
216 |
+
def get_key_content(text:str,start,end:str,case_sensitive:bool=False): # not getting the materials and methods
|
217 |
'''
|
218 |
this function extract the content between start and end
|
219 |
+
and return the content in between. If no start or end is
|
220 |
+
found, the function will return the empty string.
|
|
|
|
|
|
|
221 |
|
222 |
Parameters
|
223 |
----------
|
|
|
233 |
str
|
234 |
content between start and end
|
235 |
'''
|
236 |
+
# if not case_sensitive:
|
237 |
+
text = text.lower()
|
238 |
+
end = end.lower()
|
239 |
|
240 |
+
if type(start) is str:
|
241 |
+
start = start.lower()
|
242 |
+
start_index = text.find(start)
|
243 |
+
else:
|
244 |
+
start_index = start
|
245 |
+
|
246 |
+
end_index = text.find(end)
|
247 |
|
248 |
+
# if the start is not found, set the start as the beginning of the text
|
249 |
+
if start_index == -1:
|
250 |
+
start_index = 0
|
251 |
+
|
252 |
+
# if the end is not found, return the from the start to the end of the text for both
|
253 |
+
# the searched text and the remaining text
|
254 |
+
if end_index == -1:
|
255 |
+
end_index = 0
|
256 |
+
return text[start_index:],text[start_index:]
|
257 |
+
|
258 |
+
# return the searched text and the remaining text
|
259 |
+
return text[start_index:end_index],text[end_index:]
|
260 |
|
261 |
+
@terminal_print
|
262 |
def get_articles(update_local=True):
|
263 |
'''
|
264 |
this function return the list of articles
|
|
|
275 |
'''
|
276 |
articles = get_table("articles")
|
277 |
if update_local:
|
278 |
+
app_data["articles"] = list_dict_to_dict(articles)
|
279 |
|
280 |
return articles
|
281 |
|
282 |
+
@terminal_print
|
283 |
def get_article(domain,name):
|
284 |
'''
|
285 |
this function return the article object
|
|
|
300 |
|
301 |
return article
|
302 |
|
303 |
+
@terminal_print
|
304 |
def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True):
|
305 |
'''
|
306 |
this function receive the domain name and file obj
|
|
|
322 |
dict
|
323 |
article object
|
324 |
'''
|
325 |
+
if type(file) is str:
|
326 |
+
content = file
|
327 |
+
filename = file
|
328 |
+
upload_file(file,default_s3_bucket,filename)
|
329 |
+
else:
|
330 |
+
# extract the content from the pdf file
|
331 |
content, _ = read_pdf(file)
|
332 |
filename = file.name.split("\\")[-1]
|
333 |
+
|
334 |
+
# upload the article to s3
|
335 |
+
pdf_obj = open(file.name, 'rb')
|
336 |
+
upload_fileobj(pdf_obj,default_s3_bucket,filename)
|
337 |
+
pdf_obj.close()
|
338 |
|
339 |
article ={
|
340 |
"domain":domain,
|
341 |
"name":filename,
|
342 |
+
"raw":content,
|
|
|
|
|
343 |
"upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
344 |
}
|
|
|
|
|
|
|
345 |
|
346 |
if add_to_local:
|
347 |
+
app_data["articles"][article["name"]]=article
|
348 |
|
349 |
res = post_item("articles",article)
|
350 |
if "Error" in res:
|
|
|
353 |
|
354 |
return article
|
355 |
|
356 |
+
@terminal_print
|
357 |
def remove_article(domain,name,remove_from_s3=True, remove_from_local=True):
|
358 |
'''
|
359 |
this function remove the article from the cloud, s3 and local memory
|
|
|
378 |
if remove_from_s3:
|
379 |
delete_file(domain,name)
|
380 |
if remove_from_local:
|
381 |
+
del app_data["articles"][name]
|
382 |
pass
|
383 |
delete_item("articles",{"domain":domain,"name":name})
|
384 |
|
385 |
return True
|
386 |
|
387 |
+
@terminal_print
|
388 |
def update_article(article,file_obj=None,update_local=True):
|
389 |
'''
|
390 |
this function receive the article object and update the article
|
|
|
408 |
upload_fileobj(file_obj,article["domain"],article["name"])
|
409 |
|
410 |
if update_local:
|
411 |
+
app_data["articles"][article["name"]] = article
|
412 |
|
413 |
post_item("articles",article)
|
414 |
|
415 |
return article
|
416 |
|
417 |
+
@terminal_print
|
418 |
+
def identify_logic(text,logic_keywords=logic_keywords,case_sensitive=False):
|
419 |
+
'''
|
420 |
+
identify_logic function receive the text and return the logic of the article
|
421 |
|
422 |
+
Parameters
|
423 |
+
----------
|
424 |
+
text : str
|
425 |
+
text of the article
|
426 |
|
427 |
+
Returns
|
428 |
+
-------
|
429 |
+
dict
|
430 |
+
the type of prompt to be used for the article (groups, preoperative, both or none)
|
431 |
+
'''
|
432 |
+
if not case_sensitive:
|
433 |
+
text = text.lower()
|
434 |
+
|
435 |
+
prompt_logic={ # define the logic surfix for the prompt
|
436 |
+
(True,True):"prompt_p_g",
|
437 |
+
(True,False):"prompt_np_g",
|
438 |
+
(False,True):"prompt_p_ng",
|
439 |
+
(False,False):"prompt_np_ng",
|
440 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
|
442 |
+
article_observation = (
|
443 |
+
sum([text.count(kw) for kw in logic_keywords["groups"]])>3,
|
444 |
+
sum([text.count(kw) for kw in logic_keywords["preoperatives"]])>=3
|
445 |
+
)
|
446 |
+
|
447 |
+
return {"logic":prompt_logic[article_observation]}
|
448 |
+
|
449 |
+
# lets do it one by one
|
450 |
+
@terminal_print
|
451 |
+
def select_overview_prompts(article):
|
452 |
+
valid_prompts = set()
|
453 |
+
for t in app_data["terms"]:
|
454 |
+
# select overview prompts
|
455 |
+
if validate_term(article,t,"overview"):
|
456 |
+
# add the prompts to the memory
|
457 |
+
valid_prompts.update(t["prompts_list"])
|
458 |
+
article["extraction"]["overview"] = valid_prompts.copy()
|
459 |
+
|
460 |
+
return {p:app_data["prompts"][p] for p in valid_prompts}
|
461 |
+
|
462 |
+
@terminal_print
|
463 |
+
def select_performance_prompts(article,performance_assessment):
|
464 |
+
valid_terms = []
|
465 |
+
search_text = article["key_content"]+article["Authors"]+article["Acceptance Month"]+article["Acceptance Year"]+"\n".join(article["tables"])
|
466 |
+
search_text = search_text.lower()
|
467 |
+
|
468 |
+
for t in app_data["terms"]:
|
469 |
+
# select overview prompts
|
470 |
+
if validate_term(article,t,performance_assessment):
|
471 |
+
# add the prompts to the memory
|
472 |
+
valid_terms.append(t)
|
473 |
+
|
474 |
+
valid_prompts = {}
|
475 |
+
for t in valid_terms:
|
476 |
+
if any([p not in valid_prompts for p in t["prompts_list"]]):
|
477 |
+
for p in t["prompts_list"]:
|
478 |
+
prompt = app_data["prompts"][p]
|
479 |
+
valid_prompts[p] = prompt
|
480 |
+
valid_prompts[p]["term"] = t
|
481 |
+
if performance_assessment not in article["extraction"]:
|
482 |
+
article["extraction"][performance_assessment] = []
|
483 |
+
article["extraction"][performance_assessment].append(prompt["prompt_name"])
|
484 |
+
|
485 |
+
return valid_prompts
|
486 |
+
|
487 |
+
@terminal_print
|
488 |
+
def process_prompts(article): # function overly complicated. need to be simplified.
|
489 |
+
'''
|
490 |
+
process_prompts function receive the article identify the prompts to be used,
|
491 |
+
and traverse through the prompts and article to extract the content from the article
|
492 |
+
The prompts were selected based on the terms and the article attributes
|
493 |
+
|
494 |
+
Parameters
|
495 |
+
----------
|
496 |
+
article : dict
|
497 |
+
article object
|
498 |
+
terms : list
|
499 |
+
list of terms
|
500 |
+
prompts : list
|
501 |
+
list of prompts
|
502 |
|
503 |
+
Returns
|
504 |
+
-------
|
505 |
+
list
|
506 |
+
list of prompts selected for use on the article
|
507 |
+
'''
|
508 |
+
article["extraction"] = {}
|
509 |
+
|
510 |
+
overview_prompts = select_overview_prompts(article)
|
511 |
+
performance_assessments = ["clinical","radiologic","safety","other"]
|
512 |
+
|
513 |
+
performance_prompts = {}
|
514 |
+
for assessment in performance_assessments:
|
515 |
+
performance_prompts[assessment] = select_performance_prompts(article,assessment)
|
516 |
+
|
517 |
+
overview = asyncio.new_event_loop()
|
518 |
+
overview.run_until_complete(execute_concurrent(article,overview_prompts))
|
519 |
+
overview.close()
|
520 |
+
for assessment in performance_assessments:
|
521 |
+
performance = asyncio.new_event_loop()
|
522 |
+
performance.run_until_complete(execute_concurrent(article,performance_prompts[assessment]))
|
523 |
+
performance.close()
|
524 |
+
|
525 |
|
526 |
+
def validate_term(article,term,assessment_step):
|
527 |
+
# validate if the term is used for the right anatomic region for the article
|
528 |
+
if term["region"] != "all" and term["region"] != article["domain"].lower():
|
529 |
+
return False
|
530 |
+
|
531 |
+
if assessment_step == "overview" and term["assessment_step"] == "overview":
|
532 |
+
return True
|
533 |
|
534 |
+
# validate if the term is used for overview
|
535 |
+
if term["assessment_step"] == assessment_step:
|
536 |
+
# validate if the term is used for performance
|
537 |
+
key_text = (article["key_content"]+article["Authors"]+article["Acceptance Month"]+article["Acceptance Year"]+"\n".join(article["tables"])).lower()
|
538 |
+
keywords = [kw.strip() for kw in term["term"].split(",")]
|
539 |
|
540 |
+
return any([kw in key_text for kw in keywords])
|
541 |
+
|
542 |
+
return False
|
543 |
|
544 |
+
@terminal_print
|
545 |
def keyword_search(keywords,full_text):
|
546 |
keywords_result = {}
|
547 |
for k in keywords:
|
548 |
+
if type(k) is tuple or type(k) is list or type(k) is set:
|
549 |
+
keywords_result[k]=any([keyword_search(kw,full_text) for kw in k])
|
550 |
else:
|
551 |
+
keywords_result[k]=k in full_text
|
552 |
return keywords_result
|
553 |
|
554 |
+
@terminal_print
|
555 |
+
def execute_prompts(article,prompt):
|
556 |
+
# traverse back to add any article segments that are missing
|
557 |
+
for i in prompt["input_list"]:
|
558 |
+
if i.strip() not in article:
|
559 |
+
execute_prompts(article,app_data["prompts"][i.strip()]) # it might be a good idea to add level here.
|
560 |
+
|
561 |
+
# run executor
|
562 |
+
run_executor(article,prompt)
|
563 |
+
|
564 |
+
@terminal_print
|
565 |
+
def run_gpt(article,prompt):
|
566 |
+
# create the instruction stream
|
567 |
+
instructions = [
|
568 |
+
prompt[article["logic"]],
|
569 |
+
prompt["reformat_inst"]
|
570 |
+
]
|
571 |
+
text_in = "\n".join([article[i.strip()] for i in prompt["input_list"]])
|
572 |
+
inst_stream = create_inst(text_in,instructions)
|
573 |
+
print(prompt["prompt_name"])
|
574 |
|
575 |
+
# send the instruction stream to the openai api
|
576 |
+
res = send_inst(inst_stream)
|
|
|
|
|
|
|
|
|
577 |
|
578 |
+
# return the result to the article object
|
579 |
+
article[prompt["prompt_name"]] = res
|
580 |
+
|
581 |
+
|
582 |
+
@terminal_print
|
583 |
+
def f_replacement_term(article,prompt):
|
584 |
+
input_text = article[prompt["input_list"][0]]
|
585 |
+
|
586 |
+
for t in app_data["summary"]:
|
587 |
+
result = input_text.replace(t["term"],t["term_replacement"])
|
588 |
+
article[prompt["prompt_name"]] = result
|
589 |
+
|
590 |
+
@terminal_print
|
591 |
+
def f_summary_term(article,prompt):
|
592 |
+
input_text = article[prompt["input_list"][0]]
|
593 |
+
|
594 |
+
for t in app_data["summary"]:
|
595 |
+
result = input_text.replace(t["term"],t["term_summary"])
|
596 |
+
article[prompt["prompt_name"]] = result
|
597 |
+
|
598 |
+
@terminal_print
|
599 |
+
def run_executor(article,prompt):
|
600 |
'''
|
601 |
+
run_executor function receive the text and prompts and select the executor for the text input
|
602 |
+
'''
|
603 |
+
match prompt["executed by"]:
|
604 |
+
case "gpt-3.5-turbo-16k":
|
605 |
+
run_gpt(article,prompt)
|
606 |
+
case "f_replacement_term":
|
607 |
+
f_replacement_term(article,prompt)
|
608 |
+
case "f_summary_term":
|
609 |
+
f_summary_term(article,prompt)
|
610 |
+
|
611 |
+
|
612 |
+
def add_inst(instructions,prompt):
|
613 |
+
return instructions + prompt
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -5,4 +5,5 @@ boto3
|
|
5 |
requests
|
6 |
openai
|
7 |
pdfminer.six
|
8 |
-
tiktoken
|
|
|
|
5 |
requests
|
6 |
openai
|
7 |
pdfminer.six
|
8 |
+
tiktoken
|
9 |
+
langchain
|
supplier.py
CHANGED
@@ -7,27 +7,26 @@ from utility import terminal_print
|
|
7 |
openai.api_key = openai_api_key
|
8 |
token_encoder = tiktoken.get_encoding("cl100k_base")
|
9 |
|
10 |
-
|
|
|
11 |
max_retry = 5
|
12 |
-
def
|
13 |
import time
|
14 |
count = 0
|
|
|
15 |
|
16 |
while(count < max_retry):
|
17 |
try:
|
18 |
return func(*args,**kwargs)
|
19 |
except Exception as e:
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
count += 1
|
24 |
-
else:
|
25 |
-
raise e
|
26 |
|
27 |
-
return
|
28 |
|
29 |
@terminal_print
|
30 |
-
def execute_prompt(prompt):
|
31 |
'''
|
32 |
execute_prompt function takes two arguments: text and prompt
|
33 |
|
@@ -49,14 +48,14 @@ def execute_prompt(prompt):
|
|
49 |
return res.choices[0]["text"] if res.choices else "<error> failed to generate text</error>"
|
50 |
|
51 |
@terminal_print
|
52 |
-
def format(**kwargs):
|
53 |
if "format" in kwargs:
|
54 |
return kwargs["format"]
|
55 |
return kwargs
|
56 |
|
57 |
|
58 |
@terminal_print
|
59 |
-
def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="markdown"):
|
60 |
'''
|
61 |
execute_instruction function takes three arguments: article, instruction and model
|
62 |
|
@@ -96,7 +95,7 @@ def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="m
|
|
96 |
return res["choices"][0]["message"]["content"]
|
97 |
|
98 |
@terminal_print
|
99 |
-
def create_inst(article, instructions):
|
100 |
msg_stream = [
|
101 |
{
|
102 |
"role":"system",
|
@@ -113,7 +112,7 @@ def create_inst(article, instructions):
|
|
113 |
|
114 |
@terminal_print
|
115 |
@request_retry
|
116 |
-
def send_inst(stream, model="gpt-3.5-turbo-16k",temperature=0):
|
117 |
res= openai.ChatCompletion.create(
|
118 |
model=model,
|
119 |
messages=stream,
|
|
|
7 |
openai.api_key = openai_api_key
|
8 |
token_encoder = tiktoken.get_encoding("cl100k_base")
|
9 |
|
10 |
+
|
11 |
+
def request_retry(func): # need revision
|
12 |
max_retry = 5
|
13 |
+
def deco_retry(*args,**kwargs):
|
14 |
import time
|
15 |
count = 0
|
16 |
+
# print(f"Retrying {func.__name__}...")
|
17 |
|
18 |
while(count < max_retry):
|
19 |
try:
|
20 |
return func(*args,**kwargs)
|
21 |
except Exception as e:
|
22 |
+
print(f"Error: {e.__class__.__name__}, retrying in 5 seconds...")
|
23 |
+
time.sleep(5)
|
24 |
+
count += 1
|
|
|
|
|
|
|
25 |
|
26 |
+
return deco_retry
|
27 |
|
28 |
@terminal_print
|
29 |
+
def execute_prompt(prompt): # need revision
|
30 |
'''
|
31 |
execute_prompt function takes two arguments: text and prompt
|
32 |
|
|
|
48 |
return res.choices[0]["text"] if res.choices else "<error> failed to generate text</error>"
|
49 |
|
50 |
@terminal_print
|
51 |
+
def format(**kwargs): # need revision
|
52 |
if "format" in kwargs:
|
53 |
return kwargs["format"]
|
54 |
return kwargs
|
55 |
|
56 |
|
57 |
@terminal_print
|
58 |
+
def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="markdown"): # need revision
|
59 |
'''
|
60 |
execute_instruction function takes three arguments: article, instruction and model
|
61 |
|
|
|
95 |
return res["choices"][0]["message"]["content"]
|
96 |
|
97 |
@terminal_print
|
98 |
+
def create_inst(article, instructions): # need revision
|
99 |
msg_stream = [
|
100 |
{
|
101 |
"role":"system",
|
|
|
112 |
|
113 |
@terminal_print
|
114 |
@request_retry
|
115 |
+
def send_inst(stream, model="gpt-3.5-turbo-16k",temperature=0): # need revision to change to async method
|
116 |
res= openai.ChatCompletion.create(
|
117 |
model=model,
|
118 |
messages=stream,
|
ui_studies.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
|
3 |
from application import *
|
4 |
from features import init_app_data
|
|
|
5 |
|
6 |
def refresh():
|
7 |
init_app_data()
|
@@ -19,20 +20,23 @@ def create_md_tables(articles):
|
|
19 |
md_text += "| Domain | File Name | Upload Time | Device |\n| --- | --- | --- | --- |\n"
|
20 |
|
21 |
for article in articles:
|
22 |
-
md_table = f"| {article['domain']} | {article['name']} | {article['upload_time']} | {
|
23 |
md_text += md_table
|
24 |
|
25 |
return md_text
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
2 |
|
3 |
from application import *
|
4 |
from features import init_app_data
|
5 |
+
from utility import terminal_print
|
6 |
|
7 |
def refresh():
|
8 |
init_app_data()
|
|
|
20 |
md_text += "| Domain | File Name | Upload Time | Device |\n| --- | --- | --- | --- |\n"
|
21 |
|
22 |
for article in articles:
|
23 |
+
md_table = f"| {article['domain']} | {article['name']} | {article['upload_time']} | {default_region} |\n"
|
24 |
md_text += md_table
|
25 |
|
26 |
return md_text
|
27 |
|
28 |
+
@terminal_print
|
29 |
+
def init_studies_page():
|
30 |
+
with gr.Blocks() as studies_page:
|
31 |
+
with gr.Row():
|
32 |
+
gr.Markdown("## Article Lists")
|
33 |
+
btn_refresh = gr.Button(value="Refresh",variant="primary")
|
34 |
+
gr.HTML("<hr>")
|
35 |
+
|
36 |
+
article_list = gr.Markdown("")
|
37 |
+
|
38 |
+
btn_refresh.click(
|
39 |
+
fn=refresh,
|
40 |
+
outputs=[article_list]
|
41 |
+
)
|
42 |
+
return studies_page
|
ui_study.py
CHANGED
@@ -17,67 +17,89 @@ def reset():
|
|
17 |
)
|
18 |
|
19 |
# complete user interfaces
|
20 |
-
|
21 |
-
|
22 |
-
with gr.
|
23 |
-
|
24 |
-
|
25 |
-
gr.
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
btn_refresh = gr.Button(value="Refresh",variant="primary")
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
# control element definition
|
52 |
-
btn_reset.click(
|
53 |
-
reset,
|
54 |
-
outputs=[
|
55 |
-
domain,
|
56 |
-
upload_study,
|
57 |
-
input_study,
|
58 |
-
overview,
|
59 |
-
detail_views,
|
60 |
-
]
|
61 |
-
)
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
17 |
)
|
18 |
|
19 |
# complete user interfaces
|
20 |
+
@terminal_print
|
21 |
+
def init_study_page():
|
22 |
+
with gr.Blocks() as study_page:
|
23 |
+
# user control panel
|
24 |
+
with gr.Row(equal_height=False):
|
25 |
+
with gr.Column():
|
26 |
+
gr.Markdown("## Studies")
|
27 |
+
gr.HTML("<hr>")
|
28 |
|
29 |
+
upload_study = gr.File(label="Upload a clinical study report",type="file",file_count="multiple")
|
30 |
+
|
31 |
+
|
32 |
+
with gr.Column():
|
33 |
+
domain = gr.Radio(label="Anatomical Region",choices=anatomic_domains,value=default_region)
|
34 |
+
input_study = gr.TextArea(label="Or paste a clinical study report content",placeholder="Paste content here...",lines=5)
|
35 |
+
with gr.Row():
|
36 |
+
btn_reset = gr.Button(value="Reset",variant="stop")
|
37 |
+
btn_add_study = gr.Button(value="Add",variant="primary")
|
38 |
|
39 |
+
gr.HTML("<hr>")
|
40 |
+
with gr.Row():
|
41 |
+
gr.Markdown("## Literature Report")
|
|
|
42 |
|
43 |
+
gr.HTML("<hr>")
|
44 |
+
with gr.Row(equal_height=False):
|
45 |
+
with gr.Column():
|
46 |
+
dropdown = gr.Dropdown(label="Select a literature report",choices=app_data["articles"].keys())
|
47 |
+
with gr.Column():
|
48 |
+
with gr.Row():
|
49 |
+
btn_get_article = gr.Button(value="Get",variant="primary")
|
50 |
+
btn_refresh = gr.Button(value="Refresh",variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
gr.HTML("<hr>")
|
53 |
+
# extraction outcome panel
|
54 |
+
with gr.Row(equal_height=False):
|
55 |
+
with gr.Column():
|
56 |
+
overview = gr.Markdown("")
|
57 |
+
with gr.Column():
|
58 |
+
# tables = gr.Markdown("")
|
59 |
+
detail_views = gr.Markdown("")
|
60 |
+
|
61 |
+
# control element definition
|
62 |
+
btn_get_article.click(
|
63 |
+
get_existing_article,
|
64 |
+
inputs=[
|
65 |
+
dropdown,
|
66 |
+
],
|
67 |
+
outputs=[
|
68 |
+
overview,
|
69 |
+
detail_views,
|
70 |
+
]
|
71 |
+
)
|
72 |
+
|
73 |
+
btn_reset.click(
|
74 |
+
reset,
|
75 |
+
outputs=[
|
76 |
+
domain,
|
77 |
+
upload_study,
|
78 |
+
input_study,
|
79 |
+
overview,
|
80 |
+
detail_views,
|
81 |
+
]
|
82 |
+
)
|
83 |
+
|
84 |
+
btn_add_study.click(
|
85 |
+
process_study,
|
86 |
+
inputs=[
|
87 |
+
domain,
|
88 |
+
upload_study,
|
89 |
+
input_study,
|
90 |
+
],
|
91 |
+
outputs=[
|
92 |
+
overview,
|
93 |
+
detail_views,
|
94 |
+
# tables
|
95 |
+
],
|
96 |
+
)
|
97 |
|
98 |
+
btn_refresh.click(
|
99 |
+
refresh,
|
100 |
+
outputs=[
|
101 |
+
overview,
|
102 |
+
detail_views,
|
103 |
+
],
|
104 |
+
)
|
105 |
+
return study_page
|