Spaces:

amra-ai
/

studies

Runtime error

Roland Ding commited on Oct 7, 2023

Commit

82b9d78

•

1 Parent(s): 4e46bb4

9.9.22.67 mass update of the application

+ Revised internally to apply usage of langchain and async call
+ Realigned the application to use the new terms and prompts from
search term 13n
+ Added the new chains.py module to support the new langchain
+ revised the application.py to align with all the new backend data
structure.

modified: app.py
modified: application.py
new file: chains.py
modified: features.py
modified: requirements.txt
modified: supplier.py
modified: ui_studies.py
modified: ui_study.py

Files changed (8) hide show

app.py +16 -9
application.py +26 -16
chains.py +107 -0
features.py +374 -198
requirements.txt +2 -1
supplier.py +13 -14
ui_studies.py +17 -13
ui_study.py +81 -59

app.py CHANGED Viewed

@@ -13,23 +13,30 @@ from utility import *
 from ui_study import *
 from ui_studies import *
 examples = []
-demo = gr.TabbedInterface(
-    [study_page,studies_page],
-    ["Clinical Study","Studies"],
-    theme = gr.themes.Soft(primary_hue="sky",secondary_hue="orange"),
-    css = "footer {visibility: hidden}",
-    title="AMRA AI Medi Reader")
 def refresh_data():
     return
 if __name__ == "__main__":
     init_app_data()
     demo.launch()

 from ui_study import *
 from ui_studies import *
+# from application import app_data
 examples = []
+@terminal_print
+def init_demo():
+    '''
+    initialize the demo data
+    '''
+    study_page = init_study_page()
+    studies_page = init_studies_page()
+    return gr.TabbedInterface(
+                [study_page,studies_page],
+                ["Clinical Study","Studies"],
+                theme = gr.themes.Soft(primary_hue="sky",secondary_hue="orange"),
+                css = "footer {visibility: hidden}",
+                title="AMRA AI Medi Reader")
 def refresh_data():
     return
 if __name__ == "__main__":
     init_app_data()
+    demo = init_demo()
     demo.launch()

application.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import os
-from collections import defaultdict
 '''
 shared environment variables
 '''
@@ -54,28 +52,40 @@ tables_inst=[
     f"include all table titles."
 ]
-authors_inst=[
-    f"extract all of the authors of the article from the system text.",
-    f"return the results on the same line separated by commas.",
-]
-accepted_year_inst=[
-    f"extract the acceptance year of the article from the system text.",
-    f"return the results on a single line as 'Accepted Year: <year>.",
-]
-accepted_month_inst=[
-    f"extract the acceptance month of the article from the system text.",
-    f"return the results on a single line as 'Accepted Month: <month>.",
-]
 '''
 application default data
 '''
 app_data = {
     "current article":{},
-    "articles":[],
     "prompts":{},
     "terms":[],
-    "summary":[]
 }

 import os
 '''
 shared environment variables
 '''
     f"include all table titles."
 ]
+article_prompts = {
+    "Authors": '''extract all of the authors of the article from the above text.\n
+    Return the results on the same line separated by commas as Authors: Author A, Author B...
+    ''',
+    "Acceptance Year": '''extract the acceptance year of the article from the above text.\n
+    Return the results on a single line as Accepted Year: <year>.
+    ''',
+    "Acceptance Month":'''extract the acceptance month of the article from the above text.\n
+    Return the results on a single line as Accepted Month: <month>.
+    '''
+}
+overview_prompts = clinical_prompts = radiological_prompts = other_prompts = {}
+# populate the prompts from .prompt/overview/ folder
+def update_prompts_from_dir(prompts,path):
+    for file in os.listdir(path):
+        with open(f"{path}/{file}","r") as f:
+            prompts[file.split(".")[0]] = f.read()
+update_prompts_from_dir(overview_prompts,".prompts/overview")
+update_prompts_from_dir(clinical_prompts,".prompts/clinical")
+update_prompts_from_dir(radiological_prompts,".prompts/radiologic")
+update_prompts_from_dir(other_prompts,".prompts/other")
 '''
 application default data
 '''
 app_data = {
     "current article":{},
+    "articles":{},
     "prompts":{},
     "terms":[],
+    "summary":{}
 }

chains.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import asyncio
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import ChatPromptTemplate
+from langchain.schema import BaseOutputParser
+from utility import read_pdf,terminal_print
+class Replacement(BaseOutputParser):
+    """Parse the output of an LLM call to a comma-separated list."""
+    def parse(self, text: str, **kwargs):
+        """Parse the output of an LLM call."""
+        if kwargs:
+            print(kwargs)
+        return text.strip().split(", ")
+@terminal_print # need to review this.
+async def async_generate(article,name,chain,replacement_term=None):
+    if replacement_term:
+        resp = await chain.ainvoke({"term":replacement_term})
+    else:
+        resp = await chain.ainvoke({"term":""})
+    article[name] = resp.content
+@terminal_print # need to review this.
+async def execute_concurrent(article,prompts):
+    llm = ChatOpenAI(temperature=0.0,model_name="gpt-3.5-turbo-16k")
+    tasks = []
+    prompt_type = article["logic"]
+    prompt_list = list(prompts.keys())
+    print(prompt_list)
+    # for name,p in prompts.items():
+    while prompt_list:
+        name = prompt_list.pop(0)
+        p = prompts[name]
+        if any([s not in article for s in p["input_list"]]):
+            # prompt_list.append(name)
+            print("skip",name,"due to missing input",p["input_list"])
+            continue
+        print("executing",p["assessment_step"],name)
+        input_text = "".join([article[s] for s in p["input_list"]])
+        chat_prompt = ChatPromptTemplate.from_messages([
+            ("human",input_text),
+            ("system",p[prompt_type]),
+        ])
+        if "reformat_inst" in p:
+            chat_prompt.append(
+                ("system",p["reformat_inst"])
+            )
+        post_prompt_maping = {}
+        post_replace_term = lambda res,map=post_prompt_maping:replace_term(res,map=map)
+        chain = chat_prompt | llm | post_replace_term
+        if "term" in p:
+            tasks.append(async_generate(article,name,chain,replacement_term=p["term"]["term_prompt"])) # in here the name shall be the term_prompt from the terms triggered
+        else:
+            tasks.append(async_generate(article,name,chain)) # in here the name shall be the term_prompt from the terms triggered
+    await asyncio.gather(*tasks)
+def replace_term(res,**kwargs):
+    if "map" in kwargs:
+        for key,term in kwargs["map"].items():
+            res.content = res.content.replace(key,term)
+    return res
+if __name__ == "__main__":
+    # lets try the Blood Loss, Operation Time, and Need for ICU in other folder
+    sample_artice = ".samples/Ha SK, 2008.pdf"
+    sample_content,_ = read_pdf(sample_artice)
+    llm = ChatOpenAI(temperature=0.0,model_name="gpt-3.5-turbo-16k")
+    # with open(".prompts/other/Need for ICU.txt") as f:
+    #     prompt = f.read()
+    #     name = "Need for ICU"
+    with open(".prompts/other/Operation Time.txt") as f:
+        prompt = f.read()
+        name = "Operation Time"
+    # with open(".prompts/other/Blood Loss.txt") as f:
+    #     prompt = f.read()
+    #     name = "Blood Loss"
+    post_prompt_maping = {}
+    post_replace_term = lambda res,map=post_prompt_maping:replace_term(res,map=map)
+    chain_prompt = ChatPromptTemplate.from_messages([
+        ("human",sample_artice),
+        ("system",prompt),
+    ])
+    # experiment with cascading the chain
+    chain = chain_prompt | llm
+    chain2 = chain | post_replace_term
+    # lets try remove from chain
+    chain2.last.with_retry = True
+    res = chain2.invoke({"term":name})
+    print(res.content)

features.py CHANGED Viewed

@@ -1,29 +1,66 @@
 # language default packages
 from datetime import datetime
-from collections import defaultdict
 # external packages
 import gradio as gr
-import tiktoken
 # internal packages
 from cloud_db import *
 from cloud_storage import *
 from supplier import *
-encoding = tiktoken.get_encoding("cl100k_base")
 # get prompts, terms, outputs from the cloud
 def init_app_data():
     '''
-    a function to initialize the application data from the cloud backend
     '''
-    app_data["prompts"] = get_table("prompts")
     app_data["terms"] = get_table("terms")
-    # app_data["outputs"] = get_table("outputs")
-    app_data["articles"] = get_table("articles")
-def process_study(
         domain,
         study_file_obj,
         study_content,
@@ -39,47 +76,89 @@ def process_study(
     else:
         return "No file or content provided","No file or content provided","No file or content provided"
-    # get the key content between article objective and discussion
-    raw_content = article["content"]["raw"]
-    index_discussion = raw_content.lower().index("discussion") if "discussion" in raw_content.lower() else len(raw_content)
-    meta_content = raw_content[:index_discussion]
-    key_content = get_key_content(raw_content)
-    authors = send_inst(create_inst(meta_content,authors_inst))
-    accepted_date = send_inst(create_inst(meta_content,accepted_date_inst))
-    tables = send_inst(create_inst(key_content,tables_inst))
-    key_content += tables + authors + accepted_date
-    selected_prompts = select_prompts(key_content,terms=app_data["terms"],prompts=app_data["prompts"])
-    res = process_prompts(key_content,selected_prompts)
-    detail_views = create_detail_views(res)
-    overview = create_overview(res)
-    article.update({
-        "meta":{
-            "authors":authors,
-            "accepted_date":accepted_date,
-        },
-        "extractions":res
-    })
-    article["content"].update(
-        {
-            "key_content":key_content,
-            "tables":tables,
-        }
-    )
     app_data["current_article"] = article
     try:
         update_article(article)
     except Exception as e:
         print(e)
         # return overview, detail_views
     return overview, detail_views
 def refresh():
     '''
     this function refresh the application data from the cloud backend
@@ -89,64 +168,56 @@ def refresh():
     article = app_data["current_article"]
     if not article:
         return "No file or content provided"
-    selected_prompts = select_prompts(article["content"]["raw"],terms=app_data["terms"],prompts=app_data["prompts"])
-    res = process_prompts(article["content"]["raw"],selected_prompts)
-    article.update({
-        "extractions":res
-    })
-    detail_views = create_detail_views(res)
-    overview = create_overview(res)
     update_article(article=article)
     return overview, detail_views
 def create_overview(article):
-    md_text = ""
-    for title,content in article.items():
-        if content["assessment"] == "overview":
-            md_text += f"### {title}\n\n"
-            md_text += content["content"] + "\n\n"
     return gr.update(value=md_text)
 def create_detail_views(article):
-    md_text = ""
     # add performance
-    md_text += f"### Performance\n\n"
-    for title,content in article.items():
-        if content["assessment"] not in  ["overview","safety","other"]:
-            md_text += f"#### {content['assessment']} - {title}\n\n"
-            md_text += content["content"] + "\n\n"
-    # add safety
-    safeties = [v for _,v in article.items() if v["assessment"] == "safety"]
-    md_text += f"### Safety\n\n"
-    for content in safeties:
-        md_text += f"#### {content['assessment']} - {content['template_name']}\n\n"
-        md_text += content["content"] + "\n\n"
-    # add other
-    others = [v for _,v in article.items() if v["assessment"] == "other"]
-    md_text += f"### Other\n\n"
-    for title,content in others:
-        md_text += f"#### {content['assessment']} - {content['template_name']}\n\n"
-        md_text += content["content"] + "\n\n"
     return gr.update(value=md_text)
-def get_key_content(text,case_sensitive=False):
     '''
     this function extract the content between start and end
-    and return the content in between. The function will find
-    all the start and keep the last one showing up in the text,
-    and find all the end and keep the last one showing up in the
-    text. If no start or end is found, the function will return
-    the no text.
     Parameters
     ----------
@@ -162,19 +233,32 @@ def get_key_content(text,case_sensitive=False):
     str
         content between start and end
     '''
-    if not case_sensitive:
-        text = text.lower()
-    objective_index = text.lower().index("objective") if "objective" in text.lower() else 0
-    discussion_index = text.lower().index("discussion") if "discussion" in text.lower() else len(text)
-    keyword_index = text.lower().index("key") if "key" in text.lower() else discussion_index
-    methods_index = text[keyword_index:discussion_index].lower().index("materials and methods") if "materials and methods" in text.lower() else objective_index
-    key_content = text[objective_index:keyword_index]
-    key_content += text[methods_index:discussion_index]
-    return key_content
 def get_articles(update_local=True):
     '''
     this function return the list of articles
@@ -191,10 +275,11 @@ def get_articles(update_local=True):
     '''
     articles = get_table("articles")
     if update_local:
-        app_data["articles"] = articles
     return articles
 def get_article(domain,name):
     '''
     this function return the article object
@@ -215,6 +300,7 @@ def get_article(domain,name):
     return article
 def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True):
     '''
     this function receive the domain name and file obj
@@ -236,29 +322,29 @@ def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True)
     dict
         article object
     '''
-    if file_object:
         content, _ = read_pdf(file)
         filename = file.name.split("\\")[-1]
-        # name = filename.split(".")[0]
-    else:
-        content = file
-        # filename = file.name
-        filename = f"temp_{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
     article ={
         "domain":domain,
         "name":filename,
-        "content":{
-            "raw":content
-            },
         "upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
-    if add_to_s3 and file_object:
-        upload_fileobj(file,default_s3_bucket,filename)
     if add_to_local:
-        app_data["articles"].append(article)
     res = post_item("articles",article)
     if "Error" in res:
@@ -267,6 +353,7 @@ def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True)
     return article
 def remove_article(domain,name,remove_from_s3=True, remove_from_local=True):
     '''
     this function remove the article from the cloud, s3 and local memory
@@ -291,12 +378,13 @@ def remove_article(domain,name,remove_from_s3=True, remove_from_local=True):
     if remove_from_s3:
         delete_file(domain,name)
     if remove_from_local:
-        # app_data["articles"].remove(article)
         pass
     delete_item("articles",{"domain":domain,"name":name})
     return True
 def update_article(article,file_obj=None,update_local=True):
     '''
     this function receive the article object and update the article
@@ -320,118 +408,206 @@ def update_article(article,file_obj=None,update_local=True):
         upload_fileobj(file_obj,article["domain"],article["name"])
     if update_local:
-        app_data["articles"].append(article)
     post_item("articles",article)
     return article
-# def add_output(output):
-#     '''
-#     this function add the output to the cloud
-#     Parameters
-#     ----------
-#     output : dict
-#         output object
-#     Returns
-#     -------
-#     bool
-#         True if success
-#     '''
-#     res = post_item("outputs",output)
-#     if "Error" in res:
-#         print(res)
-#         return False
-#     return res
-# def get_output(domain,name):
-#     res = output = get_item("outputs",{"domain":domain,"name":name})
-#     if "Error" in res:
-#         print(res)
-#         return False
-#     return output
-# def remove_output(domain,name):
-#     res = delete_item("outputs",{"domain":domain,"name":name})
-#     if "Error" in res:
-#         print(res)
-#         return False
-#     return True
-# def update_output(output):
-#     res = put_item("outputs",output)
-#     if "Error" in res:
-#         print(res)
-#         return False
-#     return True
-# identify article state
-def identify_logic(text):
-    article_logic = [
-        "groups",
-        "levels",
-        "preoperatives"
-    ]
-    return {l:l in text.lower() for l in article_logic}
-def select_prompts(text,terms,prompts):
-    selected_templates = set()
-    for t in terms:
-        if all([term in text for term in t["terms"]]):
-            selected_templates.update(t["template_name"])
-    logic = identify_logic(text)
-    selected_prompts = [p for p in prompts if p["template_name"] in selected_templates]
-    overview_prompts = [p for p in prompts if p["assessment_step"] == "overview"]
-    for p in overview_prompts:
-        if all([p[l]==v for l,v in logic.items() if v]):
-            selected_prompts.append(p)
-    return selected_prompts
 def keyword_search(keywords,full_text):
     keywords_result = {}
     for k in keywords:
-        if type(k) is tuple:
-            keywords_result[k]=list_or([keyword_search(kw,full_text) for kw in k])
         else:
-            keywords_result[k]=keyword_search(k,full_text)
     return keywords_result
-def process_prompts(text,prompts):
-    '''
-    process_prompts function receive the text and prompts and return the instruction stream
-    Parameters
-    ----------
-    text : str
-        text of the article
-    prompts : list
-        list of prompts
-    Returns
-    -------
-    dict
-        processed extraction results from openai api
     '''
-    res = {}
-    for p in prompts:
-        inst = [
-            p["prompt"]+", ".join(p["fields"]),
-            p["reformat_inst"]
-        ]
-        inst_stream = create_inst(text,inst)
-        extraction = send_inst(inst_stream)
-        res[p["template_name"]] = {
-            "template_name":p["template_name"],
-            "assessment":p["assessment_step"],
-            "content":extraction
-        }
-    return res

 # language default packages
 from datetime import datetime
 # external packages
 import gradio as gr
+import asyncio
+from langchain.llms import OpenAI
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
 # internal packages
+from chains import *
 from cloud_db import *
 from cloud_storage import *
+from cloud_textract import *
 from supplier import *
+from utility import list_dict_to_dict
 # get prompts, terms, outputs from the cloud
+@terminal_print
 def init_app_data():
     '''
+    A function to initialize the application data from the cloud backend.
+    All the cloud data was saved in the app_data dictionary.
+    Parameters
+    ----------
+    None
+    Returns
+    -------
+    None
     '''
+    app_data["prompts"] = list_dict_to_dict(get_table("prompts"),key="prompt_name")
     app_data["terms"] = get_table("terms")
+    app_data["articles"] = list_dict_to_dict(get_table("articles"),key="name")
+    app_data["summary"] = list_dict_to_dict(get_table("summary"),key="term")
+@terminal_print
+def get_existing_article(
+        article_name,
+    ):
+    '''
+    get_existing_article function receive the article name and return the article object
+    Parameters
+    ----------
+    article_name : str
+        name of the article
+    Returns
+    -------
+    dict
+        article object
+    '''
+    article = app_data["articles"][article_name]
+    app_data["current_article"] = article
+    return create_overview(article), create_detail_views(article)
+@terminal_print
+def process_study( # need revision
         domain,
         study_file_obj,
         study_content,
     else:
         return "No file or content provided","No file or content provided","No file or content provided"
+    # update the common article segment from its existing attributes.
+    update_article_segment(article)
+    # perform pathway logic and content extraction
+    process_prompts(article=article)
+    # set the current article to the completed article object
     app_data["current_article"] = article
+    # update the article to the cloud
     try:
         update_article(article)
     except Exception as e:
         print(e)
         # return overview, detail_views
+    # create overview and detail markdown views for the article
+    detail_views = create_detail_views(article)
+    overview = create_overview(article)
     return overview, detail_views
+@terminal_print
+def update_article_segment(article):
+    # get the key content between article objective and discussion
+    raw_content = article["raw"]
+    index_discussion = raw_content.lower().index("discussion") if "discussion" in raw_content.lower() else len(raw_content)
+    # get the meta data
+    meta_content = raw_content[:index_discussion]
+    abstract,               next_content = get_key_content(raw_content,"objective","key") # article Liu does not have objective and key but has introduction.
+    introduction,           next_content = get_key_content(next_content,"key","methods")
+    materials_and_methods,  next_content = get_key_content(next_content,"methods","results")
+    results,                _ = get_key_content(next_content,"results","discussion")
+    # update the article object
+    article.update({
+        "Abstract":             abstract,
+        "Introduction":         introduction,
+        "Material and Methods": materials_and_methods,
+        "Results":              results,
+        "Meta Content":         meta_content,
+        "tables":               get_tables(article["name"]),
+    })
+    # add the key content as an aggregation of the other sections
+    article.update({
+        "key_content":          article["Abstract"] + article["Introduction"] + article["Material and Methods"] + article["Results"],
+        })
+    # add the recognized logic to the article
+    article.update(identify_logic(article["key_content"]))
+    # one thing to notice here, due to the fact that update_article_segment function perform direct change on the article object,
+    # there is no need to re-assign the article object to the same variable name
+    pre_loop = asyncio.new_event_loop()
+    pre_loop.run_until_complete(get_segments(article,article_prompts))
+    pre_loop.close()
+@terminal_print # need to review this.
+async def gen_segment(article,name,chain):
+    resp = await chain.ainvoke({"term":""})
+    article[name] = resp.content #["content"]
+@terminal_print # need to review this.
+async def get_segments(article,prompts):
+    llm = ChatOpenAI(temperature=0.0,model_name="gpt-3.5-turbo-16k")
+    tasks = []
+    for name,p in prompts.items():
+        prompt = ChatPromptTemplate.from_messages([
+            ("human",article["Meta Content"]),
+            ("system","From the text above "+p),
+        ])
+        chain = prompt | llm
+        tasks.append(gen_segment(article,name,chain))
+    await asyncio.gather(*tasks)
+@terminal_print
 def refresh():
     '''
     this function refresh the application data from the cloud backend
     article = app_data["current_article"]
     if not article:
         return "No file or content provided"
+    process_prompts(article)
+    detail_views = create_detail_views(article)
+    overview = create_overview(article)
     update_article(article=article)
     return overview, detail_views
+@terminal_print
 def create_overview(article):
+    # md_text = ""
+    assessment = "overview"
+    md_text = f"## Overview\n\n"
+    overview_components = article["extraction"][assessment]
+    for component in overview_components:
+        md_text += f"#### {assessment} - {component}\n\n"
+        if component in article:
+            md_text += article[component] + "\n\n"
+        else:
+            md_text += "No content found\n\n"
+        # md_text += article[component] + "\n\n"
     return gr.update(value=md_text)
+@terminal_print
 def create_detail_views(article):
+    md_text = "## Performance\n\n"
+    assessments = ["clinical","radiologic","safety","other"]
     # add performance
+    for a in assessments:
+        if a in article["extraction"]:
+            md_text += f"### {a.capitalize()}\n\n"
+            performance_components = article["extraction"][a]
+            for component in performance_components:
+                md_text += f"#### {a} - {component}\n\n"
+                if component in article:
+                    md_text += article[component] + "\n\n"
+                else:
+                    md_text += "No content found\n\n"
     return gr.update(value=md_text)
+@terminal_print
+def get_key_content(text:str,start,end:str,case_sensitive:bool=False): # not getting the materials and methods
     '''
     this function extract the content between start and end
+    and return the content in between. If no start or end is
+    found, the function will return the empty string.
     Parameters
     ----------
     str
         content between start and end
     '''
+    # if not case_sensitive:
+    text = text.lower()
+    end = end.lower()
+    if type(start) is str:
+        start = start.lower()
+        start_index = text.find(start)
+    else:
+        start_index = start
+    end_index = text.find(end)
+    # if the start is not found, set the start as the beginning of the text
+    if start_index == -1:
+        start_index = 0
+    # if the end is not found, return the from the start to the end of the text for both
+    # the searched text and the remaining text
+    if end_index == -1:
+        end_index = 0
+        return text[start_index:],text[start_index:]
+    # return the searched text and the remaining text
+    return text[start_index:end_index],text[end_index:]
+@terminal_print
 def get_articles(update_local=True):
     '''
     this function return the list of articles
     '''
     articles = get_table("articles")
     if update_local:
+        app_data["articles"] = list_dict_to_dict(articles)
     return articles
+@terminal_print
 def get_article(domain,name):
     '''
     this function return the article object
     return article
+@terminal_print
 def add_article(domain,file,add_to_s3=True, add_to_local=True, file_object=True):
     '''
     this function receive the domain name and file obj
     dict
         article object
     '''
+    if type(file) is str:
+        content = file
+        filename = file
+        upload_file(file,default_s3_bucket,filename)
+    else:
+        # extract the content from the pdf file
         content, _ = read_pdf(file)
         filename = file.name.split("\\")[-1]
+        # upload the article to s3
+        pdf_obj = open(file.name, 'rb')
+        upload_fileobj(pdf_obj,default_s3_bucket,filename)
+        pdf_obj.close()
     article ={
         "domain":domain,
         "name":filename,
+        "raw":content,
         "upload_time":datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
     if add_to_local:
+        app_data["articles"][article["name"]]=article
     res = post_item("articles",article)
     if "Error" in res:
     return article
+@terminal_print
 def remove_article(domain,name,remove_from_s3=True, remove_from_local=True):
     '''
     this function remove the article from the cloud, s3 and local memory
     if remove_from_s3:
         delete_file(domain,name)
     if remove_from_local:
+        del app_data["articles"][name]
         pass
     delete_item("articles",{"domain":domain,"name":name})
     return True
+@terminal_print
 def update_article(article,file_obj=None,update_local=True):
     '''
     this function receive the article object and update the article
         upload_fileobj(file_obj,article["domain"],article["name"])
     if update_local:
+        app_data["articles"][article["name"]] = article
     post_item("articles",article)
     return article
+@terminal_print
+def identify_logic(text,logic_keywords=logic_keywords,case_sensitive=False):
+    '''
+    identify_logic function receive the text and return the logic of the article
+    Parameters
+    ----------
+    text : str
+        text of the article
+    Returns
+    -------
+    dict
+        the type of prompt to be used for the article (groups, preoperative, both or none)
+    '''
+    if not case_sensitive:
+        text = text.lower()
+    prompt_logic={ # define the logic surfix for the prompt
+        (True,True):"prompt_p_g",
+        (True,False):"prompt_np_g",
+        (False,True):"prompt_p_ng",
+        (False,False):"prompt_np_ng",
+    }
+    article_observation = (
+        sum([text.count(kw) for kw in logic_keywords["groups"]])>3,
+        sum([text.count(kw) for kw in logic_keywords["preoperatives"]])>=3
+    )
+    return {"logic":prompt_logic[article_observation]}
+# lets do it one by one
+@terminal_print
+def select_overview_prompts(article):
+    valid_prompts = set()
+    for t in app_data["terms"]:
+        # select overview prompts
+        if validate_term(article,t,"overview"):
+            # add the prompts to the memory
+            valid_prompts.update(t["prompts_list"])
+    article["extraction"]["overview"] = valid_prompts.copy()
+    return {p:app_data["prompts"][p] for p in valid_prompts}
+@terminal_print
+def select_performance_prompts(article,performance_assessment):
+    valid_terms = []
+    search_text = article["key_content"]+article["Authors"]+article["Acceptance Month"]+article["Acceptance Year"]+"\n".join(article["tables"])
+    search_text = search_text.lower()
+    for t in app_data["terms"]:
+        # select overview prompts
+        if validate_term(article,t,performance_assessment):
+            # add the prompts to the memory
+            valid_terms.append(t)
+    valid_prompts = {}
+    for t in valid_terms:
+        if any([p not in valid_prompts for p in t["prompts_list"]]):
+            for p in t["prompts_list"]:
+                prompt = app_data["prompts"][p]
+                valid_prompts[p] = prompt
+                valid_prompts[p]["term"] = t
+                if performance_assessment not in article["extraction"]:
+                    article["extraction"][performance_assessment] = []
+                article["extraction"][performance_assessment].append(prompt["prompt_name"])
+    return valid_prompts
+@terminal_print
+def process_prompts(article): # function overly complicated. need to be simplified.
+    '''
+    process_prompts function receive the article identify the prompts to be used,
+    and traverse through the prompts and article to extract the content from the article
+    The prompts were selected based on the terms and the article attributes
+    Parameters
+    ----------
+    article : dict
+        article object
+    terms : list
+        list of terms
+    prompts : list
+        list of prompts
+    Returns
+    -------
+    list
+        list of prompts selected for use on the article
+    '''
+    article["extraction"] = {}
+    overview_prompts = select_overview_prompts(article)
+    performance_assessments = ["clinical","radiologic","safety","other"]
+    performance_prompts = {}
+    for assessment in performance_assessments:
+        performance_prompts[assessment] = select_performance_prompts(article,assessment)
+    overview = asyncio.new_event_loop()
+    overview.run_until_complete(execute_concurrent(article,overview_prompts))
+    overview.close()
+    for assessment in performance_assessments:
+        performance = asyncio.new_event_loop()
+        performance.run_until_complete(execute_concurrent(article,performance_prompts[assessment]))
+        performance.close()
+def validate_term(article,term,assessment_step):
+    # validate if the term is used for the right anatomic region for the article
+    if term["region"] != "all" and term["region"] != article["domain"].lower():
+        return False
+    if assessment_step == "overview" and term["assessment_step"] == "overview":
+        return True
+    # validate if the term is used for overview
+    if term["assessment_step"] == assessment_step:
+        # validate if the term is used for performance
+        key_text = (article["key_content"]+article["Authors"]+article["Acceptance Month"]+article["Acceptance Year"]+"\n".join(article["tables"])).lower()
+        keywords = [kw.strip() for kw in term["term"].split(",")]
+        return any([kw in key_text for kw in keywords])
+    return False
+@terminal_print
 def keyword_search(keywords,full_text):
     keywords_result = {}
     for k in keywords:
+        if type(k) is tuple or type(k) is list or type(k) is set:
+            keywords_result[k]=any([keyword_search(kw,full_text) for kw in k])
         else:
+            keywords_result[k]=k in full_text
     return keywords_result
+@terminal_print
+def execute_prompts(article,prompt):
+    # traverse back to add any article segments that are missing
+    for i in prompt["input_list"]:
+        if i.strip() not in article:
+            execute_prompts(article,app_data["prompts"][i.strip()]) # it might be a good idea to add level here.
+    # run executor
+    run_executor(article,prompt)
+@terminal_print
+def run_gpt(article,prompt):
+    # create the instruction stream
+    instructions = [
+        prompt[article["logic"]],
+        prompt["reformat_inst"]
+    ]
+    text_in = "\n".join([article[i.strip()] for i in prompt["input_list"]])
+    inst_stream = create_inst(text_in,instructions)
+    print(prompt["prompt_name"])
+    # send the instruction stream to the openai api
+    res = send_inst(inst_stream)
+    # return the result to the article object
+    article[prompt["prompt_name"]] = res
+@terminal_print
+def f_replacement_term(article,prompt):
+    input_text = article[prompt["input_list"][0]]
+    for t in app_data["summary"]:
+        result = input_text.replace(t["term"],t["term_replacement"])
+    article[prompt["prompt_name"]] = result
+@terminal_print
+def f_summary_term(article,prompt):
+    input_text = article[prompt["input_list"][0]]
+    for t in app_data["summary"]:
+        result = input_text.replace(t["term"],t["term_summary"])
+    article[prompt["prompt_name"]] = result
+@terminal_print
+def run_executor(article,prompt):
     '''
+    run_executor function receive the text and prompts and select the executor for the text input
+    '''
+    match prompt["executed by"]:
+        case "gpt-3.5-turbo-16k":
+            run_gpt(article,prompt)
+        case "f_replacement_term":
+            f_replacement_term(article,prompt)
+        case "f_summary_term":
+            f_summary_term(article,prompt)
+def add_inst(instructions,prompt):
+    return instructions + prompt

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ boto3
 requests
 openai
 pdfminer.six
-tiktoken

 requests
 openai
 pdfminer.six
+tiktoken
+langchain

supplier.py CHANGED Viewed

@@ -7,27 +7,26 @@ from utility import terminal_print
 openai.api_key = openai_api_key
 token_encoder = tiktoken.get_encoding("cl100k_base")
-def request_retry(func):
     max_retry = 5
-    def wrapper(*args,**kwargs):
         import time
         count = 0
         while(count < max_retry):
             try:
                 return func(*args,**kwargs)
             except Exception as e:
-                if e.__class__.__name__ == "Timeout":
-                    print("Timeout error, retrying in 5 seconds...")
-                    time.sleep(5)
-                    count += 1
-                else:
-                    raise e
-    return wrapper
 @terminal_print
-def execute_prompt(prompt):
     '''
     execute_prompt function takes two arguments: text and prompt
@@ -49,14 +48,14 @@ def execute_prompt(prompt):
     return res.choices[0]["text"] if res.choices else "<error> failed to generate text</error>"
 @terminal_print
-def format(**kwargs):
     if "format" in kwargs:
         return kwargs["format"]
     return kwargs
 @terminal_print
-def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="markdown"):
     '''
     execute_instruction function takes three arguments: article, instruction and model
@@ -96,7 +95,7 @@ def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="m
     return res["choices"][0]["message"]["content"]
 @terminal_print
-def create_inst(article, instructions):
     msg_stream = [
         {
             "role":"system",
@@ -113,7 +112,7 @@ def create_inst(article, instructions):
 @terminal_print
 @request_retry
-def send_inst(stream, model="gpt-3.5-turbo-16k",temperature=0):
     res= openai.ChatCompletion.create(
         model=model,
         messages=stream,

 openai.api_key = openai_api_key
 token_encoder = tiktoken.get_encoding("cl100k_base")
+def request_retry(func): # need revision
     max_retry = 5
+    def deco_retry(*args,**kwargs):
         import time
         count = 0
+        # print(f"Retrying {func.__name__}...")
         while(count < max_retry):
             try:
                 return func(*args,**kwargs)
             except Exception as e:
+                print(f"Error: {e.__class__.__name__}, retrying in 5 seconds...")
+                time.sleep(5)
+                count += 1
+    return deco_retry
 @terminal_print
+def execute_prompt(prompt): # need revision
     '''
     execute_prompt function takes two arguments: text and prompt
     return res.choices[0]["text"] if res.choices else "<error> failed to generate text</error>"
 @terminal_print
+def format(**kwargs): # need revision
     if "format" in kwargs:
         return kwargs["format"]
     return kwargs
 @terminal_print
+def execute_instruction(article, instruction,model="gpt-3.5-turbo-16k",format="markdown"): # need revision
     '''
     execute_instruction function takes three arguments: article, instruction and model
     return res["choices"][0]["message"]["content"]
 @terminal_print
+def create_inst(article, instructions): # need revision
     msg_stream = [
         {
             "role":"system",
 @terminal_print
 @request_retry
+def send_inst(stream, model="gpt-3.5-turbo-16k",temperature=0): # need revision to change to async method
     res= openai.ChatCompletion.create(
         model=model,
         messages=stream,

ui_studies.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from application import *
 from features import init_app_data
 def refresh():
     init_app_data()
@@ -19,20 +20,23 @@ def create_md_tables(articles):
     md_text += "| Domain | File Name | Upload Time | Device |\n| --- | --- | --- | --- |\n"
     for article in articles:
-        md_table = f"| {article['domain']} | {article['name']} | {article['upload_time']} | {default_device} |\n"
         md_text += md_table
     return md_text
-with gr.Blocks() as studies_page:
-    with gr.Row():
-        gr.Markdown("## Article Lists")
-        btn_refresh = gr.Button(value="Refresh",variant="primary")
-    gr.HTML("<hr>")
-    article_list = gr.Markdown("")
-    btn_refresh.click(
-        fn=refresh,
-        outputs=[article_list]
-    )

 from application import *
 from features import init_app_data
+from utility import terminal_print
 def refresh():
     init_app_data()
     md_text += "| Domain | File Name | Upload Time | Device |\n| --- | --- | --- | --- |\n"
     for article in articles:
+        md_table = f"| {article['domain']} | {article['name']} | {article['upload_time']} | {default_region} |\n"
         md_text += md_table
     return md_text
+@terminal_print
+def init_studies_page():
+    with gr.Blocks() as studies_page:
+        with gr.Row():
+            gr.Markdown("## Article Lists")
+            btn_refresh = gr.Button(value="Refresh",variant="primary")
+        gr.HTML("<hr>")
+        article_list = gr.Markdown("")
+        btn_refresh.click(
+            fn=refresh,
+            outputs=[article_list]
+        )
+    return studies_page

ui_study.py CHANGED Viewed

@@ -17,67 +17,89 @@ def reset():
     )
 # complete user interfaces
-with gr.Blocks() as study_page:
-    # user control panel
-    with gr.Row(equal_height=False):
-        with gr.Column():
-            gr.Markdown("## Studies")
-            gr.HTML("<hr>")
-            upload_study = gr.File(label="Upload a clinical study report",type="file",file_count="multiple")
-        with gr.Column():
-            domain = gr.Radio(label="Anatomical Region",choices=anatomic_domains,value=anatomic_domains[-1])
-            input_study = gr.TextArea(label="Or paste a clinical study report content",placeholder="Paste content here...",lines=5)
-            with gr.Row():
-                btn_reset = gr.Button(value="Reset",variant="stop")
-                btn_add_study = gr.Button(value="Add",variant="primary")
-    gr.HTML("<hr>")
-    with gr.Row():
-        gr.Markdown("## Literature Report")
-        btn_refresh = gr.Button(value="Refresh",variant="primary")
-    gr.HTML("<hr>")
-    # extraction outcome panel
-    with gr.Row(equal_height=False):
-        with gr.Column():
-            overview = gr.Markdown("")
-        with gr.Column():
-            # tables = gr.Markdown("")
-            detail_views = gr.Markdown("")
-    # control element definition
-    btn_reset.click(
-        reset,
-        outputs=[
-            domain,
-            upload_study,
-            input_study,
-            overview,
-            detail_views,
-        ]
-    )
-    btn_add_study.click(
-        process_study,
-        inputs=[
-            domain,
-            upload_study,
-            input_study,
-        ],
-        outputs=[
-            overview,
-            detail_views,
-            # tables
-        ],
-    )
-    btn_refresh.click(
-        refresh,
-        outputs=[
-            overview,
-            detail_views,
-        ],
-    )

     )
 # complete user interfaces
+@terminal_print
+def init_study_page():
+    with gr.Blocks() as study_page:
+        # user control panel
+        with gr.Row(equal_height=False):
+            with gr.Column():
+                gr.Markdown("## Studies")
+                gr.HTML("<hr>")
+                upload_study = gr.File(label="Upload a clinical study report",type="file",file_count="multiple")
+            with gr.Column():
+                domain = gr.Radio(label="Anatomical Region",choices=anatomic_domains,value=default_region)
+                input_study = gr.TextArea(label="Or paste a clinical study report content",placeholder="Paste content here...",lines=5)
+                with gr.Row():
+                    btn_reset = gr.Button(value="Reset",variant="stop")
+                    btn_add_study = gr.Button(value="Add",variant="primary")
+        gr.HTML("<hr>")
+        with gr.Row():
+            gr.Markdown("## Literature Report")
+        gr.HTML("<hr>")
+        with gr.Row(equal_height=False):
+            with gr.Column():
+                dropdown = gr.Dropdown(label="Select a literature report",choices=app_data["articles"].keys())
+            with gr.Column():
+                with gr.Row():
+                    btn_get_article = gr.Button(value="Get",variant="primary")
+                    btn_refresh = gr.Button(value="Refresh",variant="primary")
+        gr.HTML("<hr>")
+        # extraction outcome panel
+        with gr.Row(equal_height=False):
+            with gr.Column():
+                overview = gr.Markdown("")
+            with gr.Column():
+                # tables = gr.Markdown("")
+                detail_views = gr.Markdown("")
+        # control element definition
+        btn_get_article.click(
+            get_existing_article,
+            inputs=[
+                dropdown,
+            ],
+            outputs=[
+                overview,
+                detail_views,
+            ]
+        )
+        btn_reset.click(
+            reset,
+            outputs=[
+                domain,
+                upload_study,
+                input_study,
+                overview,
+                detail_views,
+            ]
+        )
+        btn_add_study.click(
+            process_study,
+            inputs=[
+                domain,
+                upload_study,
+                input_study,
+            ],
+            outputs=[
+                overview,
+                detail_views,
+                # tables
+            ],
+        )
+        btn_refresh.click(
+            refresh,
+            outputs=[
+                overview,
+                detail_views,
+            ],
+        )
+    return study_page