note-demo / app.py
jinee's picture
fix the code
3bb158d
raw
history blame
No virus
13.8 kB
import pandas as pd
excel_file = './fake_sample_mimic.xlsx'
sheet_names = ['df', 'diagnoses', 'procedure', 'chart', 'medication','note']
data_frames = {}
for sheet_name in sheet_names:
data_frames[sheet_name] = pd.read_excel(excel_file, sheet_name=sheet_name)
df = data_frames["df"]
diagnoses = data_frames["diagnoses"]
procedure = data_frames["procedure"]
chart = data_frames["chart"]
medication = data_frames["medication"]
note = data_frames["note"]
df["ADMITTIME"]=pd.to_datetime(df["ADMITTIME"], format= '%Y-%m-%d %H:%M:%S')
df["ADMITTIME"]=df["ADMITTIME"].dt.strftime('%Y-%m-%d')
df["DISCHTIME"]=pd.to_datetime(df["DISCHTIME"], format= '%Y-%m-%d %H:%M:%S')
df["DISCHTIME"]=df["DISCHTIME"].dt.strftime('%Y-%m-%d')
df["DOB"]=pd.to_datetime(df["DOB"], format= '%Y-%m-%d %H:%M:%S')
df["DOB"]=df["DOB"].dt.strftime('%Y-%m-%d')
df["ADMITTIME"] = pd.to_datetime(df["ADMITTIME"], format = '%Y-%m-%d')
df["DISCHTIME"] = pd.to_datetime(df["DISCHTIME"], format = '%Y-%m-%d')
df["DOB"] = pd.to_datetime(df["DOB"], format = '%Y-%m-%d')
def cal_los(row):
admit = pd.to_datetime(row["ADMITTIME"])
disch = pd.to_datetime(row["DISCHTIME"])
los = (disch - admit).days
return los
df["LOS"] = df.apply(cal_los, axis=1)
chart["DATETIME"]=pd.to_datetime(chart["DATETIME"], format= '%Y-%m-%d %H:%M:%S')
chart["DATETIME"]=chart["DATETIME"].dt.strftime('%Y-%m-%d')
chart["DATETIME"] = pd.to_datetime(chart["DATETIME"], format = '%Y-%m-%d')
chart1 = chart.sort_values(by=["SUBJECT_ID", "DATETIME"])
medication["STARTDATE"]=pd.to_datetime(medication["STARTDATE"], format= '%Y-%m-%d %H:%M:%S')
medication["STARTDATE"]=medication["STARTDATE"].dt.strftime('%Y-%m-%d')
medication["ENDDATE"]=pd.to_datetime(medication["ENDDATE"], format= '%Y-%m-%d %H:%M:%S')
medication["ENDDATE"]=medication["ENDDATE"].dt.strftime('%Y-%m-%d')
medication["STARTDATE"] = pd.to_datetime(medication["STARTDATE"], format = '%Y-%m-%d')
medication["ENDDATE"] = pd.to_datetime(medication["ENDDATE"], format = '%Y-%m-%d')
def cal_duration(row):
admit = pd.to_datetime(row["STARTDATE"])
disch = pd.to_datetime(row["ENDDATE"])
dur = (disch - admit).days
return dur
medication["duration"] = medication.apply(cal_duration, axis = 1)
note["DATETIME"]=pd.to_datetime(note["DATETIME"], format= '%Y-%m-%d %H:%M:%S')
note["DATETIME"]=note["DATETIME"].dt.strftime('%Y-%m-%d')
note["DATETIME"] = pd.to_datetime(note["DATETIME"], format = '%Y-%m-%d')
df1_df = df.loc[df["HADM_ID"]==2222]
df1_diagnoses = diagnoses.loc[diagnoses["HADM_ID"]==2222]
df1_procedure = procedure.loc[procedure["HADM_ID"]==2222]
df1_chart = chart.loc[chart["HADM_ID"]==2222]
df1_medication = medication.loc[medication["HADM_ID"]==2222]
df1_note = note.loc[note["HADM_ID"]==2222]
df2_df = df.loc[df["HADM_ID"]==8888]
df2_diagnoses = diagnoses.loc[diagnoses["HADM_ID"]==8888]
df2_procedure = procedure.loc[procedure["HADM_ID"]==8888]
df2_chart = chart.loc[chart["HADM_ID"]==8888]
df2_medication = medication.loc[medication["HADM_ID"]==8888]
df2_note = note.loc[note["HADM_ID"]==8888]
sequential_file = pd.read_csv("./sequential.csv")
input_sequential_1 = sequential_file.iloc[0,1]
input_sequential_2 = sequential_file.iloc[1,1]
"""
#For your information: Due to memory limitations of the huggingface-space, the NOTE model does not actually work, and the results (generated_1, generated_2) of inference with the same model are attached. However, the code below is actually code that can be inferred after downloading the model, and the model can be found at "https://huggingface/jinee/note".
"""
# from transformers import AutoModelForCausalLM, AutoTokenizer
# model = AutoModelForCausalLM.from_pretrained("jinee/note", load_in_4bit=True, device_map="auto")
# tokenizer = AutoTokenizer.from_pretrained("jinee/note")
# tokenizer.padding_side = 'right'
# tokenizer.add_eos_token = True
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.add_eos_token, tokenizer.add_bos_token
# from transformers import pipeline
# instruction = '''
# As a doctor, you need to create a discharge summary based on input data.
# Never change the dates or numbers in the input data and use them as is. And please follow the format below for your report.
# Also, never make up information that is not in the input data, and write a report only with information that can be identified from the input data.
# 1. Patient information (SUBJECT_ID, HADM_ID, hospitalization and discharge date, hospitalization period, gender, date of birth, age, allergy)
# 2. Diagnostic information and past history (if applicable)
# 3. Surgery or procedure information
# 4. Significant medication administration during hospitalization and discharge medication history
# 5. Meaningful lab tests during hospitalization
# 6. Summary of significant text records/notes
# 7. Discharge outcomes and treatment plan
# 8. Overall summary of at least 500 characters in lines including the above contents
# '''
# import torch
# def generation(model, tokenizer, input_data):
# torch.cuda.empty_cache()
# pipe = pipeline('text-generation',
# model = model,
# tokenizer = tokenizer,
# torch_dtype=torch.bfloat16,
# device_map = 'auto')
# global instruction
# sequences = pipe(
# f"[INST]{instruction}: {input_data} [/INST]",
# do_sample=True,
# max_new_tokens=1024,
# temperature=0.7,
# top_k=50,
# top_p=0.95,
# early_stopping =True,
# num_return_sequences=1,)
# text = sequences[0]['generated_text']
# start_index = text.find('[/INST]')
# if start_index != -1:
# summary_ = text[start_index + len('[/INST]'):]
# return(summary_)
# else:
# return("'[summary_] 'is not founded.")
# result1 = generation(model, tokenizer, input_sequential_1)
# result2= generation(model, tokenizer, input_sequential_2)
# generated_1 = result1
# generated_2 = result2
import gradio as gr
import pandas as pd
text_set1 = input_sequential_1
text_set2 = input_sequential_2
generated_1 = "Discharge Summary:\n\n1. Patient information: SUBJECT_ID: 1111, HADM_ID: 2222, ADMITTIME: 2139-06-06 00:00:00, DISCHTIME: 2139-06-09 00:00:00, GENDER: M, DOB: 2100-05-31 00:00:00, AGE: 40. Diagnoses: 40301 Hypertensive chronic kidney disease, 486 Pneumonia, 48154 Peritoneal dialysis, 40293 Chronic glomerulonephritis in diseases classified elsewhere / P0.5498 Peritoneal dialysis.\n2. During the hospitalization, the patient was diagnosed with severe cardiomegaly likely related to chronic hypertension and heart failure (2139-06-06). Initial medications included Vancomycin, Lisinopril, Metformin, and Norepinephrine. The patient's vital signs showed an unstable condition with a heart rate of 180 beats per minute, blood pressure of 180/110 mmHg, and respiratory rate of 30 breaths per minute. A critical condition with an enlarged cardiac silhouette was observed on imaging, and urgent cardiac evaluation was recommended due to potential pre-operative risk.\n3. On 2139-06-07, the patient's condition was stabilized, and no medications were administered.\n4. The following day (2139-06-08), the patient underwent valve replacement surgery, and medications included Dobutamine and Meropenem post-surgery. The patient's vital signs improved, with a heart rate of 120 beats per minute, blood pressure of 120/80 mmHg, and respiratory rate of 36.6 breaths per minute. Imaging showed a decreased cardiac silhouette size, indicating an improvement in heart size and function post-surgery.\n5. On 2139-06-09, the patient was discharged, and medications included Acetaminophen, Warfarin, and Amiodarone. Vital signs showed a stable condition with a heart rate of 130 beats per minute, blood pressure of 130/80 mmHg, and respiratory rate of 18 breaths per minute.\n\nOverall, the patient underwent successful valve replacement surgery on 2139-06-08, and no post-operative complications were observed. The patient was discharged with routine post-surgical follow-up, Acetaminophen, Warfarin, and Amiodarone for further management. The hospitalization period was 3 days (LOS: 3 / D0)."
generated_2 = " Discharge Summary:\n\n1. Patient Information:\n - SUBJECT_ID: 7777\n - HADM_ID: 8888\n - Admission and Discharge Dates: March 10, 2021 - March 15, 2021\n - Hospitalization Period: 5 days\n - Gender: Female\n - Date of Birth: September 21, 1939\n - Age: 82\n - Allergy: N/A\n\n2. Diagnostic Information and Past History:\n - Dx1: Other postoperative infection\n - Dx2: Acute edema of lung, unspecified\n - Dx3: Acute diastolic heart failure\n\n3. Surgery or Procedure Information:\n - Px1: Open and other replacement of mitral valve with tissue graft\n - Px2: Excision or destruction of other lesion or tissue of heart, open approach\n - Px3: Open heart surgery with extracorporeal circulation\n - Px4: Coronary arteriography using two catheters\n\n4. Significant Medication Administration during Hospitalization and Discharge Medication History:\n - March 10, 2021: Morphine 10.0 mg, Metoprolol 50.0 mg, Furosemide 40.0 mg\n - March 11, 2021: Heparin 5000.0 IU, Vancomycin 1000.0 mg\n - March 12, 2021: Lisinopril 20.0 mg\n - March 13, 2021: N/A\n - March 14, 2021: N/A\n - March 15, 2021: N/A\n - Discharge Medications: N/A\n\n5. Meaningful Lab Tests during Hospitalization:\n - March 10, 2021: N/A\n - March 11, 2021: Heart rate: 215 beats per minute, Hemoglobin: 95.09 g/dL, Sodium: 180.11 cm2\n - March 12, 2021: Creatinine: 13.85 mg/dL, Glucose: 2151-03-12 note: N/A\n - March 13, 2021: Potassium: 142.94 mEq/L, Sodium: 4.62 mEq/L, 2151-03-13 note: radiology: Findings:\n\n6. Summary of Significant Text Records/Notes:\n - Post-operative chest X-ray shows stable mediastinal contours. Normal heart size with no pericardial effusion. Surgical changes noted in the chest, consistent with recent cardiac surgery. No pneumothorax or significant pleural effusions.\n - Abdominal ultrasound shows post-operative changes without any acute complication.\n\n7. Discharge Outcomes and Treatment Plan:\n - Impression: Stable post-operative chest findings. No evidence of acute complications related to recent cardiac surgery. Abdominal findings are unremarkable for any acute pathology.\n - Conclusion: Post-operative imaging findings are satisfactory and within expected limits. Continued monitoring and routine post-operative care recommended.\n\n8. Overall Summary:\n The patient is a 82-year-old female with a history of other postoperative infection, acute edema of lung, and acute diastolic heart failure who underwent open and other replacement of mitral valve with tissue graft, excision or destruction of other lesion or tissue of heart, open approach, open heart surgery with extracorporeal circulation, and coronary arteriography using two catheters. During her hospitalization, she received Morphine, Metoprolol, Furosemide, Heparin, Vancomycin, and Lisinopril. Her lab tests showed stable vital signs and no significant abnormalities. The post-operative imaging findings were satisfactory, and the patient was discharged with continued monitoring"
def on_click_button1_tab1():
return text_set1
def on_click_button1_tab2():
return text_set2
def on_click_button2_tab1():
return generated_1
def on_click_button2_tab2():
return generated_2
# Gradio 인터페이스 구성
with gr.Blocks() as demo:
gr.Markdown("# NOTE: Notable generation Of patient Text summaries through Efficient approach based on direct preference optimization")
gr.Markdown("## We propose NOTE, which generates comprehensive discharge summaries of patients using MIMIC-III. ")
gr.Markdown("### This page serves as a demo application indicating that our NOTE can be applied in practice.\n ### To enable actual functionality, you will need to download the model and fake data. \n You can find the model at the provided link. LINK: https://huggingface.co/jinee/note")
with gr.Tab("Sample 1"):
gr.Markdown("### Sample Fake data")
gr.Markdown("#### Demographic")
gr.DataFrame(df1_df)
gr.Markdown("#### Diagnoses")
gr.DataFrame(df1_diagnoses)
gr.Markdown("#### Procedures")
gr.DataFrame(df1_procedure)
gr.Markdown("#### Chart events")
gr.DataFrame(df1_chart)
gr.Markdown("#### medications")
gr.DataFrame(df1_medication)
gr.Markdown("#### Notes")
gr.DataFrame(df1_note)
button1_tab1 = gr.Button("Button 1: Generate a sequential dataset")
text1_tab1 = gr.Textbox()
button1_tab1.click(on_click_button1_tab1, outputs=text1_tab1)
button2_tab1 = gr.Button("Button 2: Generate a summary")
text2_tab1 = gr.Textbox()
button2_tab1.click(on_click_button2_tab1, outputs=text2_tab1)
with gr.Tab("Sample 2"):
gr.Markdown("### Sample Fake data")
gr.Markdown("#### Demographic")
gr.DataFrame(df2_df)
gr.Markdown("#### Diagnoses")
gr.DataFrame(df2_diagnoses)
gr.Markdown("#### Procedures")
gr.DataFrame(df2_procedure)
gr.Markdown("#### Chart events")
gr.DataFrame(df2_chart)
gr.Markdown("#### medications")
gr.DataFrame(df2_medication)
gr.Markdown("#### Notes")
gr.DataFrame(df2_note)
button1_tab2 = gr.Button("Button 1: Generate a sequential dataset")
text1_tab2 = gr.Textbox()
button1_tab2.click(on_click_button1_tab2, outputs=text1_tab2)
button2_tab2 = gr.Button("Button 2: Generate a summary")
text2_tab2 = gr.Textbox()
button2_tab2.click(on_click_button2_tab2, outputs=text2_tab2)
demo.launch(share=True)