Spaces:
Build error
Build error
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from math import ceil | |
from collections import Counter | |
from string import punctuation | |
import spacy | |
from spacy import displacy | |
from spacy.lang.en import English | |
from spacy.matcher import PhraseMatcher | |
from spacy.tokens import Span | |
from negspacy.negation import Negex | |
#import en_ner_bc5cdr_md | |
import re | |
from streamlit.components.v1 import html | |
# Store the initial value of widgets in session state | |
if "visibility" not in st.session_state: | |
st.session_state.visibility = "visible" | |
st.session_state.disabled = False | |
#nlp = en_core_web_lg.load() | |
nlp = spacy.load("en_ner_bc5cdr_md") | |
nlp0 = spacy.load("en_ner_bc5cdr_md") | |
nlp1 = spacy.load("en_ner_bc5cdr_md") | |
st.set_page_config(page_title ='Clinical Note Summarization', | |
#page_icon= "Notes", | |
layout='wide') | |
st.title('Clinical Note Summarization') | |
st.markdown( | |
""" | |
<style> | |
[data-testid="stSidebar"][aria-expanded="true"] > div:first-child { | |
width: 400px; | |
} | |
[data-testid="stSidebar"][aria-expanded="false"] > div:first-child { | |
width: 400px; | |
margin-left: -230px; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
st.sidebar.markdown('Using transformer model') | |
## ======== Loading dataset ======== | |
## Loading in Admission Dataset | |
df = pd.read_csv('shpi25nov.csv') | |
# Loading in Admission chief Complaint and diagnosis | |
df2 = pd.read_csv('cohort_cc_adm_diag.csv') | |
# Loading in Dischare History | |
df3 = pd.read_csv('cohort_past_history_12072022.csv') | |
# combining both data into one | |
df = pd.merge(df, df2, on=['HADM_ID','SUBJECT_ID']) | |
# Deleting admission chief complaint and diagnosis after combining | |
del df2 | |
# Remove decimal point from Admission ID | |
df['HADM_ID'] = df['HADM_ID'].astype(str).apply(lambda x: x.replace('.0','')) | |
df3['HADM_ID'] = df3['HADM_ID'].astype(str).apply(lambda x: x.replace('.0','')) | |
#Renaming column | |
df.rename(columns={'SUBJECT_ID':'Patient_ID', | |
'HADM_ID':'Admission_ID', | |
'hpi_input_text':'Original_Text', | |
'hpi_reference_summary':'Reference_text'}, inplace = True) | |
df3.rename(columns={'SUBJECT_ID':'Patient_ID'}, inplace = True) | |
#Filter selection | |
st.sidebar.header("Search for Patient:") | |
# ===== Initial filter for patient and admission id ===== | |
patientid = df['Patient_ID'] | |
patient = st.sidebar.selectbox('Select Patient ID:', patientid) #Filter Patient | |
admissionid = df['Admission_ID'].loc[df['Patient_ID'] == patient] #Filter available Admission id for patient | |
HospitalAdmission = st.sidebar.selectbox(' ', admissionid) | |
pastHistoryEpid = df3['HADM_ID'].loc[df3['Patient_ID'] == patient] #Filter list of available Past History (for History tab) | |
# List of Model available | |
model = st.sidebar.selectbox('Select Model', ('BertSummarizer','BertGPT2','t5seq2eq','t5','gensim','pysummarizer')) | |
# ===== to display selected patient and admission id on main page | |
col3,col4 = st.columns(2) | |
patientid = col3.write(f"Patient ID: {patient} ") | |
admissionid =col4.write(f"Admission ID: {HospitalAdmission} ") | |
runtext = '' | |
inputNote ='Input note here:' | |
# Query out relevant Clinical notes | |
original_text = df.query( | |
"Patient_ID == @patient & Admission_ID == @HospitalAdmission" | |
) | |
original_text2 = original_text['Original_Text'].values | |
AdmissionChiefCom = original_text['Admission_Chief_Complaint'].values | |
diagnosis =original_text['DIAGNOSIS'].values | |
reference_text = original_text['Reference_text'].values | |
##========= Buttons to the 5 tabs ======== Temp disabled Discharge Plan and Social Notes | |
##col1, col2, col3, col4, col5 = st.columns([1,1,1,1,1]) -- to uncomment and comment below line to include discharge plan and social notes | |
col1, col2, col5 = st.columns([1,1,1]) | |
col6, col7, col8 =st.columns([2,2,2]) | |
with st.container(): | |
with col1: | |
btnAdmission = st.button("🏥 Admission") | |
if btnAdmission: | |
#nav_page('Admission') | |
inputNote = "Input Admission Note" | |
with col2: | |
btnDailyNarrative = st.button('📆Daily Narrative') | |
if btnDailyNarrative: | |
inputNote = "Input Daily Narrative Note" | |
# with col3: | |
# btnDischargePlan = st.button('🗒️Discharge Plan') | |
# if btnDischargePlan: | |
# inputNote = "Input Discharge Plan" | |
# with col4: | |
# btnSocialNotes = st.button('📝Social Notes') | |
# if btnSocialNotes: | |
# inputNote = "Input Social Note" | |
with col5: | |
btnPastHistory = st.button('📇Past History (6 Mths)') | |
if btnPastHistory: | |
inputNote = "Input History records" | |
##========= on Past History Tab ========= | |
if btnPastHistory: | |
st.text_area('Past History','Date of discharge: xxxxxxxxx') | |
else: | |
runtext =st.text_area(inputNote, str(original_text2), height=300) | |
with st.container(): | |
if btnPastHistory: | |
with col6: | |
st.markdown('**No. of admission past 6 months: xx**') | |
with col7: | |
st.text_area('Discharge Disposition:',' ', height=8) #to replace with dropdown list if data is available | |
with col8: | |
#st.date_input('Select Admission Date') # To replace with a dropdown filter instead | |
#st.selectbox('Past Episodes',pastHistoryEp) | |
pastHistory = st.selectbox(' ', pastHistoryEpid) | |
##========= END on Past History Tab ========= | |
# Extract words associated with each entity | |
def genEntities(ann, entity): | |
# entity colour dict | |
#ent_col = {'DISEASE':'#B42D1B', 'CHEMICAL':'#F06292'} | |
ent_col = {'DISEASE':'pink', 'CHEMICAL':'orange'} | |
# separate into the different entities | |
entities = trans_df['Class'].unique() | |
if entity in entities: | |
ent = list(trans_df[trans_df['Class']==entity]['Entity'].unique()) | |
entlist = ",".join(ent) | |
st.markdown(f'<p style="background-color:{ent_col[entity]};color:#080808;font-size:16px;">{entlist}</p>', unsafe_allow_html=True) | |
##======================== Start of NER Tagging ======================== | |
# ====== Old NER ====== | |
# doc = nlp(str(original_text2)) | |
# colors = { "DISEASE": "pink","CHEMICAL": "orange"} | |
# options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors} | |
# ent_html = displacy.render(doc, style="ent", options=options) | |
# ====== End of Old NER ====== | |
#lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying) | |
def lemmatize(note, nlp): | |
doc = nlp(note) | |
lemNote = [wd.lemma_ for wd in doc] | |
return " ".join(lemNote) | |
#function to modify options for displacy NER visualization | |
def get_entity_options(): | |
entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"] | |
colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'} | |
options = {"ents": entities, "colors": colors} | |
return options | |
#adding a new pipeline component to identify negation | |
def neg_model(nlp_model): | |
nlp = spacy.load(nlp_model, disable = ['parser']) | |
# nlp.add_pipe(nlp.create_pipe('sentencizer')) | |
nlp.add_pipe('sentencizer') | |
# negex = Negex(nlp) | |
nlp.add_pipe( | |
"negex", | |
config={ | |
"chunk_prefix": ["no"], | |
}, | |
last=True) | |
return nlp | |
def negation_handling(nlp_model, note, neg_model): | |
results = [] | |
nlp = neg_model(nlp_model) | |
note = note.split(".") #sentence tokenizing based on delimeter | |
note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence | |
for t in note: | |
doc = nlp(t) | |
for e in doc.ents: | |
rs = str(e._.negex) | |
if rs == "True": | |
results.append(e.text) | |
return results | |
#function to identify span objects of matched negative phrases from text | |
def match(nlp,terms,label): | |
patterns = [nlp.make_doc(text) for text in terms] | |
matcher = PhraseMatcher(nlp.vocab) | |
matcher.add(label, None, *patterns) | |
return matcher | |
#replacing the labels for identified negative entities | |
def overwrite_ent_lbl(matcher, doc): | |
matches = matcher(doc) | |
seen_tokens = set() | |
new_entities = [] | |
entities = doc.ents | |
for match_id, start, end in matches: | |
if start not in seen_tokens and end - 1 not in seen_tokens: | |
new_entities.append(Span(doc, start, end, label=match_id)) | |
entities = [e for e in entities if not (e.start < end and e.end > start)] | |
seen_tokens.update(range(start, end)) | |
doc.ents = tuple(entities) + tuple(new_entities) | |
return doc | |
#deduplicate repeated entities | |
def dedupe(items): | |
seen = set() | |
for item in items: | |
item = str(item).strip() | |
if item not in seen: | |
yield item | |
seen.add(item) | |
lem_clinical_note= lemmatize(runtext, nlp0) | |
#creating a doc object using BC5CDR model | |
doc = nlp1(lem_clinical_note) | |
options = get_entity_options() | |
#list of negative concepts from clinical note identified by negspacy | |
results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model) | |
matcher = match(nlp1, results0,"NEG_ENTITY") | |
#doc0: new doc object with added "NEG_ENTITY label" | |
doc0 = overwrite_ent_lbl(matcher,doc) | |
#visualizing identified Named Entities in clinical input text | |
ent_html = displacy.render(doc0, style='ent', options=options) | |
##======================== End of NER Tagging ======================== | |
def run_model(input_text): | |
if model == "BertSummarizer": | |
output = original_text['BertSummarizer2s'].values | |
st.write('Summary') | |
elif model == "BertGPT2": | |
output = original_text['BertGPT2'].values | |
st.write('Summary') | |
elif model == "t5seq2eq": | |
output = original_text['t5seq2eq'].values | |
st.write('Summary') | |
elif model == "t5": | |
output = original_text['t5'].values | |
st.write('Summary') | |
elif model == "gensim": | |
output = original_text['gensim'].values | |
st.write('Summary') | |
elif model == "pysummarizer": | |
output = original_text['pysummarizer'].values | |
st.write('Summary') | |
st.success(output) | |
col1, col2 = st.columns([1,1]) | |
with col1: | |
if not(btnPastHistory): #to not show summary and references text for Past History | |
st.button('Summarize') | |
run_model(runtext) | |
#sentences=runtext.split('.') | |
st.text_area('Reference text', str(reference_text), height=150) | |
else: | |
with st.expander('Full Discharge Summary'): | |
historyAdmission = df.query( | |
"Patient_ID == @patient & Admission_ID == @pastHistory" | |
) | |
fulldischargesummary = historyAdmission['TEXT'].values | |
st.write( str(fulldischargesummary)) | |
##====== Storing the Diseases/Text | |
table= {"Entity":[], "Class":[]} | |
ent_bc = {} | |
for x in doc.ents: | |
ent_bc[x.text] = x.label_ | |
for key in ent_bc: | |
table["Entity"].append(key) | |
table["Class"].append(ent_bc[key]) | |
trans_df = pd.DataFrame(table) | |
with col2: | |
st.button('NER'): | |
st.markdown('**CHIEF COMPLAINT:**') | |
st.write(str(AdmissionChiefCom)) | |
st.markdown('**ADMISSION DIAGNOSIS:**') | |
st.markdown(str(diagnosis)) | |
st.markdown('**PROBLEM/ISSUE**') | |
genEntities(trans_df, 'DISEASE') | |
st.markdown('**MEDICATION**') | |
genEntities(trans_df, 'CHEMICAL') | |
#st.table(trans_df) | |
st.markdown('**NER**') | |
with st.expander("See NER Details"): | |
st.markdown(ent_html, unsafe_allow_html=True) | |