File size: 9,517 Bytes
946703c 928a017 86f369d 946703c f1f7cd6 0029965 f1f7cd6 946703c 9d31e05 946703c 39ad2bf 946703c 9d31e05 946703c 928a017 39ad2bf b82085d 946703c 39ad2bf 8c9d6ab a5e67b4 8c9d6ab 86f369d 82bf7a9 f61237c 7155227 5c0f1a9 5a1f5ea d32fbac 0b9d4d4 7155227 5a1f5ea 946703c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 4 08:43:02 2022
@author: dreji18
"""
import streamlit as st
import hydralit_components as hc
import datetime
import time
from Bio_Epidemiology_NER.bio_recognizer import ner_prediction
from Bio_Epidemiology_NER.bio_recognizer import pdf_annotate_streamlit
from functionforDownloadButtons import download_button
import fitz
import pandas as pd
import base64
import tempfile
import os
import streamlit.components.v1 as components
# set page size wide and theme
st.set_page_config(layout='wide', initial_sidebar_state='collapsed',)
over_theme = {'txc_inactive': '#FFFFFF','menu_background':'#696969','txc_active':'black'}
# app page setup
import hydralit as hy
app = hy.HydraApp(title='Biomedical Epidemiology NER App',
nav_container= None,
nav_horizontal=bool,
layout='wide',
#favicon = "π§",
use_navbar=True,
navbar_theme=over_theme,
navbar_sticky=True,
navbar_mode='pinned',
use_loader=True,
use_cookie_cache=True,
sidebar_state = 'auto',
navbar_animation=True,
allow_url_nav=False,
hide_streamlit_markers = True,
#use_banner_images=["./background.png",None,{'header':"<h1 style='text-align:center;padding: 10px 10px;color:black;font-size:200%;'>Biomedical Epidemiology Entity Recognizer</h1><br>"},None,"./background.png"],
#banner_spacing=[5,30,60,30,5],
clear_cross_app_sessions=True,
session_params=None
)
# individual pages
@app.addapp(is_home=True)
def my_home():
hy.markdown("<h3 style='text-align: center; color: black;'>Biomedical Epidemiology Named Entity Recognition System </h3>", unsafe_allow_html=True)
st.write("""This application presents a generalizable ML pipeline capable of identifying and recognizing many biomedical named entities in texts. In three significant ways, this pipeline improves on previous efforts. First, it can recognize over 50 different entity types, including clinical entities (disease, symptoms, risks, effects, drugs, diabetes, respiration, vital signs, and others), as well as non-clinical entities, such as event-based data, social factors that are not clinical factors but are related to health outcomes. Second, with no code changes, this pipeline is simple to use and adaptable to individual methods for a given data type, task, or domain of application. Third, this pipeline can take any free texts, for example, in the form of text or PDF files and parse them for scientific texts. We hope that this application will provide a more transparent and customizable solution for the healthcare industry, helping to educate and encourage more rigorous applications of ML to biomedical analyses.""")
st.write("\n")
st.write("""The implications of this application in the context of healthcare are multi-facet. For example, these biomedical entity types can help doctors, nurses, and other healthcare professionals align symptoms to diagnosis, treatment, and follow-up. There are also opportunities for policymakers to understand the value that is within electronic and clinical medical records to understand the cost-effectiveness and cost-saving planning. For example, knowing the number of clinically informative, human diagnoses within population groups can assist learning health systems in planning strategies. Tracking social determinants can lead to reducing biases in the health data. This research can also be used to translate the clinical data into knowledge, evidence, and clinical impact.""")
hy.image("Epidemiologist.jpeg")
@app.addapp(title='Entity Recognizer', icon="far fa-copy",)
def app2():
hy.subheader("NER from text corpus")
with hy.form(key="text_form"):
ce, c1, ce, c2, c3 = hy.columns([0.07, 1, 0.07, 4, 1.5])
with c1:
hy.write("You can paste your biomedical data here. The Named Entity Recognition model will identify the required entities")
hy.image("medical care logo template social media.png")
with c2:
doc = st.text_area(
"Paste your text below (max 500 words)",
height=310,
)
MAX_WORDS = 500
import re
res = len(re.findall(r"\w+", doc))
if res > MAX_WORDS:
st.warning(
"β οΈ Your text contains "
+ str(res)
+ " words."
+ " Only the first 500 words will be reviewed. Stay tuned as increased allowance is coming! π"
)
doc = doc[:MAX_WORDS]
submit_button = st.form_submit_button(label="π Get me the data!")
if len(doc)!=0:
pred_df = ner_prediction(corpus=doc, compute='cpu') #pass compute='gpu' if using gpu
with c3:
st.dataframe(pred_df)
CSVButton1 = download_button(pred_df, "key-value-content.csv", "π₯ Download (.csv)")
hy.markdown(" ")
hy.markdown(" ")
hy.markdown(" ")
hy.subheader("NER from Pdf Reports")
counter = 0
with hy.form(key="pdf_form"):
ce, c1, ce, c2, c3 = hy.columns([0.07, 1, 0.07, 4, 1.5])
with c1:
hy.write("You can upload your biomedical report here. The Named Entity Recognition model will identify the required entities")
hy.image("medical care logo template social media.png")
with c2:
uploaded_file = st.file_uploader('Choose your .pdf file', type=["pdf"])
submit_button1 = st.form_submit_button(label="π Get me the data!")
if uploaded_file is not None:
try:
document = fitz.open(stream=uploaded_file.read(), filetype="pdf")
page = 0
final_df = pd.DataFrame(columns= ["Page","Entity Group","Value","Score"])
while page < document.pageCount:
page_text=document.get_page_text(page)
out = ner_prediction(corpus=page_text, compute='cpu')
output = out.drop_duplicates(subset=["value"],keep='first')
#to iterate through every row in the dataframe
for index, row in output.iterrows():
text = row['value']
#selecting values which has threshold greater than 0.5
#avoiding words less than than length of 3 to avoid false positives
if row["score"] > 0.5 and len(text) > 2:
final_df.loc[len(final_df.index)] = [page +1 ,row['entity_group'],row['value'],row['score']]
text_instances = document[page].search_for(text)
current_page = document[page]
if text_instances is not None:
#for adding/marking the annotation in the pdf
for inst in text_instances:
#coordinates of the annoation in the pdf
x0,x1,x2,x3 = inst
rect = (x0,x1,x2,x3)
annot = current_page.add_rect_annot(rect)
info = annot.info
info["title"] = row['entity_group']
annot.set_info(info)
annot.update()
page+=1
if len(final_df)!=0:
final_df['Pdf File'] = uploaded_file.name
final_df = final_df[['Entity Group', 'Value', 'Score', 'Page', 'Pdf File']]
with c2:
st.dataframe(final_df)
CSVButton2 = download_button(final_df, "key-value-pdf.csv", "π₯ Download (.csv)")
else:
print("No Entities Extracted!!!")
temp_dir = tempfile.TemporaryDirectory()
document.save(tempfile.gettempdir()+"/annott.pdf")
counter+=1
except Exception as e:
print("Error occured: {}".format(e))
raise e
ce, c1, ce, c2, c3 = hy.columns([0.07, 1, 0.07, 4, 1.5])
with c2:
if counter !=0:
with open((tempfile.gettempdir()+"/annott.pdf"), "rb") as pdf_file:
PDFbyte = pdf_file.read()
hy.download_button(label="π₯ Download Annotated PDF",
data=PDFbyte,
file_name=uploaded_file.name+"_annotated.pdf",
mime='application/octet-stream')
components.iframe(tempfile.gettempdir()+"/annott.pdf", width=800, height=800)
#with open((tempfile.gettempdir()+"/annott.pdf"),"rb") as f:
# base64_pdf = base64.b64encode(f.read()).decode('utf-8')
#pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf">'
#pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
app.run()
|