ando55's picture
Update app.py
24601dc
import streamlit as st
import numpy as np
from pandas import DataFrame
import run_segbot
from functionforDownloadButtons import download_button
import os
import json
os.system('git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD')
st.set_page_config(
page_title="Clinical Segment Splitter",
page_icon="๐Ÿš‘",
layout="wide"
)
def _max_width_():
max_width_str = f"max-width: 1400px;"
st.markdown(
f"""
<style>
.reportview-container .main .block-container{{
{max_width_str}
}}
</style>
""",
unsafe_allow_html=True,
)
#_max_width_()
#c30 = st.columns([1,])
#with c30:
# st.image("logo.png", width=400)
st.title("๐Ÿš‘ Clinical Segment Splitter")
st.header("")
with st.expander("โ„น๏ธ - About this app", expanded=True):
st.write(
"""
- *Clinical segment splitter* app is an implementation of our paper.
>Kenichiro Ando, Takashi Okumura, Mamoru Komachi, Hiromasa Horiguchi, Yuji Matsumoto (2022) [Exploring optimal granularity for extractive summarization of unstructured health records: Analysis of the largest multi-institutional archive of health records in Japan.](https://doi.org/10.1371/journal.pdig.0000099) PLOS Digital Health 1(9): e000009.
- This app automatically splits Japanese sentences into smaller units representing medical meanings.
"""
)
st.markdown("")
st.markdown("")
#st.markdown("## ๐Ÿ“Œ Paste document")
@st.cache(allow_output_mutation=True)
def model_load():
return run_segbot.setup()
model,fm,index = model_load()
with st.form(key="my_form"):
ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
with c1:
ModelType = st.radio(
"Select the sentence split method",
["pySBD (Default)", "full stop & linebreak"],
help="""
We have prepared 2 methods to break input text into sentences.
- The [pySBD](https://github.com/nipunsadvilkar/pySBD) is a more accurate method.
- The full stop & linebreak is naive and has low accuracy, but can be robust to noise.
""",
)
if ModelType == "full stop & linebreak":
split_method="fullstop"
else:
split_method="pySBD"
with c2:
doc = st.text_area(
"Paste your text",
"ใ‚ฐใƒฉใƒ ๆŸ“่‰ฒใ™ใ‚‹ใ‚‚ๆ˜Žใ‚‰ใ‹ใช่ŒใŒ่ฆ‹ใคใ‹ใ‚‰ใšใ€ ้ซ„ๆถฒๅŸน้คŠใงใ‚‚ๅ„ชไฝใช่ŒใฏๅŸน้คŠใ•ใ‚Œใชใ‹ใฃใŸใ€‚็ดฐ่Œๆ€ง้ซ„่†œ็‚Žใซๅฏพใ™ใ‚‹ใ‚ฐใƒฉใƒ ๆŸ“่‰ฒใฎๆ„Ÿๅบฆใฏ60%็จ‹ๅบฆใงใ‚ใ‚Šใ€ๅŸน้คŠใซ้–ขใ—ใฆใ‚‚ๆ„Ÿๅบฆใฏ้ซ˜ใใชใ„ใ€‚ใพใŸ้ซ„ๆถฒไธญใฎ็ณ–ใฏใ‚‚ใ†ๅฐ‘ใ—ๆธ›ใ‚‹ใฎใงใฏใชใ„ใ ใ‚ใ†ใ‹ใ€‚็ขบๅฎš่จบๆ–ญใฏใคใ‹ใชใ„ใ‚‚ใฎใฎใ€ๆœ€ใ‚‚็–‘ใ‚ใ—ใ„็–พๆ‚ฃใงใ‚ใฃใŸใ€‚่ตทๅ› ่ŒใฏMRSA,่…ธๅ†…็ดฐ่Œ็ญ‰ใ‚’ๅบƒๅŸŸใซใ‚ซใƒใƒผใ™ใ‚‹ใŸใ‚ใƒใƒณใ‚ณใƒžใ‚คใ‚ทใƒณ,ใƒกใƒญใƒšใƒใƒ (้ซ„่†œ็‚Ždose)ใจใ—ใŸใ€‚",
height=300,
)
submit_button = st.form_submit_button(label="Go to split โœ‚๏ธŽ")
if not submit_button:
st.stop()
keywords = run_segbot.generate(doc, model, fm, index, split_method)
st.markdown("## Results")
st.header("")
cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])
st.header("")
df = DataFrame(keywords)
df.index += 1
df.columns = ['Segment']
print(df)
#with c2:
st.table(df)
with c1:
CSVButton2 = download_button(keywords, "Data.csv", "๐Ÿ“ฅ Download (.csv)")
with c2:
CSVButton2 = download_button(keywords, "Data.txt", "๐Ÿ“ฅ Download (.txt)")
with c3:
CSVButton2 = download_button(keywords, "Data.json", "๐Ÿ“ฅ Download (.json)")