import streamlit as st
import spacy
from spacy_streamlit import visualize_ner
from support_functions import HealthseaPipe
import operator
def visualize_pipeline():
healthsea_pipe = HealthseaPipe()
color_code = {
"POSITIVE": ("#3C9E58", "#1B7735"),
"NEGATIVE": ("#FF166A", "#C0094B"),
"NEUTRAL": ("#7E7E7E", "#4E4747"),
"ANAMNESIS": ("#E49A55", "#AD6B2D"),
}
example_reviews = [
"This is great for joint pain.",
"This help joint pain but causes rashes",
"I'm diagnosed with gastritis. This product helped!",
"Made my insomnia worse",
"Didn't help my energy levels",
]
# Functions
def kpi(n, text):
html = f"""
{n}
{text}
"""
return html
def central_text(text):
html = f"""{text}
"""
return html
def format_clause(text, meta, pred):
html = f"""
"""
return html
def format_effect(text, pred):
html = f"""
"""
return html
load_state = st.markdown ("#### Loading...")
# Load model
try:
load_state.markdown ("#### Loading model...")
if "model" not in st.session_state:
nlp = spacy.load("en_healthsea")
st.session_state["model"] = nlp
# Download model
except LookupError:
import nltk
import benepar
load_state.markdown ("#### Downloading model...")
benepar.download('benepar_en3')
load_state.markdown ("#### Loading done!")
# Pipeline
st.markdown("""This app visualizes the processing steps of the Healthsea pipeline. You can test it by writing an example review.""")
st.markdown("""---""")
st.markdown(central_text("⚙️ Pipeline"), unsafe_allow_html=True)
check = st.checkbox("Use predefined examples")
if not check:
text = st.text_input(label="Write a review", value="This is great for joint pain!")
else:
text = st.selectbox("Predefined example reviews", example_reviews)
nlp = st.session_state["model"]
doc = nlp(text)
# NER
visualize_ner(
doc,
labels=nlp.get_pipe("ner").labels,
show_table=False,
title="✨ Named Entity Recognition",
colors={"CONDITION": "#FF4B76", "BENEFIT": "#629B68"},
)
st.markdown("""The first processing step is to identify Conditions or Benefits with Named Entity Recognition. Conditions are diseases, symptoms and general health problems (e.g. joint pain), while Benefits are positive desired health aspects (e.g. energy)""")
st.markdown("""---""")
# Segmentation, Blinding, Classification
st.markdown("## 🔮 Segmentation, Blinding, Classification")
clauses = healthsea_pipe.get_clauses(doc)
for doc_clause, clause in zip(clauses, doc._.clauses):
classification = max(clause["cats"].items(), key=operator.itemgetter(1))[0]
percentage = round(float(clause["cats"][classification]) * 100, 2)
meta = f"{clause['ent_name']} ({classification} {percentage}%)"
st.markdown(
format_clause(doc_clause.text, meta, classification), unsafe_allow_html=True
)
st.markdown("\n")
st.markdown("""The review is segmented into sub-clauses and then classified by a Text Classification model. We additionally blind the found entities to improve generalization and also to inform the model about our current target entity of which we want to get the prediction of.
The Text Classification predicts four exclusive classes: 'Positive', 'Negative', 'Neutral', 'Anamnesis', they represent the health effect.""")
st.markdown("""---""")
# Aggregation
st.markdown("## 🔗 Aggregation")
for effect in doc._.health_effects:
st.markdown(
format_effect(
f"{doc._.health_effects[effect]['effect']} effect on {effect}",
doc._.health_effects[effect]["effect"],
),
unsafe_allow_html=True,
)
st.markdown("\n")
st.markdown("""Multiple classification are aggregated into one final classification.""")
st.markdown("""---""")
# Indepth
st.markdown("## 🔧 Pipeline attributes")
clauses_col, effect_col = st.columns(2)
clauses_col.markdown("### doc._.clauses")
for clause in doc._.clauses:
clauses_col.json(clause)
effect_col.markdown("### doc._.health_effects")
effect_col.json(doc._.health_effects)