File size: 5,179 Bytes
69abbc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99e6b78
69abbc0
99e6b78
 
03bdce4
 
 
99e6b78
 
 
 
 
 
 
 
69abbc0
 
03bdce4
 
69abbc0
 
 
 
 
 
 
 
 
 
03bdce4
 
69abbc0
 
 
 
 
 
 
 
 
 
 
03bdce4
 
69abbc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03bdce4
 
 
69abbc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03bdce4
 
69abbc0
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import streamlit as st
import spacy
from spacy_streamlit import visualize_ner
from support_functions import HealthseaPipe
import operator

def visualize_pipeline():
    healthsea_pipe = HealthseaPipe()

    color_code = {
        "POSITIVE": ("#3C9E58", "#1B7735"),
        "NEGATIVE": ("#FF166A", "#C0094B"),
        "NEUTRAL": ("#7E7E7E", "#4E4747"),
        "ANAMNESIS": ("#E49A55", "#AD6B2D"),
    }

    example_reviews = [
        "This is great for joint pain.",
        "This help joint pain but causes rashes",
        "I'm diagnosed with gastritis. This product helped!",
        "Made my insomnia worse",
        "Didn't help my energy levels",
    ]

    # Functions
    def kpi(n, text):
        html = f"""
        <div class='kpi'>
            <h1>{n}</h1>
            <span>{text}</span>
        </div>
        """
        return html


    def central_text(text):
        html = f"""<h2 class='central_text'>{text}</h2>"""
        return html


    def format_clause(text, meta, pred):
        html = f"""
        <div>
            <div class="clause" style="background-color:{color_code[pred][0]} ; box-shadow: 0px 5px {color_code[pred][1]}; border-color:{color_code[pred][1]};">
                <div class="clause_text">{text}</div>
            </div>
            <div class="clause_meta">
                <div>{meta}</div>
            </div>
        </div>"""
        return html


    def format_effect(text, pred):
        html = f"""
        <div>
            <div class="clause" style="background-color:{color_code[pred][0]} ; box-shadow: 0px 5px {color_code[pred][1]}; border-color:{color_code[pred][1]};">
                <div class="clause_text">{text}</div>
            </div>
        </div>"""
        return html

    load_state = st.markdown ("#### Loading...")
    # Load model
    try:
        load_state.markdown ("#### Loading model...")
        if "model" not in st.session_state:
            nlp = spacy.load("en_healthsea")
            st.session_state["model"] = nlp

    # Download model
    except LookupError:
        import nltk
        import benepar
        load_state.markdown ("#### Downloading model...")
        benepar.download('benepar_en3')
    load_state.markdown ("#### Loading done!")

    # Pipeline
    st.markdown("""This app visualizes the processing steps of the Healthsea pipeline. You can test it by writing an example review.""")

    st.markdown("""---""")

    st.markdown(central_text("⚙️ Pipeline"), unsafe_allow_html=True)

    check = st.checkbox("Use predefined examples")

    if not check:
        text = st.text_input(label="Write a review", value="This is great for joint pain!")
    else:
        text = st.selectbox("Predefined example reviews", example_reviews)

    nlp = st.session_state["model"]
    doc = nlp(text)

    # NER
    visualize_ner(
        doc,
        labels=nlp.get_pipe("ner").labels,
        show_table=False,
        title="✨ Named Entity Recognition",
        colors={"CONDITION": "#FF4B76", "BENEFIT": "#629B68"},
    )

    st.markdown("""The first processing step is to identify Conditions or Benefits with Named Entity Recognition. Conditions are diseases, symptoms and general health problems (e.g. joint pain), while Benefits are positive desired health aspects (e.g. energy)""")

    st.markdown("""---""")

    # Segmentation, Blinding, Classification
    st.markdown("## 🔮 Segmentation, Blinding, Classification")

    clauses = healthsea_pipe.get_clauses(doc)
    for doc_clause, clause in zip(clauses, doc._.clauses):
        classification = max(clause["cats"].items(), key=operator.itemgetter(1))[0]
        percentage = round(float(clause["cats"][classification]) * 100, 2)
        meta = f"{clause['ent_name']} ({classification} {percentage}%)"

        st.markdown(
            format_clause(doc_clause.text, meta, classification), unsafe_allow_html=True
        )
        st.markdown("\n")

    st.markdown("""The review is segmented into sub-clauses and then classified by a Text Classification model. We additionally blind the found entities to improve generalization and also to inform the model about our current target entity of which we want to get the prediction of.
    The Text Classification predicts four exclusive classes: 'Positive', 'Negative', 'Neutral', 'Anamnesis', they represent the health effect.""")

    st.markdown("""---""")

    # Aggregation
    st.markdown("## 🔗 Aggregation")

    for effect in doc._.health_effects:
        st.markdown(
            format_effect(
                f"{doc._.health_effects[effect]['effect']} effect on {effect}",
                doc._.health_effects[effect]["effect"],
            ),
            unsafe_allow_html=True,
        )
        st.markdown("\n")

    st.markdown("""Multiple classification are aggregated into one final classification.""")

    st.markdown("""---""")
    # Indepth
    st.markdown("## 🔧 Pipeline attributes")
    clauses_col, effect_col = st.columns(2)

    clauses_col.markdown("### doc._.clauses")
    for clause in doc._.clauses:
        clauses_col.json(clause)
    effect_col.markdown("### doc._.health_effects")
    effect_col.json(doc._.health_effects)