Spaces:
Runtime error
Runtime error
Init
Browse files- .gitattributes +1 -27
- .gitignore +1 -0
- README.md +4 -4
- app.py +183 -0
- data/img/Jellymation.gif +3 -0
- requirements.txt +6 -0
- style.css +57 -0
- support_functions.py +33 -0
.gitattributes
CHANGED
@@ -1,27 +1 @@
|
|
1 |
-
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
20 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
-
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
data/img/*.gif filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
app_file: app.py
|
8 |
pinned: false
|
|
|
1 |
---
|
2 |
+
title: HealthseaPipeline
|
3 |
+
emoji: 🪐
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: pink
|
6 |
sdk: streamlit
|
7 |
app_file: app.py
|
8 |
pinned: false
|
app.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import spacy
|
3 |
+
from spacy_streamlit import visualize_ner
|
4 |
+
from support_functions import HealthseaPipe
|
5 |
+
import operator
|
6 |
+
|
7 |
+
# Header
|
8 |
+
with open("style.css") as f:
|
9 |
+
st.markdown("<style>" + f.read() + "</style>", unsafe_allow_html=True)
|
10 |
+
|
11 |
+
# Intro
|
12 |
+
st.title("Welcome to Healthsea 🪐")
|
13 |
+
|
14 |
+
intro, jellyfish = st.columns(2)
|
15 |
+
jellyfish.markdown("\n")
|
16 |
+
|
17 |
+
intro.subheader("Create easier access to health✨")
|
18 |
+
|
19 |
+
jellyfish.image("data/img/Jellymation.gif")
|
20 |
+
intro.markdown(
|
21 |
+
"""Healthsea is an end-to-end spaCy v3 pipeline for analyzing user reviews to supplementary products and extracting their potential effects on health."""
|
22 |
+
)
|
23 |
+
intro.markdown(
|
24 |
+
"""The code for Healthsea is provided in this [github repository](https://github.com/thomashacker/healthsea). Visit our [blog post](https://explosion.ai/) or more about the Healthsea project.
|
25 |
+
"""
|
26 |
+
)
|
27 |
+
|
28 |
+
st.write(
|
29 |
+
"""This app visualizes the individual processing steps of the pipeline in which you can write custom reviews to get insights into the functionality of all the different components.
|
30 |
+
You can visit the [Healthsea Demo app](https://huggingface.co/spaces/edichief/healthsea-pipeline) for exploring the Healthsea processing on productive data.
|
31 |
+
"""
|
32 |
+
)
|
33 |
+
|
34 |
+
st.markdown("""---""")
|
35 |
+
|
36 |
+
# Setup
|
37 |
+
healthsea_pipe = HealthseaPipe()
|
38 |
+
|
39 |
+
color_code = {
|
40 |
+
"POSITIVE": ("#3C9E58", "#1B7735"),
|
41 |
+
"NEGATIVE": ("#FF166A", "#C0094B"),
|
42 |
+
"NEUTRAL": ("#7E7E7E", "#4E4747"),
|
43 |
+
"ANAMNESIS": ("#E49A55", "#AD6B2D"),
|
44 |
+
}
|
45 |
+
|
46 |
+
example_reviews = [
|
47 |
+
"This is great for joint pain.",
|
48 |
+
"Product helped my joint pain but it also caused rashes.",
|
49 |
+
"I'm diagnosed with gastritis. This product helped!",
|
50 |
+
"This has made my insomnia even worse.",
|
51 |
+
"It didn't help my joint pain.",
|
52 |
+
]
|
53 |
+
|
54 |
+
# Functions
|
55 |
+
def kpi(n, text):
|
56 |
+
html = f"""
|
57 |
+
<div class='kpi'>
|
58 |
+
<h1>{n}</h1>
|
59 |
+
<span>{text}</span>
|
60 |
+
</div>
|
61 |
+
"""
|
62 |
+
return html
|
63 |
+
|
64 |
+
|
65 |
+
def central_text(text):
|
66 |
+
html = f"""<h2 class='central_text'>{text}</h2>"""
|
67 |
+
return html
|
68 |
+
|
69 |
+
|
70 |
+
def format_clause(text, meta, pred):
|
71 |
+
html = f"""
|
72 |
+
<div>
|
73 |
+
<div class="clause" style="background-color:{color_code[pred][0]} ; box-shadow: 0px 5px {color_code[pred][1]}; border-color:{color_code[pred][1]};">
|
74 |
+
<div class="clause_text">{text}</div>
|
75 |
+
</div>
|
76 |
+
<div class="clause_meta">
|
77 |
+
<div>{meta}</div>
|
78 |
+
</div>
|
79 |
+
</div>"""
|
80 |
+
return html
|
81 |
+
|
82 |
+
|
83 |
+
def format_effect(text, pred):
|
84 |
+
html = f"""
|
85 |
+
<div>
|
86 |
+
<div class="clause" style="background-color:{color_code[pred][0]} ; box-shadow: 0px 5px {color_code[pred][1]}; border-color:{color_code[pred][1]};">
|
87 |
+
<div class="clause_text">{text}</div>
|
88 |
+
</div>
|
89 |
+
</div>"""
|
90 |
+
return html
|
91 |
+
|
92 |
+
# Load model
|
93 |
+
load_state = st.info("Loading...")
|
94 |
+
try:
|
95 |
+
load_state.info("Loading model...")
|
96 |
+
if "model" not in st.session_state:
|
97 |
+
nlp = spacy.load("en_healthsea")
|
98 |
+
st.session_state["model"] = nlp
|
99 |
+
|
100 |
+
# Download model
|
101 |
+
except LookupError:
|
102 |
+
import nltk
|
103 |
+
import benepar
|
104 |
+
load_state.info ("Downloading model...")
|
105 |
+
benepar.download('benepar_en3')
|
106 |
+
if "model" not in st.session_state:
|
107 |
+
nlp = spacy.load("en_healthsea")
|
108 |
+
st.session_state["model"] = nlp
|
109 |
+
load_state.success ("Loading complete!")
|
110 |
+
|
111 |
+
# Pipeline
|
112 |
+
st.markdown(central_text("⚙️ Pipeline"), unsafe_allow_html=True)
|
113 |
+
|
114 |
+
check = st.checkbox("Use predefined examples")
|
115 |
+
|
116 |
+
if not check:
|
117 |
+
text = st.text_input(label="Write a review", value="This is great for joint pain!")
|
118 |
+
else:
|
119 |
+
text = st.selectbox("Predefined example reviews", example_reviews)
|
120 |
+
|
121 |
+
nlp = st.session_state["model"]
|
122 |
+
doc = nlp(text)
|
123 |
+
|
124 |
+
# NER
|
125 |
+
visualize_ner(
|
126 |
+
doc,
|
127 |
+
labels=nlp.get_pipe("ner").labels,
|
128 |
+
show_table=False,
|
129 |
+
title="✨ Named Entity Recognition",
|
130 |
+
colors={"CONDITION": "#FF4B76", "BENEFIT": "#629B68"},
|
131 |
+
)
|
132 |
+
|
133 |
+
st.info("""The first processing step is to identify Conditions or Benefits with Named Entity Recognition. Conditions are diseases, symptoms and general health problems (e.g. joint pain), while Benefits are positive desired health aspects (e.g. energy)""")
|
134 |
+
|
135 |
+
st.markdown("""---""")
|
136 |
+
|
137 |
+
# Segmentation, Blinding, Classification
|
138 |
+
st.markdown("## 🔮 Segmentation, Blinding, Classification")
|
139 |
+
|
140 |
+
clauses = healthsea_pipe.get_clauses(doc)
|
141 |
+
for doc_clause, clause in zip(clauses, doc._.clauses):
|
142 |
+
classification = max(clause["cats"].items(), key=operator.itemgetter(1))[0]
|
143 |
+
percentage = round(float(clause["cats"][classification]) * 100, 2)
|
144 |
+
meta = f"{clause['ent_name']} ({classification} {percentage}%)"
|
145 |
+
|
146 |
+
st.markdown(
|
147 |
+
format_clause(doc_clause.text, meta, classification), unsafe_allow_html=True
|
148 |
+
)
|
149 |
+
st.markdown("\n")
|
150 |
+
|
151 |
+
st.info("""The review is segmented into sub-clauses and then classified by a Text Classification model. We additionally blind the found entities to improve generalization and also to inform the model about our current target entity of which we want to get the prediction of.
|
152 |
+
The Text Classification predicts four exclusive classes: 'Positive', 'Negative', 'Neutral', 'Anamnesis', they represent the health effect.""")
|
153 |
+
|
154 |
+
st.info("""The 'Anamnesis' class is defined as the current state of health of a reviewer (e.g. 'I am diagnosed with joint pain'). It is used to link stated health effects that are mentioned in later sentences.""")
|
155 |
+
|
156 |
+
st.markdown("""---""")
|
157 |
+
|
158 |
+
# Aggregation
|
159 |
+
st.markdown("## 🔗 Aggregation")
|
160 |
+
|
161 |
+
for effect in doc._.health_effects:
|
162 |
+
st.markdown(
|
163 |
+
format_effect(
|
164 |
+
f"{doc._.health_effects[effect]['effect']} effect on {effect}",
|
165 |
+
doc._.health_effects[effect]["effect"],
|
166 |
+
),
|
167 |
+
unsafe_allow_html=True,
|
168 |
+
)
|
169 |
+
st.markdown("\n")
|
170 |
+
|
171 |
+
st.info("""Multiple classification are aggregated into one final classification.""")
|
172 |
+
|
173 |
+
st.markdown("""---""")
|
174 |
+
|
175 |
+
# Indepth
|
176 |
+
st.markdown("## 🔧 Pipeline attributes")
|
177 |
+
clauses_col, effect_col = st.columns(2)
|
178 |
+
|
179 |
+
clauses_col.markdown("### doc._.clauses")
|
180 |
+
for clause in doc._.clauses:
|
181 |
+
clauses_col.json(clause)
|
182 |
+
effect_col.markdown("### doc._.health_effects")
|
183 |
+
effect_col.json(doc._.health_effects)
|
data/img/Jellymation.gif
ADDED
![]() |
Git LFS Details
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit>=1.2.0
|
2 |
+
spacy-streamlit>=1.0.2
|
3 |
+
spacy>=3.1.4
|
4 |
+
benepar>=0.2.0
|
5 |
+
|
6 |
+
https://huggingface.co/edichief/en_healthsea/resolve/main/en_healthsea-any-py3-none-any.whl
|
style.css
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.kpi{
|
2 |
+
text-align: center;
|
3 |
+
border-style: solid;
|
4 |
+
border-width: 1px;
|
5 |
+
border-radius: 5px;
|
6 |
+
border-color: inherit;
|
7 |
+
}
|
8 |
+
|
9 |
+
.kpi:hover {
|
10 |
+
transform: scale(1.1);
|
11 |
+
}
|
12 |
+
|
13 |
+
.central_text{
|
14 |
+
text-align: center;
|
15 |
+
top: 50%;
|
16 |
+
}
|
17 |
+
|
18 |
+
.clause{
|
19 |
+
text-align: center;
|
20 |
+
border-style: solid;
|
21 |
+
border-width: 1px;
|
22 |
+
border-radius: 5px;
|
23 |
+
border-color: #1B7735;
|
24 |
+
box-shadow: 0px 5px #1B7735;
|
25 |
+
color: white;
|
26 |
+
margin-left: 10%;
|
27 |
+
margin-right: 10%;
|
28 |
+
padding-top: 2%;
|
29 |
+
padding-bottom: 2%;
|
30 |
+
background-color: #3C9E58;
|
31 |
+
z-index: 5;
|
32 |
+
display: block;
|
33 |
+
position: relative;
|
34 |
+
}
|
35 |
+
|
36 |
+
.clause:hover {
|
37 |
+
transform: scale(1.1);
|
38 |
+
}
|
39 |
+
|
40 |
+
.clause_text{
|
41 |
+
font-weight: bold;
|
42 |
+
}
|
43 |
+
|
44 |
+
.clause_meta{
|
45 |
+
text-align: center;
|
46 |
+
border-style: solid;
|
47 |
+
border-width: 1px;
|
48 |
+
border-radius: 5px;
|
49 |
+
border-color: #0c0c0e;
|
50 |
+
margin-left: 10%;
|
51 |
+
margin-right: 10%;
|
52 |
+
padding-top: 2%;
|
53 |
+
padding-bottom: 2%;
|
54 |
+
z-index: 3;
|
55 |
+
display: block;
|
56 |
+
position: relative;
|
57 |
+
}
|
support_functions.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from spacy.tokens import Doc
|
2 |
+
|
3 |
+
class HealthseaPipe:
|
4 |
+
|
5 |
+
# Get Clauses and their predictions
|
6 |
+
def get_clauses(self, doc):
|
7 |
+
clauses = []
|
8 |
+
for clause in doc._.clauses:
|
9 |
+
words = []
|
10 |
+
spaces = []
|
11 |
+
clause_slice = doc[clause["split_indices"][0] : clause["split_indices"][1]]
|
12 |
+
|
13 |
+
if clause["has_ent"]:
|
14 |
+
for token in clause_slice:
|
15 |
+
if token.i == clause["ent_indices"][0]:
|
16 |
+
words.append(
|
17 |
+
clause["blinder"].replace(">", "").replace("<", "")
|
18 |
+
)
|
19 |
+
spaces.append(True)
|
20 |
+
elif token.i not in range(
|
21 |
+
clause["ent_indices"][0], clause["ent_indices"][1]
|
22 |
+
):
|
23 |
+
words.append(token.text)
|
24 |
+
spaces.append(token.whitespace_)
|
25 |
+
clauses.append(Doc(doc.vocab, words=words, spaces=spaces))
|
26 |
+
|
27 |
+
else:
|
28 |
+
for token in clause_slice:
|
29 |
+
words.append(token.text)
|
30 |
+
spaces.append(token.whitespace_)
|
31 |
+
clauses.append(Doc(doc.vocab, words=words, spaces=spaces))
|
32 |
+
|
33 |
+
return clauses
|