edichief commited on
Commit
a7a38d6
1 Parent(s): 3a74faa
Files changed (8) hide show
  1. .gitattributes +1 -27
  2. .gitignore +1 -0
  3. README.md +4 -4
  4. app.py +183 -0
  5. data/img/Jellymation.gif +3 -0
  6. requirements.txt +6 -0
  7. style.css +57 -0
  8. support_functions.py +33 -0
.gitattributes CHANGED
@@ -1,27 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bin.* filter=lfs diff=lfs merge=lfs -text
5
- *.bz2 filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.model filter=lfs diff=lfs merge=lfs -text
12
- *.msgpack filter=lfs diff=lfs merge=lfs -text
13
- *.onnx filter=lfs diff=lfs merge=lfs -text
14
- *.ot filter=lfs diff=lfs merge=lfs -text
15
- *.parquet filter=lfs diff=lfs merge=lfs -text
16
- *.pb filter=lfs diff=lfs merge=lfs -text
17
- *.pt filter=lfs diff=lfs merge=lfs -text
18
- *.pth filter=lfs diff=lfs merge=lfs -text
19
- *.rar filter=lfs diff=lfs merge=lfs -text
20
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
- *.tar.* filter=lfs diff=lfs merge=lfs -text
22
- *.tflite filter=lfs diff=lfs merge=lfs -text
23
- *.tgz filter=lfs diff=lfs merge=lfs -text
24
- *.xz filter=lfs diff=lfs merge=lfs -text
25
- *.zip filter=lfs diff=lfs merge=lfs -text
26
- *.zstandard filter=lfs diff=lfs merge=lfs -text
27
- *tfevents* filter=lfs diff=lfs merge=lfs -text
1
+ data/img/*.gif filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1 @@
 
1
+ __pycache__
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Healthsea Pipeline
3
- emoji: 💩
4
- colorFrom: green
5
- colorTo: indigo
6
  sdk: streamlit
7
  app_file: app.py
8
  pinned: false
1
  ---
2
+ title: HealthseaPipeline
3
+ emoji: 🪐
4
+ colorFrom: yellow
5
+ colorTo: pink
6
  sdk: streamlit
7
  app_file: app.py
8
  pinned: false
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import spacy
3
+ from spacy_streamlit import visualize_ner
4
+ from support_functions import HealthseaPipe
5
+ import operator
6
+
7
+ # Header
8
+ with open("style.css") as f:
9
+ st.markdown("<style>" + f.read() + "</style>", unsafe_allow_html=True)
10
+
11
+ # Intro
12
+ st.title("Welcome to Healthsea 🪐")
13
+
14
+ intro, jellyfish = st.columns(2)
15
+ jellyfish.markdown("\n")
16
+
17
+ intro.subheader("Create easier access to health✨")
18
+
19
+ jellyfish.image("data/img/Jellymation.gif")
20
+ intro.markdown(
21
+ """Healthsea is an end-to-end spaCy v3 pipeline for analyzing user reviews to supplementary products and extracting their potential effects on health."""
22
+ )
23
+ intro.markdown(
24
+ """The code for Healthsea is provided in this [github repository](https://github.com/thomashacker/healthsea). Visit our [blog post](https://explosion.ai/) or more about the Healthsea project.
25
+ """
26
+ )
27
+
28
+ st.write(
29
+ """This app visualizes the individual processing steps of the pipeline in which you can write custom reviews to get insights into the functionality of all the different components.
30
+ You can visit the [Healthsea Demo app](https://huggingface.co/spaces/edichief/healthsea-pipeline) for exploring the Healthsea processing on productive data.
31
+ """
32
+ )
33
+
34
+ st.markdown("""---""")
35
+
36
+ # Setup
37
+ healthsea_pipe = HealthseaPipe()
38
+
39
+ color_code = {
40
+ "POSITIVE": ("#3C9E58", "#1B7735"),
41
+ "NEGATIVE": ("#FF166A", "#C0094B"),
42
+ "NEUTRAL": ("#7E7E7E", "#4E4747"),
43
+ "ANAMNESIS": ("#E49A55", "#AD6B2D"),
44
+ }
45
+
46
+ example_reviews = [
47
+ "This is great for joint pain.",
48
+ "Product helped my joint pain but it also caused rashes.",
49
+ "I'm diagnosed with gastritis. This product helped!",
50
+ "This has made my insomnia even worse.",
51
+ "It didn't help my joint pain.",
52
+ ]
53
+
54
+ # Functions
55
+ def kpi(n, text):
56
+ html = f"""
57
+ <div class='kpi'>
58
+ <h1>{n}</h1>
59
+ <span>{text}</span>
60
+ </div>
61
+ """
62
+ return html
63
+
64
+
65
+ def central_text(text):
66
+ html = f"""<h2 class='central_text'>{text}</h2>"""
67
+ return html
68
+
69
+
70
+ def format_clause(text, meta, pred):
71
+ html = f"""
72
+ <div>
73
+ <div class="clause" style="background-color:{color_code[pred][0]} ; box-shadow: 0px 5px {color_code[pred][1]}; border-color:{color_code[pred][1]};">
74
+ <div class="clause_text">{text}</div>
75
+ </div>
76
+ <div class="clause_meta">
77
+ <div>{meta}</div>
78
+ </div>
79
+ </div>"""
80
+ return html
81
+
82
+
83
+ def format_effect(text, pred):
84
+ html = f"""
85
+ <div>
86
+ <div class="clause" style="background-color:{color_code[pred][0]} ; box-shadow: 0px 5px {color_code[pred][1]}; border-color:{color_code[pred][1]};">
87
+ <div class="clause_text">{text}</div>
88
+ </div>
89
+ </div>"""
90
+ return html
91
+
92
+ # Load model
93
+ load_state = st.info("Loading...")
94
+ try:
95
+ load_state.info("Loading model...")
96
+ if "model" not in st.session_state:
97
+ nlp = spacy.load("en_healthsea")
98
+ st.session_state["model"] = nlp
99
+
100
+ # Download model
101
+ except LookupError:
102
+ import nltk
103
+ import benepar
104
+ load_state.info ("Downloading model...")
105
+ benepar.download('benepar_en3')
106
+ if "model" not in st.session_state:
107
+ nlp = spacy.load("en_healthsea")
108
+ st.session_state["model"] = nlp
109
+ load_state.success ("Loading complete!")
110
+
111
+ # Pipeline
112
+ st.markdown(central_text("⚙️ Pipeline"), unsafe_allow_html=True)
113
+
114
+ check = st.checkbox("Use predefined examples")
115
+
116
+ if not check:
117
+ text = st.text_input(label="Write a review", value="This is great for joint pain!")
118
+ else:
119
+ text = st.selectbox("Predefined example reviews", example_reviews)
120
+
121
+ nlp = st.session_state["model"]
122
+ doc = nlp(text)
123
+
124
+ # NER
125
+ visualize_ner(
126
+ doc,
127
+ labels=nlp.get_pipe("ner").labels,
128
+ show_table=False,
129
+ title="✨ Named Entity Recognition",
130
+ colors={"CONDITION": "#FF4B76", "BENEFIT": "#629B68"},
131
+ )
132
+
133
+ st.info("""The first processing step is to identify Conditions or Benefits with Named Entity Recognition. Conditions are diseases, symptoms and general health problems (e.g. joint pain), while Benefits are positive desired health aspects (e.g. energy)""")
134
+
135
+ st.markdown("""---""")
136
+
137
+ # Segmentation, Blinding, Classification
138
+ st.markdown("## 🔮 Segmentation, Blinding, Classification")
139
+
140
+ clauses = healthsea_pipe.get_clauses(doc)
141
+ for doc_clause, clause in zip(clauses, doc._.clauses):
142
+ classification = max(clause["cats"].items(), key=operator.itemgetter(1))[0]
143
+ percentage = round(float(clause["cats"][classification]) * 100, 2)
144
+ meta = f"{clause['ent_name']} ({classification} {percentage}%)"
145
+
146
+ st.markdown(
147
+ format_clause(doc_clause.text, meta, classification), unsafe_allow_html=True
148
+ )
149
+ st.markdown("\n")
150
+
151
+ st.info("""The review is segmented into sub-clauses and then classified by a Text Classification model. We additionally blind the found entities to improve generalization and also to inform the model about our current target entity of which we want to get the prediction of.
152
+ The Text Classification predicts four exclusive classes: 'Positive', 'Negative', 'Neutral', 'Anamnesis', they represent the health effect.""")
153
+
154
+ st.info("""The 'Anamnesis' class is defined as the current state of health of a reviewer (e.g. 'I am diagnosed with joint pain'). It is used to link stated health effects that are mentioned in later sentences.""")
155
+
156
+ st.markdown("""---""")
157
+
158
+ # Aggregation
159
+ st.markdown("## 🔗 Aggregation")
160
+
161
+ for effect in doc._.health_effects:
162
+ st.markdown(
163
+ format_effect(
164
+ f"{doc._.health_effects[effect]['effect']} effect on {effect}",
165
+ doc._.health_effects[effect]["effect"],
166
+ ),
167
+ unsafe_allow_html=True,
168
+ )
169
+ st.markdown("\n")
170
+
171
+ st.info("""Multiple classification are aggregated into one final classification.""")
172
+
173
+ st.markdown("""---""")
174
+
175
+ # Indepth
176
+ st.markdown("## 🔧 Pipeline attributes")
177
+ clauses_col, effect_col = st.columns(2)
178
+
179
+ clauses_col.markdown("### doc._.clauses")
180
+ for clause in doc._.clauses:
181
+ clauses_col.json(clause)
182
+ effect_col.markdown("### doc._.health_effects")
183
+ effect_col.json(doc._.health_effects)
data/img/Jellymation.gif ADDED

Git LFS Details

  • SHA256: c796dd42c6b93dbf75ca3045f44ad9471db737f1452fbcdd488c7b531aae79b1
  • Pointer size: 133 Bytes
  • Size of remote file: 25.4 MB
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ streamlit>=1.2.0
2
+ spacy-streamlit>=1.0.2
3
+ spacy>=3.1.4
4
+ benepar>=0.2.0
5
+
6
+ https://huggingface.co/edichief/en_healthsea/resolve/main/en_healthsea-any-py3-none-any.whl
style.css ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .kpi{
2
+ text-align: center;
3
+ border-style: solid;
4
+ border-width: 1px;
5
+ border-radius: 5px;
6
+ border-color: inherit;
7
+ }
8
+
9
+ .kpi:hover {
10
+ transform: scale(1.1);
11
+ }
12
+
13
+ .central_text{
14
+ text-align: center;
15
+ top: 50%;
16
+ }
17
+
18
+ .clause{
19
+ text-align: center;
20
+ border-style: solid;
21
+ border-width: 1px;
22
+ border-radius: 5px;
23
+ border-color: #1B7735;
24
+ box-shadow: 0px 5px #1B7735;
25
+ color: white;
26
+ margin-left: 10%;
27
+ margin-right: 10%;
28
+ padding-top: 2%;
29
+ padding-bottom: 2%;
30
+ background-color: #3C9E58;
31
+ z-index: 5;
32
+ display: block;
33
+ position: relative;
34
+ }
35
+
36
+ .clause:hover {
37
+ transform: scale(1.1);
38
+ }
39
+
40
+ .clause_text{
41
+ font-weight: bold;
42
+ }
43
+
44
+ .clause_meta{
45
+ text-align: center;
46
+ border-style: solid;
47
+ border-width: 1px;
48
+ border-radius: 5px;
49
+ border-color: #0c0c0e;
50
+ margin-left: 10%;
51
+ margin-right: 10%;
52
+ padding-top: 2%;
53
+ padding-bottom: 2%;
54
+ z-index: 3;
55
+ display: block;
56
+ position: relative;
57
+ }
support_functions.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from spacy.tokens import Doc
2
+
3
+ class HealthseaPipe:
4
+
5
+ # Get Clauses and their predictions
6
+ def get_clauses(self, doc):
7
+ clauses = []
8
+ for clause in doc._.clauses:
9
+ words = []
10
+ spaces = []
11
+ clause_slice = doc[clause["split_indices"][0] : clause["split_indices"][1]]
12
+
13
+ if clause["has_ent"]:
14
+ for token in clause_slice:
15
+ if token.i == clause["ent_indices"][0]:
16
+ words.append(
17
+ clause["blinder"].replace(">", "").replace("<", "")
18
+ )
19
+ spaces.append(True)
20
+ elif token.i not in range(
21
+ clause["ent_indices"][0], clause["ent_indices"][1]
22
+ ):
23
+ words.append(token.text)
24
+ spaces.append(token.whitespace_)
25
+ clauses.append(Doc(doc.vocab, words=words, spaces=spaces))
26
+
27
+ else:
28
+ for token in clause_slice:
29
+ words.append(token.text)
30
+ spaces.append(token.whitespace_)
31
+ clauses.append(Doc(doc.vocab, words=words, spaces=spaces))
32
+
33
+ return clauses