Spaces:
Runtime error
Runtime error
Add more info and warnings
Browse files- app.py +3 -6
- visualize_dataset.py +9 -6
- visualize_pipeline.py +10 -8
app.py
CHANGED
@@ -15,17 +15,14 @@ intro.subheader("Create easier access to health✨")
|
|
15 |
|
16 |
jellyfish.image("data/img/Jellymation.gif")
|
17 |
intro.markdown(
|
18 |
-
"Healthsea is a spaCy v3 pipeline that analyzes user reviews to
|
19 |
)
|
20 |
intro.markdown(
|
21 |
-
"""With this app, you're able to explore the
|
22 |
-
You can search for any health aspect, whether it's an disease (e.g. joint pain) or a desired health effect such as (e.g. energy),
|
23 |
-
the app returns a list of the best products and substances. You can also explore the pipeline itself, by writing custom reviews and
|
24 |
-
get an insight into the individual proccessing steps.
|
25 |
"""
|
26 |
)
|
27 |
intro.markdown(
|
28 |
-
"""
|
29 |
"""
|
30 |
)
|
31 |
|
|
|
15 |
|
16 |
jellyfish.image("data/img/Jellymation.gif")
|
17 |
intro.markdown(
|
18 |
+
"Healthsea is a spaCy v3 pipeline that analyzes user-generated reviews to supplementary products by extracting the effects on health stated by the reviewer."
|
19 |
)
|
20 |
intro.markdown(
|
21 |
+
"""With this app, you're able to explore the pipeline and it's result on productive data. You can choose between two different apps `Visualize dataset` and `Visualize pipeline`
|
|
|
|
|
|
|
22 |
"""
|
23 |
)
|
24 |
intro.markdown(
|
25 |
+
"""The code for Healthsea is provided in this [github repository](https://github.com/thomashacker/healthsea) and if you're interested you can read more about the project in our [blog post]().
|
26 |
"""
|
27 |
)
|
28 |
|
visualize_dataset.py
CHANGED
@@ -52,7 +52,10 @@ def visualize_dataset():
|
|
52 |
|
53 |
# KPI
|
54 |
|
55 |
-
st.
|
|
|
|
|
|
|
56 |
|
57 |
st.markdown("""---""")
|
58 |
|
@@ -70,10 +73,10 @@ def visualize_dataset():
|
|
70 |
# Expander
|
71 |
show_conditions, show_benefits = st.columns(2)
|
72 |
|
73 |
-
with show_conditions.expander("Top
|
74 |
st.write(search_engine.get_all_conditions_df())
|
75 |
|
76 |
-
with show_benefits.expander("Top
|
77 |
st.write(search_engine.get_all_benefits_df())
|
78 |
|
79 |
st.markdown("""---""")
|
@@ -85,7 +88,7 @@ def visualize_dataset():
|
|
85 |
st.markdown("""---""")
|
86 |
st.markdown(central_text("🧃 Products"), unsafe_allow_html=True)
|
87 |
|
88 |
-
st.
|
89 |
|
90 |
# DataFrame
|
91 |
st.write(search_engine.get_products_df(search, n))
|
@@ -115,7 +118,7 @@ def visualize_dataset():
|
|
115 |
current_aspect = search_engine.get_aspect_meta(aspect)
|
116 |
vectors.append((current_aspect["name"], current_aspect["vector"]))
|
117 |
st.markdown("\n")
|
118 |
-
st.
|
119 |
st.write(search_engine.tsne_plot(vectors))
|
120 |
|
121 |
else:
|
@@ -133,7 +136,7 @@ def visualize_dataset():
|
|
133 |
|
134 |
# Substances
|
135 |
st.markdown(central_text("🍯 Substances"), unsafe_allow_html=True)
|
136 |
-
st.
|
137 |
|
138 |
# DataFrame
|
139 |
st.write(search_engine.get_substances_df(search, n))
|
|
|
52 |
|
53 |
# KPI
|
54 |
|
55 |
+
st.info("""This app presents the analyzed dataset of up to one million reviews. You can search for the products and substances with the highest score based on health aspect. The score is based on what reviewers wrote in their reviews.""")
|
56 |
+
st.warning("""Please note that the results produced by Healthsea should not be used as a foundation for solving health problems neither do we want to advocate that supplement products are able to solve everyone's health issues.
|
57 |
+
Healthsea is a research project that presents a technical approach on analyzing user-generated reviews and acts as a proof-of-concept.""")
|
58 |
+
|
59 |
|
60 |
st.markdown("""---""")
|
61 |
|
|
|
73 |
# Expander
|
74 |
show_conditions, show_benefits = st.columns(2)
|
75 |
|
76 |
+
with show_conditions.expander("Top mentioned Conditions"):
|
77 |
st.write(search_engine.get_all_conditions_df())
|
78 |
|
79 |
+
with show_benefits.expander("Top mentioned Benefits"):
|
80 |
st.write(search_engine.get_all_benefits_df())
|
81 |
|
82 |
st.markdown("""---""")
|
|
|
88 |
st.markdown("""---""")
|
89 |
st.markdown(central_text("🧃 Products"), unsafe_allow_html=True)
|
90 |
|
91 |
+
st.info("""The products are scored based on what reviewers say. Additional variables in the scoring function are product rating, helpful count and whether the review is considered 'fake'. """)
|
92 |
|
93 |
# DataFrame
|
94 |
st.write(search_engine.get_products_df(search, n))
|
|
|
118 |
current_aspect = search_engine.get_aspect_meta(aspect)
|
119 |
vectors.append((current_aspect["name"], current_aspect["vector"]))
|
120 |
st.markdown("\n")
|
121 |
+
st.info("""To improve the search, the table also shows results of other health aspects with a high similarity""")
|
122 |
st.write(search_engine.tsne_plot(vectors))
|
123 |
|
124 |
else:
|
|
|
136 |
|
137 |
# Substances
|
138 |
st.markdown(central_text("🍯 Substances"), unsafe_allow_html=True)
|
139 |
+
st.info("""The scores of the substances are based on the products""")
|
140 |
|
141 |
# DataFrame
|
142 |
st.write(search_engine.get_substances_df(search, n))
|
visualize_pipeline.py
CHANGED
@@ -60,10 +60,10 @@ def visualize_pipeline():
|
|
60 |
</div>"""
|
61 |
return html
|
62 |
|
63 |
-
load_state = st.
|
64 |
# Load model
|
65 |
try:
|
66 |
-
load_state.
|
67 |
if "model" not in st.session_state:
|
68 |
nlp = spacy.load("en_healthsea")
|
69 |
st.session_state["model"] = nlp
|
@@ -72,15 +72,15 @@ def visualize_pipeline():
|
|
72 |
except LookupError:
|
73 |
import nltk
|
74 |
import benepar
|
75 |
-
load_state.
|
76 |
benepar.download('benepar_en3')
|
77 |
if "model" not in st.session_state:
|
78 |
nlp = spacy.load("en_healthsea")
|
79 |
st.session_state["model"] = nlp
|
80 |
-
load_state.
|
81 |
|
82 |
# Pipeline
|
83 |
-
st.
|
84 |
|
85 |
st.markdown("""---""")
|
86 |
|
@@ -105,7 +105,7 @@ def visualize_pipeline():
|
|
105 |
colors={"CONDITION": "#FF4B76", "BENEFIT": "#629B68"},
|
106 |
)
|
107 |
|
108 |
-
st.
|
109 |
|
110 |
st.markdown("""---""")
|
111 |
|
@@ -123,9 +123,11 @@ def visualize_pipeline():
|
|
123 |
)
|
124 |
st.markdown("\n")
|
125 |
|
126 |
-
st.
|
127 |
The Text Classification predicts four exclusive classes: 'Positive', 'Negative', 'Neutral', 'Anamnesis', they represent the health effect.""")
|
128 |
|
|
|
|
|
129 |
st.markdown("""---""")
|
130 |
|
131 |
# Aggregation
|
@@ -141,7 +143,7 @@ def visualize_pipeline():
|
|
141 |
)
|
142 |
st.markdown("\n")
|
143 |
|
144 |
-
st.
|
145 |
|
146 |
st.markdown("""---""")
|
147 |
# Indepth
|
|
|
60 |
</div>"""
|
61 |
return html
|
62 |
|
63 |
+
load_state = st.info("Loading...")
|
64 |
# Load model
|
65 |
try:
|
66 |
+
load_state.info("Loading model...")
|
67 |
if "model" not in st.session_state:
|
68 |
nlp = spacy.load("en_healthsea")
|
69 |
st.session_state["model"] = nlp
|
|
|
72 |
except LookupError:
|
73 |
import nltk
|
74 |
import benepar
|
75 |
+
load_state.info ("Downloading model...")
|
76 |
benepar.download('benepar_en3')
|
77 |
if "model" not in st.session_state:
|
78 |
nlp = spacy.load("en_healthsea")
|
79 |
st.session_state["model"] = nlp
|
80 |
+
load_state.success ("Loading complete!")
|
81 |
|
82 |
# Pipeline
|
83 |
+
st.info("""This app visualizes the processing steps of the Healthsea pipeline. You can test it by writing an example review.""")
|
84 |
|
85 |
st.markdown("""---""")
|
86 |
|
|
|
105 |
colors={"CONDITION": "#FF4B76", "BENEFIT": "#629B68"},
|
106 |
)
|
107 |
|
108 |
+
st.info("""The first processing step is to identify Conditions or Benefits with Named Entity Recognition. Conditions are diseases, symptoms and general health problems (e.g. joint pain), while Benefits are positive desired health aspects (e.g. energy)""")
|
109 |
|
110 |
st.markdown("""---""")
|
111 |
|
|
|
123 |
)
|
124 |
st.markdown("\n")
|
125 |
|
126 |
+
st.info("""The review is segmented into sub-clauses and then classified by a Text Classification model. We additionally blind the found entities to improve generalization and also to inform the model about our current target entity of which we want to get the prediction of.
|
127 |
The Text Classification predicts four exclusive classes: 'Positive', 'Negative', 'Neutral', 'Anamnesis', they represent the health effect.""")
|
128 |
|
129 |
+
st.info("""The 'Anamnesis' class is defined as the current state of health of a reviewer (e.g. 'I am diagnosed with joint pain'). It is used to link stated health effects that are mentioned in later sentences.""")
|
130 |
+
|
131 |
st.markdown("""---""")
|
132 |
|
133 |
# Aggregation
|
|
|
143 |
)
|
144 |
st.markdown("\n")
|
145 |
|
146 |
+
st.info("""Multiple classification are aggregated into one final classification.""")
|
147 |
|
148 |
st.markdown("""---""")
|
149 |
# Indepth
|