edichief commited on
Commit
2db6e66
1 Parent(s): 40351d8

Add more info and warnings

Browse files
Files changed (3) hide show
  1. app.py +3 -6
  2. visualize_dataset.py +9 -6
  3. visualize_pipeline.py +10 -8
app.py CHANGED
@@ -15,17 +15,14 @@ intro.subheader("Create easier access to health✨")
15
 
16
  jellyfish.image("data/img/Jellymation.gif")
17
  intro.markdown(
18
- "Healthsea is a spaCy v3 pipeline that analyzes user reviews to supplement products by extracting their effects on health."
19
  )
20
  intro.markdown(
21
- """With this app, you're able to explore the results of Healthsea on up to 1 million reviews.
22
- You can search for any health aspect, whether it's an disease (e.g. joint pain) or a desired health effect such as (e.g. energy),
23
- the app returns a list of the best products and substances. You can also explore the pipeline itself, by writing custom reviews and
24
- get an insight into the individual proccessing steps.
25
  """
26
  )
27
  intro.markdown(
28
- """You can read more in our [blog post]().
29
  """
30
  )
31
 
 
15
 
16
  jellyfish.image("data/img/Jellymation.gif")
17
  intro.markdown(
18
+ "Healthsea is a spaCy v3 pipeline that analyzes user-generated reviews to supplementary products by extracting the effects on health stated by the reviewer."
19
  )
20
  intro.markdown(
21
+ """With this app, you're able to explore the pipeline and it's result on productive data. You can choose between two different apps `Visualize dataset` and `Visualize pipeline`
 
 
 
22
  """
23
  )
24
  intro.markdown(
25
+ """The code for Healthsea is provided in this [github repository](https://github.com/thomashacker/healthsea) and if you're interested you can read more about the project in our [blog post]().
26
  """
27
  )
28
 
visualize_dataset.py CHANGED
@@ -52,7 +52,10 @@ def visualize_dataset():
52
 
53
  # KPI
54
 
55
- st.markdown("""This app presents the analyzed dataset of up to one million reviews. You can search for the best products and substances to any health aspect based on what reviewers wrote in their reviews.""")
 
 
 
56
 
57
  st.markdown("""---""")
58
 
@@ -70,10 +73,10 @@ def visualize_dataset():
70
  # Expander
71
  show_conditions, show_benefits = st.columns(2)
72
 
73
- with show_conditions.expander("Top 1000 mentioned Conditions"):
74
  st.write(search_engine.get_all_conditions_df())
75
 
76
- with show_benefits.expander("Top 1000 mentioned Benefits"):
77
  st.write(search_engine.get_all_benefits_df())
78
 
79
  st.markdown("""---""")
@@ -85,7 +88,7 @@ def visualize_dataset():
85
  st.markdown("""---""")
86
  st.markdown(central_text("🧃 Products"), unsafe_allow_html=True)
87
 
88
- st.markdown("""The products are scored based on what reviewers say. Additional variables in the scoring function are product rating, helpful count and whether the review is considered 'fake'. """)
89
 
90
  # DataFrame
91
  st.write(search_engine.get_products_df(search, n))
@@ -115,7 +118,7 @@ def visualize_dataset():
115
  current_aspect = search_engine.get_aspect_meta(aspect)
116
  vectors.append((current_aspect["name"], current_aspect["vector"]))
117
  st.markdown("\n")
118
- st.markdown("""To improve the search, the table also shows results of other health aspects with a high similarity""")
119
  st.write(search_engine.tsne_plot(vectors))
120
 
121
  else:
@@ -133,7 +136,7 @@ def visualize_dataset():
133
 
134
  # Substances
135
  st.markdown(central_text("🍯 Substances"), unsafe_allow_html=True)
136
- st.markdown("""The scores of the substances are based on the products""")
137
 
138
  # DataFrame
139
  st.write(search_engine.get_substances_df(search, n))
 
52
 
53
  # KPI
54
 
55
+ st.info("""This app presents the analyzed dataset of up to one million reviews. You can search for the products and substances with the highest score based on health aspect. The score is based on what reviewers wrote in their reviews.""")
56
+ st.warning("""Please note that the results produced by Healthsea should not be used as a foundation for solving health problems neither do we want to advocate that supplement products are able to solve everyone's health issues.
57
+ Healthsea is a research project that presents a technical approach on analyzing user-generated reviews and acts as a proof-of-concept.""")
58
+
59
 
60
  st.markdown("""---""")
61
 
 
73
  # Expander
74
  show_conditions, show_benefits = st.columns(2)
75
 
76
+ with show_conditions.expander("Top mentioned Conditions"):
77
  st.write(search_engine.get_all_conditions_df())
78
 
79
+ with show_benefits.expander("Top mentioned Benefits"):
80
  st.write(search_engine.get_all_benefits_df())
81
 
82
  st.markdown("""---""")
 
88
  st.markdown("""---""")
89
  st.markdown(central_text("🧃 Products"), unsafe_allow_html=True)
90
 
91
+ st.info("""The products are scored based on what reviewers say. Additional variables in the scoring function are product rating, helpful count and whether the review is considered 'fake'. """)
92
 
93
  # DataFrame
94
  st.write(search_engine.get_products_df(search, n))
 
118
  current_aspect = search_engine.get_aspect_meta(aspect)
119
  vectors.append((current_aspect["name"], current_aspect["vector"]))
120
  st.markdown("\n")
121
+ st.info("""To improve the search, the table also shows results of other health aspects with a high similarity""")
122
  st.write(search_engine.tsne_plot(vectors))
123
 
124
  else:
 
136
 
137
  # Substances
138
  st.markdown(central_text("🍯 Substances"), unsafe_allow_html=True)
139
+ st.info("""The scores of the substances are based on the products""")
140
 
141
  # DataFrame
142
  st.write(search_engine.get_substances_df(search, n))
visualize_pipeline.py CHANGED
@@ -60,10 +60,10 @@ def visualize_pipeline():
60
  </div>"""
61
  return html
62
 
63
- load_state = st.markdown ("#### Loading...")
64
  # Load model
65
  try:
66
- load_state.markdown ("#### Loading model...")
67
  if "model" not in st.session_state:
68
  nlp = spacy.load("en_healthsea")
69
  st.session_state["model"] = nlp
@@ -72,15 +72,15 @@ def visualize_pipeline():
72
  except LookupError:
73
  import nltk
74
  import benepar
75
- load_state.markdown ("#### Downloading model...")
76
  benepar.download('benepar_en3')
77
  if "model" not in st.session_state:
78
  nlp = spacy.load("en_healthsea")
79
  st.session_state["model"] = nlp
80
- load_state.markdown ("#### Loading done!")
81
 
82
  # Pipeline
83
- st.markdown("""This app visualizes the processing steps of the Healthsea pipeline. You can test it by writing an example review.""")
84
 
85
  st.markdown("""---""")
86
 
@@ -105,7 +105,7 @@ def visualize_pipeline():
105
  colors={"CONDITION": "#FF4B76", "BENEFIT": "#629B68"},
106
  )
107
 
108
- st.markdown("""The first processing step is to identify Conditions or Benefits with Named Entity Recognition. Conditions are diseases, symptoms and general health problems (e.g. joint pain), while Benefits are positive desired health aspects (e.g. energy)""")
109
 
110
  st.markdown("""---""")
111
 
@@ -123,9 +123,11 @@ def visualize_pipeline():
123
  )
124
  st.markdown("\n")
125
 
126
- st.markdown("""The review is segmented into sub-clauses and then classified by a Text Classification model. We additionally blind the found entities to improve generalization and also to inform the model about our current target entity of which we want to get the prediction of.
127
  The Text Classification predicts four exclusive classes: 'Positive', 'Negative', 'Neutral', 'Anamnesis', they represent the health effect.""")
128
 
 
 
129
  st.markdown("""---""")
130
 
131
  # Aggregation
@@ -141,7 +143,7 @@ def visualize_pipeline():
141
  )
142
  st.markdown("\n")
143
 
144
- st.markdown("""Multiple classification are aggregated into one final classification.""")
145
 
146
  st.markdown("""---""")
147
  # Indepth
 
60
  </div>"""
61
  return html
62
 
63
+ load_state = st.info("Loading...")
64
  # Load model
65
  try:
66
+ load_state.info("Loading model...")
67
  if "model" not in st.session_state:
68
  nlp = spacy.load("en_healthsea")
69
  st.session_state["model"] = nlp
 
72
  except LookupError:
73
  import nltk
74
  import benepar
75
+ load_state.info ("Downloading model...")
76
  benepar.download('benepar_en3')
77
  if "model" not in st.session_state:
78
  nlp = spacy.load("en_healthsea")
79
  st.session_state["model"] = nlp
80
+ load_state.success ("Loading complete!")
81
 
82
  # Pipeline
83
+ st.info("""This app visualizes the processing steps of the Healthsea pipeline. You can test it by writing an example review.""")
84
 
85
  st.markdown("""---""")
86
 
 
105
  colors={"CONDITION": "#FF4B76", "BENEFIT": "#629B68"},
106
  )
107
 
108
+ st.info("""The first processing step is to identify Conditions or Benefits with Named Entity Recognition. Conditions are diseases, symptoms and general health problems (e.g. joint pain), while Benefits are positive desired health aspects (e.g. energy)""")
109
 
110
  st.markdown("""---""")
111
 
 
123
  )
124
  st.markdown("\n")
125
 
126
+ st.info("""The review is segmented into sub-clauses and then classified by a Text Classification model. We additionally blind the found entities to improve generalization and also to inform the model about our current target entity of which we want to get the prediction of.
127
  The Text Classification predicts four exclusive classes: 'Positive', 'Negative', 'Neutral', 'Anamnesis', they represent the health effect.""")
128
 
129
+ st.info("""The 'Anamnesis' class is defined as the current state of health of a reviewer (e.g. 'I am diagnosed with joint pain'). It is used to link stated health effects that are mentioned in later sentences.""")
130
+
131
  st.markdown("""---""")
132
 
133
  # Aggregation
 
143
  )
144
  st.markdown("\n")
145
 
146
+ st.info("""Multiple classification are aggregated into one final classification.""")
147
 
148
  st.markdown("""---""")
149
  # Indepth