File size: 5,631 Bytes
69abbc0
8ab7af3
 
 
69abbc0
 
 
 
 
8ab7af3
69abbc0
 
 
 
 
99e6b78
69abbc0
 
 
8ab7af3
69abbc0
 
448e6c8
69abbc0
 
8ab7af3
 
 
32d8c3b
8ab7af3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69abbc0
8ab7af3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69abbc0
 
8ab7af3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69abbc0
 
8ab7af3
 
 
69abbc0
8ab7af3
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import streamlit as st
from pathlib import Path
import json
from support_functions import HealthseaSearch

# Header
with open("style.css") as f:
    st.markdown("<style>" + f.read() + "</style>", unsafe_allow_html=True)

# Intro
st.title("Welcome to Healthsea 🪐")

intro, jellyfish = st.columns(2)
jellyfish.markdown("\n")

intro.subheader("Create easier access to health✨")

jellyfish.image("data/img/Jellymation.gif")
intro.markdown(
    """Healthsea is an end-to-end spaCy v3 pipeline for analyzing user reviews to supplementary products and extracting their potential effects on health."""
)
intro.markdown(
    """The code for Healthsea is provided in this [github repository](https://github.com/explosion/healthsea). Visit our [blog post](https://explosion.ai/blog/healthsea) or more about the Healthsea project.
    """
)

st.write(
    """This app visualizes the results of Healthsea on a dataset of up to 1 million reviews to 10.000 products. You can use the app to search for any health aspect, whether it's a disease (e.g. joint pain) or a positive state of health (e.g. energy), the app returns a list of products and substances.
    You can visit the [Healthsea Pipeline app](https://huggingface.co/spaces/spacy/healthsea-pipeline) for exploring the pipeline itself.
    """
)

st.warning("""Healthsea is an experimental project and the results should not be used as a foundation for solving health problems. Nor do we want to give the impression that supplements are the answer to anyone's health issues.""")

# Configuration
health_aspect_path = Path("data/health_aspects.json")
product_path = Path("data/products.json")
condition_path = Path("data/condition_vectors.json")
benefit_path = Path("data/benefit_vectors.json")

# Load data
@st.cache(allow_output_mutation=True)
def load_data(
    _health_aspect_path: Path,
    _product_path: Path,
    _condition_path: Path,
    _benefit_path: Path,
):
    with open(_health_aspect_path) as reader:
        health_aspects = json.load(reader)
    with open(_product_path) as reader:
        products = json.load(reader)
    with open(_condition_path) as reader:
        conditions = json.load(reader)
    with open(_benefit_path) as reader:
        benefits = json.load(reader)
    return health_aspects, products, conditions, benefits


# Functions
def kpi(n, text):
    html = f"""
    <div class='kpi'>
        <h1 class='kpi_header'>{n}</h1>
        <span>{text}</span>
    </div>
    """
    return html


def central_text(text):
    html = f"""<h2 class='central_text'>{text}</h2>"""
    return html

# Loading data
health_aspects, products, conditions, benefits = load_data(
    health_aspect_path, product_path, condition_path, benefit_path
)
search_engine = HealthseaSearch(health_aspects, products, conditions, benefits)

# KPI
st.markdown("""---""")

st.markdown(central_text("🎀 Dataset"), unsafe_allow_html=True)

kpi_products, kpi_reviews, kpi_condition, kpi_benefit = st.columns(4)

def round_to_k(value):
    return str(round(value/1000,1))+"k"

kpi_products.markdown(kpi(round_to_k(len(products)), "Products"), unsafe_allow_html=True)
kpi_reviews.markdown(kpi(round_to_k(int(933240)), "Reviews"), unsafe_allow_html=True)
kpi_condition.markdown(kpi(round_to_k(len(conditions)), "Conditions"), unsafe_allow_html=True)
kpi_benefit.markdown(kpi(round_to_k(len(benefits)), "Benefits"), unsafe_allow_html=True)

st.markdown("""---""")

# Expander
show_conditions, show_benefits = st.columns(2)

with show_conditions.expander("Top mentioned Conditions"):
    st.write(search_engine.get_all_conditions_df())

with show_benefits.expander("Top mentioned Benefits"):
    st.write(search_engine.get_all_benefits_df())

st.markdown("""---""")

# Search
search = st.text_input(label="Search for an health aspect", value="joint pain")
n = st.slider("Show top n results", min_value=10, max_value=1000, value=25)

st.markdown("""---""")
st.markdown(central_text("🧃 Products"), unsafe_allow_html=True)

st.info("""The product score is based on the results of Healthsea. Variables used for the score are: health effect prediction, product rating, helpful count and whether the review is considered a 'fake review'. """)

# DataFrame
st.write(search_engine.get_products_df(search, n))

# KPI & Alias
aspect_alias = search_engine.get_aspect(search)["alias"]

kpi_product_mentions, kpi_alias = st.columns(2)

kpi_product_mentions.markdown(kpi(len(search_engine.get_aspect(search)["products"]), "Products"), unsafe_allow_html=True)


kpi_alias.markdown(
    kpi(len(aspect_alias), "Similar health aspects"),
    unsafe_allow_html=True,
)

depth = st.slider("Depth", min_value=0, max_value=5, value=2)

recursive_alias, recursive_edges = search_engine.get_recursive_alias(search,0,{},[],depth)

vectors = []
main_aspect = search_engine.get_aspect_meta(search)
vectors.append((main_aspect["name"], main_aspect["vector"]))
for aspect in aspect_alias:
    current_aspect = search_engine.get_aspect_meta(aspect)
    vectors.append((current_aspect["name"], current_aspect["vector"]))
st.markdown("\n")
st.info("""Health aspects with a high similarity (>=90%) are clustered together.""")
#search_engine.pyvis(vectors)
search_engine.pyvis2(recursive_alias,recursive_edges)

st.markdown("""---""")

# Substances
st.markdown(central_text("🍯 Substances"), unsafe_allow_html=True)
st.info("""Substance scores are based on product scores""")

# DataFrame
st.write(search_engine.get_substances_df(search, n))
kpi_substances, empty = st.columns(2)
kpi_substances.markdown(
    kpi(len(search_engine.get_aspect(search)["substance"]), "Substances"),
    unsafe_allow_html=True,
)