File size: 5,228 Bytes
69abbc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03bdce4
 
69abbc0
 
 
 
 
 
 
 
 
 
 
 
 
03bdce4
 
 
 
 
 
 
 
 
 
 
69abbc0
 
 
 
 
 
 
03bdce4
 
69abbc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03bdce4
69abbc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03bdce4
69abbc0
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
from pathlib import Path
import json
from support_functions import HealthseaSearch

def visualize_dataset():
    # Configuration
    health_aspect_path = Path("data/health_aspects.json")
    product_path = Path("data/products.json")
    condition_path = Path("data/condition_vectors.json")
    benefit_path = Path("data/benefit_vectors.json")

    # Load data
    @st.cache(allow_output_mutation=True)
    def load_data(
        _health_aspect_path: Path,
        _product_path: Path,
        _condition_path: Path,
        _benefit_path: Path,
    ):
        with open(_health_aspect_path) as reader:
            health_aspects = json.load(reader)
        with open(_product_path) as reader:
            products = json.load(reader)
        with open(_condition_path) as reader:
            conditions = json.load(reader)
        with open(_benefit_path) as reader:
            benefits = json.load(reader)
        return health_aspects, products, conditions, benefits


    # Functions
    def kpi(n, text):
        html = f"""
        <div class='kpi'>
            <h1 class='kpi_header'>{n}</h1>
            <span>{text}</span>
        </div>
        """
        return html


    def central_text(text):
        html = f"""<h2 class='central_text'>{text}</h2>"""
        return html

    # Loading data
    health_aspects, products, conditions, benefits = load_data(
        health_aspect_path, product_path, condition_path, benefit_path
    )
    search_engine = HealthseaSearch(health_aspects, products, conditions, benefits)

    # KPI

    st.markdown("""This app presents the analyzed dataset of up to one million reviews. You can search for the best products and substances to any health aspect based on what reviewers wrote in their reviews.""")

    st.markdown("""---""")

    st.markdown(central_text("🎀 Dataset"), unsafe_allow_html=True)

    kpi_products, kpi_reviews, kpi_condition, kpi_benefit = st.columns(4)

    kpi_products.markdown(kpi(len(products), "Products"), unsafe_allow_html=True)
    kpi_reviews.markdown(kpi(933.240, "Reviews"), unsafe_allow_html=True)
    kpi_condition.markdown(kpi(len(conditions), "Conditions"), unsafe_allow_html=True)
    kpi_benefit.markdown(kpi(len(benefits), "Benefits"), unsafe_allow_html=True)

    st.markdown("""---""")

    # Expander
    show_conditions, show_benefits = st.columns(2)

    with show_conditions.expander("Top 1000 mentioned Conditions"):
        st.write(search_engine.get_all_conditions_df())

    with show_benefits.expander("Top 1000 mentioned Benefits"):
        st.write(search_engine.get_all_benefits_df())

    st.markdown("""---""")

    # Search
    search = st.text_input(label="Search for an health aspect", value="joint pain")
    n = st.slider("Show top n results", min_value=10, max_value=1000, value=25)

    st.markdown("""---""")
    st.markdown(central_text("🧃 Products"), unsafe_allow_html=True)

    st.markdown("""The products are scored based on what reviewers say. Additional variables in the scoring function are product rating, helpful count and whether the review is considered 'fake'. """)

    # DataFrame
    st.write(search_engine.get_products_df(search, n))

    # KPI & Alias
    aspect_alias = search_engine.get_aspect(search)["alias"]

    if len(aspect_alias) > 0:
        kpi_mentions, kpi_product_mentions, kpi_alias = st.columns(3)
        kpi_mentions.markdown(
            kpi(search_engine.get_aspect_meta(search)["frequency"], "Mentions"),
            unsafe_allow_html=True,
        )
        kpi_product_mentions.markdown(
            kpi(len(search_engine.get_aspect(search)["products"]), "Products"),
            unsafe_allow_html=True,
        )
        kpi_alias.markdown(
            kpi(len(aspect_alias), "Similar health aspects"),
            unsafe_allow_html=True,
        )

        vectors = []
        main_aspect = search_engine.get_aspect_meta(search)
        vectors.append((main_aspect["name"], main_aspect["vector"]))
        for aspect in aspect_alias:
            current_aspect = search_engine.get_aspect_meta(aspect)
            vectors.append((current_aspect["name"], current_aspect["vector"]))
        st.markdown("\n")
        st.markdown("""To improve the search, the table also shows results of other health aspects with a high similarity""")
        st.write(search_engine.tsne_plot(vectors))

    else:
        kpi_mentions, kpi_product_mentions = st.columns(2)
        kpi_mentions.markdown(
            kpi(search_engine.get_aspect_meta(search)["frequency"], "Mentions"),
            unsafe_allow_html=True,
        )
        kpi_product_mentions.markdown(
            kpi(len(search_engine.get_aspect(search)["products"]), "Products"),
            unsafe_allow_html=True,
        )

    st.markdown("""---""")

    # Substances
    st.markdown(central_text("🍯 Substances"), unsafe_allow_html=True)
    st.markdown("""The scores of the substances are based on the products""")

    # DataFrame
    st.write(search_engine.get_substances_df(search, n))
    kpi_tmp, kpi_substances = st.columns(2)
    kpi_substances.markdown(
        kpi(len(search_engine.get_aspect(search)["substance"]), "Substances"),
        unsafe_allow_html=True,
    )