Spaces:
Runtime error
Runtime error
Refactored the page into smaller functions to make it more modular and easier to follow and maintain.
Browse files- pages/020_Data_Browser.py +103 -74
pages/020_Data_Browser.py
CHANGED
@@ -5,100 +5,109 @@ import plotly.express as px
|
|
5 |
from src.st_helpers import st_setup
|
6 |
from src.datatypes import *
|
7 |
|
8 |
-
if st_setup('LLM Arch'):
|
9 |
-
if not DataLoader.loaded:
|
10 |
-
DataLoader.load_data()
|
11 |
|
12 |
-
|
13 |
-
with
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
with sumcol2:
|
36 |
-
cats = Category.all_sorted()
|
37 |
-
|
38 |
-
with st.expander("**Review Counts**"):
|
39 |
-
category_names = [c.name for c in cats]
|
40 |
-
category_review_counts = [sum([p.review_count for p in c.products]) for c in cats]
|
41 |
-
data = zip(category_names, category_review_counts)
|
42 |
-
df = pd.DataFrame(data, columns=["Category", "Review Count"])
|
43 |
-
st.bar_chart(df, x="Category", y="Review Count")
|
44 |
-
|
45 |
-
with st.expander("**Product Ratings**"):
|
46 |
-
data = []
|
47 |
-
for c in cats:
|
48 |
-
for p in c.products:
|
49 |
-
data.append([c.name, p.average_rating])
|
50 |
-
df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating'])
|
51 |
-
fig = px.box(df, x="Category", y="Mean Product Rating")
|
52 |
-
fig.update_xaxes(tickangle=-90)
|
53 |
-
st.plotly_chart(fig, use_container_width=True)
|
54 |
-
|
55 |
-
with st.expander("**Product Prices**"):
|
56 |
-
data = []
|
57 |
-
for c in cats:
|
58 |
-
for p in c.products:
|
59 |
-
data.append([c.name, p.price])
|
60 |
-
df = pd.DataFrame(data, columns=['Category', 'Price'])
|
61 |
-
fig = px.box(df, x="Category", y="Price")
|
62 |
-
fig.update_xaxes(tickangle=-90)
|
63 |
-
st.plotly_chart(fig, use_container_width=True)
|
64 |
-
|
65 |
-
subhead = st.container()
|
66 |
-
|
67 |
-
col1, col2 = st.columns([1, 3])
|
68 |
-
|
69 |
-
with col1:
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
st.write('**Category**')
|
72 |
cats = Category.all_sorted()
|
73 |
options = [f"{c.name}" for c in cats]
|
74 |
selection = st.radio("**Category**", options, label_visibility="collapsed")
|
|
|
75 |
|
76 |
-
selected_category = Category.by_name(selection)
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
83 |
features.sort()
|
84 |
-
st.write(f"**{
|
85 |
st.write('; '.join(features))
|
86 |
|
87 |
-
prod_index = [p.id for p in
|
88 |
prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count,
|
89 |
-
p.average_rating, False, p.description] for p in
|
90 |
prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?',
|
91 |
'Description']
|
92 |
prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns)
|
93 |
-
total_reviews = sum([p.review_count for p in
|
94 |
-
st.write(f"**{
|
95 |
edited_df = st.data_editor(prod_df, disabled=(
|
96 |
'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description'))
|
97 |
|
98 |
selected_product_count = edited_df['Show Reviews?'].sum()
|
99 |
selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum()
|
100 |
|
101 |
-
st.write(f"**{
|
102 |
if selected_review_count > 0:
|
103 |
selected_products = list(edited_df[edited_df['Show Reviews?']].index)
|
104 |
products = Product.for_ids(selected_products)
|
@@ -112,4 +121,24 @@ if st_setup('LLM Arch'):
|
|
112 |
rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns)
|
113 |
st.dataframe(rev_df, width=10000)
|
114 |
else:
|
115 |
-
st.write("Check boxes in the table above to see reviews for products.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from src.st_helpers import st_setup
|
6 |
from src.datatypes import *
|
7 |
|
|
|
|
|
|
|
8 |
|
9 |
+
def show_db_selector_and_summary_in_container(container) -> None:
|
10 |
+
with container:
|
11 |
+
dbs = DataLoader.available_dbs()
|
12 |
+
if len(dbs) > 1:
|
13 |
+
st.write(f'**:1234: Database Selector**')
|
14 |
+
idx = dbs.index(DataLoader.active_db)
|
15 |
+
DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed"))
|
16 |
+
|
17 |
+
st.write(f'**:1234: Summary Statistics**')
|
18 |
+
summary = f'- **{len(Category.all):,}** categories'
|
19 |
+
summary += f'\n- **{len(Product.all):,}** products'
|
20 |
+
summary += f'\n- **{len(Feature.all):,}** features'
|
21 |
+
summary += f'\n- **{len(Review.all):,}** reviews'
|
22 |
+
st.markdown(summary)
|
23 |
+
if st.button('Force data reload'):
|
24 |
+
DataLoader.load_data(True)
|
25 |
+
st.rerun()
|
26 |
+
|
27 |
+
|
28 |
+
def show_data_summary_charts_in_container(container) -> None:
|
29 |
+
with container:
|
30 |
+
cats = Category.all_sorted()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
with st.expander("**Review Counts**"):
|
33 |
+
category_names = [c.name for c in cats]
|
34 |
+
category_review_counts = [sum([p.review_count for p in c.products]) for c in cats]
|
35 |
+
data = zip(category_names, category_review_counts)
|
36 |
+
df = pd.DataFrame(data, columns=["Category", "Review Count"])
|
37 |
+
st.bar_chart(df, x="Category", y="Review Count")
|
38 |
+
|
39 |
+
with st.expander("**Product Ratings**"):
|
40 |
+
data = []
|
41 |
+
for c in cats:
|
42 |
+
for p in c.products:
|
43 |
+
data.append([c.name, p.average_rating])
|
44 |
+
df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating'])
|
45 |
+
fig = px.box(df, x="Category", y="Mean Product Rating")
|
46 |
+
fig.update_xaxes(tickangle=-90)
|
47 |
+
st.plotly_chart(fig, use_container_width=True)
|
48 |
+
|
49 |
+
with st.expander("**Product Prices**"):
|
50 |
+
data = []
|
51 |
+
for c in cats:
|
52 |
+
for p in c.products:
|
53 |
+
data.append([c.name, p.price])
|
54 |
+
df = pd.DataFrame(data, columns=['Category', 'Price'])
|
55 |
+
fig = px.box(df, x="Category", y="Price")
|
56 |
+
fig.update_xaxes(tickangle=-90)
|
57 |
+
st.plotly_chart(fig, use_container_width=True)
|
58 |
+
|
59 |
+
|
60 |
+
def show_top_section() -> None:
|
61 |
+
"""
|
62 |
+
Writes the top section to the streamlit page, showing the currently selected database
|
63 |
+
and some associated summary numbers and charts
|
64 |
+
"""
|
65 |
+
top_section = st.container()
|
66 |
+
with top_section:
|
67 |
+
summary_left, summary_right = st.columns([1, 3])
|
68 |
+
show_db_selector_and_summary_in_container(summary_left)
|
69 |
+
show_data_summary_charts_in_container(summary_right)
|
70 |
+
|
71 |
+
|
72 |
+
def get_user_selected_category(container) -> Category:
|
73 |
+
"""
|
74 |
+
Show a selector to pick a category and return the selected category
|
75 |
+
"""
|
76 |
+
with container:
|
77 |
st.write('**Category**')
|
78 |
cats = Category.all_sorted()
|
79 |
options = [f"{c.name}" for c in cats]
|
80 |
selection = st.radio("**Category**", options, label_visibility="collapsed")
|
81 |
+
return Category.by_name(selection)
|
82 |
|
|
|
83 |
|
84 |
+
def show_category_datatable_in_container(category, container) -> None:
|
85 |
+
"""
|
86 |
+
Displays a category of products into a given container. Shows the products themselvses
|
87 |
+
as a table and then if any products are ticked to show the reviews it displays the associated reviews
|
88 |
+
also.
|
89 |
+
"""
|
90 |
+
with container:
|
91 |
+
features = [f.name for f in category.features]
|
92 |
features.sort()
|
93 |
+
st.write(f"**{category.singular_name} Features ({len(features)}):**")
|
94 |
st.write('; '.join(features))
|
95 |
|
96 |
+
prod_index = [p.id for p in category.products]
|
97 |
prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count,
|
98 |
+
p.average_rating, False, p.description] for p in category.products]
|
99 |
prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?',
|
100 |
'Description']
|
101 |
prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns)
|
102 |
+
total_reviews = sum([p.review_count for p in category.products])
|
103 |
+
st.write(f"**{category.name} ({len(prod_index)}). Having {total_reviews} reviews in total:**")
|
104 |
edited_df = st.data_editor(prod_df, disabled=(
|
105 |
'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description'))
|
106 |
|
107 |
selected_product_count = edited_df['Show Reviews?'].sum()
|
108 |
selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum()
|
109 |
|
110 |
+
st.write(f"**{category.singular_name} Reviews ({selected_review_count} from {selected_product_count} products):**")
|
111 |
if selected_review_count > 0:
|
112 |
selected_products = list(edited_df[edited_df['Show Reviews?']].index)
|
113 |
products = Product.for_ids(selected_products)
|
|
|
121 |
rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns)
|
122 |
st.dataframe(rev_df, width=10000)
|
123 |
else:
|
124 |
+
st.write("Check boxes in the table above to see reviews for products.")
|
125 |
+
|
126 |
+
|
127 |
+
def show_bottom_section() -> None:
|
128 |
+
# Set up space
|
129 |
+
selected_category_sub_heading = st.container()
|
130 |
+
category_col, datatable_col = st.columns([1, 3])
|
131 |
+
|
132 |
+
# Display into containers
|
133 |
+
selected_category = get_user_selected_category(category_col)
|
134 |
+
with selected_category_sub_heading:
|
135 |
+
st.write(f'### {selected_category.name}')
|
136 |
+
show_category_datatable_in_container(selected_category, datatable_col)
|
137 |
+
|
138 |
+
|
139 |
+
if st_setup('LLM Arch'):
|
140 |
+
if not DataLoader.loaded:
|
141 |
+
DataLoader.load_data()
|
142 |
+
st.write("# Data Browser")
|
143 |
+
show_top_section()
|
144 |
+
show_bottom_section()
|