Spaces:
Runtime error
Runtime error
File size: 6,236 Bytes
4f07f72 53dc0ac cfa39d8 53dc0ac cfa39d8 53dc0ac cfa39d8 53dc0ac cfa39d8 53dc0ac cfa39d8 53dc0ac cfa39d8 53dc0ac cfa39d8 53dc0ac cfa39d8 53dc0ac cfa39d8 53dc0ac cfa39d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
"""
This page allows users to browse the test data set. Mainly this is just to get a sense of the size,
content and composition of the dataset behind the project.
"""
import streamlit as st
import pandas as pd
import plotly.express as px
from src.st_helpers import st_setup
from src.datatypes import *
def show_db_selector_and_summary_in_container(container) -> None:
with container:
dbs = DataLoader.available_dbs()
if len(dbs) > 1:
st.write(f'**:1234: Database Selector**')
idx = dbs.index(DataLoader.active_db)
DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed"))
st.write(f'**:1234: Summary Statistics**')
summary = f'- **{len(Category.all):,}** categories'
summary += f'\n- **{len(Product.all):,}** products'
summary += f'\n- **{len(Feature.all):,}** features'
summary += f'\n- **{len(Review.all):,}** reviews'
st.markdown(summary)
if st.button('Force data reload'):
DataLoader.load_data(True)
st.rerun()
def show_data_summary_charts_in_container(container) -> None:
with container:
cats = Category.all_sorted()
with st.expander("**Review Counts**"):
category_names = [c.name for c in cats]
category_review_counts = [sum([p.review_count for p in c.products]) for c in cats]
data = zip(category_names, category_review_counts)
df = pd.DataFrame(data, columns=["Category", "Review Count"])
st.bar_chart(df, x="Category", y="Review Count")
with st.expander("**Product Ratings**"):
data = []
for c in cats:
for p in c.products:
data.append([c.name, p.average_rating])
df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating'])
fig = px.box(df, x="Category", y="Mean Product Rating")
fig.update_xaxes(tickangle=-90)
st.plotly_chart(fig, use_container_width=True)
with st.expander("**Product Prices**"):
data = []
for c in cats:
for p in c.products:
data.append([c.name, p.price])
df = pd.DataFrame(data, columns=['Category', 'Price'])
fig = px.box(df, x="Category", y="Price")
fig.update_xaxes(tickangle=-90)
st.plotly_chart(fig, use_container_width=True)
def show_top_section() -> None:
"""
Writes the top section to the streamlit page, showing the currently selected database
and some associated summary numbers and charts
"""
top_section = st.container()
with top_section:
summary_left, summary_right = st.columns([1, 3])
show_db_selector_and_summary_in_container(summary_left)
show_data_summary_charts_in_container(summary_right)
def get_user_selected_category(container) -> Category:
"""
Show a selector to pick a category and return the selected category
"""
with container:
st.write('**Category**')
cats = Category.all_sorted()
options = [f"{c.name}" for c in cats]
selection = st.radio("**Category**", options, label_visibility="collapsed")
return Category.by_name(selection)
def show_category_datatable_in_container(category, container) -> None:
"""
Displays a category of products into a given container. Shows the products themselvses
as a table and then if any products are ticked to show the reviews it displays the associated reviews
also.
"""
with container:
features = [f.name for f in category.features]
features.sort()
st.write(f"**{category.singular_name} Features ({len(features)}):**")
st.write('; '.join(features))
prod_index = [p.id for p in category.products]
prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count,
p.average_rating, False, p.description] for p in category.products]
prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?',
'Description']
prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns)
total_reviews = sum([p.review_count for p in category.products])
st.write(f"**{category.name} ({len(prod_index)}). Having {total_reviews} reviews in total:**")
edited_df = st.data_editor(prod_df, disabled=(
'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description'))
selected_product_count = edited_df['Show Reviews?'].sum()
selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum()
st.write(f"**{category.singular_name} Reviews ({selected_review_count} from {selected_product_count} products):**")
if selected_review_count > 0:
selected_products = list(edited_df[edited_df['Show Reviews?']].index)
products = Product.for_ids(selected_products)
rev_data = []
rev_index = []
for p in products:
for r in p.reviews:
rev_index.append(r.id)
rev_data.append([p.name, r.rating, r.review_text])
rev_columns = ['Product', 'Review Rating', 'Review Text']
rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns)
st.dataframe(rev_df, width=10000)
else:
st.write("Check boxes in the table above to see reviews for products.")
def show_bottom_section() -> None:
# Set up space
selected_category_sub_heading = st.container()
category_col, datatable_col = st.columns([1, 3])
# Display into containers
selected_category = get_user_selected_category(category_col)
with selected_category_sub_heading:
st.write(f'### {selected_category.name}')
show_category_datatable_in_container(selected_category, datatable_col)
if st_setup('LLM Arch'):
if not DataLoader.loaded:
DataLoader.load_data()
st.write("# Data Browser")
show_top_section()
show_bottom_section()
|