""" This page allows users to browse the test data set. Mainly this is just to get a sense of the size, content and composition of the dataset behind the project. """ import streamlit as st import pandas as pd import plotly.express as px from src.st_helpers import st_setup from src.datatypes import * def show_db_selector_and_summary_in_container(container) -> None: with container: dbs = DataLoader.available_dbs() if len(dbs) > 1: st.write(f'**:1234: Database Selector**') idx = dbs.index(DataLoader.active_db) DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed")) st.write(f'**:1234: Summary Statistics**') summary = f'- **{len(Category.all):,}** categories' summary += f'\n- **{len(Product.all):,}** products' summary += f'\n- **{len(Feature.all):,}** features' summary += f'\n- **{len(Review.all):,}** reviews' st.markdown(summary) if st.button('Force data reload'): DataLoader.load_data(True) st.rerun() def show_data_summary_charts_in_container(container) -> None: with container: cats = Category.all_sorted() with st.expander("**Review Counts**"): category_names = [c.name for c in cats] category_review_counts = [sum([p.review_count for p in c.products]) for c in cats] data = zip(category_names, category_review_counts) df = pd.DataFrame(data, columns=["Category", "Review Count"]) st.bar_chart(df, x="Category", y="Review Count") with st.expander("**Product Ratings**"): data = [] for c in cats: for p in c.products: data.append([c.name, p.average_rating]) df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating']) fig = px.box(df, x="Category", y="Mean Product Rating") fig.update_xaxes(tickangle=-90) st.plotly_chart(fig, use_container_width=True) with st.expander("**Product Prices**"): data = [] for c in cats: for p in c.products: data.append([c.name, p.price]) df = pd.DataFrame(data, columns=['Category', 'Price']) fig = px.box(df, x="Category", y="Price") fig.update_xaxes(tickangle=-90) st.plotly_chart(fig, use_container_width=True) def show_top_section() -> None: """ Writes the top section to the streamlit page, showing the currently selected database and some associated summary numbers and charts """ top_section = st.container() with top_section: summary_left, summary_right = st.columns([1, 3]) show_db_selector_and_summary_in_container(summary_left) show_data_summary_charts_in_container(summary_right) def get_user_selected_category(container) -> Category: """ Show a selector to pick a category and return the selected category """ with container: st.write('**Category**') cats = Category.all_sorted() options = [f"{c.name}" for c in cats] selection = st.radio("**Category**", options, label_visibility="collapsed") return Category.by_name(selection) def show_category_datatable_in_container(category, container) -> None: """ Displays a category of products into a given container. Shows the products themselvses as a table and then if any products are ticked to show the reviews it displays the associated reviews also. """ with container: features = [f.name for f in category.features] features.sort() st.write(f"**{category.singular_name} Features ({len(features)}):**") st.write('; '.join(features)) prod_index = [p.id for p in category.products] prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count, p.average_rating, False, p.description] for p in category.products] prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?', 'Description'] prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns) total_reviews = sum([p.review_count for p in category.products]) st.write(f"**{category.name} ({len(prod_index)}). Having {total_reviews} reviews in total:**") edited_df = st.data_editor(prod_df, disabled=( 'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description')) selected_product_count = edited_df['Show Reviews?'].sum() selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum() st.write(f"**{category.singular_name} Reviews ({selected_review_count} from {selected_product_count} products):**") if selected_review_count > 0: selected_products = list(edited_df[edited_df['Show Reviews?']].index) products = Product.for_ids(selected_products) rev_data = [] rev_index = [] for p in products: for r in p.reviews: rev_index.append(r.id) rev_data.append([p.name, r.rating, r.review_text]) rev_columns = ['Product', 'Review Rating', 'Review Text'] rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns) st.dataframe(rev_df, width=10000) else: st.write("Check boxes in the table above to see reviews for products.") def show_bottom_section() -> None: # Set up space selected_category_sub_heading = st.container() category_col, datatable_col = st.columns([1, 3]) # Display into containers selected_category = get_user_selected_category(category_col) with selected_category_sub_heading: st.write(f'### {selected_category.name}') show_category_datatable_in_container(selected_category, datatable_col) if st_setup('LLM Arch'): if not DataLoader.loaded: DataLoader.load_data() st.write("# Data Browser") show_top_section() show_bottom_section()