import streamlit as st import pandas as pd import plotly.express as px from src.st_helpers import st_setup from src.datatypes import * if st_setup('LLM Arch'): if not DataLoader.loaded: DataLoader.load_data() summary = st.container() with summary: st.write("# Data Browser") sumcol1, sumcol2 = st.columns([1, 3]) with sumcol1: dbs = DataLoader.available_dbs() if len(dbs) > 1: st.write(f'**:1234: Database Selector**') idx = dbs.index(DataLoader.active_db) DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed")) st.write(f'**:1234: Summary Statistics**') summary = f'- **{len(Category.all):,}** categories' summary += f'\n- **{len(Product.all):,}** products' summary += f'\n- **{len(Feature.all):,}** features' summary += f'\n- **{len(Review.all):,}** reviews' st.markdown(summary) if st.button('Force data reload'): DataLoader.load_data(True) st.rerun() with sumcol2: cats = Category.all_sorted() with st.expander("**Review Counts**"): category_names = [c.name for c in cats] category_review_counts = [sum([p.review_count for p in c.products]) for c in cats] data = zip(category_names, category_review_counts) df = pd.DataFrame(data, columns=["Category", "Review Count"]) st.bar_chart(df, x="Category", y="Review Count") with st.expander("**Product Ratings**"): data = [] for c in cats: for p in c.products: data.append([c.name, p.average_rating]) df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating']) fig = px.box(df, x="Category", y="Mean Product Rating") fig.update_xaxes(tickangle=-90) st.plotly_chart(fig, use_container_width=True) with st.expander("**Product Prices**"): data = [] for c in cats: for p in c.products: data.append([c.name, p.price]) df = pd.DataFrame(data, columns=['Category', 'Price']) fig = px.box(df, x="Category", y="Price") fig.update_xaxes(tickangle=-90) st.plotly_chart(fig, use_container_width=True) subhead = st.container() col1, col2 = st.columns([1, 3]) with col1: st.write('**Category**') cats = Category.all_sorted() options = [f"{c.name}" for c in cats] selection = st.radio("**Category**", options, label_visibility="collapsed") selected_category = Category.by_name(selection) with subhead: st.write(f'### {selection}') with col2: features = [f.name for f in selected_category.features] features.sort() st.write(f"**{selection[:-1]} Features ({len(features)}):**") st.write('; '.join(features)) prod_index = [p.id for p in selected_category.products] prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count, p.average_rating, False, p.description] for p in selected_category.products] prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?', 'Description'] prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns) total_reviews = sum([p.review_count for p in selected_category.products]) st.write(f"**{selection} ({len(prod_index)}). Having {total_reviews} reviews in total:**") edited_df = st.data_editor(prod_df, disabled=( 'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description')) selected_product_count = edited_df['Show Reviews?'].sum() selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum() st.write(f"**{selection[:-1]} Reviews ({selected_review_count} from {selected_product_count} products):**") if selected_review_count > 0: selected_products = list(edited_df[edited_df['Show Reviews?']].index) products = Product.for_ids(selected_products) rev_data = [] rev_index = [] for p in products: for r in p.reviews: rev_index.append(r.id) rev_data.append([p.name, r.rating, r.review_text]) rev_columns = ['Product', 'Review Rating', 'Review Text'] rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns) st.dataframe(rev_df, width=10000) else: st.write("Check boxes in the table above to see reviews for products.")