Spaces:

alfraser
/

llm-arch

Runtime error

File size: 6,236 Bytes

4f07f72
 
 
 
 
53dc0ac
 
 
 
 
 
 
 
cfa39d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53dc0ac
cfa39d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53dc0ac
 
 
 
cfa39d8
53dc0ac
 
cfa39d8
 
 
 
 
 
 
 
53dc0ac
cfa39d8
53dc0ac
 
cfa39d8
53dc0ac
cfa39d8
53dc0ac
 
 
cfa39d8
 
53dc0ac
 
 
 
 
 
cfa39d8
53dc0ac
 
 
 
 
 
 
 
 
 
 
 
 
cfa39d8

"""
This page allows users to browse the test data set.  Mainly this is just to get a sense of the size,
content and composition of the dataset behind the project.
"""

import streamlit as st
import pandas as pd
import plotly.express as px

from src.st_helpers import st_setup
from src.datatypes import *


def show_db_selector_and_summary_in_container(container) -> None:
    with container:
        dbs = DataLoader.available_dbs()
        if len(dbs) > 1:
            st.write(f'**:1234: Database Selector**')
            idx = dbs.index(DataLoader.active_db)
            DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed"))

        st.write(f'**:1234: Summary Statistics**')
        summary = f'- **{len(Category.all):,}** categories'
        summary += f'\n- **{len(Product.all):,}** products'
        summary += f'\n- **{len(Feature.all):,}** features'
        summary += f'\n- **{len(Review.all):,}** reviews'
        st.markdown(summary)
        if st.button('Force data reload'):
            DataLoader.load_data(True)
            st.rerun()


def show_data_summary_charts_in_container(container) -> None:
    with container:
        cats = Category.all_sorted()

        with st.expander("**Review Counts**"):
            category_names = [c.name for c in cats]
            category_review_counts = [sum([p.review_count for p in c.products]) for c in cats]
            data = zip(category_names, category_review_counts)
            df = pd.DataFrame(data, columns=["Category", "Review Count"])
            st.bar_chart(df, x="Category", y="Review Count")

        with st.expander("**Product Ratings**"):
            data = []
            for c in cats:
                for p in c.products:
                    data.append([c.name, p.average_rating])
            df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating'])
            fig = px.box(df, x="Category", y="Mean Product Rating")
            fig.update_xaxes(tickangle=-90)
            st.plotly_chart(fig, use_container_width=True)

        with st.expander("**Product Prices**"):
            data = []
            for c in cats:
                for p in c.products:
                    data.append([c.name, p.price])
            df = pd.DataFrame(data, columns=['Category', 'Price'])
            fig = px.box(df, x="Category", y="Price")
            fig.update_xaxes(tickangle=-90)
            st.plotly_chart(fig, use_container_width=True)


def show_top_section() -> None:
    """
    Writes the top section to the streamlit page, showing the currently selected database
    and some associated summary numbers and charts
    """
    top_section = st.container()
    with top_section:
        summary_left, summary_right = st.columns([1, 3])
        show_db_selector_and_summary_in_container(summary_left)
        show_data_summary_charts_in_container(summary_right)


def get_user_selected_category(container) -> Category:
    """
    Show a selector to pick a category and return the selected category
    """
    with container:
        st.write('**Category**')
        cats = Category.all_sorted()
        options = [f"{c.name}" for c in cats]
        selection = st.radio("**Category**", options, label_visibility="collapsed")
        return Category.by_name(selection)


def show_category_datatable_in_container(category, container) -> None:
    """
    Displays a category of products into a given container.  Shows the products themselvses
    as a table and then if any products are ticked to show the reviews it displays the associated reviews
    also.
    """
    with container:
        features = [f.name for f in category.features]
        features.sort()
        st.write(f"**{category.singular_name} Features ({len(features)}):**")
        st.write('; '.join(features))

        prod_index = [p.id for p in category.products]
        prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count,
                      p.average_rating, False, p.description] for p in category.products]
        prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?',
                        'Description']
        prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns)
        total_reviews = sum([p.review_count for p in category.products])
        st.write(f"**{category.name} ({len(prod_index)}). Having {total_reviews} reviews in total:**")
        edited_df = st.data_editor(prod_df, disabled=(
        'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description'))

        selected_product_count = edited_df['Show Reviews?'].sum()
        selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum()

        st.write(f"**{category.singular_name} Reviews ({selected_review_count} from {selected_product_count} products):**")
        if selected_review_count > 0:
            selected_products = list(edited_df[edited_df['Show Reviews?']].index)
            products = Product.for_ids(selected_products)
            rev_data = []
            rev_index = []
            for p in products:
                for r in p.reviews:
                    rev_index.append(r.id)
                    rev_data.append([p.name, r.rating, r.review_text])
            rev_columns = ['Product', 'Review Rating', 'Review Text']
            rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns)
            st.dataframe(rev_df, width=10000)
        else:
            st.write("Check boxes in the table above to see reviews for products.")


def show_bottom_section() -> None:
    # Set up space
    selected_category_sub_heading = st.container()
    category_col, datatable_col = st.columns([1, 3])

    # Display into containers
    selected_category = get_user_selected_category(category_col)
    with selected_category_sub_heading:
        st.write(f'### {selected_category.name}')
    show_category_datatable_in_container(selected_category, datatable_col)


if st_setup('LLM Arch'):
    if not DataLoader.loaded:
        DataLoader.load_data()
    st.write("# Data Browser")
    show_top_section()
    show_bottom_section()