Spaces:

alfraser
/

llm-arch

Runtime error

App Files Files Community

llm-arch / pages /020_Data_Browser.py

alfraser

Added page level comments to describe the purpose of each page

4f07f72 12 months ago

raw

history blame contribute delete

6.24 kB

	"""
	This page allows users to browse the test data set. Mainly this is just to get a sense of the size,
	content and composition of the dataset behind the project.
	"""

	import streamlit as st
	import pandas as pd
	import plotly.express as px

	from src.st_helpers import st_setup
	from src.datatypes import *


	def show_db_selector_and_summary_in_container(container) -> None:
	with container:
	dbs = DataLoader.available_dbs()
	if len(dbs) > 1:
	st.write(f':1234: Database Selector')
	idx = dbs.index(DataLoader.active_db)
	DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed"))

	st.write(f':1234: Summary Statistics')
	summary = f'- {len(Category.all):,} categories'
	summary += f'\n- {len(Product.all):,} products'
	summary += f'\n- {len(Feature.all):,} features'
	summary += f'\n- {len(Review.all):,} reviews'
	st.markdown(summary)
	if st.button('Force data reload'):
	DataLoader.load_data(True)
	st.rerun()


	def show_data_summary_charts_in_container(container) -> None:
	with container:
	cats = Category.all_sorted()

	with st.expander("Review Counts"):
	category_names = [c.name for c in cats]
	category_review_counts = [sum([p.review_count for p in c.products]) for c in cats]
	data = zip(category_names, category_review_counts)
	df = pd.DataFrame(data, columns=["Category", "Review Count"])
	st.bar_chart(df, x="Category", y="Review Count")

	with st.expander("Product Ratings"):
	data = []
	for c in cats:
	for p in c.products:
	data.append([c.name, p.average_rating])
	df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating'])
	fig = px.box(df, x="Category", y="Mean Product Rating")
	fig.update_xaxes(tickangle=-90)
	st.plotly_chart(fig, use_container_width=True)

	with st.expander("Product Prices"):
	data = []
	for c in cats:
	for p in c.products:
	data.append([c.name, p.price])
	df = pd.DataFrame(data, columns=['Category', 'Price'])
	fig = px.box(df, x="Category", y="Price")
	fig.update_xaxes(tickangle=-90)
	st.plotly_chart(fig, use_container_width=True)


	def show_top_section() -> None:
	"""
	Writes the top section to the streamlit page, showing the currently selected database
	and some associated summary numbers and charts
	"""
	top_section = st.container()
	with top_section:
	summary_left, summary_right = st.columns([1, 3])
	show_db_selector_and_summary_in_container(summary_left)
	show_data_summary_charts_in_container(summary_right)


	def get_user_selected_category(container) -> Category:
	"""
	Show a selector to pick a category and return the selected category
	"""
	with container:
	st.write('Category')
	cats = Category.all_sorted()
	options = [f"{c.name}" for c in cats]
	selection = st.radio("Category", options, label_visibility="collapsed")
	return Category.by_name(selection)


	def show_category_datatable_in_container(category, container) -> None:
	"""
	Displays a category of products into a given container. Shows the products themselvses
	as a table and then if any products are ticked to show the reviews it displays the associated reviews
	also.
	"""
	with container:
	features = [f.name for f in category.features]
	features.sort()
	st.write(f"{category.singular_name} Features ({len(features)}):")
	st.write('; '.join(features))

	prod_index = [p.id for p in category.products]
	prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count,
	p.average_rating, False, p.description] for p in category.products]
	prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?',
	'Description']
	prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns)
	total_reviews = sum([p.review_count for p in category.products])
	st.write(f"{category.name} ({len(prod_index)}). Having {total_reviews} reviews in total:")
	edited_df = st.data_editor(prod_df, disabled=(
	'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description'))

	selected_product_count = edited_df['Show Reviews?'].sum()
	selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum()

	st.write(f"{category.singular_name} Reviews ({selected_review_count} from {selected_product_count} products):")
	if selected_review_count > 0:
	selected_products = list(edited_df[edited_df['Show Reviews?']].index)
	products = Product.for_ids(selected_products)
	rev_data = []
	rev_index = []
	for p in products:
	for r in p.reviews:
	rev_index.append(r.id)
	rev_data.append([p.name, r.rating, r.review_text])
	rev_columns = ['Product', 'Review Rating', 'Review Text']
	rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns)
	st.dataframe(rev_df, width=10000)
	else:
	st.write("Check boxes in the table above to see reviews for products.")


	def show_bottom_section() -> None:
	# Set up space
	selected_category_sub_heading = st.container()
	category_col, datatable_col = st.columns([1, 3])

	# Display into containers
	selected_category = get_user_selected_category(category_col)
	with selected_category_sub_heading:
	st.write(f'### {selected_category.name}')
	show_category_datatable_in_container(selected_category, datatable_col)


	if st_setup('LLM Arch'):
	if not DataLoader.loaded:
	DataLoader.load_data()
	st.write("# Data Browser")
	show_top_section()
	show_bottom_section()