alfraser commited on
Commit
cfa39d8
·
1 Parent(s): dd4879b

Refactored the page into smaller functions to make it more modular and easier to follow and maintain.

Browse files
Files changed (1) hide show
  1. pages/020_Data_Browser.py +103 -74
pages/020_Data_Browser.py CHANGED
@@ -5,100 +5,109 @@ import plotly.express as px
5
  from src.st_helpers import st_setup
6
  from src.datatypes import *
7
 
8
- if st_setup('LLM Arch'):
9
- if not DataLoader.loaded:
10
- DataLoader.load_data()
11
 
12
- summary = st.container()
13
- with summary:
14
- st.write("# Data Browser")
15
-
16
- sumcol1, sumcol2 = st.columns([1, 3])
17
-
18
- with sumcol1:
19
- dbs = DataLoader.available_dbs()
20
- if len(dbs) > 1:
21
- st.write(f'**:1234: Database Selector**')
22
- idx = dbs.index(DataLoader.active_db)
23
- DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed"))
24
-
25
- st.write(f'**:1234: Summary Statistics**')
26
- summary = f'- **{len(Category.all):,}** categories'
27
- summary += f'\n- **{len(Product.all):,}** products'
28
- summary += f'\n- **{len(Feature.all):,}** features'
29
- summary += f'\n- **{len(Review.all):,}** reviews'
30
- st.markdown(summary)
31
- if st.button('Force data reload'):
32
- DataLoader.load_data(True)
33
- st.rerun()
34
-
35
- with sumcol2:
36
- cats = Category.all_sorted()
37
-
38
- with st.expander("**Review Counts**"):
39
- category_names = [c.name for c in cats]
40
- category_review_counts = [sum([p.review_count for p in c.products]) for c in cats]
41
- data = zip(category_names, category_review_counts)
42
- df = pd.DataFrame(data, columns=["Category", "Review Count"])
43
- st.bar_chart(df, x="Category", y="Review Count")
44
-
45
- with st.expander("**Product Ratings**"):
46
- data = []
47
- for c in cats:
48
- for p in c.products:
49
- data.append([c.name, p.average_rating])
50
- df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating'])
51
- fig = px.box(df, x="Category", y="Mean Product Rating")
52
- fig.update_xaxes(tickangle=-90)
53
- st.plotly_chart(fig, use_container_width=True)
54
-
55
- with st.expander("**Product Prices**"):
56
- data = []
57
- for c in cats:
58
- for p in c.products:
59
- data.append([c.name, p.price])
60
- df = pd.DataFrame(data, columns=['Category', 'Price'])
61
- fig = px.box(df, x="Category", y="Price")
62
- fig.update_xaxes(tickangle=-90)
63
- st.plotly_chart(fig, use_container_width=True)
64
-
65
- subhead = st.container()
66
-
67
- col1, col2 = st.columns([1, 3])
68
-
69
- with col1:
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  st.write('**Category**')
72
  cats = Category.all_sorted()
73
  options = [f"{c.name}" for c in cats]
74
  selection = st.radio("**Category**", options, label_visibility="collapsed")
 
75
 
76
- selected_category = Category.by_name(selection)
77
 
78
- with subhead:
79
- st.write(f'### {selection}')
80
-
81
- with col2:
82
- features = [f.name for f in selected_category.features]
 
 
 
83
  features.sort()
84
- st.write(f"**{selection[:-1]} Features ({len(features)}):**")
85
  st.write('; '.join(features))
86
 
87
- prod_index = [p.id for p in selected_category.products]
88
  prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count,
89
- p.average_rating, False, p.description] for p in selected_category.products]
90
  prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?',
91
  'Description']
92
  prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns)
93
- total_reviews = sum([p.review_count for p in selected_category.products])
94
- st.write(f"**{selection} ({len(prod_index)}). Having {total_reviews} reviews in total:**")
95
  edited_df = st.data_editor(prod_df, disabled=(
96
  'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description'))
97
 
98
  selected_product_count = edited_df['Show Reviews?'].sum()
99
  selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum()
100
 
101
- st.write(f"**{selection[:-1]} Reviews ({selected_review_count} from {selected_product_count} products):**")
102
  if selected_review_count > 0:
103
  selected_products = list(edited_df[edited_df['Show Reviews?']].index)
104
  products = Product.for_ids(selected_products)
@@ -112,4 +121,24 @@ if st_setup('LLM Arch'):
112
  rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns)
113
  st.dataframe(rev_df, width=10000)
114
  else:
115
- st.write("Check boxes in the table above to see reviews for products.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from src.st_helpers import st_setup
6
  from src.datatypes import *
7
 
 
 
 
8
 
9
+ def show_db_selector_and_summary_in_container(container) -> None:
10
+ with container:
11
+ dbs = DataLoader.available_dbs()
12
+ if len(dbs) > 1:
13
+ st.write(f'**:1234: Database Selector**')
14
+ idx = dbs.index(DataLoader.active_db)
15
+ DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed"))
16
+
17
+ st.write(f'**:1234: Summary Statistics**')
18
+ summary = f'- **{len(Category.all):,}** categories'
19
+ summary += f'\n- **{len(Product.all):,}** products'
20
+ summary += f'\n- **{len(Feature.all):,}** features'
21
+ summary += f'\n- **{len(Review.all):,}** reviews'
22
+ st.markdown(summary)
23
+ if st.button('Force data reload'):
24
+ DataLoader.load_data(True)
25
+ st.rerun()
26
+
27
+
28
+ def show_data_summary_charts_in_container(container) -> None:
29
+ with container:
30
+ cats = Category.all_sorted()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ with st.expander("**Review Counts**"):
33
+ category_names = [c.name for c in cats]
34
+ category_review_counts = [sum([p.review_count for p in c.products]) for c in cats]
35
+ data = zip(category_names, category_review_counts)
36
+ df = pd.DataFrame(data, columns=["Category", "Review Count"])
37
+ st.bar_chart(df, x="Category", y="Review Count")
38
+
39
+ with st.expander("**Product Ratings**"):
40
+ data = []
41
+ for c in cats:
42
+ for p in c.products:
43
+ data.append([c.name, p.average_rating])
44
+ df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating'])
45
+ fig = px.box(df, x="Category", y="Mean Product Rating")
46
+ fig.update_xaxes(tickangle=-90)
47
+ st.plotly_chart(fig, use_container_width=True)
48
+
49
+ with st.expander("**Product Prices**"):
50
+ data = []
51
+ for c in cats:
52
+ for p in c.products:
53
+ data.append([c.name, p.price])
54
+ df = pd.DataFrame(data, columns=['Category', 'Price'])
55
+ fig = px.box(df, x="Category", y="Price")
56
+ fig.update_xaxes(tickangle=-90)
57
+ st.plotly_chart(fig, use_container_width=True)
58
+
59
+
60
+ def show_top_section() -> None:
61
+ """
62
+ Writes the top section to the streamlit page, showing the currently selected database
63
+ and some associated summary numbers and charts
64
+ """
65
+ top_section = st.container()
66
+ with top_section:
67
+ summary_left, summary_right = st.columns([1, 3])
68
+ show_db_selector_and_summary_in_container(summary_left)
69
+ show_data_summary_charts_in_container(summary_right)
70
+
71
+
72
+ def get_user_selected_category(container) -> Category:
73
+ """
74
+ Show a selector to pick a category and return the selected category
75
+ """
76
+ with container:
77
  st.write('**Category**')
78
  cats = Category.all_sorted()
79
  options = [f"{c.name}" for c in cats]
80
  selection = st.radio("**Category**", options, label_visibility="collapsed")
81
+ return Category.by_name(selection)
82
 
 
83
 
84
+ def show_category_datatable_in_container(category, container) -> None:
85
+ """
86
+ Displays a category of products into a given container. Shows the products themselvses
87
+ as a table and then if any products are ticked to show the reviews it displays the associated reviews
88
+ also.
89
+ """
90
+ with container:
91
+ features = [f.name for f in category.features]
92
  features.sort()
93
+ st.write(f"**{category.singular_name} Features ({len(features)}):**")
94
  st.write('; '.join(features))
95
 
96
+ prod_index = [p.id for p in category.products]
97
  prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count,
98
+ p.average_rating, False, p.description] for p in category.products]
99
  prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?',
100
  'Description']
101
  prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns)
102
+ total_reviews = sum([p.review_count for p in category.products])
103
+ st.write(f"**{category.name} ({len(prod_index)}). Having {total_reviews} reviews in total:**")
104
  edited_df = st.data_editor(prod_df, disabled=(
105
  'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description'))
106
 
107
  selected_product_count = edited_df['Show Reviews?'].sum()
108
  selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum()
109
 
110
+ st.write(f"**{category.singular_name} Reviews ({selected_review_count} from {selected_product_count} products):**")
111
  if selected_review_count > 0:
112
  selected_products = list(edited_df[edited_df['Show Reviews?']].index)
113
  products = Product.for_ids(selected_products)
 
121
  rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns)
122
  st.dataframe(rev_df, width=10000)
123
  else:
124
+ st.write("Check boxes in the table above to see reviews for products.")
125
+
126
+
127
+ def show_bottom_section() -> None:
128
+ # Set up space
129
+ selected_category_sub_heading = st.container()
130
+ category_col, datatable_col = st.columns([1, 3])
131
+
132
+ # Display into containers
133
+ selected_category = get_user_selected_category(category_col)
134
+ with selected_category_sub_heading:
135
+ st.write(f'### {selected_category.name}')
136
+ show_category_datatable_in_container(selected_category, datatable_col)
137
+
138
+
139
+ if st_setup('LLM Arch'):
140
+ if not DataLoader.loaded:
141
+ DataLoader.load_data()
142
+ st.write("# Data Browser")
143
+ show_top_section()
144
+ show_bottom_section()