Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import matplotlib.dates as mdates | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import re | |
| from datetime import datetime, timedelta | |
| import warnings | |
| import time | |
| import dask.dataframe as dd | |
| state_to_region = { | |
| # WEST | |
| 'AK': 'WEST', 'CA': 'WEST', 'CO': 'WEST', 'HI': 'WEST', 'ID': 'WEST', | |
| 'MT': 'WEST', 'NV': 'WEST', 'OR': 'WEST', 'UT': 'WEST', 'WA': 'WEST', 'WY': 'WEST', | |
| # SOUTHWEST | |
| 'AZ': 'SOUTHWEST', 'NM': 'SOUTHWEST', 'OK': 'SOUTHWEST', 'TX': 'SOUTHWEST', | |
| # MIDWEST | |
| 'IL': 'MIDWEST', 'IN': 'MIDWEST', 'IA': 'MIDWEST', 'KS': 'MIDWEST', 'MI': 'MIDWEST', | |
| 'MN': 'MIDWEST', 'MO': 'MIDWEST', 'NE': 'MIDWEST', 'ND': 'MIDWEST', 'OH': 'MIDWEST', | |
| 'SD': 'MIDWEST', 'WI': 'MIDWEST', | |
| # SOUTHEAST | |
| 'AL': 'SOUTHEAST', 'AR': 'SOUTHEAST', 'DE': 'SOUTHEAST', 'FL': 'SOUTHEAST', | |
| 'GA': 'SOUTHEAST', 'KY': 'SOUTHEAST', 'LA': 'SOUTHEAST', 'MD': 'SOUTHEAST', | |
| 'MS': 'SOUTHEAST', 'NC': 'SOUTHEAST', 'SC': 'SOUTHEAST', 'TN': 'SOUTHEAST', | |
| 'VA': 'SOUTHEAST', 'WV': 'SOUTHEAST', | |
| # NORTHEAST | |
| 'CT': 'NORTHEAST', 'ME': 'NORTHEAST', 'MA': 'NORTHEAST', 'NH': 'NORTHEAST', | |
| 'NJ': 'NORTHEAST', 'NY': 'NORTHEAST', 'PA': 'NORTHEAST', 'RI': 'NORTHEAST', | |
| 'VT': 'NORTHEAST' | |
| } | |
| def date_from_week(year, week): | |
| # Assuming the fiscal year starts in August and the week starts from August 1st | |
| base_date = pd.to_datetime((year - 1).astype(str) + '-08-01') | |
| dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days') | |
| return dates | |
| def load_data(active_card): | |
| # st.write(f"{active_card}") | |
| # Define columns common to multiple cards if there are any | |
| common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales'] | |
| # Columns specific to cards | |
| card_specific_cols = { | |
| 'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'], | |
| 'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'], | |
| 'card3': ['FyWeek', 'Fy', 'State', 'Store', 'Itemtype', 'Chaincode', 'SalesVolume', 'UnitPrice', 'Sales'] # Added for PE calculation card | |
| } | |
| # Choose columns based on the active card | |
| required_columns = card_specific_cols.get(active_card, common_cols) | |
| # Define the data types for efficient memory usage | |
| dtype_spec = { | |
| 'FyWeek': 'string', | |
| 'Fy': 'category', # Add data type for 'Fy' if it's used | |
| 'Itemtype': 'category', | |
| 'Chaincode': 'category', | |
| 'State': 'category', | |
| "Store": "category", | |
| 'Containercode': 'category', | |
| "Address": "string", | |
| "Zipcode": "float", | |
| "City": "category", | |
| 'SalesVolume': 'float', | |
| 'UnitPrice': 'float', | |
| 'Sales': 'float' | |
| } | |
| # Read only the necessary columns | |
| # st.write(required_columns) | |
| ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec) | |
| df = ddf.compute() | |
| if active_card in ['card2','card3']: | |
| df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({ | |
| 'SalesVolume': 'sum', | |
| 'UnitPrice': 'mean', | |
| 'Sales': 'sum' | |
| }).reset_index() | |
| df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True) | |
| df['Week'] = df['Week'].astype(int) # Convert 'Week' to int | |
| df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int | |
| df['Dt'] = date_from_week(df['Year'], df['Week']) | |
| # Add the region column based on state | |
| df['Region'] = df['State'].map(state_to_region) | |
| # st.write(df.columns) | |
| return df | |
| # Display logo | |
| st.image("bonnie.png", width=150) # Adjust width as needed | |
| # Display title | |
| # st.title("Bonnie Plants Pricing & Sales Analytics Dashboard") | |
| st.title("Price vs. Sales Volume Tracker Dashboard") | |
| # Initialize session state for storing which card was clicked and item type | |
| if 'active_card' not in st.session_state: | |
| st.session_state['active_card'] = None | |
| if 'selected_item_type' not in st.session_state: | |
| st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE' | |
| if 'selected_feature' not in st.session_state: | |
| st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code' | |
| # Card selection buttons | |
| col1, col2 ,col3= st.columns(3) | |
| # Define buttons for plot categories, update session state when clicked | |
| with col1: | |
| if st.button("Sales Volume Trend for Item Category"): | |
| st.session_state['active_card'] = 'card1' | |
| with col2: | |
| if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"): | |
| st.session_state['active_card'] = 'card2' | |
| with col3: | |
| if st.button("PE Coefficient Calculation for Regions & Item Categories"): | |
| st.session_state['active_card'] = 'card3' | |
| start_time=time.time() | |
| # st.write(st.session_state['active_card']) | |
| df = load_data(st.session_state['active_card']) | |
| time_taken = time.time() - start_time | |
| st.write(f"Data loaded in {time_taken:.2f} seconds") | |
| # Initialize session state for storing the selected state and feature | |
| if 'selected_state' not in st.session_state: | |
| st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state | |
| ############################################ CARD #1 #################################################### | |
| if st.session_state['active_card'] == 'card1': | |
| # st.write("Processing card1...") | |
| # Dropdown for selecting the state | |
| selected_state = st.selectbox('Select State', df['State'].unique()) | |
| # Dropdown for selecting the feature for grouping | |
| selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',]) | |
| # Filter the dataframe based on selected state | |
| filtered_df = df[df['State'] == selected_state] | |
| # Time the grouping operation | |
| start_time = time.time() | |
| group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index() | |
| time_taken = time.time() - start_time | |
| # Plotting | |
| fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature, | |
| title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}', | |
| labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'}) | |
| st.plotly_chart(fig) | |
| ########################################################################################################## | |
| ########################################### CARD #2 #################################################### | |
| if st.session_state['active_card'] == 'card2': | |
| # Dropdown to select item type (using session_state) | |
| st.session_state['selected_item_type'] = st.selectbox( | |
| 'Select Item Type', df['Itemtype'].unique(), | |
| index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) | |
| # Dropdown to select the grouping category (container code, chain code, or state) | |
| group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State']) | |
| # Multi-select checkbox to select multiple years | |
| selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021]) | |
| st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}") | |
| # Convert 'Dt' column to datetime | |
| df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') | |
| df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo') | |
| df["Promo"] = df["Promo"].astype("category") | |
| # Filter the dataframe based on the selected item type and selected years | |
| filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))] | |
| # Find the top 3 values based on total SalesVolume in the selected grouping category | |
| top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index | |
| # Filter the data for only the top 3 values | |
| top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)] | |
| # Aggregate data | |
| agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({ | |
| 'SalesVolume': 'sum', | |
| 'UnitPrice': 'mean' | |
| }).reset_index() | |
| # Create a new column 'week-year' for X-axis labels | |
| agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y') | |
| # Loop through the top 3 values and create separate plots using Plotly | |
| for value in top_3_values: | |
| value_data = agg_df[agg_df[group_by_option] == value] | |
| # Assuming you have 'value_data' from your previous code | |
| mean_sales_volume = value_data['SalesVolume'].mean() | |
| mean_unit_price = value_data['UnitPrice'].mean() | |
| # Create a Plotly figure | |
| fig = go.Figure() | |
| # Add SalesVolume trace | |
| fig.add_trace(go.Scatter( | |
| x=value_data['week-year'], | |
| y=value_data['SalesVolume'], | |
| mode='lines+markers', | |
| name='SalesVolume', | |
| line=dict(color='blue'), | |
| hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}' | |
| )) | |
| # Add UnitPrice trace on a secondary Y-axis | |
| fig.add_trace(go.Scatter( | |
| x=value_data['week-year'], | |
| y=value_data['UnitPrice'], | |
| mode='lines+markers', | |
| name='UnitPrice', | |
| line=dict(color='green'), | |
| yaxis='y2', | |
| hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}' | |
| )) | |
| # Add mean line for SalesVolume | |
| fig.add_shape(type="line", | |
| x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
| y0=mean_sales_volume, y1=mean_sales_volume, | |
| line=dict(color="blue", width=2, dash="dash"), | |
| xref='x', yref='y') | |
| # Add mean line for UnitPrice (on secondary Y-axis) | |
| fig.add_shape(type="line", | |
| x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
| y0=mean_unit_price, y1=mean_unit_price, | |
| line=dict(color="green", width=2, dash="dash"), | |
| xref='x', yref='y2') | |
| # Update layout for dual axes | |
| fig.update_layout( | |
| template='plotly_white', | |
| title=f"SalesVolume and UnitPrice - {value} ({group_by_option})", | |
| xaxis_title='Week-Year', | |
| yaxis_title='Sales Volume', | |
| yaxis2=dict(title='UnitPrice', overlaying='y', side='right'), | |
| legend=dict(x=0.9, y=1.15), | |
| hovermode="x unified", # Show both values in a tooltip | |
| height=400, | |
| autosize=False, | |
| margin=dict(l=1, r=1, t=1, b=1) | |
| ) | |
| # Rotate X-axis labels | |
| fig.update_xaxes(tickangle=90) | |
| # Display the Plotly figure in Streamlit | |
| st.plotly_chart(fig, use_container_width=False) | |
| ########################################################################################################## | |
| if st.session_state['active_card'] == 'card3': | |
| # Dropdown for selecting the item type | |
| item_type_options = df['Itemtype'].unique() | |
| selected_item_type = st.selectbox("Select Item Type", item_type_options) | |
| # Dropdown for selecting the region (multiple selection allowed) | |
| region_options = df['Region'].unique() | |
| selected_regions = st.multiselect("Select Region(s)", region_options, default=region_options) | |
| # Filter data based on selected item type and selected regions | |
| filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))] | |
| # Group by Year, Region, Itemtype and Promo, and aggregate SalesVolume and UnitPrice | |
| agg_df = filtered_df.groupby(['Fy', 'Region', 'Itemtype',]).agg({ | |
| 'SalesVolume': 'sum', | |
| 'UnitPrice': 'mean' | |
| }).reset_index() | |
| # Sort values by Region, Itemtype, Fy, and Promo for YOY calculation | |
| agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'Fy',]) | |
| # Calculate YOY percentage changes in Sales Volume and Unit Price | |
| agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype',])['SalesVolume'].pct_change().round(3) * 100 | |
| agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype', ])['UnitPrice'].pct_change().round(3) * 100 | |
| # Calculate Price Elasticity Coefficient (PE) | |
| agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2) | |
| # Exclude FY 2025 but keep FY 2021 even with NaN values | |
| agg_df_filtered = agg_df[agg_df['Fy'] != 'FY 2025'] | |
| # Drop rows where PE_Coeff is NaN (optional) | |
| agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff']) | |
| st.dataframe(agg_df_filtered) | |
| st.write(agg_df_filtered.shape) | |
| # Plot the PE Coefficient with Plotly | |
| fig = px.line( | |
| agg_df_filtered, | |
| x='Fy', | |
| y='PE_Coeff', # Differentiate between Promo and NoPromo | |
| line_dash='Region', # Differentiate lines by Region | |
| title=f"Price Elasticity Coefficient (PE) by Year for {selected_item_type}", | |
| labels={'Fy': 'Fiscal Year', 'PE_Coeff': 'Price Elasticity Coefficient'}, | |
| markers=True | |
| ) | |
| # Customize layout and show plot | |
| fig.update_layout( | |
| height=600, | |
| width=1000, | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| ################################################################################################################### | |