Spaces:

Sk4467
/

Bonnie-Internal-demo

Sleeping

App Files Files Community

Bonnie-Internal-demo / app.py

ProtonDataLabs

Update app.py

8776749 unverified about 1 year ago

raw

history blame

13.7 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import matplotlib.dates as mdates
	import plotly.express as px
	import plotly.graph_objects as go
	import re
	from datetime import datetime, timedelta
	import warnings
	import time
	import dask.dataframe as dd
	state_to_region = {
	# WEST
	'AK': 'WEST', 'CA': 'WEST', 'CO': 'WEST', 'HI': 'WEST', 'ID': 'WEST',
	'MT': 'WEST', 'NV': 'WEST', 'OR': 'WEST', 'UT': 'WEST', 'WA': 'WEST', 'WY': 'WEST',

	# SOUTHWEST
	'AZ': 'SOUTHWEST', 'NM': 'SOUTHWEST', 'OK': 'SOUTHWEST', 'TX': 'SOUTHWEST',

	# MIDWEST
	'IL': 'MIDWEST', 'IN': 'MIDWEST', 'IA': 'MIDWEST', 'KS': 'MIDWEST', 'MI': 'MIDWEST',
	'MN': 'MIDWEST', 'MO': 'MIDWEST', 'NE': 'MIDWEST', 'ND': 'MIDWEST', 'OH': 'MIDWEST',
	'SD': 'MIDWEST', 'WI': 'MIDWEST',

	# SOUTHEAST
	'AL': 'SOUTHEAST', 'AR': 'SOUTHEAST', 'DE': 'SOUTHEAST', 'FL': 'SOUTHEAST',
	'GA': 'SOUTHEAST', 'KY': 'SOUTHEAST', 'LA': 'SOUTHEAST', 'MD': 'SOUTHEAST',
	'MS': 'SOUTHEAST', 'NC': 'SOUTHEAST', 'SC': 'SOUTHEAST', 'TN': 'SOUTHEAST',
	'VA': 'SOUTHEAST', 'WV': 'SOUTHEAST',

	# NORTHEAST
	'CT': 'NORTHEAST', 'ME': 'NORTHEAST', 'MA': 'NORTHEAST', 'NH': 'NORTHEAST',
	'NJ': 'NORTHEAST', 'NY': 'NORTHEAST', 'PA': 'NORTHEAST', 'RI': 'NORTHEAST',
	'VT': 'NORTHEAST'
	}
	@st.cache_data
	def date_from_week(year, week):
	# Assuming the fiscal year starts in August and the week starts from August 1st
	base_date = pd.to_datetime((year - 1).astype(str) + '-08-01')
	dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days')
	return dates

	@st.cache_data
	def load_data(active_card):
	# st.write(f"{active_card}")
	# Define columns common to multiple cards if there are any
	common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales']

	# Columns specific to cards
	card_specific_cols = {
	'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'],
	'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'],
	'card3': ['FyWeek', 'Fy', 'State', 'Store', 'Itemtype', 'Chaincode', 'SalesVolume', 'UnitPrice', 'Sales'] # Added for PE calculation card
	}

	# Choose columns based on the active card
	required_columns = card_specific_cols.get(active_card, common_cols)

	# Define the data types for efficient memory usage
	dtype_spec = {
	'FyWeek': 'string',
	'Fy': 'category', # Add data type for 'Fy' if it's used
	'Itemtype': 'category',
	'Chaincode': 'category',
	'State': 'category',
	"Store": "category",
	'Containercode': 'category',
	"Address": "string",
	"Zipcode": "float",
	"City": "category",
	'SalesVolume': 'float',
	'UnitPrice': 'float',
	'Sales': 'float'
	}

	# Read only the necessary columns
	# st.write(required_columns)
	ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec)
	df = ddf.compute()


	if active_card in ['card2','card3']:
	df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
	'SalesVolume': 'sum',
	'UnitPrice': 'mean',
	'Sales': 'sum'
	}).reset_index()
	df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True)
	df['Week'] = df['Week'].astype(int) # Convert 'Week' to int
	df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int
	df['Dt'] = date_from_week(df['Year'], df['Week'])

	# Add the region column based on state
	df['Region'] = df['State'].map(state_to_region)

	# st.write(df.columns)
	return df

	# Display logo
	st.image("bonnie.png", width=150) # Adjust width as needed

	# Display title
	# st.title("Bonnie Plants Pricing & Sales Analytics Dashboard")
	st.title("Price vs. Sales Volume Tracker Dashboard")


	# Initialize session state for storing which card was clicked and item type
	if 'active_card' not in st.session_state:
	st.session_state['active_card'] = None
	if 'selected_item_type' not in st.session_state:
	st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE'

	if 'selected_feature' not in st.session_state:
	st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code'

	# Card selection buttons
	col1, col2 ,col3= st.columns(3)
	# Define buttons for plot categories, update session state when clicked
	with col1:
	if st.button("Sales Volume Trend for Item Category"):
	st.session_state['active_card'] = 'card1'

	with col2:
	if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
	st.session_state['active_card'] = 'card2'
	with col3:
	if st.button("PE Coefficient Calculation for Regions & Item Categories"):
	st.session_state['active_card'] = 'card3'

	start_time=time.time()
	# st.write(st.session_state['active_card'])
	df = load_data(st.session_state['active_card'])
	time_taken = time.time() - start_time
	st.write(f"Data loaded in {time_taken:.2f} seconds")


	# Initialize session state for storing the selected state and feature
	if 'selected_state' not in st.session_state:
	st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state

	############################################ CARD #1 ####################################################
	if st.session_state['active_card'] == 'card1':
	# st.write("Processing card1...")
	# Dropdown for selecting the state
	selected_state = st.selectbox('Select State', df['State'].unique())
	# Dropdown for selecting the feature for grouping
	selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',])

	# Filter the dataframe based on selected state
	filtered_df = df[df['State'] == selected_state]

	# Time the grouping operation
	start_time = time.time()
	group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index()
	time_taken = time.time() - start_time

	# Plotting
	fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature,
	title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}',
	labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'})
	st.plotly_chart(fig)

	##########################################################################################################


	########################################### CARD #2 ####################################################





	if st.session_state['active_card'] == 'card2':
	# Dropdown to select item type (using session_state)
	st.session_state['selected_item_type'] = st.selectbox(
	'Select Item Type', df['Itemtype'].unique(),
	index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))

	# Dropdown to select the grouping category (container code, chain code, or state)
	group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State'])

	# Multi-select checkbox to select multiple years
	selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021])

	st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}")

	# Convert 'Dt' column to datetime
	df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
	df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
	df["Promo"] = df["Promo"].astype("category")

	# Filter the dataframe based on the selected item type and selected years
	filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))]

	# Find the top 3 values based on total SalesVolume in the selected grouping category
	top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index

	# Filter the data for only the top 3 values
	top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]

	# Aggregate data
	agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
	'SalesVolume': 'sum',
	'UnitPrice': 'mean'
	}).reset_index()

	# Create a new column 'week-year' for X-axis labels
	agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y')

	# Loop through the top 3 values and create separate plots using Plotly
	for value in top_3_values:
	value_data = agg_df[agg_df[group_by_option] == value]
	# Assuming you have 'value_data' from your previous code
	mean_sales_volume = value_data['SalesVolume'].mean()
	mean_unit_price = value_data['UnitPrice'].mean()

	# Create a Plotly figure
	fig = go.Figure()

	# Add SalesVolume trace
	fig.add_trace(go.Scatter(
	x=value_data['week-year'],
	y=value_data['SalesVolume'],
	mode='lines+markers',
	name='SalesVolume',
	line=dict(color='blue'),
	hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}'
	))

	# Add UnitPrice trace on a secondary Y-axis
	fig.add_trace(go.Scatter(
	x=value_data['week-year'],
	y=value_data['UnitPrice'],
	mode='lines+markers',
	name='UnitPrice',
	line=dict(color='green'),
	yaxis='y2',
	hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}'
	))
	# Add mean line for SalesVolume
	fig.add_shape(type="line",
	x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
	y0=mean_sales_volume, y1=mean_sales_volume,
	line=dict(color="blue", width=2, dash="dash"),
	xref='x', yref='y')

	# Add mean line for UnitPrice (on secondary Y-axis)
	fig.add_shape(type="line",
	x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
	y0=mean_unit_price, y1=mean_unit_price,
	line=dict(color="green", width=2, dash="dash"),
	xref='x', yref='y2')

	# Update layout for dual axes
	fig.update_layout(
	template='plotly_white',
	title=f"SalesVolume and UnitPrice - {value} ({group_by_option})",
	xaxis_title='Week-Year',
	yaxis_title='Sales Volume',
	yaxis2=dict(title='UnitPrice', overlaying='y', side='right'),
	legend=dict(x=0.9, y=1.15),
	hovermode="x unified", # Show both values in a tooltip
	height=400,
	autosize=False,
	margin=dict(l=1, r=1, t=1, b=1)
	)

	# Rotate X-axis labels
	fig.update_xaxes(tickangle=90)

	# Display the Plotly figure in Streamlit
	st.plotly_chart(fig, use_container_width=False)



	##########################################################################################################

	if st.session_state['active_card'] == 'card3':
	# Dropdown for selecting the item type
	item_type_options = df['Itemtype'].unique()
	selected_item_type = st.selectbox("Select Item Type", item_type_options)

	# Dropdown for selecting the region (multiple selection allowed)
	region_options = df['Region'].unique()
	selected_regions = st.multiselect("Select Region(s)", region_options, default=region_options)

	# Filter data based on selected item type and selected regions
	filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))]

	# Group by Year, Region, Itemtype and Promo, and aggregate SalesVolume and UnitPrice
	agg_df = filtered_df.groupby(['Fy', 'Region', 'Itemtype',]).agg({
	'SalesVolume': 'sum',
	'UnitPrice': 'mean'
	}).reset_index()

	# Sort values by Region, Itemtype, Fy, and Promo for YOY calculation
	agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'Fy',])

	# Calculate YOY percentage changes in Sales Volume and Unit Price
	agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype',])['SalesVolume'].pct_change().round(3) * 100
	agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype', ])['UnitPrice'].pct_change().round(3) * 100

	# Calculate Price Elasticity Coefficient (PE)
	agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2)

	# Exclude FY 2025 but keep FY 2021 even with NaN values
	agg_df_filtered = agg_df[agg_df['Fy'] != 'FY 2025']

	# Drop rows where PE_Coeff is NaN (optional)
	agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff'])
	st.dataframe(agg_df_filtered)
	st.write(agg_df_filtered.shape)
	# Plot the PE Coefficient with Plotly
	fig = px.line(
	agg_df_filtered,
	x='Fy',
	y='PE_Coeff', # Differentiate between Promo and NoPromo
	line_dash='Region', # Differentiate lines by Region
	title=f"Price Elasticity Coefficient (PE) by Year for {selected_item_type}",
	labels={'Fy': 'Fiscal Year', 'PE_Coeff': 'Price Elasticity Coefficient'},
	markers=True
	)

	# Customize layout and show plot
	fig.update_layout(
	height=600,
	width=1000,
	)

	st.plotly_chart(fig, use_container_width=True)
	###################################################################################################################