Spaces:

aaronayitey
/

Streamlit-app

Sleeping

Streamlit-app / app.py

Update app.py

23dea5d 8 months ago

8.49 kB

	import streamlit as st
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt


	# Machine Learning Modeling
	from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
	from sklearn.compose import ColumnTransformer
	from sklearn.linear_model import LinearRegression
	from sklearn.model_selection import RandomizedSearchCV
	import xgboost as xgb
	from sklearn.metrics import mean_squared_error
	import joblib

	# Set the page layout to full width
	st.set_page_config(layout="wide")
	# Initialize df as None
	df = None

	st.sidebar.title("Favorita Stores")
	selected_option = st.sidebar.radio("Select to Proceed", ["Data Statistics", "Visuals", "Time Series Analysis", "Forecasting"])

	# Custom CSS styling for the title
	st.markdown(
	"""
	<style>
	.title-text {
	font-size: 28px;
	text-align: center;
	background-color: #3498db;
	color: white;
	padding: 10px 0;
	width: 100%;
	position: sticky;
	top: 0;
	z-index: 1;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	# Streamlit App Title
	st.markdown('<p class="title-text">Machine Learning App for Sales Prediction</p>', unsafe_allow_html=True)

	# Function to load and process the data
	def load_and_process_data():
	global df
	# Allow the user to upload an Excel file
	uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx"])
	if uploaded_file is not None:
	# Check if the file is an Excel file
	if uploaded_file.type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
	# Read the Excel file into a DataFrame
	df = pd.read_excel(uploaded_file)
	# Remove null values
	df.dropna(inplace=True)
	df = df.drop(columns='Unnamed: 0')
	else:
	st.write("Please upload a valid Excel file.")

	# Load and process the data
	load_and_process_data()

	if selected_option == "Data Statistics":
	# Rest of the code for "Data Statistics" option using df
	if df is not None:
	number_sample = st.number_input("Enter sample size to display data", min_value=5, max_value=10, step=1, value=5)
	displayed_data = df.head(number_sample)
	st.write("Sample data", displayed_data)
	st.write("Summary Statistics of float/Integer columns", df.describe())
	object_columns = df.select_dtypes(include='object').columns.tolist()
	selected_column = st.selectbox("Select column of Data Type Object to View Unique values", object_columns)
	if selected_column:
	unique_values = df[selected_column].unique()
	st.write("Unique values are", unique_values)

	elif selected_option == "Visuals":
	# Rest of the code for "Visuals" option using df
	if df is not None:
	object_columns = df.select_dtypes(include='object').columns.tolist()
	selected_column = st.selectbox("Select column of Data Type Object for Visualization", object_columns)
	if selected_column:
	df['date'] = pd.to_datetime(df['date']) # Convert to datetime if applicable
	df_grouped = df.groupby(selected_column)['sales'].sum().head(10)
	df_grouped = df_grouped.sort_values(ascending=False)
	fig, ax = plt.subplots(figsize=(15, 6))
	ax.bar(df_grouped.index, df_grouped.values)
	ax.set_xlabel(selected_column)
	ax.set_ylabel('Sales Count')
	ax.set_title(f'Top 10 Sales Count for {selected_column}')
	st.pyplot(fig) # Pass the figure to st.pyplot()
	elif selected_option == "Time Series Analysis":
	if df is not None:
	# Choose date and sales columns
	timeseriesdata = df[['sales', 'date']]
	timeseriesdata.index = timeseriesdata['date']
	timeseriesdata = timeseriesdata[['sales']] # Keep only the 'sales' column

	# Make date the index
	timeseriesdata = timeseriesdata.resample('D').sum() # Resample to daily sales

	# Resample the data based on user's choice
	resample_method = st.selectbox("Select a resampling method", ['M', 'Q', 'Y'])
	if resample_method:
	resampled_data = timeseriesdata.resample(resample_method).sum()

	# Plot the time series using Seaborn lineplot
	plt.figure(figsize=(15, 6))
	sns.lineplot(data=resampled_data)
	plt.ylabel('Sales')
	plt.title(f'Sales Time Series (Resampled by {resample_method})')
	st.pyplot(plt.gcf())
	else:
	st.write("Please enter these inputs to predict sales. Thank you!")
	# Load the pre-trained model and preprocessor
	model = joblib.load('./xgb_model.joblib')
	preprocessor = joblib.load('./preprocessor.joblib')



	# Create a layout with 2 columns for even distribution
	col1, col2 = st.columns(2)

	# User Inputs - Number
	with col1:
	# Create a date input using st.date_input
	date = st.date_input("Enter Date")

	# Convert the selected date to a string in the desired format (e.g., YYYY-MM-DD)
	formatted_date = date.strftime('%Y-%m-%d')

	# User Inputs - Year
	with col2:
	family = st.selectbox("Select product family", ['CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS', 'FROZEN FOODS',
	'GROCERY I', 'GROCERY II', 'HARDWARE', 'HOME AND KITCHEN I',
	'HOME AND KITCHEN II', 'HOME APPLIANCES', 'HOME CARE',
	'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE', 'LIQUOR,WINE,BEER',
	'MAGAZINES', 'MEATS', 'PERSONAL CARE', 'PET SUPPLIES',
	'PLAYERS AND ELECTRONICS', 'POULTRY', 'PREPARED FOODS', 'PRODUCE',
	'SCHOOL AND OFFICE SUPPLIES', 'SEAFOOD', 'AUTOMOTIVE', 'BABY CARE',
	'BEAUTY', 'BEVERAGES', 'BOOKS', 'BREAD/BAKERY'])

	# User Inputs - On Promotion
	with col1:
	onpromotion = st.number_input("Enter Number for onpromotion", min_value=0, step=1)


	# User Inputs - Day of the Week
	with col2:
	city = st.selectbox("Select city", ['Quito', 'Cayambe', 'Latacunga', 'Riobamba', 'Ibarra',
	'Santo Domingo', 'Guaranda', 'Puyo', 'Ambato', 'Guayaquil',
	'Salinas', 'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Libertad',
	'Cuenca', 'Loja', 'Machala', 'Esmeraldas', 'Manta', 'El Carmen'])

	# User Inputs - Product Category
	with col1:
	oil_prices = st.number_input("Enter oil price", min_value=1, step=1)


	# User Inputs - Day of the Week
	with col2:
	holiday_type = st.selectbox("Select holiday type", ['Holiday', 'Additional', 'Transfer', 'Event', 'Bridge'])

	# User Inputs - Product Category
	with col1:
	sales_lag_1 = st.number_input("Enter Number for sales lag", min_value=0, step=1)


	# User Inputs - Day of the Week
	with col2:
	moving_average = st.number_input("Enter Number for moving average", min_value=0, step=1)

	# Placeholder for Predicted Value

	# Add custom spacing between columns
	st.markdown("<hr>", unsafe_allow_html=True)



	# Predict Button
	if st.button("Predict"):
	# Prepare input data for prediction
	# Prepare input data for prediction
	# Create a DataFrame with all required columns except "sales"
	prediction_placeholder = st.empty()
	input_df = pd.DataFrame({
	"family": [family],
	"onpromotion": [onpromotion],
	"city": [city],
	"oil_prices": [oil_prices],
	"holiday_type": [holiday_type],
	"sales_lag_1": [sales_lag_1],
	"moving_average": [moving_average]
	})

	# Transform the input DataFrame using the preprocessor
	preprocessed_data = preprocessor.transform(input_df)



	# Make a prediction
	prediction = model.predict(preprocessed_data)


	# Display the prediction
	prediction_placeholder.text(f"Predicted Value for sales: {prediction[0]: ,.2f}")

	if prediction >= 0:
	prediction_placeholder.markdown(
	f'Predicted Value for sales: <span style="background-color: green; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
	unsafe_allow_html=True
	)
	else:
	prediction_placeholder.markdown(
	f'Predicted Value for sales: <span style="background-color: red; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
	unsafe_allow_html=True
	)