Spaces:
Sleeping
Sleeping
File size: 8,489 Bytes
38f0dc3 23dea5d 38f0dc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Machine Learning Modeling
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import joblib
# Set the page layout to full width
st.set_page_config(layout="wide")
# Initialize df as None
df = None
st.sidebar.title("Favorita Stores")
selected_option = st.sidebar.radio("Select to Proceed", ["Data Statistics", "Visuals", "Time Series Analysis", "Forecasting"])
# Custom CSS styling for the title
st.markdown(
"""
<style>
.title-text {
font-size: 28px;
text-align: center;
background-color: #3498db;
color: white;
padding: 10px 0;
width: 100%;
position: sticky;
top: 0;
z-index: 1;
}
</style>
""",
unsafe_allow_html=True
)
# Streamlit App Title
st.markdown('<p class="title-text">Machine Learning App for Sales Prediction</p>', unsafe_allow_html=True)
# Function to load and process the data
def load_and_process_data():
global df
# Allow the user to upload an Excel file
uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx"])
if uploaded_file is not None:
# Check if the file is an Excel file
if uploaded_file.type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
# Read the Excel file into a DataFrame
df = pd.read_excel(uploaded_file)
# Remove null values
df.dropna(inplace=True)
df = df.drop(columns='Unnamed: 0')
else:
st.write("Please upload a valid Excel file.")
# Load and process the data
load_and_process_data()
if selected_option == "Data Statistics":
# Rest of the code for "Data Statistics" option using df
if df is not None:
number_sample = st.number_input("Enter sample size to display data", min_value=5, max_value=10, step=1, value=5)
displayed_data = df.head(number_sample)
st.write("Sample data", displayed_data)
st.write("Summary Statistics of float/Integer columns", df.describe())
object_columns = df.select_dtypes(include='object').columns.tolist()
selected_column = st.selectbox("Select column of Data Type Object to View Unique values", object_columns)
if selected_column:
unique_values = df[selected_column].unique()
st.write("Unique values are", unique_values)
elif selected_option == "Visuals":
# Rest of the code for "Visuals" option using df
if df is not None:
object_columns = df.select_dtypes(include='object').columns.tolist()
selected_column = st.selectbox("Select column of Data Type Object for Visualization", object_columns)
if selected_column:
df['date'] = pd.to_datetime(df['date']) # Convert to datetime if applicable
df_grouped = df.groupby(selected_column)['sales'].sum().head(10)
df_grouped = df_grouped.sort_values(ascending=False)
fig, ax = plt.subplots(figsize=(15, 6))
ax.bar(df_grouped.index, df_grouped.values)
ax.set_xlabel(selected_column)
ax.set_ylabel('Sales Count')
ax.set_title(f'Top 10 Sales Count for {selected_column}')
st.pyplot(fig) # Pass the figure to st.pyplot()
elif selected_option == "Time Series Analysis":
if df is not None:
# Choose date and sales columns
timeseriesdata = df[['sales', 'date']]
timeseriesdata.index = timeseriesdata['date']
timeseriesdata = timeseriesdata[['sales']] # Keep only the 'sales' column
# Make date the index
timeseriesdata = timeseriesdata.resample('D').sum() # Resample to daily sales
# Resample the data based on user's choice
resample_method = st.selectbox("Select a resampling method", ['M', 'Q', 'Y'])
if resample_method:
resampled_data = timeseriesdata.resample(resample_method).sum()
# Plot the time series using Seaborn lineplot
plt.figure(figsize=(15, 6))
sns.lineplot(data=resampled_data)
plt.ylabel('Sales')
plt.title(f'Sales Time Series (Resampled by {resample_method})')
st.pyplot(plt.gcf())
else:
st.write("Please enter these inputs to predict sales. Thank you!")
# Load the pre-trained model and preprocessor
model = joblib.load('./xgb_model.joblib')
preprocessor = joblib.load('./preprocessor.joblib')
# Create a layout with 2 columns for even distribution
col1, col2 = st.columns(2)
# User Inputs - Number
with col1:
# Create a date input using st.date_input
date = st.date_input("Enter Date")
# Convert the selected date to a string in the desired format (e.g., YYYY-MM-DD)
formatted_date = date.strftime('%Y-%m-%d')
# User Inputs - Year
with col2:
family = st.selectbox("Select product family", ['CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS', 'FROZEN FOODS',
'GROCERY I', 'GROCERY II', 'HARDWARE', 'HOME AND KITCHEN I',
'HOME AND KITCHEN II', 'HOME APPLIANCES', 'HOME CARE',
'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE', 'LIQUOR,WINE,BEER',
'MAGAZINES', 'MEATS', 'PERSONAL CARE', 'PET SUPPLIES',
'PLAYERS AND ELECTRONICS', 'POULTRY', 'PREPARED FOODS', 'PRODUCE',
'SCHOOL AND OFFICE SUPPLIES', 'SEAFOOD', 'AUTOMOTIVE', 'BABY CARE',
'BEAUTY', 'BEVERAGES', 'BOOKS', 'BREAD/BAKERY'])
# User Inputs - On Promotion
with col1:
onpromotion = st.number_input("Enter Number for onpromotion", min_value=0, step=1)
# User Inputs - Day of the Week
with col2:
city = st.selectbox("Select city", ['Quito', 'Cayambe', 'Latacunga', 'Riobamba', 'Ibarra',
'Santo Domingo', 'Guaranda', 'Puyo', 'Ambato', 'Guayaquil',
'Salinas', 'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Libertad',
'Cuenca', 'Loja', 'Machala', 'Esmeraldas', 'Manta', 'El Carmen'])
# User Inputs - Product Category
with col1:
oil_prices = st.number_input("Enter oil price", min_value=1, step=1)
# User Inputs - Day of the Week
with col2:
holiday_type = st.selectbox("Select holiday type", ['Holiday', 'Additional', 'Transfer', 'Event', 'Bridge'])
# User Inputs - Product Category
with col1:
sales_lag_1 = st.number_input("Enter Number for sales lag", min_value=0, step=1)
# User Inputs - Day of the Week
with col2:
moving_average = st.number_input("Enter Number for moving average", min_value=0, step=1)
# Placeholder for Predicted Value
# Add custom spacing between columns
st.markdown("<hr>", unsafe_allow_html=True)
# Predict Button
if st.button("Predict"):
# Prepare input data for prediction
# Prepare input data for prediction
# Create a DataFrame with all required columns except "sales"
prediction_placeholder = st.empty()
input_df = pd.DataFrame({
"family": [family],
"onpromotion": [onpromotion],
"city": [city],
"oil_prices": [oil_prices],
"holiday_type": [holiday_type],
"sales_lag_1": [sales_lag_1],
"moving_average": [moving_average]
})
# Transform the input DataFrame using the preprocessor
preprocessed_data = preprocessor.transform(input_df)
# Make a prediction
prediction = model.predict(preprocessed_data)
# Display the prediction
prediction_placeholder.text(f"Predicted Value for sales: {prediction[0]: ,.2f}")
if prediction >= 0:
prediction_placeholder.markdown(
f'Predicted Value for sales: <span style="background-color: green; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
unsafe_allow_html=True
)
else:
prediction_placeholder.markdown(
f'Predicted Value for sales: <span style="background-color: red; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
unsafe_allow_html=True
)
|