Spaces:
Sleeping
Sleeping
aaronayitey
commited on
Commit
•
38f0dc3
1
Parent(s):
5765cf7
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
|
6 |
+
|
7 |
+
# Machine Learning Modeling
|
8 |
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
|
9 |
+
from sklearn.compose import ColumnTransformer
|
10 |
+
from sklearn.linear_model import LinearRegression
|
11 |
+
from sklearn.model_selection import RandomizedSearchCV
|
12 |
+
import xgboost as xgb
|
13 |
+
from sklearn.metrics import mean_squared_error
|
14 |
+
import joblib
|
15 |
+
|
16 |
+
# Set the page layout to full width
|
17 |
+
st.set_page_config(layout="wide")
|
18 |
+
# Initialize df as None
|
19 |
+
df = None
|
20 |
+
|
21 |
+
st.sidebar.title("Favorita Stores")
|
22 |
+
selected_option = st.sidebar.radio("Select to Proceed", ["Data Statistics", "Visuals", "Time Series Analysis", "Forecasting"])
|
23 |
+
|
24 |
+
# Custom CSS styling for the title
|
25 |
+
st.markdown(
|
26 |
+
"""
|
27 |
+
<style>
|
28 |
+
.title-text {
|
29 |
+
font-size: 28px;
|
30 |
+
text-align: center;
|
31 |
+
background-color: #3498db;
|
32 |
+
color: white;
|
33 |
+
padding: 10px 0;
|
34 |
+
width: 100%;
|
35 |
+
position: sticky;
|
36 |
+
top: 0;
|
37 |
+
z-index: 1;
|
38 |
+
}
|
39 |
+
</style>
|
40 |
+
""",
|
41 |
+
unsafe_allow_html=True
|
42 |
+
)
|
43 |
+
|
44 |
+
# Streamlit App Title
|
45 |
+
st.markdown('<p class="title-text">Machine Learning App for Sales Prediction</p>', unsafe_allow_html=True)
|
46 |
+
|
47 |
+
# Function to load and process the data
|
48 |
+
def load_and_process_data():
|
49 |
+
global df
|
50 |
+
# Allow the user to upload an Excel file
|
51 |
+
uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx"])
|
52 |
+
if uploaded_file is not None:
|
53 |
+
# Check if the file is an Excel file
|
54 |
+
if uploaded_file.type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
|
55 |
+
# Read the Excel file into a DataFrame
|
56 |
+
df = pd.read_excel(uploaded_file)
|
57 |
+
# Remove null values
|
58 |
+
df.dropna(inplace=True)
|
59 |
+
df = df.drop(columns='Unnamed: 0')
|
60 |
+
else:
|
61 |
+
st.write("Please upload a valid Excel file.")
|
62 |
+
|
63 |
+
# Load and process the data
|
64 |
+
load_and_process_data()
|
65 |
+
|
66 |
+
if selected_option == "Data Statistics":
|
67 |
+
# Rest of the code for "Data Statistics" option using df
|
68 |
+
if df is not None:
|
69 |
+
number_sample = st.number_input("Enter sample size to display data", min_value=5, max_value=10, step=1, value=5)
|
70 |
+
displayed_data = df.head(number_sample)
|
71 |
+
st.write("Sample data", displayed_data)
|
72 |
+
st.write("Summary Statistics of float/Integer columns", df.describe())
|
73 |
+
object_columns = df.select_dtypes(include='object').columns.tolist()
|
74 |
+
selected_column = st.selectbox("Select column of Data Type Object to View Unique values", object_columns)
|
75 |
+
if selected_column:
|
76 |
+
unique_values = df[selected_column].unique()
|
77 |
+
st.write("Unique values are", unique_values)
|
78 |
+
|
79 |
+
elif selected_option == "Visuals":
|
80 |
+
# Rest of the code for "Visuals" option using df
|
81 |
+
if df is not None:
|
82 |
+
object_columns = df.select_dtypes(include='object').columns.tolist()
|
83 |
+
selected_column = st.selectbox("Select column of Data Type Object for Visualization", object_columns)
|
84 |
+
if selected_column:
|
85 |
+
df['date'] = pd.to_datetime(df['date']) # Convert to datetime if applicable
|
86 |
+
df_grouped = df.groupby(selected_column)['sales'].sum().head(10)
|
87 |
+
df_grouped = df_grouped.sort_values(ascending=False)
|
88 |
+
fig, ax = plt.subplots(figsize=(15, 6))
|
89 |
+
ax.bar(df_grouped.index, df_grouped.values)
|
90 |
+
ax.set_xlabel(selected_column)
|
91 |
+
ax.set_ylabel('Sales Count')
|
92 |
+
ax.set_title(f'Top 10 Sales Count for {selected_column}')
|
93 |
+
st.pyplot(fig) # Pass the figure to st.pyplot()
|
94 |
+
elif selected_option == "Time Series Analysis":
|
95 |
+
if df is not None:
|
96 |
+
# Choose date and sales columns
|
97 |
+
timeseriesdata = df[['sales', 'date']]
|
98 |
+
timeseriesdata.index = timeseriesdata['date']
|
99 |
+
# Make date the index
|
100 |
+
timeseriesdata = timeseriesdata.resample('D').sum() # Resample to daily sales
|
101 |
+
|
102 |
+
# Resample the data based on user's choice
|
103 |
+
resample_method = st.selectbox("Select a resampling method", ['M', 'Q', 'Y'])
|
104 |
+
if resample_method:
|
105 |
+
resampled_data = timeseriesdata.resample(resample_method).sum()
|
106 |
+
|
107 |
+
# Plot the time series using Seaborn lineplot
|
108 |
+
plt.figure(figsize=(15, 6))
|
109 |
+
sns.lineplot(data=resampled_data)
|
110 |
+
plt.ylabel('Sales')
|
111 |
+
plt.title(f'Sales Time Series (Resampled by {resample_method})')
|
112 |
+
st.pyplot(plt.gcf())
|
113 |
+
else:
|
114 |
+
st.write("Please enter these inputs to predict sales. Thank you!")
|
115 |
+
# Load the pre-trained model and preprocessor
|
116 |
+
model = joblib.load('./xgb_model.joblib')
|
117 |
+
preprocessor = joblib.load('./preprocessor.joblib')
|
118 |
+
|
119 |
+
|
120 |
+
|
121 |
+
# Create a layout with 2 columns for even distribution
|
122 |
+
col1, col2 = st.columns(2)
|
123 |
+
|
124 |
+
# User Inputs - Number
|
125 |
+
with col1:
|
126 |
+
# Create a date input using st.date_input
|
127 |
+
date = st.date_input("Enter Date")
|
128 |
+
|
129 |
+
# Convert the selected date to a string in the desired format (e.g., YYYY-MM-DD)
|
130 |
+
formatted_date = date.strftime('%Y-%m-%d')
|
131 |
+
|
132 |
+
# User Inputs - Year
|
133 |
+
with col2:
|
134 |
+
family = st.selectbox("Select product family", ['CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS', 'FROZEN FOODS',
|
135 |
+
'GROCERY I', 'GROCERY II', 'HARDWARE', 'HOME AND KITCHEN I',
|
136 |
+
'HOME AND KITCHEN II', 'HOME APPLIANCES', 'HOME CARE',
|
137 |
+
'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE', 'LIQUOR,WINE,BEER',
|
138 |
+
'MAGAZINES', 'MEATS', 'PERSONAL CARE', 'PET SUPPLIES',
|
139 |
+
'PLAYERS AND ELECTRONICS', 'POULTRY', 'PREPARED FOODS', 'PRODUCE',
|
140 |
+
'SCHOOL AND OFFICE SUPPLIES', 'SEAFOOD', 'AUTOMOTIVE', 'BABY CARE',
|
141 |
+
'BEAUTY', 'BEVERAGES', 'BOOKS', 'BREAD/BAKERY'])
|
142 |
+
|
143 |
+
# User Inputs - On Promotion
|
144 |
+
with col1:
|
145 |
+
onpromotion = st.number_input("Enter Number for onpromotion", min_value=0, step=1)
|
146 |
+
|
147 |
+
|
148 |
+
# User Inputs - Day of the Week
|
149 |
+
with col2:
|
150 |
+
city = st.selectbox("Select city", ['Quito', 'Cayambe', 'Latacunga', 'Riobamba', 'Ibarra',
|
151 |
+
'Santo Domingo', 'Guaranda', 'Puyo', 'Ambato', 'Guayaquil',
|
152 |
+
'Salinas', 'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Libertad',
|
153 |
+
'Cuenca', 'Loja', 'Machala', 'Esmeraldas', 'Manta', 'El Carmen'])
|
154 |
+
|
155 |
+
# User Inputs - Product Category
|
156 |
+
with col1:
|
157 |
+
oil_prices = st.number_input("Enter oil price", min_value=1, step=1)
|
158 |
+
|
159 |
+
|
160 |
+
# User Inputs - Day of the Week
|
161 |
+
with col2:
|
162 |
+
holiday_type = st.selectbox("Select holiday type", ['Holiday', 'Additional', 'Transfer', 'Event', 'Bridge'])
|
163 |
+
|
164 |
+
# User Inputs - Product Category
|
165 |
+
with col1:
|
166 |
+
sales_lag_1 = st.number_input("Enter Number for sales lag", min_value=0, step=1)
|
167 |
+
|
168 |
+
|
169 |
+
# User Inputs - Day of the Week
|
170 |
+
with col2:
|
171 |
+
moving_average = st.number_input("Enter Number for moving average", min_value=0, step=1)
|
172 |
+
|
173 |
+
# Placeholder for Predicted Value
|
174 |
+
|
175 |
+
# Add custom spacing between columns
|
176 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
177 |
+
|
178 |
+
|
179 |
+
|
180 |
+
# Predict Button
|
181 |
+
if st.button("Predict"):
|
182 |
+
# Prepare input data for prediction
|
183 |
+
# Prepare input data for prediction
|
184 |
+
# Create a DataFrame with all required columns except "sales"
|
185 |
+
prediction_placeholder = st.empty()
|
186 |
+
input_df = pd.DataFrame({
|
187 |
+
"family": [family],
|
188 |
+
"onpromotion": [onpromotion],
|
189 |
+
"city": [city],
|
190 |
+
"oil_prices": [oil_prices],
|
191 |
+
"holiday_type": [holiday_type],
|
192 |
+
"sales_lag_1": [sales_lag_1],
|
193 |
+
"moving_average": [moving_average]
|
194 |
+
})
|
195 |
+
|
196 |
+
# Transform the input DataFrame using the preprocessor
|
197 |
+
preprocessed_data = preprocessor.transform(input_df)
|
198 |
+
|
199 |
+
|
200 |
+
|
201 |
+
# Make a prediction
|
202 |
+
prediction = model.predict(preprocessed_data)
|
203 |
+
|
204 |
+
|
205 |
+
# Display the prediction
|
206 |
+
prediction_placeholder.text(f"Predicted Value for sales: {prediction[0]: ,.2f}")
|
207 |
+
|
208 |
+
if prediction >= 0:
|
209 |
+
prediction_placeholder.markdown(
|
210 |
+
f'Predicted Value for sales: <span style="background-color: green; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
|
211 |
+
unsafe_allow_html=True
|
212 |
+
)
|
213 |
+
else:
|
214 |
+
prediction_placeholder.markdown(
|
215 |
+
f'Predicted Value for sales: <span style="background-color: red; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
|
216 |
+
unsafe_allow_html=True
|
217 |
+
)
|