aaronayitey commited on
Commit
38f0dc3
1 Parent(s): 5765cf7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -0
app.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+
6
+
7
+ # Machine Learning Modeling
8
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
9
+ from sklearn.compose import ColumnTransformer
10
+ from sklearn.linear_model import LinearRegression
11
+ from sklearn.model_selection import RandomizedSearchCV
12
+ import xgboost as xgb
13
+ from sklearn.metrics import mean_squared_error
14
+ import joblib
15
+
16
+ # Set the page layout to full width
17
+ st.set_page_config(layout="wide")
18
+ # Initialize df as None
19
+ df = None
20
+
21
+ st.sidebar.title("Favorita Stores")
22
+ selected_option = st.sidebar.radio("Select to Proceed", ["Data Statistics", "Visuals", "Time Series Analysis", "Forecasting"])
23
+
24
+ # Custom CSS styling for the title
25
+ st.markdown(
26
+ """
27
+ <style>
28
+ .title-text {
29
+ font-size: 28px;
30
+ text-align: center;
31
+ background-color: #3498db;
32
+ color: white;
33
+ padding: 10px 0;
34
+ width: 100%;
35
+ position: sticky;
36
+ top: 0;
37
+ z-index: 1;
38
+ }
39
+ </style>
40
+ """,
41
+ unsafe_allow_html=True
42
+ )
43
+
44
+ # Streamlit App Title
45
+ st.markdown('<p class="title-text">Machine Learning App for Sales Prediction</p>', unsafe_allow_html=True)
46
+
47
+ # Function to load and process the data
48
+ def load_and_process_data():
49
+ global df
50
+ # Allow the user to upload an Excel file
51
+ uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx"])
52
+ if uploaded_file is not None:
53
+ # Check if the file is an Excel file
54
+ if uploaded_file.type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
55
+ # Read the Excel file into a DataFrame
56
+ df = pd.read_excel(uploaded_file)
57
+ # Remove null values
58
+ df.dropna(inplace=True)
59
+ df = df.drop(columns='Unnamed: 0')
60
+ else:
61
+ st.write("Please upload a valid Excel file.")
62
+
63
+ # Load and process the data
64
+ load_and_process_data()
65
+
66
+ if selected_option == "Data Statistics":
67
+ # Rest of the code for "Data Statistics" option using df
68
+ if df is not None:
69
+ number_sample = st.number_input("Enter sample size to display data", min_value=5, max_value=10, step=1, value=5)
70
+ displayed_data = df.head(number_sample)
71
+ st.write("Sample data", displayed_data)
72
+ st.write("Summary Statistics of float/Integer columns", df.describe())
73
+ object_columns = df.select_dtypes(include='object').columns.tolist()
74
+ selected_column = st.selectbox("Select column of Data Type Object to View Unique values", object_columns)
75
+ if selected_column:
76
+ unique_values = df[selected_column].unique()
77
+ st.write("Unique values are", unique_values)
78
+
79
+ elif selected_option == "Visuals":
80
+ # Rest of the code for "Visuals" option using df
81
+ if df is not None:
82
+ object_columns = df.select_dtypes(include='object').columns.tolist()
83
+ selected_column = st.selectbox("Select column of Data Type Object for Visualization", object_columns)
84
+ if selected_column:
85
+ df['date'] = pd.to_datetime(df['date']) # Convert to datetime if applicable
86
+ df_grouped = df.groupby(selected_column)['sales'].sum().head(10)
87
+ df_grouped = df_grouped.sort_values(ascending=False)
88
+ fig, ax = plt.subplots(figsize=(15, 6))
89
+ ax.bar(df_grouped.index, df_grouped.values)
90
+ ax.set_xlabel(selected_column)
91
+ ax.set_ylabel('Sales Count')
92
+ ax.set_title(f'Top 10 Sales Count for {selected_column}')
93
+ st.pyplot(fig) # Pass the figure to st.pyplot()
94
+ elif selected_option == "Time Series Analysis":
95
+ if df is not None:
96
+ # Choose date and sales columns
97
+ timeseriesdata = df[['sales', 'date']]
98
+ timeseriesdata.index = timeseriesdata['date']
99
+ # Make date the index
100
+ timeseriesdata = timeseriesdata.resample('D').sum() # Resample to daily sales
101
+
102
+ # Resample the data based on user's choice
103
+ resample_method = st.selectbox("Select a resampling method", ['M', 'Q', 'Y'])
104
+ if resample_method:
105
+ resampled_data = timeseriesdata.resample(resample_method).sum()
106
+
107
+ # Plot the time series using Seaborn lineplot
108
+ plt.figure(figsize=(15, 6))
109
+ sns.lineplot(data=resampled_data)
110
+ plt.ylabel('Sales')
111
+ plt.title(f'Sales Time Series (Resampled by {resample_method})')
112
+ st.pyplot(plt.gcf())
113
+ else:
114
+ st.write("Please enter these inputs to predict sales. Thank you!")
115
+ # Load the pre-trained model and preprocessor
116
+ model = joblib.load('./xgb_model.joblib')
117
+ preprocessor = joblib.load('./preprocessor.joblib')
118
+
119
+
120
+
121
+ # Create a layout with 2 columns for even distribution
122
+ col1, col2 = st.columns(2)
123
+
124
+ # User Inputs - Number
125
+ with col1:
126
+ # Create a date input using st.date_input
127
+ date = st.date_input("Enter Date")
128
+
129
+ # Convert the selected date to a string in the desired format (e.g., YYYY-MM-DD)
130
+ formatted_date = date.strftime('%Y-%m-%d')
131
+
132
+ # User Inputs - Year
133
+ with col2:
134
+ family = st.selectbox("Select product family", ['CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS', 'FROZEN FOODS',
135
+ 'GROCERY I', 'GROCERY II', 'HARDWARE', 'HOME AND KITCHEN I',
136
+ 'HOME AND KITCHEN II', 'HOME APPLIANCES', 'HOME CARE',
137
+ 'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE', 'LIQUOR,WINE,BEER',
138
+ 'MAGAZINES', 'MEATS', 'PERSONAL CARE', 'PET SUPPLIES',
139
+ 'PLAYERS AND ELECTRONICS', 'POULTRY', 'PREPARED FOODS', 'PRODUCE',
140
+ 'SCHOOL AND OFFICE SUPPLIES', 'SEAFOOD', 'AUTOMOTIVE', 'BABY CARE',
141
+ 'BEAUTY', 'BEVERAGES', 'BOOKS', 'BREAD/BAKERY'])
142
+
143
+ # User Inputs - On Promotion
144
+ with col1:
145
+ onpromotion = st.number_input("Enter Number for onpromotion", min_value=0, step=1)
146
+
147
+
148
+ # User Inputs - Day of the Week
149
+ with col2:
150
+ city = st.selectbox("Select city", ['Quito', 'Cayambe', 'Latacunga', 'Riobamba', 'Ibarra',
151
+ 'Santo Domingo', 'Guaranda', 'Puyo', 'Ambato', 'Guayaquil',
152
+ 'Salinas', 'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Libertad',
153
+ 'Cuenca', 'Loja', 'Machala', 'Esmeraldas', 'Manta', 'El Carmen'])
154
+
155
+ # User Inputs - Product Category
156
+ with col1:
157
+ oil_prices = st.number_input("Enter oil price", min_value=1, step=1)
158
+
159
+
160
+ # User Inputs - Day of the Week
161
+ with col2:
162
+ holiday_type = st.selectbox("Select holiday type", ['Holiday', 'Additional', 'Transfer', 'Event', 'Bridge'])
163
+
164
+ # User Inputs - Product Category
165
+ with col1:
166
+ sales_lag_1 = st.number_input("Enter Number for sales lag", min_value=0, step=1)
167
+
168
+
169
+ # User Inputs - Day of the Week
170
+ with col2:
171
+ moving_average = st.number_input("Enter Number for moving average", min_value=0, step=1)
172
+
173
+ # Placeholder for Predicted Value
174
+
175
+ # Add custom spacing between columns
176
+ st.markdown("<hr>", unsafe_allow_html=True)
177
+
178
+
179
+
180
+ # Predict Button
181
+ if st.button("Predict"):
182
+ # Prepare input data for prediction
183
+ # Prepare input data for prediction
184
+ # Create a DataFrame with all required columns except "sales"
185
+ prediction_placeholder = st.empty()
186
+ input_df = pd.DataFrame({
187
+ "family": [family],
188
+ "onpromotion": [onpromotion],
189
+ "city": [city],
190
+ "oil_prices": [oil_prices],
191
+ "holiday_type": [holiday_type],
192
+ "sales_lag_1": [sales_lag_1],
193
+ "moving_average": [moving_average]
194
+ })
195
+
196
+ # Transform the input DataFrame using the preprocessor
197
+ preprocessed_data = preprocessor.transform(input_df)
198
+
199
+
200
+
201
+ # Make a prediction
202
+ prediction = model.predict(preprocessed_data)
203
+
204
+
205
+ # Display the prediction
206
+ prediction_placeholder.text(f"Predicted Value for sales: {prediction[0]: ,.2f}")
207
+
208
+ if prediction >= 0:
209
+ prediction_placeholder.markdown(
210
+ f'Predicted Value for sales: <span style="background-color: green; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
211
+ unsafe_allow_html=True
212
+ )
213
+ else:
214
+ prediction_placeholder.markdown(
215
+ f'Predicted Value for sales: <span style="background-color: red; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
216
+ unsafe_allow_html=True
217
+ )