ikoghoemmanuell commited on
Commit
1c4efbc
·
1 Parent(s): 5968696

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -73
app.py CHANGED
@@ -7,25 +7,34 @@ import pandas as pd
7
  import re
8
  from pathlib import Path
9
  from PIL import Image
10
- from datetime import datetime
11
 
12
  # Setting the page configurations
13
- st.set_page_config(page_title="Sales Prediction Forecasting", page_icon=":heavy_dollar_sign:", layout="wide", initial_sidebar_state="auto")
14
 
15
  # Setting the page title
16
  st.title("Grocery Store Sales Time Series Model Prediction")
17
 
 
 
 
18
  # Function to load the dataset
19
  @st.cache_resource
20
  def load_data(relative_path):
21
- data = pd.read_csv(relative_path, index_col=0)
 
22
  return data
23
 
 
 
 
24
  # Loading the base dataframe
25
  rpath = r"merged_train_data.csv"
26
  data = load_data(rpath)
27
 
28
- # Load the model and encoder and scaler
 
 
29
  model = pickle.load(open("model.pkl", "rb"))
30
  encoder = pickle.load(open("encoder.pkl", "rb"))
31
  scaler = pickle.load(open("scaler.pkl", "rb"))
@@ -35,6 +44,9 @@ header = st.container()
35
  dataset = st.container()
36
  features_and_output = st.container()
37
 
 
 
 
38
  # Designing the sidebar
39
  st.sidebar.header("Brief overview of the Columns")
40
  st.sidebar.markdown("""
@@ -57,18 +69,26 @@ with dataset:
57
  dataset.write("Further information will preview when take a look at the sidebar")
58
  dataset.write("---")
59
 
 
 
 
60
  # Icon for the page
61
  image = Image.open(r"beautiful image.png")
62
 
63
  # inputs from the user
64
- form = st.form(key="information", clear_on_submit=False)
65
 
66
  # Structuring the header section
67
  with header:
68
  header.write("This an application to build a model that more accurately predicts the unit sales for thousands of items sold at different Favorita stores")
 
69
  header.image(image)
 
70
  header.write("---")
71
 
 
 
 
72
  # Structuring the features and output section
73
  with features_and_output:
74
  features_and_output.subheader("Inputs")
@@ -80,77 +100,108 @@ with features_and_output:
80
  with form:
81
  left_col.markdown("***Combined data on Product and Transaction***")
82
  date = left_col.date_input("Select a date:")
83
- family = left_col.selectbox("Product family:", options=sorted(list(data["family"].unique())))
84
- onpromotion = left_col.number_input("Number of products on promotion:", min_value=data["onpromotion"].min(), value=data["onpromotion"].min())
85
- city = left_col.selectbox("City:", options=sorted(set(data["city"])))
86
-
87
  mid_col.markdown("***Data on Location and type***")
88
- store_nbr = mid_col.selectbox("Store number:", options=sorted(set(data["store_nbr"])))
89
- type_x = mid_col.radio("type_x:", options=sorted(set(data["type_x"])), horizontal=True)
90
- type_y = mid_col.radio("type_y:", options=sorted(set(data["type_y"])), horizontal=True)
91
- cluster = mid_col.select_slider("Store cluster:", options=sorted(set(data["cluster"])))
92
- state = mid_col.selectbox("State:", options=sorted(set(data["state"])))
93
-
94
  right_col.markdown("***Data on Economical Factors***")
95
- oil_price = right_col.number_input("Oil price:", min_value=data["oil_price"].min(), value=data["oil_price"].min())
96
-
97
- # No submission point, directly make prediction and show results
98
- if form:
99
- input_dict = {
100
- 'store_nbr': store_nbr,
101
- 'cluster': cluster,
102
- 'city': city,
103
- 'state': state,
104
- 'family': family,
105
- 'type_x': type_x,
106
- 'type_y': type_y,
107
- 'onpromotion': onpromotion,
108
- 'oil_price': oil_price,
109
- 'date': date
110
- }
111
- input_df = pd.DataFrame([input_dict])
112
 
113
- # Function to extract date features
114
- def getDateFeatures(df):
115
- df['date'] = pd.to_datetime(df['date'])
116
- df['day_of_week'] = df['date'].dt.dayofweek
117
- df['day_of_month'] = df['date'].dt.day
118
- df['month'] = df['date'].dt.month
119
- df['year'] = df['date'].dt.year
120
- df['quarter'] = df['date'].dt.quarter
121
- df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
122
- df['is_month_start'] = df['date'].dt.is_month_start.astype(int)
123
- df['is_month_end'] = df['date'].dt.is_month_end.astype(int)
124
- df['is_quarter_start'] = df['date'].dt.is_quarter_start.astype(int)
125
- df['is_quarter_end'] = df['date'].dt.is_quarter_end.astype(int)
126
- df['is_year_start'] = df['date'].dt.is_year_start.astype(int)
127
- df['is_year_end'] = df['date'].dt.is_year_end.astype(int)
128
- df['days_since_start'] = (df['date'] - datetime(df['date'].dt.year.min(), 1, 1)).dt.days
129
- df['days_until_end'] = (datetime(df['date'].dt.year.max(), 12, 31) - df['date']).dt.days
130
- df = df.drop('date', axis=1)
131
- return df
132
-
133
- # Define the function to make predictions
134
- def predict_sales(input_data, input_df):
135
- categoric_columns = ['family', 'city', 'state', 'type_y', 'type_x']
136
- columns = list(input_df.columns)
137
- numeric_columns = [i for i in columns if i not in categoric_columns]
138
- scaled_num = scaler.fit_transform(input_df[numeric_columns])
139
- encoded_cat = encoder.transform(input_df[categoric_columns])
140
- input_data = pd.concat([scaled_num, encoded_cat], axis=1)
141
- input_data = input_data.to_numpy()
142
- prediction = model.predict(input_data.flatten().reshape(1, -1))
143
- return prediction
144
-
145
- input_df = getDateFeatures(input_df)
146
- input_df = input_df.drop(columns=['date'], axis=1)
147
-
148
- if st.button('Predict'):
149
- prediction = predict_sales(input_df.values, input_df)
150
- st.success('The predicted sales amount is $' + str(round(prediction[0], 2)))
151
-
152
- # Defining and structuring the footer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  footer = st.expander("**Subsequent Information**")
154
  with footer:
155
  if footer.button("Special Thanks"):
156
- footer.markdown("*We want to express our appreciation and gratitude to Emmanuel, Rachel, Mavies, and Richard for their great insights and contributions!*")
 
7
  import re
8
  from pathlib import Path
9
  from PIL import Image
10
+
11
 
12
  # Setting the page configurations
13
+ st.set_page_config(page_title= "Sales Prediction Forecasting", page_icon= ":heavy_dollar_sign:", layout= "wide", initial_sidebar_state= "auto")
14
 
15
  # Setting the page title
16
  st.title("Grocery Store Sales Time Series Model Prediction")
17
 
18
+
19
+
20
+
21
  # Function to load the dataset
22
  @st.cache_resource
23
  def load_data(relative_path):
24
+ data= pd.read_csv(relative_path, index_col= 0)
25
+ #merged["date"] = pd.to_datetime(merged["date"])
26
  return data
27
 
28
+
29
+
30
+
31
  # Loading the base dataframe
32
  rpath = r"merged_train_data.csv"
33
  data = load_data(rpath)
34
 
35
+
36
+
37
+ # Load the model and encoder ans scaler
38
  model = pickle.load(open("model.pkl", "rb"))
39
  encoder = pickle.load(open("encoder.pkl", "rb"))
40
  scaler = pickle.load(open("scaler.pkl", "rb"))
 
44
  dataset = st.container()
45
  features_and_output = st.container()
46
 
47
+
48
+
49
+
50
  # Designing the sidebar
51
  st.sidebar.header("Brief overview of the Columns")
52
  st.sidebar.markdown("""
 
69
  dataset.write("Further information will preview when take a look at the sidebar")
70
  dataset.write("---")
71
 
72
+
73
+
74
+
75
  # Icon for the page
76
  image = Image.open(r"beautiful image.png")
77
 
78
  # inputs from the user
79
+ form = st.form(key="information", clear_on_submit=True)
80
 
81
  # Structuring the header section
82
  with header:
83
  header.write("This an application to build a model that more accurately predicts the unit sales for thousands of items sold at different Favorita stores")
84
+
85
  header.image(image)
86
+
87
  header.write("---")
88
 
89
+
90
+
91
+
92
  # Structuring the features and output section
93
  with features_and_output:
94
  features_and_output.subheader("Inputs")
 
100
  with form:
101
  left_col.markdown("***Combined data on Product and Transaction***")
102
  date = left_col.date_input("Select a date:")
103
+ family = left_col.selectbox("Product family:", options= sorted(list(data["family"].unique())))
104
+ onpromotion = left_col.number_input("Number of products on promotion:", min_value= data["onpromotion"].min(), value= data["onpromotion"].min())
105
+ city = left_col.selectbox("City:", options= sorted(set(data["city"])))
106
+
107
  mid_col.markdown("***Data on Location and type***")
108
+ store_nbr = mid_col.selectbox("Store number:", options= sorted(set(data["store_nbr"])))
109
+ type_x = mid_col.radio("type_x:", options= sorted(set(data["type_x"])), horizontal= True)
110
+ type_y = mid_col.radio("type_y:", options= sorted(set(data["type_y"])), horizontal= True)
111
+ cluster = mid_col.select_slider("Store cluster:", options= sorted(set(data["cluster"])))
112
+ state = mid_col.selectbox("State:", options= sorted(set(data["state"])))
113
+
114
  right_col.markdown("***Data on Economical Factors***")
115
+ oil_price = right_col.number_input("Oil price:", min_value= data["oil_price"].min(), value= data["oil_price"].min())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # Submission point
118
+ submitted = form.form_submit_button(label= "Submit button")
119
+
120
+ if submitted:
121
+ with features_and_output:
122
+ input_features = {
123
+ "date":[date],
124
+ "store_nbr": [store_nbr],
125
+ "family": [family],
126
+ "onpromotion": [onpromotion],
127
+ "city": [city],
128
+ "state": [state],
129
+ "type_x": [type_x],
130
+ "cluster":[cluster],
131
+ "oil_price": [oil_price],
132
+ "type_y": [type_y],
133
+ }
134
+
135
+
136
+
137
+ # Define the function to make predictions
138
+ def predict_sales(input_data, input_df):
139
+ # defining categories and numeric columns
140
+ categoric_columns = ['family', 'city', 'state', 'type_y', 'type_x']
141
+ columns = list(input_df.columns)
142
+ numeric_columns = [i for i in columns if i not in categoric_columns]
143
+ scaled_num = scaler.fit_transform(input_df[numeric_columns])
144
+ encoded_cat = encoder.transform(input_df[categoric_columns])
145
+ input_data = pd.concat([scaled_num, encoded_cat], axis=1)
146
+ # convert input_data to a numpy array before flattening to convert it back to a 2D array
147
+ input_data = input_data.to_numpy()
148
+ prediction = model.predict(input_data.flatten().reshape(1, -1))
149
+ return prediction
150
+
151
+ #Convert input parameters to a pandas DataFrame
152
+ input_dict = {
153
+ 'store_nbr': store_nbr,
154
+ 'cluster': cluster,
155
+ 'city': city,
156
+ 'state': state,
157
+ 'family': family,
158
+ 'type_x': type_x,
159
+ 'type_y': type_y,
160
+ 'onpromotion': onpromotion,
161
+ 'oil_price': oil_price,
162
+ 'date' : date
163
+ }
164
+ input_df = pd.DataFrame([input_dict])
165
+
166
+
167
+ @st.cache_resource
168
+ def getDateFeatures(df):
169
+ df['date'] = pd.to_datetime(df['date'], errors='coerce')
170
+ df['month'] = df['date'].dt.month
171
+ df['day_of_month'] = df['date'].dt.day
172
+ df['day_of_year'] = df['date'].dt.dayofyear
173
+ df['week_of_year'] = df['date'].dt.isocalendar().week
174
+ df['week_of_year'] = df['week_of_year'].astype(float)
175
+ df['day_of_week'] = df['date'].dt.dayofweek
176
+ df['year'] = df['date'].dt.year
177
+ df["is_weekend"] = np.where(df['day_of_week'] > 4, 1, 0)
178
+ df['is_month_start'] = df['date'].dt.is_month_start.astype(int)
179
+ df['quarter'] = df['date'].dt.quarter
180
+ df['is_month_end'] = df['date'].dt.is_month_end.astype(int)
181
+ df['is_quarter_start'] = df['date'].dt.is_quarter_start.astype(int)
182
+ df['is_quarter_end'] = df['date'].dt.is_quarter_end.astype(int)
183
+ df['is_year_start'] = df['date'].dt.is_year_start.astype(int)
184
+ df['is_year_end'] = df['date'].dt.is_year_end.astype(int)
185
+
186
+ df["season"] = np.where(df.month.isin([12,1,2]), 0, 1)
187
+ df["season"] = np.where(df.month.isin([6,7,8]), 2, df["season"])
188
+ df["season"] = pd.Series(np.where(df.month.isin([9, 10, 11]), 3, df["season"])).astype("int8")
189
+ df['pay_day'] = np.where((df['day_of_month']==15) | (df['is_month_end']==1), 1, 0)
190
+ df['earthquake_impact'] = np.where(df['date'].isin(
191
+ pd.date_range(start='2016-04-16', end='2016-12-31', freq='D')), 1, 0)
192
+
193
+ return df
194
+ input_df = getDateFeatures(input_df)
195
+ input_df = input_df.drop(columns= ['date'], axis=1)
196
+
197
+ # Make prediction and show results
198
+ if st.button('Predict'):
199
+ prediction = predict_sales(input_df.values, input_df)
200
+ st.success('The predicted sales amount is $' + str(round(prediction[0],2)))
201
+
202
+
203
+ # ----- Defining and structuring the footer
204
  footer = st.expander("**Subsequent Information**")
205
  with footer:
206
  if footer.button("Special Thanks"):
207
+ footer.markdown("*We want to express our appreciation and gratitude to Emmanuel,Racheal, Mavies and Richard for their great insights and contributions!*")