Configuration error
Configuration error
updated name
Browse files
@@ -1,417 +1,416 @@
1 |
import streamlit as st
2 |
import pandas as pd
3 |
import time
4 |
from datetime import datetime
5 |
6 |
import numpy as np
7 |
import pmdarima as pm
8 |
import matplotlib.pyplot as plt
9 |
from pmdarima import auto_arima
10 |
import plotly.graph_objects as go
11 |
12 |
import torch
13 |
from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
# Preprocessing
23 |
24 |
def merge(B, C, A):
25 |
i = j = k = 0
26 |
27 |
# Convert 'Date' columns to objects
28 |
B['Date'] = pd.to_datetime(B['Date'])
29 |
C['Date'] = pd.to_datetime(C['Date'])
30 |
A['Date'] = pd.to_datetime(A['Date'])
31 |
32 |
while i < len(B) and j < len(C):
33 |
if B['Date'].iloc[i] <= C['Date'].iloc[j]:
34 |
A['Date'].iloc[k] = B['Date'].iloc[i]
35 |
A['Sales'].iloc[k] = B['Sales'].iloc[i]
36 |
i += 1
37 |
38 |
39 |
A['Date'].iloc[k] = C['Date'].iloc[j]
40 |
A['Sales'].iloc[k] = C['Sales'].iloc[j]
41 |
j += 1
42 |
k += 1
43 |
44 |
while i < len(B):
45 |
A['Date'].iloc[k] = B['Date'].iloc[i]
46 |
A['Sales'].iloc[k] = B['Sales'].iloc[i]
47 |
i += 1
48 |
k += 1
49 |
50 |
while j < len(C):
51 |
A['Date'].iloc[k] = C['Date'].iloc[j]
52 |
A['Sales'].iloc[k] = C['Sales'].iloc[j]
53 |
j += 1
54 |
k += 1
55 |
56 |
return A
57 |
58 |
59 |
def merge_sort(dataframe):
60 |
if len(dataframe) > 1:
61 |
center = len(dataframe) // 2
62 |
left = dataframe.iloc[:center]
63 |
right = dataframe.iloc[center:]
64 |
65 |
66 |
67 |
return merge(left, right, dataframe)
68 |
69 |
70 |
return dataframe
71 |
72 |
73 |
def drop (dataframe):
74 |
def get_columns_containing(dataframe, substrings):
75 |
return [col for col in dataframe.columns if any(substring.lower() in col.lower() for substring in substrings)]
76 |
77 |
columns_to_keep = get_columns_containing(dataframe, ["date", "sale"])
78 |
dataframe = dataframe.drop(columns=dataframe.columns.difference(columns_to_keep))
79 |
dataframe = dataframe.dropna()
80 |
81 |
return dataframe
82 |
83 |
84 |
def date_format(dataframe):
85 |
for i, d, s in dataframe.itertuples():
86 |
dataframe['Date'][i] = dataframe['Date'][i].strip()
87 |
88 |
for i, d, s in dataframe.itertuples():
89 |
new_date = datetime.strptime(dataframe['Date'][i], "%m/%d/%Y").date()
90 |
dataframe['Date'][i] = new_date
91 |
92 |
return dataframe
93 |
94 |
95 |
def group_to_three(dataframe):
96 |
dataframe['Date'] = pd.to_datetime(dataframe['Date'])
97 |
dataframe = dataframe.groupby([pd.Grouper(key='Date', freq='3D')])['Sales'].mean().round(2)
98 |
dataframe = dataframe.replace(0, np.nan).dropna()
99 |
100 |
return dataframe
101 |
102 |
103 |
def series_to_df_exogenous(series):
104 |
dataframe = series.to_frame()
105 |
dataframe = dataframe.reset_index()
106 |
dataframe = dataframe.set_index('Date')
107 |
dataframe = dataframe.dropna()
108 |
# Create the eXogenous values
109 |
dataframe['Sales First Difference'] = dataframe['Sales'] - dataframe['Sales'].shift(1)
110 |
dataframe['Seasonal First Difference'] = dataframe['Sales'] - dataframe['Sales'].shift(12)
111 |
dataframe = dataframe.dropna()
112 |
return dataframe
113 |
114 |
115 |
def dates_df(dataframe):
116 |
dataframe = dataframe.reset_index()
117 |
dataframe['Date'] = dataframe['Date'].dt.strftime('%B %d, %Y')
118 |
dataframe[dataframe.columns] = dataframe[dataframe.columns].astype(str)
119 |
return dataframe
120 |
121 |
122 |
def get_forecast_period(period):
123 |
return round(period / 3)
124 |
125 |
126 |
127 |
def train_test(dataframe, n):
128 |
training_y = dataframe.iloc[:-n,0]
129 |
test_y = dataframe.iloc[-n:,0]
130 |
test_y_series = pd.Series(test_y, index=dataframe.iloc[-n:, 0].index)
131 |
training_X = dataframe.iloc[:-n,1:]
132 |
test_X = dataframe.iloc[-n:,1:]
133 |
future_X = dataframe.iloc[0:,1:]
134 |
return (training_y, test_y, test_y_series, training_X, test_X, future_X)
135 |
136 |
137 |
def test_fitting(dataframe, Exo, trainY):
138 |
trainTestModel = auto_arima(X = Exo, y = trainY, start_p=1, start_q=1,
139 |
140 |
max_p=3, max_q=3, m=12,
141 |
start_P=2, start_Q=2, seasonal=True,
142 |
d=None, D=1, trace=True,
143 |
144 |
145 |
stepwise=True, maxiter = 50)
146 |
model = trainTestModel
147 |
return model
148 |
149 |
def forecast_accuracy(forecast, actual):
150 |
mape = np.mean(np.abs(forecast - actual)/np.abs(actual)).round(4) # MAPE
151 |
rmse = (np.mean((forecast - actual)**2)**.5).round(2) # RMSE
152 |
corr = np.corrcoef(forecast, actual)[0,1] # corr
153 |
mins = np.amin(np.hstack([forecast[:,None],
154 |
actual[:,None]]), axis=1)
155 |
maxs = np.amax(np.hstack([forecast[:,None],
156 |
actual[:,None]]), axis=1)
157 |
minmax = 1 - np.mean(mins/maxs) # minmax
158 |
return({'mape':mape, 'rmse':rmse, 'corr':corr, 'min-max':minmax})
159 |
160 |
161 |
def sales_growth(dataframe, fittedValues):
162 |
sales_growth = fittedValues.to_frame()
163 |
sales_growth = sales_growth.reset_index()
164 |
sales_growth.columns = ("Date", "Sales")
165 |
sales_growth = sales_growth.set_index('Date')
166 |
167 |
sales_growth['Sales'] = (sales_growth['Sales']).round(2)
168 |
169 |
# Calculate and create the column for sales difference and growth
170 |
sales_growth['Forecasted Sales First Difference']=(sales_growth['Sales']-sales_growth['Sales'].shift(1)).round(2)
171 |
sales_growth['Forecasted Sales Growth']=(((sales_growth['Sales']-sales_growth['Sales'].shift(1))/sales_growth['Sales'].shift(1))*100).round(2)
172 |
173 |
# Calculate and create the first row for sales difference and growth
174 |
sales_growth['Forecasted Sales First Difference'].iloc[0] = (dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2]).round(2)
175 |
sales_growth['Forecasted Sales Growth'].iloc[0]=(((dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2])/dataframe['Sales'].iloc[-1])*100).round(2)
176 |
177 |
return sales_growth
178 |
179 |
180 |
def merge_forecast_data(actual, predicted, future): # debug
181 |
actual = actual.to_frame()
182 |
183 |
184 |
actual.rename(columns={actual.columns[0]: "Actual Sales"}, inplace=True)
185 |
186 |
187 |
188 |
predicted = predicted.to_frame()
189 |
predicted.rename(columns={predicted.columns[0]: "Predicted Sales"}, inplace=True)
190 |
191 |
192 |
193 |
future = future.to_frame()
194 |
future = future.rename_axis('Date')
195 |
future.rename(columns={future.columns[0]: "Forecasted Future Sales"}, inplace=True)
196 |
197 |
198 |
199 |
merged_dataframe = pd.concat([actual, predicted, future], axis=1)
200 |
201 |
202 |
merged_dataframe = merged_dataframe.reset_index()
203 |
204 |
205 |
return merged_dataframe
206 |
207 |
def interpret_mape(mape_score):
208 |
score = (mape_score * 100).round(2)
209 |
if score < 10:
210 |
interpretation = "Great"
211 |
color = "green"
212 |
elif score < 20:
213 |
interpretation = "Good"
214 |
color = "seagreen"
215 |
elif score < 50:
216 |
interpretation = "Relatively good"
217 |
color = "orange"
218 |
219 |
interpretation = "Poor"
220 |
color = "red"
221 |
return score, interpretation, color
222 |
223 |
# TAPAS Model
224 |
225 |
226 |
def load_tapas_model():
227 |
model_name = "google/tapas-large-finetuned-wtq"
228 |
tokenizer = TapasTokenizer.from_pretrained(model_name)
229 |
model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
230 |
pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
231 |
return pipe
232 |
233 |
pipe = load_tapas_model()
234 |
235 |
def get_answer(table, query):
236 |
answers = pipe(table=table, query=query)
237 |
return answers
238 |
239 |
def convert_answer(answer):
240 |
if answer['aggregator'] == 'SUM':
241 |
cells = answer['cells']
242 |
converted = sum(float(value.replace(',', '')) for value in cells)
243 |
return converted
244 |
245 |
if answer['aggregator'] == 'AVERAGE':
246 |
cells = answer['cells']
247 |
values = [float(value.replace(',', '')) for value in cells]
248 |
converted = sum(values) / len(values)
249 |
return converted
250 |
251 |
if answer['aggregator'] == 'COUNT':
252 |
cells = answer['cells']
253 |
converted = sum(int(value.replace(',', '')) for value in cells)
254 |
return converted
255 |
256 |
257 |
258 |
return answer['answer']
259 |
260 |
def get_converted_answer(table, query):
261 |
converted_answer = convert_answer(get_answer(table, query))
262 |
return converted_answer
263 |
264 |
# Session States
265 |
if 'uploaded' not in st.session_state:
266 |
st.session_state.uploaded = False
267 |
268 |
if 'forecasted' not in st.session_state:
269 |
st.session_state.forecasted = False
270 |
271 |
# Web Application
272 |
st.title("Forecasting Dashboard 📈")
273 |
if not st.session_state.uploaded:
274 |
st.subheader("Welcome User, get started forecasting by uploading your file in the sidebar!")
275 |
276 |
# Sidebar Menu
277 |
with st.sidebar:
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
df =
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
print(df) # debug
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
fitted_series =
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
future_fitted_series =
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
fig_compare =
377 |
fig_compare.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['
378 |
379 |
380 |
381 |
382 |
383 |
fig_forecast =
384 |
fig_forecast.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
st.markdown(hide_st_style, unsafe_allow_html=True)
1 |
import streamlit as st
2 |
import pandas as pd
3 |
import time
4 |
from datetime import datetime
5 |
6 |
import numpy as np
7 |
import pmdarima as pm
8 |
import matplotlib.pyplot as plt
9 |
from pmdarima import auto_arima
10 |
import plotly.graph_objects as go
11 |
12 |
import torch
13 |
from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
14 |
15 |
16 |
page_title="Sales Predictor-AI Project",
17 |
18 |
19 |
20 |
21 |
22 |
# Preprocessing
23 |
24 |
def merge(B, C, A):
25 |
i = j = k = 0
26 |
27 |
# Convert 'Date' columns to objects
28 |
B['Date'] = pd.to_datetime(B['Date'])
29 |
C['Date'] = pd.to_datetime(C['Date'])
30 |
A['Date'] = pd.to_datetime(A['Date'])
31 |
32 |
while i < len(B) and j < len(C):
33 |
if B['Date'].iloc[i] <= C['Date'].iloc[j]:
34 |
A['Date'].iloc[k] = B['Date'].iloc[i]
35 |
A['Sales'].iloc[k] = B['Sales'].iloc[i]
36 |
i += 1
37 |
38 |
39 |
A['Date'].iloc[k] = C['Date'].iloc[j]
40 |
A['Sales'].iloc[k] = C['Sales'].iloc[j]
41 |
j += 1
42 |
k += 1
43 |
44 |
while i < len(B):
45 |
A['Date'].iloc[k] = B['Date'].iloc[i]
46 |
A['Sales'].iloc[k] = B['Sales'].iloc[i]
47 |
i += 1
48 |
k += 1
49 |
50 |
while j < len(C):
51 |
A['Date'].iloc[k] = C['Date'].iloc[j]
52 |
A['Sales'].iloc[k] = C['Sales'].iloc[j]
53 |
j += 1
54 |
k += 1
55 |
56 |
return A
57 |
58 |
59 |
def merge_sort(dataframe):
60 |
if len(dataframe) > 1:
61 |
center = len(dataframe) // 2
62 |
left = dataframe.iloc[:center]
63 |
right = dataframe.iloc[center:]
64 |
65 |
66 |
67 |
return merge(left, right, dataframe)
68 |
69 |
70 |
return dataframe
71 |
72 |
73 |
def drop (dataframe):
74 |
def get_columns_containing(dataframe, substrings):
75 |
return [col for col in dataframe.columns if any(substring.lower() in col.lower() for substring in substrings)]
76 |
77 |
columns_to_keep = get_columns_containing(dataframe, ["date", "sale"])
78 |
dataframe = dataframe.drop(columns=dataframe.columns.difference(columns_to_keep))
79 |
dataframe = dataframe.dropna()
80 |
81 |
return dataframe
82 |
83 |
84 |
def date_format(dataframe):
85 |
for i, d, s in dataframe.itertuples():
86 |
dataframe['Date'][i] = dataframe['Date'][i].strip()
87 |
88 |
for i, d, s in dataframe.itertuples():
89 |
new_date = datetime.strptime(dataframe['Date'][i], "%m/%d/%Y").date()
90 |
dataframe['Date'][i] = new_date
91 |
92 |
return dataframe
93 |
94 |
95 |
def group_to_three(dataframe):
96 |
dataframe['Date'] = pd.to_datetime(dataframe['Date'])
97 |
dataframe = dataframe.groupby([pd.Grouper(key='Date', freq='3D')])['Sales'].mean().round(2)
98 |
dataframe = dataframe.replace(0, np.nan).dropna()
99 |
100 |
return dataframe
101 |
102 |
103 |
def series_to_df_exogenous(series):
104 |
dataframe = series.to_frame()
105 |
dataframe = dataframe.reset_index()
106 |
dataframe = dataframe.set_index('Date')
107 |
dataframe = dataframe.dropna()
108 |
# Create the eXogenous values
109 |
dataframe['Sales First Difference'] = dataframe['Sales'] - dataframe['Sales'].shift(1)
110 |
dataframe['Seasonal First Difference'] = dataframe['Sales'] - dataframe['Sales'].shift(12)
111 |
dataframe = dataframe.dropna()
112 |
return dataframe
113 |
114 |
115 |
def dates_df(dataframe):
116 |
dataframe = dataframe.reset_index()
117 |
dataframe['Date'] = dataframe['Date'].dt.strftime('%B %d, %Y')
118 |
dataframe[dataframe.columns] = dataframe[dataframe.columns].astype(str)
119 |
return dataframe
120 |
121 |
122 |
def get_forecast_period(period):
123 |
return round(period / 3)
124 |
125 |
126 |
127 |
def train_test(dataframe, n):
128 |
training_y = dataframe.iloc[:-n,0]
129 |
test_y = dataframe.iloc[-n:,0]
130 |
test_y_series = pd.Series(test_y, index=dataframe.iloc[-n:, 0].index)
131 |
training_X = dataframe.iloc[:-n,1:]
132 |
test_X = dataframe.iloc[-n:,1:]
133 |
future_X = dataframe.iloc[0:,1:]
134 |
return (training_y, test_y, test_y_series, training_X, test_X, future_X)
135 |
136 |
137 |
def test_fitting(dataframe, Exo, trainY):
138 |
trainTestModel = auto_arima(X = Exo, y = trainY, start_p=1, start_q=1,
139 |
140 |
max_p=3, max_q=3, m=12,
141 |
start_P=2, start_Q=2, seasonal=True,
142 |
d=None, D=1, trace=True,
143 |
144 |
145 |
stepwise=True, maxiter = 50)
146 |
model = trainTestModel
147 |
return model
148 |
149 |
def forecast_accuracy(forecast, actual):
150 |
mape = np.mean(np.abs(forecast - actual)/np.abs(actual)).round(4) # MAPE
151 |
rmse = (np.mean((forecast - actual)**2)**.5).round(2) # RMSE
152 |
corr = np.corrcoef(forecast, actual)[0,1] # corr
153 |
mins = np.amin(np.hstack([forecast[:,None],
154 |
actual[:,None]]), axis=1)
155 |
maxs = np.amax(np.hstack([forecast[:,None],
156 |
actual[:,None]]), axis=1)
157 |
minmax = 1 - np.mean(mins/maxs) # minmax
158 |
return({'mape':mape, 'rmse':rmse, 'corr':corr, 'min-max':minmax})
159 |
160 |
161 |
def sales_growth(dataframe, fittedValues):
162 |
sales_growth = fittedValues.to_frame()
163 |
sales_growth = sales_growth.reset_index()
164 |
sales_growth.columns = ("Date", "Sales")
165 |
sales_growth = sales_growth.set_index('Date')
166 |
167 |
sales_growth['Sales'] = (sales_growth['Sales']).round(2)
168 |
169 |
# Calculate and create the column for sales difference and growth
170 |
sales_growth['Forecasted Sales First Difference']=(sales_growth['Sales']-sales_growth['Sales'].shift(1)).round(2)
171 |
sales_growth['Forecasted Sales Growth']=(((sales_growth['Sales']-sales_growth['Sales'].shift(1))/sales_growth['Sales'].shift(1))*100).round(2)
172 |
173 |
# Calculate and create the first row for sales difference and growth
174 |
sales_growth['Forecasted Sales First Difference'].iloc[0] = (dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2]).round(2)
175 |
sales_growth['Forecasted Sales Growth'].iloc[0]=(((dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2])/dataframe['Sales'].iloc[-1])*100).round(2)
176 |
177 |
return sales_growth
178 |
179 |
180 |
def merge_forecast_data(actual, predicted, future): # debug
181 |
actual = actual.to_frame()
182 |
183 |
184 |
actual.rename(columns={actual.columns[0]: "Actual Sales"}, inplace=True)
185 |
186 |
187 |
188 |
predicted = predicted.to_frame()
189 |
predicted.rename(columns={predicted.columns[0]: "Predicted Sales"}, inplace=True)
190 |
191 |
192 |
193 |
future = future.to_frame()
194 |
future = future.rename_axis('Date')
195 |
future.rename(columns={future.columns[0]: "Forecasted Future Sales"}, inplace=True)
196 |
197 |
198 |
199 |
merged_dataframe = pd.concat([actual, predicted, future], axis=1)
200 |
201 |
202 |
merged_dataframe = merged_dataframe.reset_index()
203 |
204 |
205 |
return merged_dataframe
206 |
207 |
def interpret_mape(mape_score):
208 |
score = (mape_score * 100).round(2)
209 |
if score < 10:
210 |
interpretation = "Great"
211 |
color = "green"
212 |
elif score < 20:
213 |
interpretation = "Good"
214 |
color = "seagreen"
215 |
elif score < 50:
216 |
interpretation = "Relatively good"
217 |
color = "orange"
218 |
219 |
interpretation = "Poor"
220 |
color = "red"
221 |
return score, interpretation, color
222 |
223 |
# TAPAS Model
224 |
225 |
226 |
def load_tapas_model():
227 |
model_name = "google/tapas-large-finetuned-wtq"
228 |
tokenizer = TapasTokenizer.from_pretrained(model_name)
229 |
model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
230 |
pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
231 |
return pipe
232 |
233 |
pipe = load_tapas_model()
234 |
235 |
def get_answer(table, query):
236 |
answers = pipe(table=table, query=query)
237 |
return answers
238 |
239 |
def convert_answer(answer):
240 |
if answer['aggregator'] == 'SUM':
241 |
cells = answer['cells']
242 |
converted = sum(float(value.replace(',', '')) for value in cells)
243 |
return converted
244 |
245 |
if answer['aggregator'] == 'AVERAGE':
246 |
cells = answer['cells']
247 |
values = [float(value.replace(',', '')) for value in cells]
248 |
converted = sum(values) / len(values)
249 |
return converted
250 |
251 |
if answer['aggregator'] == 'COUNT':
252 |
cells = answer['cells']
253 |
converted = sum(int(value.replace(',', '')) for value in cells)
254 |
return converted
255 |
256 |
257 |
258 |
return answer['answer']
259 |
260 |
def get_converted_answer(table, query):
261 |
converted_answer = convert_answer(get_answer(table, query))
262 |
return converted_answer
263 |
264 |
# Session States
265 |
if 'uploaded' not in st.session_state:
266 |
st.session_state.uploaded = False
267 |
268 |
if 'forecasted' not in st.session_state:
269 |
st.session_state.forecasted = False
270 |
271 |
# Web Application
272 |
st.title("Forecasting Dashboard 📈")
273 |
if not st.session_state.uploaded:
274 |
st.subheader("Welcome User, get started forecasting by uploading your file in the sidebar!")
275 |
276 |
# Sidebar Menu
277 |
with st.sidebar:
278 |
st.title("Forecaster v1.1")
279 |
st.subheader("An intelligent sales forecasting system")
280 |
uploaded_file = st.file_uploader("Upload your store data here to proceed (must atleast contain Date and Sales)", type=["csv"])
281 |
if uploaded_file is not None:
282 |
date_found = False
283 |
sales_found = False
284 |
df = pd.read_csv(uploaded_file, parse_dates=True)
285 |
for column in df.columns:
286 |
if 'Date' in column:
287 |
date_found = True
288 |
if 'Sales' in column:
289 |
sales_found = True
290 |
if(date_found == False or sales_found == False):
291 |
st.error('Please upload a csv containing both Date and Sales...')
292 |
293 |
294 |
st.success("File uploaded successfully!")
295 |
st.write("Your uploaded data:")
296 |
297 |
298 |
df = drop(df)
299 |
df = date_format(df)
300 |
301 |
series = group_to_three(df)
302 |
303 |
st.session_state.uploaded = True
304 |
305 |
with open('sample.csv', 'rb') as f:
306 |
st.download_button("Download our sample CSV", f, file_name='sample.csv')
307 |
308 |
if (st.session_state.uploaded):
309 |
st.subheader("Sales History")
310 |
311 |
312 |
313 |
314 |
period = st.slider('How many days would you like to forecast?', min_value=MIN_DAYS, max_value=MAX_DAYS)
315 |
forecast_period = get_forecast_period(period)
316 |
317 |
forecast_button = st.button(
318 |
'Start Forecasting',
319 |
320 |
321 |
322 |
323 |
if (forecast_button or st.session_state.forecasted):
324 |
df = series_to_df_exogenous(series)
325 |
n_periods = round(len(df) * 0.2)
326 |
print(n_periods) # debug
327 |
328 |
train = train_test(df, n_periods)
329 |
training_y, test_y, test_y_series, training_X, test_X, future_X = train
330 |
train_test_model = test_fitting(df, training_X, training_y)
331 |
332 |
print(df) # debug
333 |
print(len(df)) # debug
334 |
335 |
future_n_periods = forecast_period
336 |
fitted, confint = train_test_model.predict(X=test_X, n_periods=n_periods, return_conf_int=True)
337 |
index_of_fc = test_y_series.index
338 |
339 |
# make series for plotting purpose
340 |
fitted_series = pd.Series(fitted)
341 |
fitted_series.index = index_of_fc
342 |
lower_series = pd.Series(confint[:, 0], index=index_of_fc)
343 |
upper_series = pd.Series(confint[:, 1], index=index_of_fc)
344 |
345 |
#Future predictions
346 |
frequency = '3D'
347 |
future_fitted, confint = train_test_model.predict(X=df.iloc[-future_n_periods:,1:], n_periods=future_n_periods, return_conf_int=True, freq=frequency)
348 |
future_index_of_fc = pd.date_range(df['Sales'].index[-1], periods = future_n_periods, freq=frequency)
349 |
350 |
# make series for future plotting purpose
351 |
future_fitted_series = pd.Series(future_fitted)
352 |
future_fitted_series.index = future_index_of_fc
353 |
# future_lower_series = pd.Series(confint[:, 0], index=future_index_of_fc)
354 |
# future_upper_series = pd.Series(confint[:, 1], index=future_index_of_fc)
355 |
356 |
future_sales_growth = sales_growth(df, future_fitted_series)
357 |
358 |
test_y, predictions = np.array(test_y), np.array(fitted)
359 |
print("Test Y:", test_y) # debug
360 |
print("Prediction:", fitted) # debug
361 |
score = forecast_accuracy(predictions, test_y)
362 |
print("Score:", score) # debug
363 |
mape, interpretation, mape_color = interpret_mape(score['mape'])
364 |
365 |
366 |
367 |
merged_data = merge_forecast_data(df['Sales'], fitted_series, future_fitted_series)
368 |
369 |
col_charts = st.columns(2)
370 |
371 |
print(merged_data) # debug
372 |
373 |
374 |
with col_charts[0]:
375 |
fig_compare = go.Figure()
376 |
fig_compare.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Actual Sales'], mode='lines', name='Actual Sales'))
377 |
fig_compare.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Predicted Sales'], mode='lines', name='Predicted Sales', line=dict(color='#006400')))
378 |
fig_compare.update_layout(title='Historical Sales Data', xaxis_title='Date', yaxis_title='Sales')
379 |
st.plotly_chart(fig_compare, use_container_width=True)
380 |
381 |
with col_charts[1]:
382 |
fig_forecast = go.Figure()
383 |
fig_forecast.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Actual Sales'], mode='lines', name='Actual Sales'))
384 |
fig_forecast.add_trace(go.Scatter(x=merged_data[merged_data.columns[0]], y=merged_data['Forecasted Future Sales'], mode='lines', name='Future Forecasted Sales', line=dict(color=mape_color)))
385 |
fig_forecast.update_layout(title='Forecasted Sales Data', xaxis_title='Date', yaxis_title='Sales')
386 |
st.plotly_chart(fig_forecast, use_container_width=True)
387 |
st.write(f"MAPE score: {mape}% - {interpretation}")
388 |
389 |
df = dates_df(future_sales_growth)
390 |
391 |
col_table = st.columns(2)
392 |
with col_table[0]:
393 |
col_table[0].subheader(f"Forecasted sales in the next {period} days")
394 |
395 |
396 |
with col_table[1]:
397 |
col_table[1] = st.subheader("Question-Answering")
398 |
with st.form("question_form"):
399 |
question = st.text_input('Ask a Question about the Forecasted Data', placeholder="What is the total sales in the month of December?")
400 |
query_button = st.form_submit_button(label='Generate Answer')
401 |
if query_button or question:
402 |
answer = get_converted_answer(df, question)
403 |
if answer is not None:
404 |
st.write("The answer is:", answer)
405 |
406 |
st.write("Answer is not found in table")
407 |
st.session_state.forecasted = True
408 |
409 |
410 |
# Hide Streamlit default style
411 |
hide_st_style = """
412 |
413 |
footer {visibility: hidden;}
414 |
415 |
416 |
st.markdown(hide_st_style, unsafe_allow_html=True)