salsabilapl's picture
Create app.py
421f9b9
raw
history blame
No virus
4.11 kB
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose, STL
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.graphics.gofplots import qqplot
from statsmodels.tsa.stattools import adfuller
from tqdm import tqdm_notebook
from itertools import product
from typing import Union
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import streamlit as st
import joblib
import base64
# Streamlit UI
st.title('Sales Forecasting App')
st.write("This app uses the SARIMAX model to predict future sales with exogenous variables (temp avg and humidity avg).")
#Read Dataset
macro_econ_data = pd.read_csv('dataset_all.csv')
#Prepare Data
macro_econ_data['dates'] = pd.to_datetime(macro_econ_data['dates'])
macro_econ_data.set_index("dates",inplace=True)
macro_econ_data['sales qty'] = macro_econ_data['Dine in'] + macro_econ_data['Online'] + macro_econ_data['Take Away']
#Split Target Exog
target = macro_econ_data['sales qty']
exog = macro_econ_data[['humidity avg','temp avg']]
#Split train test
target_train = target[:'2023-05-31']
target_test = target['2023-06-01':]
# Display the data model
st.subheader('Data Model')
last_date = target_train.index[-1]
st.write(last_date)
# Streamlit UI
st.subheader("Predict sales for the next 7 days?")
if st.button("Yes"):
#Fungsi forecast
@st.cache_resource
def recursive_forecast(endog: Union[pd.Series, list], exog: Union[pd.Series, list], train_len: int, horizon: int, window: int, method: str) -> list:
total_len = train_len + horizon
if method == 'last':
pred_last_value = []
for i in range(train_len, total_len, window):
last_value = endog[:i].iloc[-1]
pred_last_value.extend(last_value for _ in range(window))
return pred_last_value
elif method == 'SARIMAX':
pred_SARIMAX = []
for i in range(train_len, total_len, window):
model = SARIMAX(endog[:i], exog[:i], order=(0,1,1), seasonal_order=(3,1,1,7), simple_differencing=False)
res = model.fit(disp=False)
predictions = res.get_prediction(exog=exog)
oos_pred = predictions.predicted_mean.iloc[-window:]
pred_SARIMAX.extend(oos_pred)
return pred_SARIMAX
TRAIN_LEN = len(target_train)
HORIZON = len(target_test)
WINDOW = 1
pred_SARIMAX = recursive_forecast(target, exog, TRAIN_LEN, HORIZON, WINDOW, 'SARIMAX')
# Create a DataFrame for the predictions
pred_df = pd.DataFrame({'actual': target_test})
pred_df['pred_SARIMAX'] = pred_SARIMAX
# Set the index of the DataFrame to datetime
pred_df.index = pd.to_datetime(pred_df.index)
# Select the rows for the next 7 days
end_date = last_date + pd.DateOffset(days=7)
selected_rows = pred_df.loc[last_date:end_date]
# Display the 'pred_SARIMAX' column from the selected rows
pred_SARIMAX_selected = selected_rows['pred_SARIMAX']
# Show the predictions
st.subheader('Predicted Sales for the Next 7 Days')
st.write(pred_SARIMAX_selected)
# Plot the predictions
plt.figure(figsize=(10, 5))
plt.plot(pred_SARIMAX_selected.index, pred_SARIMAX_selected, label='Predicted Sales', marker='o')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.title('Predicted Sales for the Next 7 Days')
plt.grid()
plt.legend()
st.pyplot(plt)
# Download button save format CSV
csv = pred_SARIMAX_selected.to_csv(index=True)
b64 = base64.b64encode(csv.encode()).decode() # Konversi DataFrame ke base64
# Show download button
st.markdown(f'<a href="data:file/csv;base64,{b64}" download="hasil_prediksi.csv">Download Prediction (CSV)</a>', unsafe_allow_html=True)