first commit
Browse files- app.py +11 -0
- eda.py +97 -0
- model_sarimax.pkl +3 -0
- paragon.png +0 -0
- prediction.py +41 -0
- requirements.txt +7 -0
- sample_dataset_timeseries_noarea.csv +0 -0
app.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import eda # python file
|
3 |
+
import prediction # python file
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Page Navigation: ',('Products Sales Quantities Prediction','EDA'))
|
6 |
+
|
7 |
+
if navigation == 'EDA':
|
8 |
+
eda.run()
|
9 |
+
else:
|
10 |
+
prediction.run()
|
11 |
+
|
eda.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import seaborn as sns
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
|
7 |
+
|
8 |
+
from PIL import Image
|
9 |
+
|
10 |
+
st.set_page_config(
|
11 |
+
page_title='ParagonCorp Products Sales Quantities Forecasting',
|
12 |
+
layout = 'wide',
|
13 |
+
initial_sidebar_state='expanded'
|
14 |
+
)
|
15 |
+
|
16 |
+
def run():
|
17 |
+
# title
|
18 |
+
st.title('Exploratory Data Analysis of the Dataset')
|
19 |
+
st.write('by Ahmad Luay Adnani')
|
20 |
+
|
21 |
+
|
22 |
+
# Add Image
|
23 |
+
image = Image.open('paragon.png')
|
24 |
+
st.image(image)
|
25 |
+
|
26 |
+
# Description
|
27 |
+
st.write('---')
|
28 |
+
st.write('# Dataset')
|
29 |
+
st.write('Dataset provided by ParagonCorp.')
|
30 |
+
|
31 |
+
# show dataframe
|
32 |
+
df = pd.read_csv('sample_dataset_timeseries_noarea.csv')
|
33 |
+
st.dataframe(df)
|
34 |
+
|
35 |
+
###
|
36 |
+
# create a copy of the dataframe
|
37 |
+
df_eda = df.copy()
|
38 |
+
|
39 |
+
# EDA
|
40 |
+
st.write('---')
|
41 |
+
st.write('# Exploratory Data Analysis')
|
42 |
+
select_eda = st.selectbox('Select EDA : ', ('Statistical Descriptive','Trend of Product Sales Quantities'))
|
43 |
+
if select_eda == 'Statistical Descriptive':
|
44 |
+
# measure of central tendency
|
45 |
+
stats = df.describe().T
|
46 |
+
st.dataframe(stats)
|
47 |
+
st.write('Based on information above:')
|
48 |
+
st.write("- The average product sales quantities is 3191.56.")
|
49 |
+
st.write('- The range of product sales quantities is between 0 to 774,732.')
|
50 |
+
|
51 |
+
else:
|
52 |
+
# Trend of Product Sales Quantities
|
53 |
+
# create a copy of the dataset
|
54 |
+
df_eda = df.copy()
|
55 |
+
|
56 |
+
# groupby week_end_date
|
57 |
+
df_eda = df_eda.groupby("week_end_date")["quantity"].sum().to_frame().reset_index()
|
58 |
+
|
59 |
+
# Set the date as index
|
60 |
+
df_eda = df.set_index('week_end_date')
|
61 |
+
|
62 |
+
# convert index to datetime
|
63 |
+
df_eda.index = pd.to_datetime(df_eda.index)
|
64 |
+
|
65 |
+
# drop columns
|
66 |
+
df_eda.drop(columns=['week_number','week_start_date','product_item'],inplace=True)
|
67 |
+
|
68 |
+
# convert index to datetime
|
69 |
+
df_eda.index = pd.to_datetime(df_eda.index)
|
70 |
+
|
71 |
+
# Select the proper time period for weekly aggregation
|
72 |
+
df_eda = df_eda['2022-01-02':'2023-04-09'].resample('W').sum()
|
73 |
+
|
74 |
+
# Trend
|
75 |
+
# add linear line
|
76 |
+
m, b = np.polyfit(range(len(df_eda)), df_eda['quantity'], 1)
|
77 |
+
plt.figure(figsize=(10,4))
|
78 |
+
plt.plot(df_eda.index, m*range(len(df_eda)) + b, label='Linear Trend Line',color='orange', alpha=0.6)
|
79 |
+
|
80 |
+
# plot time series df_eda
|
81 |
+
plt.plot(df_eda.index, df_eda['quantity'], marker = 'o', ms = 4, label='Product Sales Quantities')
|
82 |
+
|
83 |
+
# add labels and legend
|
84 |
+
plt.title('Trend of Product Sales Quantities')
|
85 |
+
plt.xlabel('Date')
|
86 |
+
plt.ylabel('quantity')
|
87 |
+
plt.legend()
|
88 |
+
|
89 |
+
# show plot
|
90 |
+
plt.show()
|
91 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
92 |
+
st.pyplot()
|
93 |
+
|
94 |
+
st.write('Here we can see the data of product sales quantities for the past 67 weeks. The trend is increasing over the past 67 weeks. There are no pattern on the products sales quantities. There are big spike that occur on 2022-05-08.')
|
95 |
+
|
96 |
+
if __name__ == '__main__':
|
97 |
+
run()
|
model_sarimax.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24727d7887914918f860fa8c19bf7775b616494fe6887fd21084dd80b954b661
|
3 |
+
size 2670231
|
paragon.png
ADDED
![]() |
prediction.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import pickle
|
7 |
+
|
8 |
+
# Load models
|
9 |
+
|
10 |
+
with open('model_sarimax.pkl', 'rb') as file_1:
|
11 |
+
model_sarimax = pickle.load(file_1)
|
12 |
+
|
13 |
+
def run():
|
14 |
+
st.markdown("<h1 style='text-align: center;'>Products Sales Quantities Prediction</h1>", unsafe_allow_html=True)
|
15 |
+
|
16 |
+
with st.form(key='Amazon_Customer_Review'):
|
17 |
+
|
18 |
+
|
19 |
+
input = st.number_input('Title', min_value=0, max_value=99, value=5 ,step=1)
|
20 |
+
submitted = st.form_submit_button('Predict')
|
21 |
+
|
22 |
+
|
23 |
+
if submitted:
|
24 |
+
|
25 |
+
# Predict
|
26 |
+
result = model_sarimax.forecast(18).tail(input)
|
27 |
+
result = pd.DataFrame(result)
|
28 |
+
st.dataframe(result)
|
29 |
+
|
30 |
+
# Forecast Visualization
|
31 |
+
fig = plt.figure(figsize=(20,10))
|
32 |
+
sns.lineplot(x=result.index, y=result.predicted_mean, data=result)
|
33 |
+
plt.title(f'Prediction for the next {input} weeks', fontsize=20)
|
34 |
+
plt.xlabel('Date', fontsize=16)
|
35 |
+
plt.xticks(fontsize=8)
|
36 |
+
plt.ylabel('Quantities Difference', fontsize=16)
|
37 |
+
st.pyplot(fig)
|
38 |
+
|
39 |
+
|
40 |
+
if __name__ == '__main__':
|
41 |
+
run()
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
scikit-learn == 1.0.2
|
6 |
+
numpy
|
7 |
+
statsmodels == 0.13.2
|
sample_dataset_timeseries_noarea.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|