ahmadluay commited on
Commit
e9af0e5
1 Parent(s): ad49447

first commit

Browse files
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda # python file
3
+ import prediction # python file
4
+
5
+ navigation = st.sidebar.selectbox('Page Navigation: ',('Products Sales Quantities Prediction','EDA'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
11
+
eda.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+
7
+
8
+ from PIL import Image
9
+
10
+ st.set_page_config(
11
+ page_title='ParagonCorp Products Sales Quantities Forecasting',
12
+ layout = 'wide',
13
+ initial_sidebar_state='expanded'
14
+ )
15
+
16
+ def run():
17
+ # title
18
+ st.title('Exploratory Data Analysis of the Dataset')
19
+ st.write('by Ahmad Luay Adnani')
20
+
21
+
22
+ # Add Image
23
+ image = Image.open('paragon.png')
24
+ st.image(image)
25
+
26
+ # Description
27
+ st.write('---')
28
+ st.write('# Dataset')
29
+ st.write('Dataset provided by ParagonCorp.')
30
+
31
+ # show dataframe
32
+ df = pd.read_csv('sample_dataset_timeseries_noarea.csv')
33
+ st.dataframe(df)
34
+
35
+ ###
36
+ # create a copy of the dataframe
37
+ df_eda = df.copy()
38
+
39
+ # EDA
40
+ st.write('---')
41
+ st.write('# Exploratory Data Analysis')
42
+ select_eda = st.selectbox('Select EDA : ', ('Statistical Descriptive','Trend of Product Sales Quantities'))
43
+ if select_eda == 'Statistical Descriptive':
44
+ # measure of central tendency
45
+ stats = df.describe().T
46
+ st.dataframe(stats)
47
+ st.write('Based on information above:')
48
+ st.write("- The average product sales quantities is 3191.56.")
49
+ st.write('- The range of product sales quantities is between 0 to 774,732.')
50
+
51
+ else:
52
+ # Trend of Product Sales Quantities
53
+ # create a copy of the dataset
54
+ df_eda = df.copy()
55
+
56
+ # groupby week_end_date
57
+ df_eda = df_eda.groupby("week_end_date")["quantity"].sum().to_frame().reset_index()
58
+
59
+ # Set the date as index
60
+ df_eda = df.set_index('week_end_date')
61
+
62
+ # convert index to datetime
63
+ df_eda.index = pd.to_datetime(df_eda.index)
64
+
65
+ # drop columns
66
+ df_eda.drop(columns=['week_number','week_start_date','product_item'],inplace=True)
67
+
68
+ # convert index to datetime
69
+ df_eda.index = pd.to_datetime(df_eda.index)
70
+
71
+ # Select the proper time period for weekly aggregation
72
+ df_eda = df_eda['2022-01-02':'2023-04-09'].resample('W').sum()
73
+
74
+ # Trend
75
+ # add linear line
76
+ m, b = np.polyfit(range(len(df_eda)), df_eda['quantity'], 1)
77
+ plt.figure(figsize=(10,4))
78
+ plt.plot(df_eda.index, m*range(len(df_eda)) + b, label='Linear Trend Line',color='orange', alpha=0.6)
79
+
80
+ # plot time series df_eda
81
+ plt.plot(df_eda.index, df_eda['quantity'], marker = 'o', ms = 4, label='Product Sales Quantities')
82
+
83
+ # add labels and legend
84
+ plt.title('Trend of Product Sales Quantities')
85
+ plt.xlabel('Date')
86
+ plt.ylabel('quantity')
87
+ plt.legend()
88
+
89
+ # show plot
90
+ plt.show()
91
+ st.set_option('deprecation.showPyplotGlobalUse', False)
92
+ st.pyplot()
93
+
94
+ st.write('Here we can see the data of product sales quantities for the past 67 weeks. The trend is increasing over the past 67 weeks. There are no pattern on the products sales quantities. There are big spike that occur on 2022-05-08.')
95
+
96
+ if __name__ == '__main__':
97
+ run()
model_sarimax.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24727d7887914918f860fa8c19bf7775b616494fe6887fd21084dd80b954b661
3
+ size 2670231
paragon.png ADDED
prediction.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import pickle
7
+
8
+ # Load models
9
+
10
+ with open('model_sarimax.pkl', 'rb') as file_1:
11
+ model_sarimax = pickle.load(file_1)
12
+
13
+ def run():
14
+ st.markdown("<h1 style='text-align: center;'>Products Sales Quantities Prediction</h1>", unsafe_allow_html=True)
15
+
16
+ with st.form(key='Amazon_Customer_Review'):
17
+
18
+
19
+ input = st.number_input('Title', min_value=0, max_value=99, value=5 ,step=1)
20
+ submitted = st.form_submit_button('Predict')
21
+
22
+
23
+ if submitted:
24
+
25
+ # Predict
26
+ result = model_sarimax.forecast(18).tail(input)
27
+ result = pd.DataFrame(result)
28
+ st.dataframe(result)
29
+
30
+ # Forecast Visualization
31
+ fig = plt.figure(figsize=(20,10))
32
+ sns.lineplot(x=result.index, y=result.predicted_mean, data=result)
33
+ plt.title(f'Prediction for the next {input} weeks', fontsize=20)
34
+ plt.xlabel('Date', fontsize=16)
35
+ plt.xticks(fontsize=8)
36
+ plt.ylabel('Quantities Difference', fontsize=16)
37
+ st.pyplot(fig)
38
+
39
+
40
+ if __name__ == '__main__':
41
+ run()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ scikit-learn == 1.0.2
6
+ numpy
7
+ statsmodels == 0.13.2
sample_dataset_timeseries_noarea.csv ADDED
The diff for this file is too large to render. See raw diff