fadyabila commited on
Commit
6fc1ba0
1 Parent(s): 3bd6a2a

Submission

Browse files
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Choose Page : ', ('EDA', 'Quantity Sales Forecasting'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
eda.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+
8
+ # Melebarkan visualisasi untuk memaksimalkan browser
9
+ st.set_page_config(
10
+ page_title='Sales Forecasting Analysis in Paragon Corp',
11
+ layout='wide',
12
+ initial_sidebar_state='expanded'
13
+ )
14
+
15
+ def run():
16
+ # Membuat title
17
+ st.title('Beauty Products Warehouse Sales Forecasting Analysis in Paragon Corp')
18
+ st.write('### by Fadya Ulya Salsabila')
19
+
20
+ # Menambahkan Gambar
21
+ image = Image.open('paragon.png')
22
+ st.image(image, caption='Paragon Corp')
23
+
24
+ # Menambahkan Deskripsi
25
+ st.write('## Background')
26
+ st.write("""
27
+ Paragon is an Indonesian beauty/cosmetics company with the goal of creating the greater good for society through innovation. Products from Paragon are Wardah, Kahf, Make Over, and Emina.
28
+ Targeting the middle class segment for cosmetics is attractive because customers are relatively insensitive to price. Like Paragon, it is a local organization targeting middle class customers in Indonesia.
29
+
30
+ Warehouses within a company must be adapted to the conditions and intensity of production in the industry.
31
+ Meanwhile, warehousing is an important line in the trading business, because there are existing industrial and production goods, such as receiving raw materials from suppliers, handling goods, sending goods to their destination.
32
+ The warehouse management system is very important for business continuity, because the warehouse is directly related to sales.
33
+ When the warehouse inventory does not match sales, it will have an impact on losses, either due to failed sales or too much inventory available in the warehouse.
34
+ A warehouse management system whose main purpose is to control all processes that occur in it, such as receiving, storing, processing customer orders, taking orders, checking and packing and shipping.
35
+ With a warehouse management system, we can better control the process of movement and storage, more optimal use of space in the warehouse, increase the effectiveness of the receiving and shipping process and know the amount of stock more accurately from time to time.
36
+
37
+ Therefore, in this analysis and modeling, this will predict the number of sales of Paragon's warehouse stock using the `sample_dataset_timeseries_noarea.csv` dataset.
38
+ This prediction will use Regression Model with Time Series and Forecasting Analysis.""")
39
+
40
+ st.write('## Dataset')
41
+ st.write("""
42
+ The dataset is from Paragon dataset.
43
+ This dataset contains `102733 rows` and `5 columns`, such as:
44
+
45
+ 1. `week_number`: contained information about week of specific product sold, (2021-52 to 2023-14).
46
+ 2. `week_start_date`: contained information about week start date of specific product sold.
47
+ 3. `week_end_date`: contained information about week end date of specific product sold.
48
+ 4. `product_item`: contained information about product item/product code (Variabel Bebas).
49
+ 5. `quantity`: contained information about quantity of product in respective week.""")
50
+
51
+ # Membuat Garis Lurus
52
+ st.markdown('---')
53
+
54
+ # Membuat Sub Headrer
55
+ st.subheader('EDA for Sales Forecasting')
56
+
57
+ # Magic Syntax
58
+ st.write(
59
+ ' On this page, the author will do a simple exploration.'
60
+ ' The dataset used is the Quantity Sales dataset.'
61
+ ' This dataset comes from Paragon Corp.')
62
+
63
+ # Show DataFrame
64
+ df1 = pd.read_csv('sample_dataset_timeseries_noarea.csv')
65
+ st.dataframe(df1)
66
+
67
+ # Membuat Barplot
68
+ st.write('#### Product Item Plot')
69
+ fig = plt.figure(figsize=(10,7))
70
+ sns.countplot(x='product_item', data=df1, palette="PuRd")
71
+ st.pyplot(fig)
72
+ st.write(
73
+ ' There are 2309 different product items.'
74
+ ' Where the product item appears the most, such as 67 times and the least appears is 1 time.')
75
+
76
+ # Mengelompokkan data
77
+ # Convert date column to a datetime object
78
+ df1["week_start_date"] = pd.to_datetime(df1["week_start_date"])
79
+
80
+ # Group by week_start_date and sum quantity
81
+ st.write('#### Quantity Grouped by Weeks')
82
+ quantity_flow = df1.groupby(pd.Grouper(key="week_start_date", freq="W-MON"))["quantity"].sum().reset_index()
83
+ quantity_flow = quantity_flow.rename(columns={"week_start_date": "week_start_date"})
84
+ st.write(
85
+ ' The column for the number of products sold (`quantity`) is grouped by `week_start_date`, intended to find out how many product stocks have been sold each week.'
86
+ ' Based on the results above, there are 67 sales weeks with a different number of products each week.')
87
+
88
+ # Plot the quantity_flow DataFrame
89
+ st.write('#### Quantity Flow')
90
+ fig, ax = plt.subplots(figsize=(10, 6))
91
+ ax.plot(quantity_flow["week_start_date"], quantity_flow["quantity"], color="blue", linestyle="dashed")
92
+ ax.set_title("Quantity Flow")
93
+ ax.set_xlabel("Week Start Date")
94
+ ax.set_ylabel("Quantity")
95
+ ax.grid(True)
96
+ st.pyplot(fig)
97
+ st.write("""
98
+ The visualization of the trend chart above shows that warehouse stock sales fluctuate.
99
+ Especially in May 2022 it shows that sales have fallen dramatically from April 2022.
100
+ This can be influenced by factors, such as:
101
+ 1. Consumer purchasing power
102
+ 2 Changing consumer tastes
103
+ 3 Trends and level of market competition
104
+ 4 Certain events/holidays
105
+
106
+ It can be said that in May 2022, when there was a 10-day Idul Fitri holiday, make-up sales decreased.
107
+ This is because during the Eid holiday, many business operations are also on holiday.
108
+ So in the case of Paragon it can be said the same way.
109
+ During the Eid holiday, business people are faced with the challenge of staff adequacy.
110
+ Businesses that continue to operate during Eid, attendance arrangements, standby employee absences, and leave are crucial.
111
+ So that fewer employees work during the Eid holidays and sometimes the related shops also apply 2 days to one week off.
112
+ This causes a decrease in sales.""")
113
+
114
+ # Membuat Histogram Berdasarkan Input User
115
+ st.write('#### Histogram Based On User Input')
116
+ pilihan = st.selectbox('Choose Column : ', ('quantity', 'week_start_date', 'week_end_date'))
117
+ fig = plt.figure(figsize=(15,5))
118
+ sns.histplot(df1[pilihan], bins=30, kde=True)
119
+ st.pyplot(fig)
120
+
121
+ if __name__ == '__main__':
122
+ run()
linreg_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e39d88c77894d2962ffd309d55294457dd0c6307cb5bc2abf38d085315ce14f4
3
+ size 435
main.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import eda
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Choose Page : ', ('EDA', 'Quantity Sales Forecasting'))
6
+
7
+ if navigation == 'EDA':
8
+ eda.run()
9
+ else:
10
+ prediction.run()
paragon.png ADDED
prediction.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from datetime import datetime, timedelta
5
+ from sklearn.linear_model import LinearRegression
6
+ from sklearn.preprocessing import StandardScaler
7
+ import pickle
8
+ import streamlit as st
9
+
10
+ # Load All Files
11
+
12
+ model_lr = pickle.load('linreg_model.pkl')
13
+ scaler = pickle.load('scaler.pkl')
14
+
15
+ def run():
16
+ # Load the time series data
17
+ quantity_flow1 = pd.read_csv('quantity_flow1.csv', index_col=0, parse_dates=True)
18
+
19
+ # Function to Predict Quantity in Next n-weeks
20
+ def forecasting(month):
21
+ quantity_forecast = quantity_flow1.copy()
22
+ window = 2
23
+ for i in range(month):
24
+ X = np.array(quantity_forecast[-window:].values).reshape(1, -1)
25
+ X_scaled = scaler.transform(X)
26
+ new_idx = quantity_forecast.index[-1] + timedelta(weeks=1)
27
+ quantity_forecast[new_idx] = round(model_lr.predict(X_scaled)[0])
28
+ return quantity_forecast
29
+
30
+ # Set up the Streamlit app
31
+ st.title("Sales Forecasting")
32
+
33
+ # Create a sidebar for user input
34
+ months = st.sidebar.slider("Select the number of weeks to forecast", 1, 24, 12)
35
+
36
+ # Generate the forecast and plot the results
37
+ quantity_forecast = forecasting(months)
38
+ fig, ax = plt.subplots(figsize=(20, 5))
39
+ ax.plot(quantity_forecast, color='blue', label='forecast')
40
+ ax.plot(quantity_flow1, color='red', label='real')
41
+ ax.legend()
42
+ st.pyplot(fig)
43
+
44
+ if __name__ == "__main__":
45
+ run()
quantity_flow1.csv ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ quantity
2
+ 128808
3
+ 3839557
4
+ 4138934
5
+ 4149077
6
+ 4824273
7
+ 4008027
8
+ 4311010
9
+ 3882636
10
+ 4256541
11
+ 3883810
12
+ 5047898
13
+ 4303200
14
+ 4980761
15
+ 4620208
16
+ 4553719
17
+ 4921338
18
+ 5768747
19
+ 4684319
20
+ 1334134
21
+ 7172144
22
+ 5505616
23
+ 5394786
24
+ 5817233
25
+ 6260083
26
+ 5759699
27
+ 5709933
28
+ 5556481
29
+ 5275889
30
+ 5682207
31
+ 5824993
32
+ 5900392
33
+ 6143352
34
+ 6256332
35
+ 5021634
36
+ 5918497
37
+ 6112693
38
+ 5237531
39
+ 5554898
40
+ 5343014
41
+ 5912301
42
+ 4907074
43
+ 5269967
44
+ 4229764
45
+ 4878485
46
+ 4661839
47
+ 5234579
48
+ 4861820
49
+ 4465411
50
+ 4953197
51
+ 5717901
52
+ 5259200
53
+ 5768419
54
+ 4677997
55
+ 4111427
56
+ 4623965
57
+ 5265632
58
+ 4364421
59
+ 4703388
60
+ 4267946
61
+ 3731588
62
+ 4701262
63
+ 3732954
64
+ 4775638
65
+ 4787111
66
+ 4757590
67
+ 6001166
68
+ 4172070
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Berisi daftar library yang kita butuhkan
2
+
3
+ streamlit==1.20.0
4
+ pandas==1.5.3
5
+ seaborn
6
+ matplotlib
7
+ numpy
8
+ scikit-learn==1.2.1
9
+ plotly
10
+ pickle
sample_dataset_timeseries_noarea.csv ADDED
The diff for this file is too large to render. See raw diff