shouzen commited on
Commit
de4dd16
1 Parent(s): 2ac5446

Add app py

Browse files
Files changed (1) hide show
  1. app.py +177 -0
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ from pylab import rcParams
4
+ from statsmodels.tsa.seasonal import seasonal_decompose
5
+ import streamlit as st
6
+
7
+ st.title('Project Canada Goose')
8
+ st.write('Mempertahankan brand "canada goose" agar tetap menjadi penjualan tertinggi (untuk 1 tahun kedepan) dengan metode time series forecasting')
9
+ st.markdown('# All Data')
10
+ @st.cache
11
+ def load_csv_data():
12
+ data = pd.read_csv('Final_Data_Sales.csv')
13
+
14
+ # Convert data yang bukan datetime yang seperti 0000-0000 ke Datetime agar hasilnya NaT
15
+ data['sold_at'] = pd.to_datetime(data['sold_at'], errors='coerce')
16
+ data['created_at'] = pd.to_datetime(data['created_at'], errors='coerce')
17
+ data['shipped_at'] = pd.to_datetime(data['shipped_at'], errors='coerce')
18
+ data['delivered_at'] = pd.to_datetime(data['delivered_at'], errors='coerce')
19
+ data['returned_at'] = pd.to_datetime(data['returned_at'], errors='coerce')
20
+
21
+ # Ambil data date dari data setelahnya.
22
+ data.fillna(method='bfill', inplace=True)
23
+ return data
24
+
25
+ data_load_state = st.text('Loading data...')
26
+ # Load 10,000 rows of data into the dataframe.
27
+ data = load_csv_data()
28
+ st.dataframe(data)
29
+ # Notify the reader that the data was successfully loaded.
30
+ data_load_state.text("Ini adalah data keseluruhan dari data csv")
31
+
32
+ total_data = data.shape
33
+ st.write(f'Total Datanya adalah : {total_data}')
34
+
35
+ # Data Cleaning
36
+ data = data.dropna()
37
+ st.write("Jumlah data setelah menghapus missing value:", len(data))
38
+
39
+ #Statistika Deskriptif
40
+ st.markdown('## Statistika Deskriptif')
41
+ analisis = data.copy()
42
+ analisis = analisis[['sale_price', 'cost']]
43
+ st.table(analisis.describe())
44
+
45
+ #Perbandingan Shipped, Processing, Cancelled, Complete dan Returned
46
+ st.markdown("## Perbandingan Shipped, Processing, Cancelled, Complete dan Returned")
47
+ # plt.figure(figsize=(10,5))
48
+ # plt.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%')
49
+ # plt.show()
50
+ fig1, ax1 = plt.subplots()
51
+ ax1.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%')
52
+ st.pyplot(fig1)
53
+
54
+ #Brand Terlaris
55
+ st.markdown("## Brand Terlaris")
56
+ st.write("Ini adalah top 5 brand terlaris ")
57
+ brand = data[['product_id','product_brand', 'sale_price']]
58
+ brand = brand.groupby(['product_id','product_brand'], as_index=False)['sale_price'].sum()
59
+ brand = brand.sort_values('sale_price', ascending=False)
60
+ st.table(brand.head(5))
61
+
62
+ #Penjualan Tertinggi Berdasarkan Product Brand
63
+ st.markdown("## Penjualan Tertinggi Berdasarkan Product Brand")
64
+ def perbandingan(w, a, x, y, z):
65
+ plt.figure(figsize=(20, 8))
66
+
67
+ plt.subplot(221)
68
+ plt.grid()
69
+ plt.bar(w[a], w['sale_price'], label="Sale Price")
70
+ plt.title(y)
71
+
72
+ plt.subplot(222)
73
+ plt.grid()
74
+ plt.bar(x[a], x['sale_price'], label="Sale Price")
75
+ plt.title(z)
76
+ st.pyplot(plt)
77
+
78
+ product_brand = brand
79
+ pb = product_brand[['product_brand', 'sale_price']]
80
+ sh = pb.sort_values('sale_price').tail(5)
81
+ sl = pb.sort_values('sale_price').head(5)
82
+
83
+ perbandingan(sh, 'product_brand', sl, 'Penjualan Tertinggi Berdasarkan Product Brand', 'Penjualan Terendah Berdasarkan Product Brand')
84
+
85
+ #Visualisasi Data Sale Price
86
+ st.markdown(' # Visualisasi Data Sale Price Khusus Untuk Canada Goose')
87
+ cg = data.copy()
88
+ cg= cg[['created_at','product_brand','sale_price']]
89
+ cg_f = cg.loc[cg['product_brand'] == 'Canada Goose'] #Ambil data Canada Goose Saja
90
+ cg_f = cg_f.sort_values('created_at')
91
+ st.write('Sorting berdasarkan tanggal pada created_at')
92
+ st.dataframe(cg_f)
93
+
94
+ #Resampling Data to Monthly
95
+ st.markdown('## Resampling data perbulan')
96
+ st.write('Data sale_price disini ditampilkan dalam perbulan')
97
+
98
+ cg_e = cg_f[['created_at','sale_price']] ## Ambil created at dan sale price
99
+ cg_e = cg_e.sort_values('created_at')
100
+ y = cg_e.set_index('created_at').resample('M').mean() ## Rata rata sale price /bulan agar data tidak lebih 'noisy' (m yang dimaksud adalah month end frequency)
101
+ y = y.dropna() #Hapus Value Kosong
102
+ y = y.rename_axis(None, axis=1).rename_axis('Date', axis=0) #Ubah index yang tadinya 'created_at' menjadi 'Date'
103
+
104
+ st.dataframe(y.head(10)) #Tampilkan 10 data teratas saja
105
+
106
+ # Classic Time Series Decomposition -> 1920
107
+ st.markdown('## Classic Time Series Decomposition -> 1920')
108
+ st.markdown('''
109
+ Teknik untuk memisahkan time series menjadi trend, seasonal, dan residual menggunakan movie average, ada 2 tipe:
110
+
111
+ *Additive = Trend + Seasonal + Residual*\n
112
+ *Multiplicative = Trend * Seasonal * Residual*\n
113
+
114
+ Additive dipakai **untuk trend dan seasonal yang tidak terlalu bervariasi**\n
115
+ Multiplicative dipakai **untuk trend dan seasonal yang berubah seiring jalannya waktu**
116
+ ''')
117
+ rcParams['figure.figsize'] = 10, 5 #Besar Figur
118
+ decomposition = seasonal_decompose(y.copy(), model='additive',period=12)
119
+
120
+ fig = decomposition.plot()
121
+ st.pyplot(fig)
122
+
123
+ #Model
124
+
125
+ y_train, y_test = y[:28], y[-7:] # Pisah data untuk keperlaun model dengan 80% train dan 20% test
126
+
127
+ st.markdown('# Model')
128
+ st.markdown('## ProphetFB Model')
129
+ from fbprophet import Prophet #Import Prophet FB Model
130
+
131
+ m = Prophet()
132
+ d = y.copy()
133
+ d= d.reset_index()
134
+ d = d.rename(columns={'Date' : 'ds', 'sale_price' : 'y'})
135
+
136
+ model = m.fit(d)
137
+ future = m.make_future_dataframe(periods=14, freq='M') #bisa setting periode untuk setting seberapa jauh untuk diprediksi (dalam bulan)
138
+ forecast = m.predict(future)
139
+ forecast = forecast.set_index('ds')
140
+ d = d.set_index('ds')
141
+ final_forecast = forecast['yhat']
142
+
143
+ fig = plt.figure(figsize=(15,5))
144
+ plt.title("Prediksi untuk 1 tahun kedepan dengan ProphetFB Model")
145
+ plt.plot(d, label="Actual")
146
+ plt.plot(final_forecast, label="Predicted")
147
+ plt.legend(loc = 'upper left')
148
+ st.pyplot(fig)
149
+
150
+ #Arima Model
151
+ st.markdown("## ARIMA Model")
152
+ from pmdarima import auto_arima
153
+ arima = auto_arima(y_train,start_p=1, start_q=1, max_p=3, max_q=3, m=12,
154
+ start_P=0, seasonal=True, d=1, D=1, trace=True,
155
+ error_action='ignore', # don't want to know if an order does not work
156
+ suppress_warnings=True, # don't want convergence warnings
157
+ stepwise=True)
158
+
159
+ n_forecast = len(y_test) + 8
160
+ pred= arima.predict(n_forecast,D=1,seasonal=(1,0,0))
161
+ dates = pd.date_range(y_test.index[-1],periods=n_forecast, freq='M')
162
+ pred= pd.Series(pred, index=dates)
163
+
164
+ fig = plt.figure(figsize=(15,5))
165
+ plt.title("Prediksi menurut arima untuk 1 tahun kedepan")
166
+ plt.plot(y_train,label="Training")
167
+ plt.plot(y_test,label="Test")
168
+ plt.plot(pred,label="Pred")
169
+ plt.legend(loc = 'upper left')
170
+ st.pyplot(fig)
171
+
172
+
173
+
174
+
175
+
176
+
177
+