shouzen commited on
Commit
2ac5446
1 Parent(s): c279a8b

Delete main.py

Browse files
Files changed (1) hide show
  1. main.py +0 -177
main.py DELETED
@@ -1,177 +0,0 @@
1
- import pandas as pd
2
- import matplotlib.pyplot as plt
3
- from pylab import rcParams
4
- from statsmodels.tsa.seasonal import seasonal_decompose
5
- import streamlit as st
6
-
7
- st.title('Project Canada Goose')
8
- st.write('Mempertahankan brand "canada goose" agar tetap menjadi penjualan tertinggi (untuk 1 tahun kedepan) dengan metode time series forecasting')
9
- st.markdown('# All Data')
10
- @st.cache
11
- def load_csv_data():
12
- data = pd.read_csv('Final_Data_Sales.csv')
13
-
14
- # Convert data yang bukan datetime yang seperti 0000-0000 ke Datetime agar hasilnya NaT
15
- data['sold_at'] = pd.to_datetime(data['sold_at'], errors='coerce')
16
- data['created_at'] = pd.to_datetime(data['created_at'], errors='coerce')
17
- data['shipped_at'] = pd.to_datetime(data['shipped_at'], errors='coerce')
18
- data['delivered_at'] = pd.to_datetime(data['delivered_at'], errors='coerce')
19
- data['returned_at'] = pd.to_datetime(data['returned_at'], errors='coerce')
20
-
21
- # Ambil data date dari data setelahnya.
22
- data.fillna(method='bfill', inplace=True)
23
- return data
24
-
25
- data_load_state = st.text('Loading data...')
26
- # Load 10,000 rows of data into the dataframe.
27
- data = load_csv_data()
28
- st.dataframe(data)
29
- # Notify the reader that the data was successfully loaded.
30
- data_load_state.text("Ini adalah data keseluruhan dari data csv")
31
-
32
- total_data = data.shape
33
- st.write(f'Total Datanya adalah : {total_data}')
34
-
35
- # Data Cleaning
36
- data = data.dropna()
37
- st.write("Jumlah data setelah menghapus missing value:", len(data))
38
-
39
- #Statistika Deskriptif
40
- st.markdown('## Statistika Deskriptif')
41
- analisis = data.copy()
42
- analisis = analisis[['sale_price', 'cost']]
43
- st.table(analisis.describe())
44
-
45
- #Perbandingan Shipped, Processing, Cancelled, Complete dan Returned
46
- st.markdown("## Perbandingan Shipped, Processing, Cancelled, Complete dan Returned")
47
- # plt.figure(figsize=(10,5))
48
- # plt.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%')
49
- # plt.show()
50
- fig1, ax1 = plt.subplots()
51
- ax1.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%')
52
- st.pyplot(fig1)
53
-
54
- #Brand Terlaris
55
- st.markdown("## Brand Terlaris")
56
- st.write("Ini adalah top 5 brand terlaris ")
57
- brand = data[['product_id','product_brand', 'sale_price']]
58
- brand = brand.groupby(['product_id','product_brand'], as_index=False)['sale_price'].sum()
59
- brand = brand.sort_values('sale_price', ascending=False)
60
- st.table(brand.head(5))
61
-
62
- #Penjualan Tertinggi Berdasarkan Product Brand
63
- st.markdown("## Penjualan Tertinggi Berdasarkan Product Brand")
64
- def perbandingan(w, a, x, y, z):
65
- plt.figure(figsize=(20, 8))
66
-
67
- plt.subplot(221)
68
- plt.grid()
69
- plt.bar(w[a], w['sale_price'], label="Sale Price")
70
- plt.title(y)
71
-
72
- plt.subplot(222)
73
- plt.grid()
74
- plt.bar(x[a], x['sale_price'], label="Sale Price")
75
- plt.title(z)
76
- st.pyplot(plt)
77
-
78
- product_brand = brand
79
- pb = product_brand[['product_brand', 'sale_price']]
80
- sh = pb.sort_values('sale_price').tail(5)
81
- sl = pb.sort_values('sale_price').head(5)
82
-
83
- perbandingan(sh, 'product_brand', sl, 'Penjualan Tertinggi Berdasarkan Product Brand', 'Penjualan Terendah Berdasarkan Product Brand')
84
-
85
- #Visualisasi Data Sale Price
86
- st.markdown(' # Visualisasi Data Sale Price Khusus Untuk Canada Goose')
87
- cg = data.copy()
88
- cg= cg[['created_at','product_brand','sale_price']]
89
- cg_f = cg.loc[cg['product_brand'] == 'Canada Goose'] #Ambil data Canada Goose Saja
90
- cg_f = cg_f.sort_values('created_at')
91
- st.write('Sorting berdasarkan tanggal pada created_at')
92
- st.dataframe(cg_f)
93
-
94
- #Resampling Data to Monthly
95
- st.markdown('## Resampling data perbulan')
96
- st.write('Data sale_price disini ditampilkan dalam perbulan')
97
-
98
- cg_e = cg_f[['created_at','sale_price']] ## Ambil created at dan sale price
99
- cg_e = cg_e.sort_values('created_at')
100
- y = cg_e.set_index('created_at').resample('M').mean() ## Rata rata sale price /bulan agar data tidak lebih 'noisy' (m yang dimaksud adalah month end frequency)
101
- y = y.dropna() #Hapus Value Kosong
102
- y = y.rename_axis(None, axis=1).rename_axis('Date', axis=0) #Ubah index yang tadinya 'created_at' menjadi 'Date'
103
-
104
- st.dataframe(y.head(10)) #Tampilkan 10 data teratas saja
105
-
106
- # Classic Time Series Decomposition -> 1920
107
- st.markdown('## Classic Time Series Decomposition -> 1920')
108
- st.markdown('''
109
- Teknik untuk memisahkan time series menjadi trend, seasonal, dan residual menggunakan movie average, ada 2 tipe:
110
-
111
- *Additive = Trend + Seasonal + Residual*\n
112
- *Multiplicative = Trend * Seasonal * Residual*\n
113
-
114
- Additive dipakai **untuk trend dan seasonal yang tidak terlalu bervariasi**\n
115
- Multiplicative dipakai **untuk trend dan seasonal yang berubah seiring jalannya waktu**
116
- ''')
117
- rcParams['figure.figsize'] = 10, 5 #Besar Figur
118
- decomposition = seasonal_decompose(y.copy(), model='additive',period=12)
119
-
120
- fig = decomposition.plot()
121
- st.pyplot(fig)
122
-
123
- #Model
124
-
125
- y_train, y_test = y[:28], y[-7:] # Pisah data untuk keperlaun model dengan 80% train dan 20% test
126
-
127
- st.markdown('# Model')
128
- st.markdown('## ProphetFB Model')
129
- from fbprophet import Prophet #Import Prophet FB Model
130
-
131
- m = Prophet()
132
- d = y.copy()
133
- d= d.reset_index()
134
- d = d.rename(columns={'Date' : 'ds', 'sale_price' : 'y'})
135
-
136
- model = m.fit(d)
137
- future = m.make_future_dataframe(periods=14, freq='M') #bisa setting periode untuk setting seberapa jauh untuk diprediksi (dalam bulan)
138
- forecast = m.predict(future)
139
- forecast = forecast.set_index('ds')
140
- d = d.set_index('ds')
141
- final_forecast = forecast['yhat']
142
-
143
- fig = plt.figure(figsize=(15,5))
144
- plt.title("Prediksi untuk 1 tahun kedepan dengan ProphetFB Model")
145
- plt.plot(d, label="Actual")
146
- plt.plot(final_forecast, label="Predicted")
147
- plt.legend(loc = 'upper left')
148
- st.pyplot(fig)
149
-
150
- #Arima Model
151
- st.markdown("## ARIMA Model")
152
- from pmdarima import auto_arima
153
- arima = auto_arima(y_train,start_p=1, start_q=1, max_p=3, max_q=3, m=12,
154
- start_P=0, seasonal=True, d=1, D=1, trace=True,
155
- error_action='ignore', # don't want to know if an order does not work
156
- suppress_warnings=True, # don't want convergence warnings
157
- stepwise=True)
158
-
159
- n_forecast = len(y_test) + 8
160
- pred= arima.predict(n_forecast,D=1,seasonal=(1,0,0))
161
- dates = pd.date_range(y_test.index[-1],periods=n_forecast, freq='M')
162
- pred= pd.Series(pred, index=dates)
163
-
164
- fig = plt.figure(figsize=(15,5))
165
- plt.title("Prediksi menurut arima untuk 1 tahun kedepan")
166
- plt.plot(y_train,label="Training")
167
- plt.plot(y_test,label="Test")
168
- plt.plot(pred,label="Pred")
169
- plt.legend(loc = 'upper left')
170
- st.pyplot(fig)
171
-
172
-
173
-
174
-
175
-
176
-
177
-