Spaces:
Build error
Build error
File size: 6,599 Bytes
de4dd16 8940ea8 fd9f22f de4dd16 0f81a6b 768081b de4dd16 8940ea8 de4dd16 8940ea8 de4dd16 8940ea8 de4dd16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import matplotlib.pyplot as plt
from pylab import rcParams
from statsmodels.tsa.seasonal import seasonal_decompose
import streamlit as st
import pandas as pd
from datasets import load_dataset
dataset = load_dataset("shouzen/final_data_sale", use_auth_token=True)
st.title('Project Canada Goose')
st.write('Mempertahankan brand "canada goose" agar tetap menjadi penjualan tertinggi (untuk 1 tahun kedepan) dengan metode time series forecasting')
st.markdown('# All Data')
@st.cache
def load_csv_data():
tp = pd.read_csv('Final_Data_Sales.csv', iterator=True, chunksize=1000,nrows=50000) # gives TextFileReader
data = pd.concat(tp, ignore_index=True)
# Convert data yang bukan datetime yang seperti 0000-0000 ke Datetime agar hasilnya NaT
data['sold_at'] = pd.to_datetime(data['sold_at'], errors='coerce')
data['created_at'] = pd.to_datetime(data['created_at'], errors='coerce')
data['shipped_at'] = pd.to_datetime(data['shipped_at'], errors='coerce')
data['delivered_at'] = pd.to_datetime(data['delivered_at'], errors='coerce')
data['returned_at'] = pd.to_datetime(data['returned_at'], errors='coerce')
# Ambil data date dari data setelahnya.
data.fillna(method='bfill', inplace=True)
return data
data_load_state = st.text('Loading data...')
data = load_csv_data()
st.dataframe(data)
# Notify the reader that the data was successfully loaded.
data_load_state.text("Ini adalah data keseluruhan dari data csv")
total_data = data.shape
st.write(f'Total Datanya adalah : {total_data}')
# Data Cleaning
data = data.dropna()
st.write("Jumlah data setelah menghapus missing value:", len(data))
#Statistika Deskriptif
st.markdown('## Statistika Deskriptif')
analisis = data.copy()
analisis = analisis[['sale_price', 'cost']]
st.table(analisis.describe())
#Perbandingan Shipped, Processing, Cancelled, Complete dan Returned
st.markdown("## Perbandingan Shipped, Processing, Cancelled, Complete dan Returned")
# plt.figure(figsize=(10,5))
# plt.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%')
# plt.show()
fig1, ax1 = plt.subplots()
ax1.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%')
st.pyplot(fig1)
#Brand Terlaris
st.markdown("## Brand Terlaris")
st.write("Ini adalah top 5 brand terlaris ")
brand = data[['product_id','product_brand', 'sale_price']]
brand = brand.groupby(['product_id','product_brand'], as_index=False)['sale_price'].sum()
brand = brand.sort_values('sale_price', ascending=False)
st.table(brand.head(5))
#Penjualan Tertinggi Berdasarkan Product Brand
st.markdown("## Penjualan Tertinggi Berdasarkan Product Brand")
def perbandingan(w, a, x, y, z):
plt.figure(figsize=(20, 8))
plt.subplot(221)
plt.grid()
plt.bar(w[a], w['sale_price'], label="Sale Price")
plt.title(y)
plt.subplot(222)
plt.grid()
plt.bar(x[a], x['sale_price'], label="Sale Price")
plt.title(z)
st.pyplot(plt)
product_brand = brand
pb = product_brand[['product_brand', 'sale_price']]
sh = pb.sort_values('sale_price').tail(5)
sl = pb.sort_values('sale_price').head(5)
perbandingan(sh, 'product_brand', sl, 'Penjualan Tertinggi Berdasarkan Product Brand', 'Penjualan Terendah Berdasarkan Product Brand')
#Visualisasi Data Sale Price
st.markdown(' # Visualisasi Data Sale Price Khusus Untuk Canada Goose')
cg = data.copy()
cg= cg[['created_at','product_brand','sale_price']]
cg_f = cg.loc[cg['product_brand'] == 'Canada Goose'] #Ambil data Canada Goose Saja
cg_f = cg_f.sort_values('created_at')
st.write('Sorting berdasarkan tanggal pada created_at')
st.dataframe(cg_f)
#Resampling Data to Monthly
st.markdown('## Resampling data perbulan')
st.write('Data sale_price disini ditampilkan dalam perbulan')
cg_e = cg_f[['created_at','sale_price']] ## Ambil created at dan sale price
cg_e = cg_e.sort_values('created_at')
y = cg_e.set_index('created_at').resample('M').mean() ## Rata rata sale price /bulan agar data tidak lebih 'noisy' (m yang dimaksud adalah month end frequency)
y = y.dropna() #Hapus Value Kosong
y = y.rename_axis(None, axis=1).rename_axis('Date', axis=0) #Ubah index yang tadinya 'created_at' menjadi 'Date'
st.dataframe(y.head(10)) #Tampilkan 10 data teratas saja
# Classic Time Series Decomposition -> 1920
st.markdown('## Classic Time Series Decomposition -> 1920')
st.markdown('''
Teknik untuk memisahkan time series menjadi trend, seasonal, dan residual menggunakan movie average, ada 2 tipe:
*Additive = Trend + Seasonal + Residual*\n
*Multiplicative = Trend * Seasonal * Residual*\n
Additive dipakai **untuk trend dan seasonal yang tidak terlalu bervariasi**\n
Multiplicative dipakai **untuk trend dan seasonal yang berubah seiring jalannya waktu**
''')
rcParams['figure.figsize'] = 10, 5 #Besar Figur
decomposition = seasonal_decompose(y.copy(), model='additive',period=12)
fig = decomposition.plot()
st.pyplot(fig)
#Model
y_train, y_test = y[:28], y[-7:] # Pisah data untuk keperlaun model dengan 80% train dan 20% test
st.markdown('# Model')
st.markdown('## ProphetFB Model')
from fbprophet import Prophet #Import Prophet FB Model
m = Prophet()
d = y.copy()
d= d.reset_index()
d = d.rename(columns={'Date' : 'ds', 'sale_price' : 'y'})
model = m.fit(d)
future = m.make_future_dataframe(periods=14, freq='M') #bisa setting periode untuk setting seberapa jauh untuk diprediksi (dalam bulan)
forecast = m.predict(future)
forecast = forecast.set_index('ds')
d = d.set_index('ds')
final_forecast = forecast['yhat']
fig = plt.figure(figsize=(15,5))
plt.title("Prediksi untuk 1 tahun kedepan dengan ProphetFB Model")
plt.plot(d, label="Actual")
plt.plot(final_forecast, label="Predicted")
plt.legend(loc = 'upper left')
st.pyplot(fig)
#Arima Model
st.markdown("## ARIMA Model")
from pmdarima import auto_arima
arima = auto_arima(y_train,start_p=1, start_q=1, max_p=3, max_q=3, m=12,
start_P=0, seasonal=True, d=1, D=1, trace=True,
error_action='ignore', # don't want to know if an order does not work
suppress_warnings=True, # don't want convergence warnings
stepwise=True)
n_forecast = len(y_test) + 8
pred= arima.predict(n_forecast,D=1,seasonal=(1,0,0))
dates = pd.date_range(y_test.index[-1],periods=n_forecast, freq='M')
pred= pd.Series(pred, index=dates)
fig = plt.figure(figsize=(15,5))
plt.title("Prediksi menurut arima untuk 1 tahun kedepan")
plt.plot(y_train,label="Training")
plt.plot(y_test,label="Test")
plt.plot(pred,label="Pred")
plt.legend(loc = 'upper left')
st.pyplot(fig)
|