import matplotlib.pyplot as plt from pylab import rcParams from statsmodels.tsa.seasonal import seasonal_decompose import streamlit as st import pandas as pd from datasets import load_dataset dataset = load_dataset("shouzen/final_data_sale", use_auth_token=True) st.title('Project Canada Goose') st.write('Mempertahankan brand "canada goose" agar tetap menjadi penjualan tertinggi (untuk 1 tahun kedepan) dengan metode time series forecasting') st.markdown('# All Data') @st.cache def load_csv_data(): tp = pd.read_csv('Final_Data_Sales.csv', iterator=True, chunksize=1000,nrows=50000) # gives TextFileReader data = pd.concat(tp, ignore_index=True) # Convert data yang bukan datetime yang seperti 0000-0000 ke Datetime agar hasilnya NaT data['sold_at'] = pd.to_datetime(data['sold_at'], errors='coerce') data['created_at'] = pd.to_datetime(data['created_at'], errors='coerce') data['shipped_at'] = pd.to_datetime(data['shipped_at'], errors='coerce') data['delivered_at'] = pd.to_datetime(data['delivered_at'], errors='coerce') data['returned_at'] = pd.to_datetime(data['returned_at'], errors='coerce') # Ambil data date dari data setelahnya. data.fillna(method='bfill', inplace=True) return data data_load_state = st.text('Loading data...') data = load_csv_data() st.dataframe(data) # Notify the reader that the data was successfully loaded. data_load_state.text("Ini adalah data keseluruhan dari data csv") total_data = data.shape st.write(f'Total Datanya adalah : {total_data}') # Data Cleaning data = data.dropna() st.write("Jumlah data setelah menghapus missing value:", len(data)) #Statistika Deskriptif st.markdown('## Statistika Deskriptif') analisis = data.copy() analisis = analisis[['sale_price', 'cost']] st.table(analisis.describe()) #Perbandingan Shipped, Processing, Cancelled, Complete dan Returned st.markdown("## Perbandingan Shipped, Processing, Cancelled, Complete dan Returned") # plt.figure(figsize=(10,5)) # plt.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%') # plt.show() fig1, ax1 = plt.subplots() ax1.pie(data['status'].value_counts(), labels=data['status'].unique(), autopct='%.2f%%') st.pyplot(fig1) #Brand Terlaris st.markdown("## Brand Terlaris") st.write("Ini adalah top 5 brand terlaris ") brand = data[['product_id','product_brand', 'sale_price']] brand = brand.groupby(['product_id','product_brand'], as_index=False)['sale_price'].sum() brand = brand.sort_values('sale_price', ascending=False) st.table(brand.head(5)) #Penjualan Tertinggi Berdasarkan Product Brand st.markdown("## Penjualan Tertinggi Berdasarkan Product Brand") def perbandingan(w, a, x, y, z): plt.figure(figsize=(20, 8)) plt.subplot(221) plt.grid() plt.bar(w[a], w['sale_price'], label="Sale Price") plt.title(y) plt.subplot(222) plt.grid() plt.bar(x[a], x['sale_price'], label="Sale Price") plt.title(z) st.pyplot(plt) product_brand = brand pb = product_brand[['product_brand', 'sale_price']] sh = pb.sort_values('sale_price').tail(5) sl = pb.sort_values('sale_price').head(5) perbandingan(sh, 'product_brand', sl, 'Penjualan Tertinggi Berdasarkan Product Brand', 'Penjualan Terendah Berdasarkan Product Brand') #Visualisasi Data Sale Price st.markdown(' # Visualisasi Data Sale Price Khusus Untuk Canada Goose') cg = data.copy() cg= cg[['created_at','product_brand','sale_price']] cg_f = cg.loc[cg['product_brand'] == 'Canada Goose'] #Ambil data Canada Goose Saja cg_f = cg_f.sort_values('created_at') st.write('Sorting berdasarkan tanggal pada created_at') st.dataframe(cg_f) #Resampling Data to Monthly st.markdown('## Resampling data perbulan') st.write('Data sale_price disini ditampilkan dalam perbulan') cg_e = cg_f[['created_at','sale_price']] ## Ambil created at dan sale price cg_e = cg_e.sort_values('created_at') y = cg_e.set_index('created_at').resample('M').mean() ## Rata rata sale price /bulan agar data tidak lebih 'noisy' (m yang dimaksud adalah month end frequency) y = y.dropna() #Hapus Value Kosong y = y.rename_axis(None, axis=1).rename_axis('Date', axis=0) #Ubah index yang tadinya 'created_at' menjadi 'Date' st.dataframe(y.head(10)) #Tampilkan 10 data teratas saja # Classic Time Series Decomposition -> 1920 st.markdown('## Classic Time Series Decomposition -> 1920') st.markdown(''' Teknik untuk memisahkan time series menjadi trend, seasonal, dan residual menggunakan movie average, ada 2 tipe: *Additive = Trend + Seasonal + Residual*\n *Multiplicative = Trend * Seasonal * Residual*\n Additive dipakai **untuk trend dan seasonal yang tidak terlalu bervariasi**\n Multiplicative dipakai **untuk trend dan seasonal yang berubah seiring jalannya waktu** ''') rcParams['figure.figsize'] = 10, 5 #Besar Figur decomposition = seasonal_decompose(y.copy(), model='additive',period=12) fig = decomposition.plot() st.pyplot(fig) #Model y_train, y_test = y[:28], y[-7:] # Pisah data untuk keperlaun model dengan 80% train dan 20% test st.markdown('# Model') st.markdown('## ProphetFB Model') from fbprophet import Prophet #Import Prophet FB Model m = Prophet() d = y.copy() d= d.reset_index() d = d.rename(columns={'Date' : 'ds', 'sale_price' : 'y'}) model = m.fit(d) future = m.make_future_dataframe(periods=14, freq='M') #bisa setting periode untuk setting seberapa jauh untuk diprediksi (dalam bulan) forecast = m.predict(future) forecast = forecast.set_index('ds') d = d.set_index('ds') final_forecast = forecast['yhat'] fig = plt.figure(figsize=(15,5)) plt.title("Prediksi untuk 1 tahun kedepan dengan ProphetFB Model") plt.plot(d, label="Actual") plt.plot(final_forecast, label="Predicted") plt.legend(loc = 'upper left') st.pyplot(fig) #Arima Model st.markdown("## ARIMA Model") from pmdarima import auto_arima arima = auto_arima(y_train,start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, trace=True, error_action='ignore', # don't want to know if an order does not work suppress_warnings=True, # don't want convergence warnings stepwise=True) n_forecast = len(y_test) + 8 pred= arima.predict(n_forecast,D=1,seasonal=(1,0,0)) dates = pd.date_range(y_test.index[-1],periods=n_forecast, freq='M') pred= pd.Series(pred, index=dates) fig = plt.figure(figsize=(15,5)) plt.title("Prediksi menurut arima untuk 1 tahun kedepan") plt.plot(y_train,label="Training") plt.plot(y_test,label="Test") plt.plot(pred,label="Pred") plt.legend(loc = 'upper left') st.pyplot(fig)