Spaces:

ahmadluay
/

paragoncorp_products_sales_quantities_forecasting

Runtime error

App Files Files Community

ahmadluay commited on Apr 15, 2023

Commit

e9af0e5

•

1 Parent(s): ad49447

first commit

Browse files

Files changed (7) hide show

app.py +11 -0
eda.py +97 -0
model_sarimax.pkl +3 -0
paragon.png +0 -0
prediction.py +41 -0
requirements.txt +7 -0
sample_dataset_timeseries_noarea.csv +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import streamlit as st
+import eda # python file
+import prediction # python file
+navigation = st.sidebar.selectbox('Page Navigation: ',('Products Sales Quantities Prediction','EDA'))
+if navigation == 'EDA':
+    eda.run()
+else:
+    prediction.run()

eda.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+from PIL import Image
+st.set_page_config(
+    page_title='ParagonCorp Products Sales Quantities Forecasting',
+    layout = 'wide',
+    initial_sidebar_state='expanded'
+)
+def run():
+    # title
+    st.title('Exploratory Data Analysis of the Dataset')
+    st.write('by Ahmad Luay Adnani')
+    # Add Image
+    image = Image.open('paragon.png')
+    st.image(image)
+    # Description
+    st.write('---')
+    st.write('# Dataset')
+    st.write('Dataset provided by ParagonCorp.')
+    # show dataframe
+    df = pd.read_csv('sample_dataset_timeseries_noarea.csv')
+    st.dataframe(df)
+    ###
+    # create a copy of the dataframe
+    df_eda = df.copy()
+    # EDA
+    st.write('---')
+    st.write('# Exploratory Data Analysis')
+    select_eda = st.selectbox('Select EDA : ', ('Statistical Descriptive','Trend of Product Sales Quantities'))
+    if select_eda == 'Statistical Descriptive':
+        # measure of central tendency
+        stats = df.describe().T
+        st.dataframe(stats)
+        st.write('Based on information above:')
+        st.write("- The average product sales quantities is 3191.56.")
+        st.write('- The range of product sales quantities is between  0 to 774,732.')
+    else:
+        # Trend of Product Sales Quantities
+        # create a copy of the dataset
+        df_eda = df.copy()
+        # groupby week_end_date
+        df_eda = df_eda.groupby("week_end_date")["quantity"].sum().to_frame().reset_index()
+        # Set the date as index
+        df_eda = df.set_index('week_end_date')
+        # convert index to datetime
+        df_eda.index = pd.to_datetime(df_eda.index)
+        # drop columns
+        df_eda.drop(columns=['week_number','week_start_date','product_item'],inplace=True)
+        # convert index to datetime
+        df_eda.index = pd.to_datetime(df_eda.index)
+        # Select the proper time period for weekly aggregation
+        df_eda = df_eda['2022-01-02':'2023-04-09'].resample('W').sum()
+        # Trend
+        # add linear line
+        m, b = np.polyfit(range(len(df_eda)), df_eda['quantity'], 1)
+        plt.figure(figsize=(10,4))
+        plt.plot(df_eda.index, m*range(len(df_eda)) + b, label='Linear Trend Line',color='orange', alpha=0.6)
+        # plot time series df_eda
+        plt.plot(df_eda.index, df_eda['quantity'], marker = 'o', ms = 4, label='Product Sales Quantities')
+        # add labels and legend
+        plt.title('Trend of Product Sales Quantities')
+        plt.xlabel('Date')
+        plt.ylabel('quantity')
+        plt.legend()
+        # show plot
+        plt.show()
+        st.set_option('deprecation.showPyplotGlobalUse', False)
+        st.pyplot()
+        st.write('Here we can see the data of product sales quantities for the past 67 weeks. The trend is increasing over the past 67 weeks. There are no pattern on the products sales quantities. There are big spike that occur on 2022-05-08.')
+if __name__ == '__main__':
+    run()

model_sarimax.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24727d7887914918f860fa8c19bf7775b616494fe6887fd21084dd80b954b661
+size 2670231

paragon.png ADDED Viewed

prediction.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pickle
+# Load models
+with open('model_sarimax.pkl', 'rb') as file_1:
+  model_sarimax = pickle.load(file_1)
+def run():
+  st.markdown("<h1 style='text-align: center;'>Products Sales Quantities Prediction</h1>", unsafe_allow_html=True)
+  with st.form(key='Amazon_Customer_Review'):
+      input = st.number_input('Title', min_value=0, max_value=99, value=5 ,step=1)
+      submitted = st.form_submit_button('Predict')
+  if submitted:
+      # Predict
+      result = model_sarimax.forecast(18).tail(input)
+      result = pd.DataFrame(result)
+      st.dataframe(result)
+      # Forecast Visualization
+      fig = plt.figure(figsize=(20,10))
+      sns.lineplot(x=result.index, y=result.predicted_mean, data=result)
+      plt.title(f'Prediction for the next {input} weeks', fontsize=20)
+      plt.xlabel('Date', fontsize=16)
+      plt.xticks(fontsize=8)
+      plt.ylabel('Quantities Difference', fontsize=16)
+      st.pyplot(fig)
+if __name__ == '__main__':
+    run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+pandas
+seaborn
+matplotlib
+scikit-learn == 1.0.2
+numpy
+statsmodels == 0.13.2

sample_dataset_timeseries_noarea.csv ADDED Viewed

The diff for this file is too large to render. See raw diff