File size: 8,489 Bytes
38f0dc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23dea5d
 
38f0dc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


# Machine Learning Modeling
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import joblib

# Set the page layout to full width
st.set_page_config(layout="wide")
# Initialize df as None
df = None

st.sidebar.title("Favorita Stores")
selected_option = st.sidebar.radio("Select to Proceed", ["Data Statistics", "Visuals", "Time Series Analysis", "Forecasting"])

# Custom CSS styling for the title
st.markdown(
    """
    <style>
    .title-text {
        font-size: 28px;
        text-align: center;
        background-color: #3498db;
        color: white;
        padding: 10px 0;
        width: 100%;
        position: sticky;
        top: 0;
        z-index: 1;
    }
    </style>
    """,
    unsafe_allow_html=True
)

# Streamlit App Title
st.markdown('<p class="title-text">Machine Learning App for Sales Prediction</p>', unsafe_allow_html=True)

# Function to load and process the data
def load_and_process_data():
    global df
    # Allow the user to upload an Excel file
    uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx"])
    if uploaded_file is not None:
        # Check if the file is an Excel file
        if uploaded_file.type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
            # Read the Excel file into a DataFrame
            df = pd.read_excel(uploaded_file)
            # Remove null values
            df.dropna(inplace=True)
            df = df.drop(columns='Unnamed: 0')
        else:
            st.write("Please upload a valid Excel file.")

# Load and process the data
load_and_process_data()

if selected_option == "Data Statistics":
    # Rest of the code for "Data Statistics" option using df
    if df is not None:
        number_sample = st.number_input("Enter sample size to display data", min_value=5, max_value=10, step=1, value=5)
        displayed_data = df.head(number_sample)
        st.write("Sample data", displayed_data)
        st.write("Summary Statistics of float/Integer columns", df.describe())
        object_columns = df.select_dtypes(include='object').columns.tolist()
        selected_column = st.selectbox("Select column of Data Type Object to View Unique values", object_columns)
        if selected_column:
            unique_values = df[selected_column].unique()
            st.write("Unique values are", unique_values)

elif selected_option == "Visuals":
    # Rest of the code for "Visuals" option using df
    if df is not None:
        object_columns = df.select_dtypes(include='object').columns.tolist()
        selected_column = st.selectbox("Select column of Data Type Object for Visualization", object_columns)
        if selected_column:
            df['date'] = pd.to_datetime(df['date'])  # Convert to datetime if applicable
            df_grouped = df.groupby(selected_column)['sales'].sum().head(10)
            df_grouped = df_grouped.sort_values(ascending=False)
            fig, ax = plt.subplots(figsize=(15, 6))
            ax.bar(df_grouped.index, df_grouped.values)
            ax.set_xlabel(selected_column)
            ax.set_ylabel('Sales Count')
            ax.set_title(f'Top 10 Sales Count for {selected_column}')
            st.pyplot(fig)  # Pass the figure to st.pyplot()
elif selected_option == "Time Series Analysis":
    if df is not None:
        # Choose date and sales columns
        timeseriesdata = df[['sales', 'date']]
        timeseriesdata.index = timeseriesdata['date']
        timeseriesdata = timeseriesdata[['sales']]  # Keep only the 'sales' column

        # Make date the index
        timeseriesdata = timeseriesdata.resample('D').sum()  # Resample to daily sales

        # Resample the data based on user's choice
        resample_method = st.selectbox("Select a resampling method", ['M', 'Q', 'Y'])
        if resample_method:
            resampled_data = timeseriesdata.resample(resample_method).sum()

            # Plot the time series using Seaborn lineplot
            plt.figure(figsize=(15, 6))
            sns.lineplot(data=resampled_data)
            plt.ylabel('Sales')
            plt.title(f'Sales Time Series (Resampled by {resample_method})')
            st.pyplot(plt.gcf())
else:
    st.write("Please enter these inputs to predict sales. Thank you!")
    # Load the pre-trained model and preprocessor
    model = joblib.load('./xgb_model.joblib')
    preprocessor = joblib.load('./preprocessor.joblib') 

    

    # Create a layout with 2 columns for even distribution
    col1, col2 = st.columns(2)  

    # User Inputs - Number
    with col1:
        # Create a date input using st.date_input
        date = st.date_input("Enter Date")      

        # Convert the selected date to a string in the desired format (e.g., YYYY-MM-DD)
        formatted_date = date.strftime('%Y-%m-%d')      

    # User Inputs - Year
    with col2:
        family = st.selectbox("Select product family", ['CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS', 'FROZEN FOODS',
           'GROCERY I', 'GROCERY II', 'HARDWARE', 'HOME AND KITCHEN I',
           'HOME AND KITCHEN II', 'HOME APPLIANCES', 'HOME CARE',
           'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE', 'LIQUOR,WINE,BEER',
           'MAGAZINES', 'MEATS', 'PERSONAL CARE', 'PET SUPPLIES',
           'PLAYERS AND ELECTRONICS', 'POULTRY', 'PREPARED FOODS', 'PRODUCE',
           'SCHOOL AND OFFICE SUPPLIES', 'SEAFOOD', 'AUTOMOTIVE', 'BABY CARE',
           'BEAUTY', 'BEVERAGES', 'BOOKS', 'BREAD/BAKERY']) 

    # User Inputs - On Promotion
    with col1:
        onpromotion = st.number_input("Enter Number for onpromotion", min_value=0, step=1)  


    # User Inputs - Day of the Week
    with col2:
        city = st.selectbox("Select city", ['Quito', 'Cayambe', 'Latacunga', 'Riobamba', 'Ibarra',
           'Santo Domingo', 'Guaranda', 'Puyo', 'Ambato', 'Guayaquil',
           'Salinas', 'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Libertad',
           'Cuenca', 'Loja', 'Machala', 'Esmeraldas', 'Manta', 'El Carmen'])    

    # User Inputs - Product Category
    with col1:
        oil_prices = st.number_input("Enter oil price", min_value=1, step=1)    
 

    # User Inputs - Day of the Week
    with col2:
        holiday_type = st.selectbox("Select holiday type", ['Holiday', 'Additional', 'Transfer', 'Event', 'Bridge'])    

    # User Inputs - Product Category
    with col1:
        sales_lag_1 = st.number_input("Enter Number for sales lag", min_value=0, step=1)    


    # User Inputs - Day of the Week
    with col2:
        moving_average = st.number_input("Enter Number for moving average", min_value=0, step=1)    

    # Placeholder for Predicted Value   

    # Add custom spacing between columns
    st.markdown("<hr>", unsafe_allow_html=True) 



    # Predict Button
    if st.button("Predict"):
        # Prepare input data for prediction
        # Prepare input data for prediction
        # Create a DataFrame with all required columns except "sales"
        prediction_placeholder = st.empty()
        input_df = pd.DataFrame({
            "family": [family],
            "onpromotion": [onpromotion],
            "city": [city],
            "oil_prices": [oil_prices],
            "holiday_type": [holiday_type],
            "sales_lag_1": [sales_lag_1],
            "moving_average": [moving_average]
        })

        # Transform the input DataFrame using the preprocessor
        preprocessed_data = preprocessor.transform(input_df)



        # Make a prediction
        prediction = model.predict(preprocessed_data)   

         
        # Display the prediction
        prediction_placeholder.text(f"Predicted Value for sales: {prediction[0]: ,.2f}")  

        if prediction >= 0:
            prediction_placeholder.markdown(
            f'Predicted Value for sales: <span style="background-color: green; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
            unsafe_allow_html=True
        )
        else:
            prediction_placeholder.markdown(
            f'Predicted Value for sales: <span style="background-color: red; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
            unsafe_allow_html=True
        )