Spaces:

aaronayitey
/

Streamlit-app

Sleeping

File size: 8,489 Bytes

import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


# Machine Learning Modeling
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import RandomizedSearchCV
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import joblib

# Set the page layout to full width
st.set_page_config(layout="wide")
# Initialize df as None
df = None

st.sidebar.title("Favorita Stores")
selected_option = st.sidebar.radio("Select to Proceed", ["Data Statistics", "Visuals", "Time Series Analysis", "Forecasting"])

# Custom CSS styling for the title
st.markdown(
    """
    <style>
    .title-text {
        font-size: 28px;
        text-align: center;
        background-color: #3498db;
        color: white;
        padding: 10px 0;
        width: 100%;
        position: sticky;
        top: 0;
        z-index: 1;
    }
    </style>
    """,
    unsafe_allow_html=True
)

# Streamlit App Title
st.markdown('<p class="title-text">Machine Learning App for Sales Prediction</p>', unsafe_allow_html=True)

# Function to load and process the data
def load_and_process_data():
    global df
    # Allow the user to upload an Excel file
    uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx"])
    if uploaded_file is not None:
        # Check if the file is an Excel file
        if uploaded_file.type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
            # Read the Excel file into a DataFrame
            df = pd.read_excel(uploaded_file)
            # Remove null values
            df.dropna(inplace=True)
            df = df.drop(columns='Unnamed: 0')
        else:
            st.write("Please upload a valid Excel file.")

# Load and process the data
load_and_process_data()

if selected_option == "Data Statistics":
    # Rest of the code for "Data Statistics" option using df
    if df is not None:
        number_sample = st.number_input("Enter sample size to display data", min_value=5, max_value=10, step=1, value=5)
        displayed_data = df.head(number_sample)
        st.write("Sample data", displayed_data)
        st.write("Summary Statistics of float/Integer columns", df.describe())
        object_columns = df.select_dtypes(include='object').columns.tolist()
        selected_column = st.selectbox("Select column of Data Type Object to View Unique values", object_columns)
        if selected_column:
            unique_values = df[selected_column].unique()
            st.write("Unique values are", unique_values)

elif selected_option == "Visuals":
    # Rest of the code for "Visuals" option using df
    if df is not None:
        object_columns = df.select_dtypes(include='object').columns.tolist()
        selected_column = st.selectbox("Select column of Data Type Object for Visualization", object_columns)
        if selected_column:
            df['date'] = pd.to_datetime(df['date'])  # Convert to datetime if applicable
            df_grouped = df.groupby(selected_column)['sales'].sum().head(10)
            df_grouped = df_grouped.sort_values(ascending=False)
            fig, ax = plt.subplots(figsize=(15, 6))
            ax.bar(df_grouped.index, df_grouped.values)
            ax.set_xlabel(selected_column)
            ax.set_ylabel('Sales Count')
            ax.set_title(f'Top 10 Sales Count for {selected_column}')
            st.pyplot(fig)  # Pass the figure to st.pyplot()
elif selected_option == "Time Series Analysis":
    if df is not None:
        # Choose date and sales columns
        timeseriesdata = df[['sales', 'date']]
        timeseriesdata.index = timeseriesdata['date']
        timeseriesdata = timeseriesdata[['sales']]  # Keep only the 'sales' column

        # Make date the index
        timeseriesdata = timeseriesdata.resample('D').sum()  # Resample to daily sales

        # Resample the data based on user's choice
        resample_method = st.selectbox("Select a resampling method", ['M', 'Q', 'Y'])
        if resample_method:
            resampled_data = timeseriesdata.resample(resample_method).sum()

            # Plot the time series using Seaborn lineplot
            plt.figure(figsize=(15, 6))
            sns.lineplot(data=resampled_data)
            plt.ylabel('Sales')
            plt.title(f'Sales Time Series (Resampled by {resample_method})')
            st.pyplot(plt.gcf())
else:
    st.write("Please enter these inputs to predict sales. Thank you!")
    # Load the pre-trained model and preprocessor
    model = joblib.load('./xgb_model.joblib')
    preprocessor = joblib.load('./preprocessor.joblib') 

    

    # Create a layout with 2 columns for even distribution
    col1, col2 = st.columns(2)  

    # User Inputs - Number
    with col1:
        # Create a date input using st.date_input
        date = st.date_input("Enter Date")      

        # Convert the selected date to a string in the desired format (e.g., YYYY-MM-DD)
        formatted_date = date.strftime('%Y-%m-%d')      

    # User Inputs - Year
    with col2:
        family = st.selectbox("Select product family", ['CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS', 'FROZEN FOODS',
           'GROCERY I', 'GROCERY II', 'HARDWARE', 'HOME AND KITCHEN I',
           'HOME AND KITCHEN II', 'HOME APPLIANCES', 'HOME CARE',
           'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE', 'LIQUOR,WINE,BEER',
           'MAGAZINES', 'MEATS', 'PERSONAL CARE', 'PET SUPPLIES',
           'PLAYERS AND ELECTRONICS', 'POULTRY', 'PREPARED FOODS', 'PRODUCE',
           'SCHOOL AND OFFICE SUPPLIES', 'SEAFOOD', 'AUTOMOTIVE', 'BABY CARE',
           'BEAUTY', 'BEVERAGES', 'BOOKS', 'BREAD/BAKERY']) 

    # User Inputs - On Promotion
    with col1:
        onpromotion = st.number_input("Enter Number for onpromotion", min_value=0, step=1)  


    # User Inputs - Day of the Week
    with col2:
        city = st.selectbox("Select city", ['Quito', 'Cayambe', 'Latacunga', 'Riobamba', 'Ibarra',
           'Santo Domingo', 'Guaranda', 'Puyo', 'Ambato', 'Guayaquil',
           'Salinas', 'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Libertad',
           'Cuenca', 'Loja', 'Machala', 'Esmeraldas', 'Manta', 'El Carmen'])    

    # User Inputs - Product Category
    with col1:
        oil_prices = st.number_input("Enter oil price", min_value=1, step=1)    
 

    # User Inputs - Day of the Week
    with col2:
        holiday_type = st.selectbox("Select holiday type", ['Holiday', 'Additional', 'Transfer', 'Event', 'Bridge'])    

    # User Inputs - Product Category
    with col1:
        sales_lag_1 = st.number_input("Enter Number for sales lag", min_value=0, step=1)    


    # User Inputs - Day of the Week
    with col2:
        moving_average = st.number_input("Enter Number for moving average", min_value=0, step=1)    

    # Placeholder for Predicted Value   

    # Add custom spacing between columns
    st.markdown("<hr>", unsafe_allow_html=True) 



    # Predict Button
    if st.button("Predict"):
        # Prepare input data for prediction
        # Prepare input data for prediction
        # Create a DataFrame with all required columns except "sales"
        prediction_placeholder = st.empty()
        input_df = pd.DataFrame({
            "family": [family],
            "onpromotion": [onpromotion],
            "city": [city],
            "oil_prices": [oil_prices],
            "holiday_type": [holiday_type],
            "sales_lag_1": [sales_lag_1],
            "moving_average": [moving_average]
        })

        # Transform the input DataFrame using the preprocessor
        preprocessed_data = preprocessor.transform(input_df)



        # Make a prediction
        prediction = model.predict(preprocessed_data)   

         
        # Display the prediction
        prediction_placeholder.text(f"Predicted Value for sales: {prediction[0]: ,.2f}")  

        if prediction >= 0:
            prediction_placeholder.markdown(
            f'Predicted Value for sales: <span style="background-color: green; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
            unsafe_allow_html=True
        )
        else:
            prediction_placeholder.markdown(
            f'Predicted Value for sales: <span style="background-color: red; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
            unsafe_allow_html=True
        )