File size: 3,928 Bytes
c25cd03
 
 
 
 
 
 
 
 
 
 
 
 
 
ae33b87
c25cd03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# app.py

import streamlit as st
import pandas as pd
import numpy as np
import joblib
from eda import (average_sales_by_region, average_sales_and_profit_over_time, 
                 segment_vs_region_distribution, sales_vs_profit_across_segments, 
                 category_composition_for_profit_and_sales)
from prediction import make_prediction

# Load the dataset for EDA
@st.cache
def load_data():
    return pd.read_csv('superstore_clean.csv')

df = load_data()

# Load the pipeline and model for predictions
pipeline = joblib.load('full_pipeline_with_unit_price.pkl')
model = joblib.load('best_model.pkl')

# Sidebar for navigation
st.sidebar.title("Navigation")
selection = st.sidebar.radio("Go to", ["Home", "EDA", "Make a Prediction"])

if selection == "Home":
    st.title("Welcome to the Superstore Sales Dashboard")

elif selection == "EDA":
    st.title("Exploratory Data Analysis (EDA)")
    
    # Average Sales by Region
    st.header("Average Sales by Region")
    fig1 = average_sales_by_region(df)
    st.pyplot(fig1)
    
    # Average Sales and Profit Over Time
    st.header("Average Sales and Profit Over Time")
    fig2 = average_sales_and_profit_over_time(df)
    st.pyplot(fig2)
    
    # Segment vs. Region Distribution
    st.header("Segment vs. Region Distribution")
    fig3 = segment_vs_region_distribution(df)
    st.pyplot(fig3)
    
    # Sales vs. Profit Across Different Customer Segments
    st.header("Sales vs. Profit Across Different Customer Segments")
    fig4 = sales_vs_profit_across_segments(df)
    st.pyplot(fig4)
    
    # Category Composition for Profit and Sales
    st.header("Category Composition for Profit and Sales")
    fig5 = category_composition_for_profit_and_sales(df)
    st.pyplot(fig5)
    
elif selection == "Make a Prediction":
    st.title("Make a Sales Prediction")
            # Input form
    with st.form("input_form"):
        row_id = st.number_input('Row ID', min_value=1, value=1, step=1)
        order_id = st.text_input('Order ID')
        order_date = st.date_input('Order Date')
        ship_date = st.date_input('Ship Date')
        ship_mode = st.selectbox('Ship Mode', ['First Class', 'Second Class', 'Standard Class', 'Same Day'])
        customer_id = st.text_input('Customer ID')
        customer_name = st.text_input('Customer Name')
        segment = st.selectbox('Segment', ['Consumer', 'Corporate', 'Home Office'])
        country = st.text_input('Country', value='United States')
        city = st.text_input('City')
        state = st.text_input('State')
        postal_code = st.text_input('Postal Code')
        region = st.selectbox('Region', ['South', 'West', 'Central', 'East'])
        product_id = st.text_input('Product ID')
        category = st.selectbox('Category', ['Furniture', 'Office Supplies', 'Technology'])
        sub_category = st.selectbox('Sub-Category', ['Bookcases', 'Chairs', 'Labels', 'Tables', 'Storage', 'Furnishings', 'Art', 'Phones', 'Binders', 'Appliances', 'Paper', 'Accessories', 'Envelopes', 'Fasteners', 'Supplies', 'Machines', 'Copiers'])
        product_name = st.text_input('Product Name')
        sales = st.number_input('Sales', value=0.0, format="%.2f")
        quantity = st.number_input('Quantity', value=1, format="%d")
        discount = st.number_input('Discount', value=0.0, format="%.2f")
        profit = st.number_input('Profit', value=0.0, format="%.2f")

        submit_button = st.form_submit_button("Predict")

    if submit_button:
        # Construct the input DataFrame. Modify as necessary to fit the model's expected input
        input_data = pd.DataFrame([[sales, quantity, discount, sub_category]], 
                                      columns=['sales', 'quantity', 'discount', 'sub_category'])
            
        # Call prediction function
        predicted_profit = make_prediction(input_data)
            
        st.write(f'Predicted Profit: {predicted_profit:.2f}')