# app.py import streamlit as st import pandas as pd import numpy as np import joblib from datetime import datetime # Assuming the necessary EDA functions are defined in eda.py and imported here from eda import (average_sales_by_region, average_sales_and_profit_over_time, segment_vs_region_distribution, sales_vs_profit_across_segments, category_composition_for_profit_and_sales) # Load the model for predictions model = joblib.load('best_model.pkl') # Load the dataset for EDA @st.cache_data def load_data(): return pd.read_csv('superstore_clean.csv') df = load_data() # Sidebar for navigation st.sidebar.title("Navigation") selection = st.sidebar.radio("Go to", ["Home", "EDA", "Make a Prediction"]) if selection == "Home": st.title("Welcome to the Superstore Sales Dashboard") elif selection == "EDA": st.title("Exploratory Data Analysis (EDA)") # Display EDA plots directly here or call a function that does average_sales_by_region(df) average_sales_and_profit_over_time(df) segment_vs_region_distribution(df) sales_vs_profit_across_segments(df) category_composition_for_profit_and_sales(df) elif selection == "Make a Prediction": st.title("Make a Sales Prediction") with st.form("input_form"): # Capture all inputs as per the original dataset order_date = st.date_input('Order Date', datetime.now()) ship_date = st.date_input('Ship Date', datetime.now() + timedelta(days=1)) # Assume shipping the next day ship_mode = st.selectbox('Ship Mode', ['First Class', 'Second Class', 'Standard Class', 'Same Day'], index=2) # Default to Standard Class segment = st.selectbox('Segment', ['Consumer', 'Corporate', 'Home Office'], index=0) # Default to Consumer country = st.text_input('Country', value='United States') city = st.text_input('City', value='Los Angeles') # Example city state = st.text_input('State', value='California') # Example state postal_code = st.text_input('Postal Code', value='90001') # Example postal code region = st.selectbox('Region', ['South', 'West', 'Central', 'East'], index=1) # Default to West category = st.selectbox('Category', ['Furniture', 'Office Supplies', 'Technology'], index=1) # Default to Office Supplies sub_category = st.selectbox('Sub-Category', ['Bookcases', 'Chairs', 'Labels', 'Tables', 'Storage', 'Furnishings', 'Art', 'Phones', 'Binders', 'Appliances', 'Paper', 'Accessories', 'Envelopes', 'Fasteners', 'Supplies', 'Machines', 'Copiers'], index=10) # Default to Paper product_name = st.text_input('Product Name', value='Staple papers') # Example product sales = st.number_input('Sales', value=100.0, format="%.2f") # Example sales amount quantity = st.number_input('Quantity', value=2, format="%d") # Example quantity discount = st.number_input('Discount', value=0.0, format="%.2f") # Example discount submit_button = st.form_submit_button("Predict") if submit_button: # Construct the input DataFrame input_features = pd.DataFrame([[sub_category, sales, quantity, discount ]], columns=[ 'Sub-Category', 'Product Name', 'Sales', 'Quantity', 'Discount' ]) # Preprocess and predict (You'll need to adjust this part based on how your model expects input) # For example, you might need to transform 'input_features' to match the expected input format of your model predicted_profit = model.predict(input_features) # Adjust this line as necessary st.write(f'Predicted Profit: {predicted_profit:.2f}')