Spaces:

jskswamy
/

superkart_backend

Sleeping

App Files Files Community

jskswamy commited on Aug 31

Commit

5ff8440

verified ·

1 Parent(s): 446a732

Uploading files via huggingface api

Browse files

Files changed (4) hide show

Dockerfile +10 -8
README.md +4 -4
app.py +352 -751
requirements.txt +7 -4

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-# Streamlit Frontend Dockerfile
 FROM python:3.12-slim
 # Set working directory
@@ -12,20 +12,22 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY app.py .
-COPY .streamlit/ .streamlit/
 # Expose port (Hugging Face Spaces uses 7860)
 EXPOSE 7860
-# Set default backend URL (can be overridden with environment variable)
-ENV BACKEND_URL=http://localhost:7860
-# Health check
 HEALTHCHECK --interval=30s \
     --timeout=10s \
     --start-period=5s \
     --retries=3 \
-    CMD curl -f http://localhost:7860/_stcore/health || exit 1
-# Run Streamlit
-CMD ["streamlit", "run", "app.py"]

+# Use Python 3.12 slim image as base
 FROM python:3.12-slim
 # Set working directory
 # Copy application code
 COPY app.py .
+# Create models directory and copy model file
+COPY ./superkart_model.joblib ./superkart_model.joblib
 # Expose port (Hugging Face Spaces uses 7860)
 EXPOSE 7860
+# Set environment variables
+ENV FLASK_APP=app.py
+ENV FLASK_ENV=production
 HEALTHCHECK --interval=30s \
     --timeout=10s \
     --start-period=5s \
     --retries=3 \
+    CMD curl -f http://localhost:7860/ || exit 1
+# Run the application with gunicorn
+CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "4", "--timeout", "120", "app:app"]

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Superkart Frontend
-emoji: 🌍
-colorFrom: yellow
-colorTo: blue
 sdk: docker
 pinned: false
 ---

 ---
+title: Superkart Backend
+emoji: 🛒
+colorFrom: purple
+colorTo: indigo
 sdk: docker
 pinned: false
 ---

app.py CHANGED Viewed

@@ -1,793 +1,394 @@
 """
-SuperKart Sales Prediction Frontend
-A Streamlit web application for predicting product sales using the SuperKart ML model.
-This frontend provides an intuitive interface for users to input product and store features
-and get sales predictions from the backend API.
 """
-import warnings
-import streamlit as st
-import requests
-import pandas as pd
-import argparse
 import os
-import sys
-from typing import Dict
-# Suppress SyntaxWarnings from Streamlit library
-warnings.filterwarnings("ignore", category=SyntaxWarning)
-# Page configuration
-st.set_page_config(
-    page_title="SuperKart Sales Predictor",
-    page_icon="🛒",
-    layout="wide",
-    initial_sidebar_state="expanded",
-)
-# Custom CSS for better styling
-st.markdown(
     """
-<style>
-    .main-header {
-        font-size: 3rem;
-        color: #1f77b4;
-        text-align: center;
-        margin-bottom: 2rem;
-    }
-    .prediction-box {
-        background-color: #f0f8ff;
-        padding: 20px;
-        border-radius: 10px;
-        border-left: 5px solid #1f77b4;
-        margin: 20px 0;
-    }
-    .success-box {
-        background-color: #d4edda;
-        padding: 15px;
-        border-radius: 5px;
-        border-left: 5px solid #28a745;
-        margin: 10px 0;
-    }
-    .error-box {
-        background-color: #f8d7da;
-        padding: 15px;
-        border-radius: 5px;
-        border-left: 5px solid #dc3545;
-        margin: 10px 0;
-    }
-</style>
-""",
-    unsafe_allow_html=True,
-)
-def get_backend_url():
-    """Get backend URL from command line arguments, environment variables, or default."""
-    # Check if running with Streamlit (sys.argv will contain streamlit run ...)
-    if len(sys.argv) > 1 and "streamlit" in sys.argv[0]:
-        # Parse additional arguments after the script name
-        parser = argparse.ArgumentParser(description="SuperKart Frontend App")
-        parser.add_argument(
-            "--backend-url",
-            type=str,
-            default=os.getenv("BACKEND_URL", "http://localhost:7860"),
-            help="Backend API URL (default: http://localhost:7860)",
-        )
-        # Only parse known args to avoid conflicts with Streamlit args
-        try:
-            known_args, _ = parser.parse_known_args()
-            return known_args.backend_url
-        except (SystemExit, argparse.ArgumentError):
-            pass
-    # Fallback to environment variable or default
-    return os.getenv("BACKEND_URL", "http://localhost:7860")
-# Configuration
-BACKEND_URL = get_backend_url()
-def make_api_request(endpoint: str, data: Dict = None, method: str = "GET") -> Dict:
-    """Make API request to backend service."""
-    try:
-        url = f"{BACKEND_URL}{endpoint}"
-        if method == "GET":
-            response = requests.get(url, timeout=30)
-        elif method == "POST":
-            response = requests.post(url, json=data, timeout=30)
-        response.raise_for_status()
-        return {"success": True, "data": response.json()}
-    except requests.exceptions.ConnectionError:
-        return {
-            "success": False,
-            "error": "Cannot connect to backend API. Please ensure the backend service is running.",
-        }
-    except requests.exceptions.Timeout:
-        return {
-            "success": False,
-            "error": "Request timeout. The backend service is taking too long to respond.",
-        }
-    except requests.exceptions.RequestException as e:
-        return {"success": False, "error": f"API request failed: {str(e)}"}
-def get_feature_info():
-    """Get feature information from backend API."""
-    result = make_api_request("/features")
-    if result["success"]:
-        return result["data"]
-    else:
-        st.error(f"Failed to get feature information: {result['error']}")
-        return None
-def create_input_form():
-    """Create the input form for prediction."""
-    st.header("🔮 Product Sales Prediction")
-    # Get feature information
-    feature_info = get_feature_info()
-    if not feature_info:
-        return None
-    # Create form
-    with st.form("prediction_form"):
-        col1, col2 = st.columns(2)
-        with col1:
-            st.subheader("📦 Product Features")
-            product_weight = st.number_input(
-                "Product Weight (kg)",
-                min_value=0.1,
-                max_value=100.0,
-                value=12.66,
-                step=0.1,
-                help="Weight of the product in kilograms",
-            )
-            product_sugar_content = st.selectbox(
-                "Sugar Content",
-                options=["Low Sugar", "Regular", "No Sugar"],
-                index=0,
-                help="Sugar content level of the product",
-            )
-            product_allocated_area = st.number_input(
-                "Allocated Display Area (Ratio)",
-                min_value=0.0,
-                max_value=1.0,
-                value=0.027,
-                step=0.001,
-                format="%.3f",
-                help="Ratio of allocated display area (0.0 to 1.0)",
-            )
-            product_type = st.selectbox(
-                "Product Type",
-                options=[
-                    "Dairy",
-                    "Soft Drinks",
-                    "Meat",
-                    "Fruits and Vegetables",
-                    "Household",
-                    "Baking Goods",
-                    "Snack Foods",
-                    "Frozen Foods",
-                    "Breakfast",
-                    "Health and Hygiene",
-                    "Hard Drinks",
-                    "Canned",
-                    "Bread",
-                    "Starchy Foods",
-                    "Others",
-                    "Seafood",
-                ],
-                index=7,  # Frozen Foods
-                help="Category of the product",
-            )
-            product_mrp = st.number_input(
-                "Maximum Retail Price ($)",
-                min_value=1.0,
-                max_value=1000.0,
-                value=117.08,
-                step=0.01,
-                format="%.2f",
-                help="Maximum retail price in USD",
-            )
-        with col2:
-            st.subheader("🏪 Store Features")
-            store_establishment_year = st.selectbox(
-                "Store Establishment Year",
-                options=[1987, 1998, 1999, 2009],
-                index=3,  # 2009
-                help="Year when the store was established",
-            )
-            store_size = st.selectbox(
-                "Store Size",
-                options=["Small", "Medium", "High"],
-                index=1,  # Medium
-                help="Size category of the store",
-            )
-            store_location_city_type = st.selectbox(
-                "City Type",
-                options=["Tier 1", "Tier 2", "Tier 3"],
-                index=1,  # Tier 2
-                help="Type of city where the store is located",
-            )
-            store_type = st.selectbox(
-                "Store Type",
-                options=[
-                    "Supermarket Type1",
-                    "Supermarket Type2",
-                    "Supermarket Type3",
-                    "Departmental Store",
-                    "Food Mart",
-                ],
-                index=1,  # Supermarket Type2
-                help="Type/format of the store",
-            )
-        # Submit button
-        submitted = st.form_submit_button("🎯 Predict Sales", type="primary")
-        if submitted:
-            # Prepare input data
-            input_data = {
-                "Product_Weight": product_weight,
-                "Product_Sugar_Content": product_sugar_content,
-                "Product_Allocated_Area": product_allocated_area,
-                "Product_Type": product_type,
-                "Product_MRP": product_mrp,
-                "Store_Establishment_Year": store_establishment_year,
-                "Store_Size": store_size,
-                "Store_Location_City_Type": store_location_city_type,
-                "Store_Type": store_type,
-            }
-            return input_data
-    return None
-def display_prediction_result(prediction_data: Dict):
-    """Display the prediction result with EDA-based insights."""
-    predicted_sales = prediction_data["predicted_sales"]
-    # Main prediction display
-    st.markdown('<div class="prediction-box">', unsafe_allow_html=True)
-    col1, col2, col3 = st.columns([1, 2, 1])
-    with col2:
-        st.markdown(
-            f"""
-        <div style="text-align: center;">
-            <h2>💰 Predicted Sales Revenue</h2>
-            <h1 style="color: #28a745; font-size: 4rem;">${predicted_sales:,.2f}</h1>
-        </div>
-        """,
-            unsafe_allow_html=True,
-        )
-    st.markdown("</div>", unsafe_allow_html=True)
-    # EDA-based insights and business metrics
-    st.subheader("📊 Sales Analysis & Business Insights")
-    # Based on EDA: Sales range $33-$8,000, Mean: $3,464, Median: $3,452, Std: $1,066
-    sales_mean = 3464
-    sales_median = 3452
-    sales_std = 1066
-    sales_q1 = 2762
-    sales_q3 = 4145
-    col1, col2, col3, col4 = st.columns(4)
-    with col1:
-        # Performance vs Mean
-        vs_mean = ((predicted_sales - sales_mean) / sales_mean) * 100
-        delta_color = "normal" if abs(vs_mean) < 10 else "inverse"
-        st.metric(
-            label="📊 vs Dataset Mean",
-            value=f"${predicted_sales:,.2f}",
-            delta=f"{vs_mean:+.1f}%",
-            delta_color=delta_color,
-        )
-    with col2:
-        # Performance vs Median
-        vs_median = ((predicted_sales - sales_median) / sales_median) * 100
-        delta_color = "normal" if abs(vs_median) < 10 else "inverse"
-        st.metric(
-            label="📈 vs Dataset Median",
-            value=f"${sales_median:,.2f}",
-            delta=f"{vs_median:+.1f}%",
-            delta_color=delta_color,
-        )
-    with col3:
-        # Percentile ranking based on EDA quartiles
-        if predicted_sales <= sales_q1:
-            percentile = "Bottom 25%"
-            percentile_color = "🔴"
-        elif predicted_sales <= sales_median:
-            percentile = "25th-50th"
-            percentile_color = "🟡"
-        elif predicted_sales <= sales_q3:
-            percentile = "50th-75th"
-            percentile_color = "🟠"
-        else:
-            percentile = "Top 25%"
-            percentile_color = "🟢"
-        st.metric(
-            label="🎯 Performance Percentile",
-            value=f"{percentile_color} {percentile}",
-            delta=None,
-        )
-    with col4:
-        # Standard deviation analysis
-        z_score = (predicted_sales - sales_mean) / sales_std
-        if abs(z_score) <= 1:
-            volatility = "Normal"
-            vol_color = "🟢"
-        elif abs(z_score) <= 2:
-            volatility = "Moderate"
-            vol_color = "🟡"
-        else:
-            volatility = "High"
-            vol_color = "🔴"
-        st.metric(
-            label="📉 Sales Volatility",
-            value=f"{vol_color} {volatility}",
-            delta=f"σ: {z_score:+.1f}",
-        )
-    # Business insights section
-    st.subheader("💼 Business Recommendations & Next Steps")
-    # Performance Summary Box
-    if predicted_sales >= sales_q3:  # Top 25%
-        performance_level = "⭐ Excellent"
-        performance_color = "#28a745"
-        summary_message = (
-            "This product is predicted to perform in the top 25% of SuperKart sales!"
-        )
-    elif predicted_sales >= sales_median:  # Above median
-        performance_level = "✅ Good"
-        performance_color = "#17a2b8"
-        summary_message = (
-            "This product is predicted to perform above the historical average."
-        )
-    elif predicted_sales >= sales_q1:  # Above bottom quartile
-        performance_level = "⚠️ Below Average"
-        performance_color = "#ffc107"
-        summary_message = (
-            "This product may underperform compared to typical SuperKart sales."
-        )
-    else:  # Bottom 25%
-        performance_level = "🔴 Needs Attention"
-        performance_color = "#dc3545"
-        summary_message = (
-            "This product is predicted to be in the bottom 25% of sales performance."
-        )
-    # Performance summary box
-    st.markdown(
-        f"""
-        <div style="background-color: {performance_color}20; padding: 20px; border-radius: 10px;
-             border-left: 5px solid {performance_color}; margin: 15px 0;">
-            <h4 style="color: {performance_color}; margin: 0 0 10px 0;">
-                {performance_level} Performance Expected
-            </h4>
-            <p style="margin: 0; font-size: 16px;">{summary_message}</p>
-        </div>
-        """,
-        unsafe_allow_html=True,
-    )
-    # Three-column layout for insights
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        st.markdown("#### 💰 Financial Impact")
-        # Revenue tier classification (moved to top for consistency)
-        if predicted_sales >= 5000:
-            tier = "🏆 Premium Tier"
-        elif predicted_sales >= 3000:
-            tier = "🥈 Standard Tier"
-        else:
-            tier = "🥉 Value Tier"
-        st.info(f"**Revenue Classification:** {tier}")
-        # Financial metrics with clear labels
-        profit_margin = 0.2  # 20% profit margin
-        estimated_profit = predicted_sales * profit_margin
-        st.metric("Predicted Revenue", f"${predicted_sales:,.0f}")
-        st.metric("Estimated Profit (20%)", f"${estimated_profit:,.0f}")
-    with col2:
-        st.markdown("#### 📊 Market Position")
-        # Clear market positioning
-        vs_mean_pct = ((predicted_sales - sales_mean) / sales_mean) * 100
-        if vs_mean_pct > 10:
-            position = "🚀 Above Market Average"
-        elif vs_mean_pct > -10:
-            position = "📊 Market Average"
-        else:
-            position = "📉 Below Market Average"
-        st.success(position)
-        st.write(f"**vs Historical Mean:** {vs_mean_pct:+.1f}%")
-        st.write("**Market Range:** \\$33 - \\$8,000")
-        st.write(f"**Your Prediction:** ${predicted_sales:,.0f}")
-    with col3:
-        st.markdown("#### 🎯 Action Items")
-        # Clear, actionable recommendations
-        if predicted_sales < sales_q1:
-            st.warning("**Low Performance Risk**")
-            st.write("**Immediate Actions:**")
-            st.write("• Launch promotional campaign")
-            st.write("• Review pricing strategy")
-            st.write("• Optimize product placement")
-            st.write("• Analyze competitor offerings")
-        elif predicted_sales > sales_q3:
-            st.success("**High Performance Opportunity**")
-            st.write("**Recommended Actions:**")
-            st.write("• Ensure adequate stock levels")
-            st.write("• Consider premium pricing")
-            st.write("• Expand to similar products")
-            st.write("• Allocate prime shelf space")
-        else:
-            st.info("**Standard Performance Expected**")
-            st.write("**Monitor & Optimize:**")
-            st.write("• Track actual vs predicted")
-            st.write("• A/B test marketing approaches")
-            st.write("• Monitor competitor activity")
-            st.write("• Adjust inventory as needed")
-def create_input_summary(input_data: Dict):
-    """Create a summary of input features."""
-    st.subheader("📋 Input Summary")
-    # Create two columns for better layout
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown("**Product Information:**")
-        st.write(f"• Weight: {input_data['Product_Weight']} kg")
-        st.write(f"• Sugar Content: {input_data['Product_Sugar_Content']}")
-        st.write(f"• Display Area: {input_data['Product_Allocated_Area']:.3f}")
-        st.write(f"• Type: {input_data['Product_Type']}")
-        st.write(f"• MRP: ${input_data['Product_MRP']:.2f}")
-    with col2:
-        st.markdown("**Store Information:**")
-        st.write(f"• Establishment Year: {input_data['Store_Establishment_Year']}")
-        st.write(f"• Size: {input_data['Store_Size']}")
-        st.write(f"• City Type: {input_data['Store_Location_City_Type']}")
-        st.write(f"• Store Type: {input_data['Store_Type']}")
-def create_batch_prediction():
-    """Create batch prediction interface."""
-    st.header("📊 Batch Prediction")
-    st.markdown("""
-    Upload a CSV file with multiple products to get batch predictions.
-    The CSV should contain all required columns with the same names as in the single prediction form.
-    """)
-    # File uploader
-    uploaded_file = st.file_uploader(
-        "Choose a CSV file",
-        type="csv",
-        help="Upload a CSV file with product and store features",
     )
-    if uploaded_file is not None:
-        try:
-            # Read the CSV file
-            df = pd.read_csv(uploaded_file)
-            # Display the uploaded data
-            st.subheader("📂 Uploaded Data")
-            st.dataframe(df.head(10))
-            if st.button("🚀 Run Batch Prediction", type="primary"):
-                # Convert DataFrame to list of dictionaries
-                predictions_data = df.to_dict("records")
-                # Make batch prediction request
-                result = make_api_request(
-                    "/predict/batch", {"predictions": predictions_data}, "POST"
-                )
-                if result["success"]:
-                    batch_results = result["data"]
-                    # Display results
-                    st.subheader("📈 Batch Prediction Results")
-                    col1, col2, col3 = st.columns(3)
-                    with col1:
-                        st.metric(
-                            "✅ Successful", batch_results["successful_predictions"]
-                        )
-                    with col2:
-                        st.metric("❌ Failed", batch_results["failed_predictions"])
-                    with col3:
-                        st.metric("📊 Total", len(predictions_data))
-                    # Show successful predictions
-                    if batch_results["results"]:
-                        st.subheader("🎯 Successful Predictions")
-                        # Create a user-friendly results DataFrame
-                        display_results = []
-                        for result in batch_results["results"]:
-                            # Extract readable product info
-                            input_features = result["input_features"]
-                            # Determine performance category
-                            sales = result["predicted_sales"]
-                            if sales >= 4145:  # Top 25% (Q3)
-                                category = "🟢 High"
-                            elif sales >= 3452:  # Above median
-                                category = "🟡 Good"
-                            elif sales >= 2762:  # Above Q1
-                                category = "🟠 Average"
-                            else:
-                                category = "🔴 Low"
-                            display_row = {
-                                "Row": result["index"] + 1,
-                                "Product Type": input_features["Product_Type"],
-                                "Weight (kg)": input_features["Product_Weight"],
-                                "MRP ($)": f"${input_features['Product_MRP']:.2f}",
-                                "Store Size": input_features["Store_Size"],
-                                "Store Type": input_features["Store_Type"],
-                                "Predicted Sales": f"${sales:,.2f}",
-                                "Performance": category,
-                            }
-                            display_results.append(display_row)
-                        display_df = pd.DataFrame(display_results)
-                        # Show the clean results table
-                        st.dataframe(
-                            display_df, use_container_width=True, hide_index=True
-                        )
-                        # Summary statistics
-                        sales_values = [
-                            result["predicted_sales"]
-                            for result in batch_results["results"]
-                        ]
-                        col1, col2, col3, col4 = st.columns(4)
-                        with col1:
-                            st.metric("💰 Total Revenue", f"${sum(sales_values):,.0f}")
-                        with col2:
-                            st.metric(
-                                "📊 Average Sale",
-                                f"${sum(sales_values) / len(sales_values):,.0f}",
-                            )
-                        with col3:
-                            high_performers = len(
-                                [s for s in sales_values if s >= 4145]
-                            )
-                            st.metric("🟢 High Performers", f"{high_performers}")
-                        with col4:
-                            low_performers = len([s for s in sales_values if s < 2762])
-                            st.metric("🔴 Needs Attention", f"{low_performers}")
-                        # Download options
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            # Download user-friendly results
-                            csv_display = display_df.to_csv(index=False)
-                            st.download_button(
-                                label="📥 Download Summary Results",
-                                data=csv_display,
-                                file_name="batch_predictions_summary.csv",
-                                mime="text/csv",
-                            )
-                        with col2:
-                            # Download detailed results for technical users
-                            detailed_results = []
-                            for result in batch_results["results"]:
-                                detailed_row = {
-                                    "row_index": result["index"],
-                                    "predicted_sales": result["predicted_sales"],
-                                    **result["input_features"],
-                                }
-                                detailed_results.append(detailed_row)
-                            detailed_df = pd.DataFrame(detailed_results)
-                            csv_detailed = detailed_df.to_csv(index=False)
-                            st.download_button(
-                                label="🔧 Download Detailed Results",
-                                data=csv_detailed,
-                                file_name="batch_predictions_detailed.csv",
-                                mime="text/csv",
-                            )
-                    # Show errors if any
-                    if batch_results["errors"]:
-                        st.subheader("⚠️ Prediction Errors")
-                        errors_df = pd.DataFrame(batch_results["errors"])
-                        st.dataframe(errors_df)
-                else:
-                    st.error(f"Batch prediction failed: {result['error']}")
-        except Exception as e:
-            st.error(f"Error processing file: {str(e)}")
-def main():
-    """Main application function."""
-    # Title and description
-    st.markdown(
-        '<h1 class="main-header">🛒 SuperKart Sales Predictor</h1>',
-        unsafe_allow_html=True,
-    )
-    st.markdown(
-        """
-    <div style="text-align: center; margin-bottom: 2rem;">
-        <p style="font-size: 1.2rem; color: #666;">
-            Predict product sales revenue using machine learning based on product and store characteristics
-        </p>
-    </div>
-    """,
-        unsafe_allow_html=True,
-    )
-    # Check backend health
-    health_result = make_api_request("/")
-    if not health_result["success"]:
-        st.error(
-            f"⚠️ Backend API is not available at `{BACKEND_URL}`. Please ensure the backend service is running."
-        )
-        st.info(
-            """
-            **How to specify a different backend URL:**
-            1. **Command line argument:**
-               ```
-               streamlit run app.py -- --backend-url http://your-backend:5050
-               ```
-            2. **Environment variable:**
-               ```
-               export BACKEND_URL=http://your-backend:5050
-               streamlit run app.py
-               ```
-            """
-        )
-        st.stop()
-    # Sidebar navigation
-    st.sidebar.title("🧭 Navigation")
-    # Display current backend URL and connection status
-    st.sidebar.markdown("---")
-    st.sidebar.markdown("**🔗 Backend Configuration**")
-    st.sidebar.code(BACKEND_URL, language=None)
-    # Show connection status
-    if health_result["success"]:
-        st.sidebar.success("🟢 Connected")
-        if "data" in health_result and "model_loaded" in health_result["data"]:
-            model_status = (
-                "🤖 Model Loaded"
-                if health_result["data"]["model_loaded"]
-                else "⚠️ Model Not Loaded"
-            )
-            st.sidebar.info(model_status)
-    else:
-        st.sidebar.error("🔴 Disconnected")
-    st.sidebar.markdown("---")
-    app_mode = st.sidebar.selectbox(
-        "Choose App Mode",
-        ["Single Prediction", "Batch Prediction", "API Documentation"],
-    )
-    if app_mode == "Single Prediction":
-        # Single prediction interface
-        input_data = create_input_form()
-        if input_data:
-            # Make prediction
-            result = make_api_request("/predict", input_data, "POST")
-            if result["success"]:
-                prediction_data = result["data"]
-                # Display results
-                display_prediction_result(prediction_data)
-                # Show input summary
-                with st.expander("📋 View Input Details", expanded=False):
-                    create_input_summary(input_data)
-                # Success message
-                st.markdown(
-                    '<div class="success-box">✅ Prediction completed successfully!</div>',
-                    unsafe_allow_html=True,
                 )
-            else:
-                st.markdown(
-                    f'<div class="error-box">❌ Prediction failed: {result["error"]}</div>',
-                    unsafe_allow_html=True,
-                )
-    elif app_mode == "Batch Prediction":
-        create_batch_prediction()
-    elif app_mode == "API Documentation":
-        st.header("📚 API Documentation")
-        # Get feature information
-        feature_info = get_feature_info()
-        if feature_info:
-            st.subheader("🔧 Required Features")
-            features_df = pd.DataFrame(
-                [
-                    {"Feature": k, "Description": v}
-                    for k, v in feature_info["feature_descriptions"].items()
-                ]
-            )
-            st.table(features_df)
-            st.subheader("📝 Example Input")
-            st.json(feature_info["example_input"])
-            st.subheader("🌐 API Endpoints")
-            st.markdown("""
-            - **GET /**: Health check
-            - **POST /predict**: Single prediction
-            - **POST /predict/batch**: Batch prediction
-            - **GET /features**: Get feature information
-            """)
-    # Footer
-    st.markdown("---")
-    st.markdown(
-        "<div style='text-align: center; color: #666;'>"
-        "SuperKart Sales Prediction System | Krishnaswamy Subramanian"
-        "</div>",
-        unsafe_allow_html=True,
-    )
 if __name__ == "__main__":
-    main()

 """
+SuperKart Sales Prediction Flask API
+This Flask application provides a REST API for predicting product sales using a pre-trained
+Random Forest model. The API accepts product and store features and returns predicted sales revenue.
 """
 import os
+import joblib
+import pandas as pd
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import logging
+from typing import Any, Dict
+from pydantic import BaseModel, ValidationError, field_validator
+from datetime import datetime
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize Flask app
+app = Flask(__name__)
+CORS(app)  # Enable CORS for frontend integration
+# Global variables for model and preprocessing pipeline
+model = None
+feature_columns = None
+# Define user input features (what user provides)
+USER_INPUT_FEATURES = [
+    "Product_Weight",
+    "Product_Sugar_Content",
+    "Product_Allocated_Area",
+    "Product_Type",
+    "Product_MRP",
+    "Store_Establishment_Year",
+    "Store_Size",
+    "Store_Location_City_Type",
+    "Store_Type",
+]
+# Define model features (what model expects after preprocessing)
+MODEL_FEATURES = [
+    "Product_Weight",
+    "Product_Sugar_Content",
+    "Product_Allocated_Area",
+    "Product_Type",
+    "Product_MRP",
+    "Store_Size",
+    "Store_Location_City_Type",
+    "Store_Type",
+    "Store_Age",
+]
+# Pydantic model for input validation
+class PredictionInput(BaseModel):
+    Product_Weight: float
+    Product_Sugar_Content: str
+    Product_Allocated_Area: float
+    Product_Type: str
+    Product_MRP: float
+    Store_Establishment_Year: int
+    Store_Size: str
+    Store_Location_City_Type: str
+    Store_Type: str
+    @field_validator("Product_Weight")
+    @classmethod
+    def validate_product_weight(cls, v: float) -> float:
+        if v <= 0:
+            raise ValueError("Product_Weight must be greater than 0")
+        if v < 4.0 or v > 22.0:
+            raise ValueError("Product_Weight must be between 4.0 and 22.0")
+        return v
+    @field_validator("Product_Allocated_Area")
+    @classmethod
+    def validate_allocated_area(cls, v: float) -> float:
+        if v < 0 or v > 1:
+            raise ValueError("Product_Allocated_Area must be between 0 and 1")
+        return v
+    @field_validator("Product_MRP")
+    @classmethod
+    def validate_mrp(cls, v: float) -> float:
+        if v <= 0:
+            raise ValueError("Product_MRP must be greater than 0")
+        if v < 31.0 or v > 266.0:
+            raise ValueError("Product_MRP must be between 31.0 and 266.0")
+        return v
+    @field_validator("Store_Establishment_Year")
+    @classmethod
+    def validate_establishment_year(cls, v: int) -> int:
+        valid_years = [1987, 1998, 1999, 2009]
+        if v not in valid_years:
+            raise ValueError(f"Store_Establishment_Year must be one of: {valid_years}")
+        return v
+    @field_validator("Product_Sugar_Content")
+    @classmethod
+    def validate_sugar_content(cls, v: str) -> str:
+        valid = ["Low Sugar", "Regular", "No Sugar"]
+        if v not in valid:
+            raise ValueError(f"Product_Sugar_Content must be one of: {valid}")
+        return v
+    @field_validator("Product_Type")
+    @classmethod
+    def validate_product_type(cls, v: str) -> str:
+        valid = [
+            "Dairy",
+            "Soft Drinks",
+            "Meat",
+            "Fruits and Vegetables",
+            "Household",
+            "Baking Goods",
+            "Snack Foods",
+            "Frozen Foods",
+            "Breakfast",
+            "Health and Hygiene",
+            "Hard Drinks",
+            "Canned",
+            "Bread",
+            "Starchy Foods",
+            "Others",
+            "Seafood",
+        ]
+        if v not in valid:
+            raise ValueError(f"Product_Type must be one of: {valid}")
+        return v
+    @field_validator("Store_Size")
+    @classmethod
+    def validate_store_size(cls, v: str) -> str:
+        valid = ["Small", "Medium", "High"]
+        if v not in valid:
+            raise ValueError(f"Store_Size must be one of: {valid}")
+        return v
+    @field_validator("Store_Location_City_Type")
+    @classmethod
+    def validate_city_type(cls, v: str) -> str:
+        valid = ["Tier 1", "Tier 2", "Tier 3"]
+        if v not in valid:
+            raise ValueError(f"Store_Location_City_Type must be one of: {valid}")
+        return v
+    @field_validator("Store_Type")
+    @classmethod
+    def validate_store_type(cls, v: str) -> str:
+        valid = [
+            "Supermarket Type1",
+            "Supermarket Type2",
+            "Supermarket Type3",
+            "Departmental Store",
+            "Food Mart",
+        ]
+        if v not in valid:
+            raise ValueError(f"Store_Type must be one of: {valid}")
+        return v
+def load_model(model_path: str):
     """
+    Load the trained model from the specified path.
+    Args:
+        model_path (str): Path to the model file.
+    Returns:
+        bool: True if model loaded successfully, False otherwise.
+    """
+    global model, feature_columns
+    try:
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"Model file not found at: {model_path}")
+        # Load the trained model (which includes preprocessing pipeline)
+        model = joblib.load(model_path)
+        logger.info(f"✅ Model loaded successfully from: {model_path}")
+        # Set feature columns
+        feature_columns = MODEL_FEATURES
+        logger.info(f"📋 Model features: {MODEL_FEATURES}")
+        logger.info(f"📋 User input features: {USER_INPUT_FEATURES}")
+        return True
+    except Exception as e:
+        logger.error(f"❌ Error loading model: {str(e)}")
+        return False
+def convert_establishment_year_to_age(data: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert Store_Establishment_Year to Store_Age."""
+    # Create a copy to avoid modifying the original
+    converted_data = data.copy()
+    # Get current year
+    current_year = datetime.now().year
+    # Convert establishment year to age
+    if "Store_Establishment_Year" in converted_data:
+        establishment_year = converted_data.pop("Store_Establishment_Year")
+        converted_data["Store_Age"] = current_year - establishment_year
+    return converted_data
+def preprocess_input(data: Dict[str, Any]) -> pd.DataFrame:
+    """Convert input data to DataFrame format expected by the model."""
+    # First convert establishment year to age
+    converted_data = convert_establishment_year_to_age(data)
+    # Create DataFrame with model features
+    df = pd.DataFrame([converted_data])
+    df = df[MODEL_FEATURES]
+    return df
+@app.route("/", methods=["GET"])
+def health_check():
+    """Health check endpoint."""
+    return jsonify(
+        {
+            "status": "healthy",
+            "message": "SuperKart Sales Prediction API is running",
+            "model_loaded": model is not None,
+        }
     )
+@app.route("/predict", methods=["POST"])
+def predict():
+    """Predict sales for given product and store features."""
+    if model is None:
+        return jsonify({"error": "Model not loaded. Please check server logs."}), 500
+    try:
+        # Get JSON data from request
+        data = request.get_json()
+        if not data:
+            return jsonify(
+                {
+                    "error": "No data provided. Please send JSON data in the request body."
+                }
+            ), 400
+        # Validate input using Pydantic
+        try:
+            validated = PredictionInput(**data)
+        except ValidationError as ve:
+            return jsonify(
+                {"error": "Input validation failed", "details": ve.errors()}
+            ), 400
+        # Preprocess input data
+        input_df = preprocess_input(validated.model_dump())
+        # Make prediction
+        prediction = model.predict(input_df)
+        predicted_sales = float(prediction[0])
+        # Prepare response
+        response = {
+            "predicted_sales": round(predicted_sales, 2),
+            "currency": "USD",
+            "input_features": validated.model_dump(),
+            "status": "success",
+        }
+        logger.info(f"✅ Prediction successful: ${predicted_sales:.2f}")
+        return jsonify(response)
+    except Exception as e:
+        logger.error(f"❌ Prediction error: {str(e)}")
+        return jsonify({"error": f"Prediction failed: {str(e)}"}), 500
+@app.route("/features", methods=["GET"])
+def get_features():
+    """Get information about expected input features."""
+    feature_info = {
+        "required_features": USER_INPUT_FEATURES,
+        "feature_descriptions": {
+            "Product_Weight": "Weight of the product (4.0-22.0 kg)",
+            "Product_Sugar_Content": "Sugar content (Low Sugar, Regular, No Sugar)",
+            "Product_Allocated_Area": "Allocated display area ratio (0.0-1.0)",
+            "Product_Type": "Product category (16 types: Dairy, Soft Drinks, Meat, etc.)",
+            "Product_MRP": "Maximum retail price (31.0-266.0 USD)",
+            "Store_Establishment_Year": "Year store was established (1987, 1998, 1999, 2009)",
+            "Store_Size": "Store size (Small, Medium, High)",
+            "Store_Location_City_Type": "City type (Tier 1, Tier 2, Tier 3)",
+            "Store_Type": "Store type (Supermarket Type1/2/3, Departmental Store, Food Mart)",
+        },
+        "example_input": {
+            "Product_Weight": 12.66,
+            "Product_Sugar_Content": "Low Sugar",
+            "Product_Allocated_Area": 0.027,
+            "Product_Type": "Frozen Foods",
+            "Product_MRP": 117.08,
+            "Store_Establishment_Year": 2009,
+            "Store_Size": "Medium",
+            "Store_Location_City_Type": "Tier 2",
+            "Store_Type": "Supermarket Type2",
+        },
+    }
+    return jsonify(feature_info)
+@app.route("/predict/batch", methods=["POST"])
+def predict_batch():
+    """Predict sales for multiple products at once."""
+    if model is None:
+        return jsonify({"error": "Model not loaded. Please check server logs."}), 500
+    try:
+        # Get JSON data from request
+        data = request.get_json()
+        if not data or "predictions" not in data:
+            return jsonify(
+                {
+                    "error": 'No data provided. Please send JSON with "predictions" array.'
+                }
+            ), 400
+        predictions_data = data["predictions"]
+        if not isinstance(predictions_data, list):
+            return jsonify({"error": "Predictions must be an array of objects."}), 400
+        results = []
+        errors = []
+        for i, item in enumerate(predictions_data):
+            try:
+                # Validate input using Pydantic
+                try:
+                    validated = PredictionInput(**item)
+                except ValidationError as ve:
+                    errors.append({"index": i, "error": ve.errors(), "input": item})
+                    continue
+                # Preprocess and predict
+                input_df = preprocess_input(validated.model_dump())
+                prediction = model.predict(input_df)
+                predicted_sales = float(prediction[0])
+                results.append(
+                    {
+                        "index": i,
+                        "predicted_sales": round(predicted_sales, 2),
+                        "input_features": validated.model_dump(),
+                    }
                 )
+            except Exception as e:
+                errors.append({"index": i, "error": str(e), "input": item})
+        response = {
+            "successful_predictions": len(results),
+            "failed_predictions": len(errors),
+            "results": results,
+            "errors": errors,
+            "status": "completed",
+        }
+        logger.info(
+            f"✅ Batch prediction completed: {len(results)} successful, {len(errors)} failed"
+        )
+        return jsonify(response)
+    except Exception as e:
+        logger.error(f"❌ Batch prediction error: {str(e)}")
+        return jsonify({"error": f"Batch prediction failed: {str(e)}"}), 500
+# Load model on module import (for Gunicorn compatibility)
+if not load_model("./superkart_model.joblib"):
+    logger.error("❌ Failed to load model. Application may not work properly.")
 if __name__ == "__main__":
+    # This runs only when script is executed directly (not imported by Gunicorn)
+    logger.info("🚀 Starting SuperKart Sales Prediction API...")
+    app.run(host="0.0.0.0", port=7860, debug=True)

requirements.txt CHANGED Viewed

@@ -1,5 +1,8 @@
-streamlit==1.29.0
-requests==2.32.3
 pandas==2.2.2
-plotly==5.17.0
-watchdog==6.0.0

+Flask==3.0.0
+flask-cors==4.0.0
+joblib==1.4.2
 pandas==2.2.2
+numpy==2.0.2
+scikit-learn==1.6.1
+gunicorn==21.2.0
+pydantic==2.5.0