import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objs as go
from sklearn.preprocessing import StandardScaler
from pyod.models.iforest import IForest
from datetime import datetime, timedelta


class NYCTaxiAnomalyDetector:
    def __init__(self, data):
        self.data = data.copy()
        self.scaler = StandardScaler()

    def filter_by_date_range(self, start_date, end_date):
        """
        Filter data by specified date range

        :param start_date: Start date of the range
        :param end_date: End date of the range
        :return: Filtered DataFrame
        """
        # Ensure date column is datetime
        if not pd.api.types.is_datetime64_any_dtype(self.data["date"]):
            self.data["date"] = pd.to_datetime(self.data["date"])

        # Filter data
        filtered_data = self.data[
            (self.data["date"] >= start_date) & (self.data["date"] <= end_date)
        ]

        return filtered_data

    def preprocess_data(self, data, column):
        """
        Preprocess data for anomaly detection

        :param data: Filtered DataFrame
        :param column: Column to detect anomalies in
        :return: Scaled data and original index
        """
        # Ensure the column is numeric
        data[column] = pd.to_numeric(data[column], errors="coerce")

        # Remove NaN values
        clean_data = data[column].dropna()

        # Scale the data
        scaled_data = self.scaler.fit_transform(clean_data.values.reshape(-1, 1))

        return scaled_data, clean_data.index

    def detect_anomalies(self, data, column, contamination=0.05):
        """
        Detect anomalies using Isolation Forest

        :param data: Filtered DataFrame
        :param column: Column to detect anomalies in
        :param contamination: Expected proportion of outliers
        :return: DataFrame with anomaly detection results
        """
        # Preprocess data
        scaled_data, original_index = self.preprocess_data(data, column)

        # Apply Isolation Forest
        clf = IForest(contamination=contamination, random_state=42)
        y_pred = clf.fit_predict(scaled_data)

        # Create results DataFrame
        anomaly_results = pd.DataFrame(
            {
                "date": original_index,
                "value": data.loc[original_index, column],
                "is_anomaly": y_pred == 1,
            }
        )

        return anomaly_results


class AIContextGenerator:
    def generate_context(self, anomaly_date):
        """
        Generate potential context for the anomaly

        :param anomaly_date: Date of the anomaly
        :return: List of contextual insights
        """
        # Mock contextual insights - replace with actual data sources
        contexts = [
            {
                "type": "Weather",
                "description": f"Weather conditions on {anomaly_date.date()}",
                "severity": "High",
            },
            {
                "type": "Event",
                "description": f"City events around {anomaly_date.date()}",
                "severity": "Medium",
            },
            {
                "type": "Economic",
                "description": f"Economic factors on {anomaly_date.date()}",
                "severity": "Low",
            },
        ]
        return contexts


def load_nyc_taxi_data():
    """
    Load and preprocess NYC Taxi dataset

    :return: DataFrame with synthetic taxi traffic data
    """
    # Synthetic data generation
    dates = pd.date_range(start="2023-01-01", end="2023-12-31", freq="D")
    base_traffic = np.random.normal(5000, 500, len(dates))

    # Introduce some anomalies
    base_traffic[50] = 10000  # Extreme spike
    base_traffic[200] = 500  # Extreme drop
    base_traffic[300] = 12000  # Another spike

    df = pd.DataFrame({"date": dates, "daily_traffic": base_traffic})

    return df


def main():
    st.set_page_config(
        page_title="NYC Taxi Traffic Anomaly Detection", page_icon="🚕", layout="wide"
    )

    st.title("🚕 NYC Taxi Traffic Anomaly Detection")

    # Load Data
    taxi_data = load_nyc_taxi_data()

    # Sidebar for Configuration
    st.sidebar.header("Anomaly Detection Settings")

    # Date Range Selection
    st.sidebar.subheader("Date Range")
    min_date = taxi_data["date"].min().date()
    max_date = taxi_data["date"].max().date()

    col1, col2 = st.sidebar.columns(2)
    with col1:
        start_date = st.date_input(
            "Start Date", min_value=min_date, max_value=max_date, value=min_date
        )

    with col2:
        end_date = st.date_input(
            "End Date", min_value=min_date, max_value=max_date, value=max_date
        )

    # Anomaly Sensitivity
    anomaly_threshold = st.sidebar.slider(
        "Anomaly Sensitivity",
        min_value=0.01,
        max_value=0.1,
        value=0.05,
        step=0.01,
        help="Lower values detect fewer but more extreme anomalies",
    )

    # Instantiate Detector
    detector = NYCTaxiAnomalyDetector(taxi_data)

    # Filter Data by Date Range
    filtered_data = detector.filter_by_date_range(
        pd.to_datetime(start_date), pd.to_datetime(end_date)
    )

    # Detect Anomalies
    anomalies = detector.detect_anomalies(
        filtered_data, "daily_traffic", contamination=anomaly_threshold
    )

    # Visualization
    st.header("Daily Taxi Traffic Trend")
    fig = px.line(
        filtered_data,
        x="date",
        y="daily_traffic",
        title=f"NYC Taxi Daily Traffic ({start_date} to {end_date})",
        labels={"daily_traffic": "Number of Taxi Rides"},
    )

    # Highlight Anomalies
    anomaly_points = filtered_data[anomalies["is_anomaly"]]
    fig.add_trace(
        go.Scatter(
            x=anomaly_points["date"],
            y=anomaly_points["daily_traffic"],
            mode="markers",
            name="Anomalies",
            marker=dict(color="red", size=10, symbol="star"),
        )
    )

    st.plotly_chart(fig, use_container_width=True)

    # Anomaly Details
    st.header("Anomaly Insights")

    if not anomaly_points.empty:
        context_generator = AIContextGenerator()

        for _, anomaly in anomaly_points.iterrows():
            st.subheader(f"Anomaly on {anomaly['date'].date()}")

            col1, col2 = st.columns(2)

            with col1:
                st.metric("Taxi Rides", f"{anomaly['daily_traffic']:.0f}")

            with col2:
                contexts = context_generator.generate_context(anomaly["date"])
                st.write("### Potential Context")
                for context in contexts:
                    st.markdown(
                        f"""
                    - **{context['type']}**: {context['description']} 
                      (Severity: {context['severity']})
                    """
                    )
    else:
        st.info("No significant anomalies detected with current settings.")


if __name__ == "__main__":
    main()