import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler,OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import classification_report, recall_score, precision_score, accuracy_score
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.genmod.families import Gamma
from statsmodels.genmod.families.links import Log
from statsmodels.tools import add_constant
from pygam import LinearGAM, GammaGAM, s, f
import pickle
import streamlit.components.v1 as components
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from streamlit_option_menu import option_menu
from pygam import LinearGAM
import lime
from lime import lime_tabular


class HousePriceModel(nn.Module):
    def __init__(self, input_size):
        super(HousePriceModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Linear(64, 32),
            nn.LeakyReLU(0.2,inplace=True),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        x = self.model(x)
        return x


def lime_component(X, input_data, dnn_model, gam_model, terms, y_min, y_max):
    """
    Generate and display a LIME (Local Interpretable Model-agnostic Explanations) component.

    :param X: pandas.DataFrame
        The feature matrix used to train the explainer.
    :param input_data: list or numpy.array
        The input data point to explain.
    :param dnn_model: torch.nn.Module
        The trained neural network model.
    :param gam_model: object
        The trained Generalized Additive Model 
    :param terms: list
        List of feature names.
    :param y_min: float
        Minimum y-axis value 
    :param y_max: float
        Maximum y-axis value 

    :return: None
    """
    def nn_prediction(input_data):
        input_tensor = torch.FloatTensor(input_data)
        dnn_model.eval()
        with torch.no_grad():
            output = dnn_model(input_tensor)
        
        return output.cpu().numpy()

    kernel_width = 3
    explainer = lime.lime_tabular.LimeTabularExplainer(X.values,mode='regression',feature_names=terms, kernel_width=kernel_width)
    exp = explainer.explain_instance(np.array(input_data), nn_prediction, num_features=len(input_data))

    feature_importance = exp.as_list()
    intercept = exp.intercept[0]
    formula = f"> _**y =** {intercept:.4f}"
    i = 0
    for feature, importance in feature_importance:
        if importance > 0:
            formula += f" + {importance:.4f} * **{terms[i]}**"
        else:
            formula += f" - {abs(importance):.4f} * **{terms[i]}**"
        i += 1
    formula += '_'
    st.markdown(formula)

    lime_graph = exp.as_pyplot_figure()
    st.pyplot(lime_graph)
    
        
def spline_component(X, input_data, dnn_model, gam_model, terms, y_min, y_max):
    """
    Generate the spline functions in a component.

    :param X: pandas.DataFrame
        The feature matrix used to train the explainer.
    :param input_data: list or numpy.array
        The input data point to explain.
    :param dnn_model: torch.nn.Module
        The trained neural network model.
    :param gam_model: object
        The trained Generalized Additive Model 
    :param terms: list
        List of feature names.
    :param y_min: float
        Minimum y-axis value 
    :param y_max: float
        Maximum y-axis value 

    :return: None
    """
    col1, col2 = st.columns(2)

    for i in range(len(terms)):
        XX = gam_model.generate_X_grid(term=i)
        pdep, confi = gam_model.partial_dependence(term=i, X=XX, width=0.95)
        fig, ax = plt.subplots(figsize=(6, 6))
        
        # Plot partial dependence
        ax.plot(XX[:, i], pdep, label='Partial Dependence')
        ax.plot(XX[:, i], confi, c='r', ls='--', label='Confidence Interval')
        
        # Find y-value corresponding to user input
        user_x = input_data[i]
        user_y = np.interp(user_x, XX[:, i], pdep)
        
        # Plot vertical and horizontal lines
        ax.axvline(x=user_x, color='b', linestyle='--', label='Model Input',ymin=0, ymax=(user_y-y_min)/(y_max-y_min))
        ax.axhline(y=user_y, color='b', linestyle='--', xmin=0, xmax=(user_x - XX[0, i]) / (XX[-1, i] - XX[0, i]))
        ax.annotate(f'{user_y:.2f}', (user_x, user_y), textcoords="offset points", xytext=(0,20), ha='center')
        
        ax.set_title(f'{terms[i]}')
        ax.set_xlabel(terms[i])
        ax.set_ylabel('Partial Dependence')
        ax.legend(loc='best', fontsize='x-small')
        
        # Set consistent y-axis limits
        ax.set_ylim(y_min, y_max)
        plt.tight_layout(pad=1.5)
        
        # Alternate between columns
        if i % 2 == 0:
            with col1:
                st.write(f"Spline function: {terms[i]}:")
                st.pyplot(fig)
                
        else:
            with col2:
                st.write(f"Spline function: {terms[i]}:")
                st.pyplot(fig)
        
        plt.close(fig)

def prediction_component(X, input_data, dnn_model, gam_model, terms, y_min, y_max, scale=1):
    """
    Generates the main component for the spline functions and LIME based upon the GAM and 
    the neural network.

    :param X: pandas.DataFrame
        The feature matrix used to train the explainer.
    :param input_data: list or numpy.array
        The input data point to explain.
    :param dnn_model: torch.nn.Module
        The trained neural network model.
    :param gam_model: object
        The trained Generalized Additive Model 
    :param terms: list
        List of feature names.
    :param y_min: float
        Minimum y-axis value 
    :param y_max: float
        Maximum y-axis value 
    :param scale: int
        The scale of the model's dataset's price 

    :return: None
    """
    input_array = np.array(input_data).reshape(1, -1)
    
    dnn_prediction = dnn_model(torch.FloatTensor(input_array))[0][0]
    gam_prediction = gam_model.predict(input_array)[0]
    
    st.markdown("# Predictions")
    st.write(f"- _General Additive Model Prediction:_ ${gam_prediction*scale:.2f}")
    st.write(f"- _Neural Network Prediction:_ ${dnn_prediction*scale:.2f}")
    st.markdown("----------------")
    
    st.markdown("## Spline Functions")
    spline_component(X, input_data, dnn_model, gam_model, terms, y_min, y_max)
    st.markdown("----------------")
    st.markdown("## LIME Explanation for Neural Network")
    
    lime_component(X, input_data, dnn_model, gam_model, terms, y_min, y_max)


def show():
    st.markdown("# _Explainable AI for Housing Costs_")
    st.markdown("""
            ## _Models_
            ### California Housing Dataset:
            > This dataset is more concentrated and is far less sparse as the dataset includes a smaller population from 1990 and only in California
            ### HUD Housing Dataset:
            > This dataset is far more sparse and is calculated over the entire country with data from all the different regions of the country 
            """)
    st.markdown("----------------")

    model_versions = ['California Housing Dataset', 'HUD Housing Dataset']    
    value = st.selectbox(f"Select model version", model_versions)
    st.markdown("----------------")
    
    st.write('Generate model predictions:')
    if value == 'HUD Housing Dataset':
        dnn_model = HousePriceModel(8)
        dnn_model.load_state_dict(torch.load('models/dnn_model_hud.pth'))
        dnn_model.eval()

        with open('models/gam_model_hud.pkl', 'rb') as file:
            gam_model = pickle.load(file)
            
        dnn_mse = 77202161664.0000
        gam_mse = 57308274833.9465

        input_data = [None] * 8

        terms = [
            'City/Suburban Status',
            'Census Region',
            'Area median income (average)',
            '# of bedrooms in unit',
            'Age of the house (years)',
            '# of rooms in unit',
            '# of Persons in Household',
            'Monthly utility cost'
        ]

        region_code = {
            'Northeast': 1,
            'Midwest': 2,
            'South': 3,
            'West': 4,
        }

        metro_code = {
            'Central cities of metropolitan areas': 1,
            'Inside metropolitan area, but not in central city': 2,
            'Inside metropolitan area, but not in central city - rural': 3,
            'Outside metropolitan areas, urbanized': 4,
            'Outside metropolitan areas, rural': 5,
        }

        with st.sidebar:
            st.title("Model Inputs")
            value = st.selectbox(f"{terms[0]}", metro_code.keys())
            input_data[0] =metro_code[value]
            value = st.selectbox(f"{terms[0]}", region_code.keys())
            input_data[1] =region_code[value]
            value = st.number_input(f"{terms[2]}", value=84200)
            input_data[2] =value
            value = st.number_input(f"{terms[3]}", value=4)
            input_data[3] =value
            value = st.number_input(f"{terms[4]}", value=9)
            input_data[4] =value
            value = st.number_input(f"{terms[5]}", value=8)
            input_data[5] =value
            value = st.number_input(f"{terms[6]}", value=3)
            input_data[6] =value
            value = st.number_input(f"{terms[7]}", value=300)
            input_data[7] =value

        df = pd.read_csv('data/hud_dataset.csv', index_col=False)
        X, y = df.drop(columns=['VALUE']), df['VALUE']

        if st.button("Predict"):
            st.markdown("# Model Loss")
            st.write(f"- _Mean Squared Error (Generalized Additive Model):_ {gam_mse:.3f}")
            st.write(f"- _Mean Squared Error (Neural Network Model):_ {dnn_mse:.3f}")
            st.markdown("----------------")
            prediction_component(X, input_data, dnn_model, gam_model, terms, y_min = -500000, y_max = 500000)
                
    else: 
        dnn_model = HousePriceModel(8)
        dnn_model.load_state_dict(torch.load('models/dnn_model_california.pth'))
        dnn_model.eval()

        with open('models/gam_model_california.pkl', 'rb') as file:
            gam_model = pickle.load(file)
            
        dnn_mse = 0.9678
        gam_mse = 0.3081

        input_data = [None] * 8

        terms = [
            'Median Income',
            'House Age (years)',
            'Average Rooms',
            'Average Bedrooms',
            'Population (average of census block group per county)',
            'Average Occupancy',
            'Latitude',
            'Longitude'
        ]
        
        counties = pd.read_csv('data/california_counties.csv')
        distinct_counties = list(counties['County'].unique())

        with st.sidebar:
            st.title("Model Inputs")
            county = st.selectbox(f"{terms[0]}", distinct_counties)
            counties = counties[counties['County'] == county]
            county_vals = counties.values[0]
            population = county_vals[4] / 442
            lat = county_vals[2]
            long = county_vals[3]
            value = st.number_input(f"{terms[0]}", value=84200)
            input_data[0] =value/10000
            value = st.number_input(f"{terms[1]}", value=4)
            input_data[1] =value
            value = st.number_input(f"{terms[2]}", value=9)
            input_data[2] =value
            value = st.number_input(f"{terms[3]}", value=8)
            input_data[3] =value
            input_data[4] = population
            value = st.number_input(f"{terms[5]}", value=3)
            input_data[5] =value
            input_data[6] = lat
            input_data[7] = long
            
        housing = fetch_california_housing()
        X, y = pd.DataFrame(housing.data), pd.DataFrame(housing.target)
        X.columns = housing.feature_names

        if st.button("Predict"):
            st.markdown("# Model Loss")
            st.write(f"- _Mean Squared Error (Generalized Additive Model):_ {gam_mse:.3f}")
            st.write(f"- _Mean Squared Error (Neural Network Model):_ {dnn_mse:.3f}")
            st.markdown("----------------")
            prediction_component(X, input_data, dnn_model, gam_model, terms, y_min = -10, y_max = 10, scale=100000)
            
if __name__ == '__main__':
    st.set_page_config(page_title="Generalized Additive Models", page_icon="🚀")
    page = option_menu(
        menu_title=None,
        options=["Home", "About"],
        icons=["house", "book"],
        menu_icon="cast",
        default_index=0,
        orientation="horizontal",
    )
    if page == "Home":
        show()
    elif page == "About":
        st.switch_page("pages/about.py")

    st.info("This app is for explaining the problem domain using Generalized Additive Models")