import pandas as pd
import pickle
import streamlit as st
import numpy as np
from huggingface_hub import hf_hub_download
import shap
import matplotlib.pyplot as plt

model_path = hf_hub_download(repo_id=st.secrets["REPO_ID"], filename="model.pkl", token=st.secrets["HF_TOKEN"])
explainer_path = hf_hub_download(repo_id=st.secrets["REPO_ID"], filename="explainer.pkl", token=st.secrets["HF_TOKEN"])


# load model
model = pickle.load(open(model_path, "rb"))
def model_proba(x):
    return model.predict_proba(x)[:, 1]
explainer = pickle.load(open(explainer_path, "rb"))

def predict(input):
    col1c, col2c= st.columns([0.3, 0.7])

    inputss = pd.Series(input)
    inputdf = pd.DataFrame(inputss)
    inputdf.rename(columns={0: 'value'}, inplace=True)

    with col1c:
        st.subheader('Input data')
        st.table(inputdf)

        prob = model.predict_proba([inputss])

    st.header(f'CVD Risk: {prob[0][1]*100:.2f}%')
    
    with col2c:
        st.subheader('CVD Risk Explanation')
        shap_value = explainer(pd.DataFrame(inputdf).T)
        shap.decision_plot(shap_value.base_values, shap_value.values, feature_names=shap_value.feature_names)
        ax = plt.gca()
        ax.set_xlabel('<-- Feature input decreases risk | Feature input increases risk -->')
        ax.set_ylabel('Feature impact -->')
        st.pyplot(plt.gcf())

    st.markdown('''                
            * The effect of each input feature's value on the model's result shown relates to THIS instance only.  
            * The straight vertical line is the expected (mean) value of the model.
            * The plotted line shows the effect of each feature in deviating from the expected value.
            ''')
    

st.title('Cardiovascular Disease Risk Prediction DEMO')
st.markdown('''
            This is a CVD risk prediction app for demonstration only, **therefore not for clinical use**.
            
            Output from a simple logistic regression model based on 1000 individuals in India.
            Data source: [Cardiovascular Disease Dataset](https://www.kaggle.com/datasets/jocelyndumlao/cardiovascular-disease-dataset/)

            The CVD risk model prediction is explained using [SHAP](https://shap.readthedocs.io/en/stable/) values.
            ''')

col1, col2= st.columns(2)

with col1:
    age = st.number_input('Age (years)', 0, 100)

    sex_options = ['Female', 'Male']
    sex = st.radio('Sex', sex_options)

    chestpain_options = ['none', 'non-anginal pain', 'typical angina', 'atypical angina']
    chestpain= st.radio('Chest pain type', chestpain_options)

    restingBP = st.number_input('Resting systolic blood pressure mm HG (94-200)', 0, 200)


with col2:
    serumcholestrol = st.number_input('Serum Cholesterol in mg/dl (126-564)', 0, 300)

    fastingbloodsugar_options = ['LESS than 120 mg/dl', 'GREATER than or EQUAL 120 mg/dl']
    fastingbloodsugar = st.radio('Fasting blood sugar', fastingbloodsugar_options)

    maxheartrate = st.number_input('Maximum heart rate achieved BPM (71-202)', 0, 300)

    exerciseangia_options = ['no', 'yes']
    exerciseangia = st.radio('Exercise induced angina', exerciseangia_options)

st.divider()

input={
    'age': age,
    'gender': sex_options.index(sex),
    'chestpain': chestpain_options.index(chestpain),
    'restingBP': restingBP,
    'serumcholestrol': serumcholestrol,
    'fastingbloodsugar': fastingbloodsugar_options.index(fastingbloodsugar),
    'maxheartrate': maxheartrate,
    'exerciseangia': exerciseangia_options.index(exerciseangia)
}

col1b, col2b = st.columns(2)

with col1b:
    but1 = st.empty()

with col2b:
    but2 = st.empty()

st.divider()

if but1.button('Predict Input', use_container_width=True):
    predict(input)

if but2.button('Predict Random', use_container_width=True):
    predict({
    'age': np.random.randint(35, 90),
    'gender': np.random.randint(0, 2),
    'chestpain': np.random.randint(0, 4),
    'restingBP': np.random.randint(80, 200),
    'serumcholestrol': np.random.randint(100, 600),
    'fastingbloodsugar': np.random.randint(0, 2),
    'maxheartrate': np.random.randint(70, 220),
    'exerciseangia': np.random.randint(0, 2)
    })