import streamlit as st
import pandas as pd
import numpy as np
import pickle
import base64
import seaborn as sns
import matplotlib.pyplot as plt

st.write("""
# Chronic Kidney Disease

#Context
This dataset is originally from UCI Machine Learning Repository. The objective of the dataset is to diagnostically predict whether a patient is having chronic kidney disease or not, based on certain diagnostic measurements included in the dataset.

#Content
The datasets consists of several medical predictor variables and one target variable, Class. Predictor variables includes Blood Pressure(Bp), Albumin(Al), etc.

#Inspiration
Can you build a machine learning model to accurately predict whether or not the patients in the dataset have chronic kidney disease or not?
""")

url_dataset = f'<a href="new_model.csv">Download Dataset CSV File</a>'
st.markdown(url_dataset, unsafe_allow_html=True)

def user_input_features() :
    Bp = st.sidebar.slider('Bp', 50.000, 180.000)
    Sg = st.sidebar.slider('Sg', 1.005, 1.025)
    Al = st.sidebar.slider('Al', 0.000, 5.000)
    Su = st.sidebar.slider('Su', 0.000, 5.000)
    Rbc = st.sidebar.slider('Rbc', 0.000, 1.000)
    Bu = st.sidebar.slider('Bu', 1.500, 391.000)
    Sc = st.sidebar.slider('Sc', 0.400, 76.000)
    Sod = st.sidebar.slider('Sod', 4.500, 163.000)
    Pot = st.sidebar.slider('Pot', 2.500, 47.000)
    Hemo = st.sidebar.slider('Hemo', 3.100, 17.800)
    Wbcc = st.sidebar.slider('Wbcc', 2200.000, 26400.000)
    Rbcc = st.sidebar.slider('Rbcc', 2.100, 8.000)
    Htn = st.sidebar.slider('Htn', 0.000, 1.000)
    Class = st.sidebar.slider('Class', 0.000, 1.000)


    data = {'Bp':[Bp], 
            'Sg':[Sg],
            'Al':[Al],
            'Su':[Su],
            'Rbc':[Rbc],
            'Bu':[Bu],
            'Sc':[Sc],
            'Sod':[Sod],
            'Pot':[Pot],
            'Hemo':[Hemo],
            'Wbcc':[Wbcc],
            'Rbcc':[Rbcc],
            'Htn':[Htn],
            'Class':[Class]
            }

    features = pd.DataFrame(data)
    return features

input_df = user_input_features()


kidney_raw = pd.read_csv('new_model.csv')
kidney_raw.fillna(0, inplace=True)
kidney = kidney_raw.drop(columns=['Class'])
df = pd.concat([input_df, kidney], axis=0)

df = df[:1] # Selects only the first row (the user input data)
df.fillna(0, inplace=True)

features = ['Bp', 'Sg', 'Al',
'Su',
'Rbc',
'Bu',
'Sc',
'Sod',
'Pot',
'Hemo',
'Wbcc',
'Rbcc',
'Htn'
]

df = df[features]


st.subheader('User Input features')
st.write(df)

load_clf = pickle.load(open('kidney_clf.pkl', 'rb'))
prediction = load_clf.predict(df)
prediction_proba = load_clf.predict_proba(df)
kidney_labels = np.array(['Normal', 'Chronic Kidney'])
st.subheader('Prediction')
st.write(kidney_labels[int(prediction)])
st.subheader('Prediction Probability')
df_prob = pd.DataFrame(data = prediction_proba, 
                    index = ['Probability'], 
                    columns = kidney_labels)
st.write(df_prob)