import streamlit as st import pandas as pd import numpy as np import pickle import base64 import seaborn as sns import matplotlib.pyplot as plt st.write(""" # Chronic Kidney Disease #Context This dataset is originally from UCI Machine Learning Repository. The objective of the dataset is to diagnostically predict whether a patient is having chronic kidney disease or not, based on certain diagnostic measurements included in the dataset. #Content The datasets consists of several medical predictor variables and one target variable, Class. Predictor variables includes Blood Pressure(Bp), Albumin(Al), etc. #Inspiration Can you build a machine learning model to accurately predict whether or not the patients in the dataset have chronic kidney disease or not? """) url_dataset = f'Download Dataset CSV File' st.markdown(url_dataset, unsafe_allow_html=True) def user_input_features() : Bp = st.sidebar.slider('Bp', 50.000, 180.000) Sg = st.sidebar.slider('Sg', 1.005, 1.025) Al = st.sidebar.slider('Al', 0.000, 5.000) Su = st.sidebar.slider('Su', 0.000, 5.000) Rbc = st.sidebar.slider('Rbc', 0.000, 1.000) Bu = st.sidebar.slider('Bu', 1.500, 391.000) Sc = st.sidebar.slider('Sc', 0.400, 76.000) Sod = st.sidebar.slider('Sod', 4.500, 163.000) Pot = st.sidebar.slider('Pot', 2.500, 47.000) Hemo = st.sidebar.slider('Hemo', 3.100, 17.800) Wbcc = st.sidebar.slider('Wbcc', 2200.000, 26400.000) Rbcc = st.sidebar.slider('Rbcc', 2.100, 8.000) Htn = st.sidebar.slider('Htn', 0.000, 1.000) Class = st.sidebar.slider('Class', 0.000, 1.000) data = {'Bp':[Bp], 'Sg':[Sg], 'Al':[Al], 'Su':[Su], 'Rbc':[Rbc], 'Bu':[Bu], 'Sc':[Sc], 'Sod':[Sod], 'Pot':[Pot], 'Hemo':[Hemo], 'Wbcc':[Wbcc], 'Rbcc':[Rbcc], 'Htn':[Htn], 'Class':[Class] } features = pd.DataFrame(data) return features input_df = user_input_features() kidney_raw = pd.read_csv('new_model.csv') kidney_raw.fillna(0, inplace=True) kidney = kidney_raw.drop(columns=['Class']) df = pd.concat([input_df, kidney], axis=0) df = df[:1] # Selects only the first row (the user input data) df.fillna(0, inplace=True) features = ['Bp', 'Sg', 'Al', 'Su', 'Rbc', 'Bu', 'Sc', 'Sod', 'Pot', 'Hemo', 'Wbcc', 'Rbcc', 'Htn' ] df = df[features] st.subheader('User Input features') st.write(df) load_clf = pickle.load(open('kidney_clf.pkl', 'rb')) prediction = load_clf.predict(df) prediction_proba = load_clf.predict_proba(df) kidney_labels = np.array(['Normal', 'Chronic Kidney']) st.subheader('Prediction') st.write(kidney_labels[int(prediction)]) st.subheader('Prediction Probability') df_prob = pd.DataFrame(data = prediction_proba, index = ['Probability'], columns = kidney_labels) st.write(df_prob)