Spaces:

ashkanpakzad
/

cvdriskdemo

Sleeping

App Files Files Community

ashkanpakzad commited on Mar 14

Commit

f6764e8

•

2 Parent(s): ed6d7a6 25e3660

Merge branch 'main' of https://huggingface.co/spaces/ashkanpakzad/cvdriskdemo

Browse files

Files changed (2) hide show

.gitignore +1 -0
app.py +68 -27

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .streamlit

app.py CHANGED Viewed

@@ -1,23 +1,53 @@
 import pandas as pd
-from pathlib import Path
-from sklearn.linear_model import LogisticRegression
 import pickle
 import streamlit as st
 import numpy as np
-# load model
-model = pickle.load(open(Path("model.pkl"), "rb"))
-def predict(input):
-    inputdf = pd.Series(input)
-    st.subheader('Input data')
-    st.table(pd.DataFrame(inputdf))
-    prob = model.predict_proba([inputdf])
-    st.subheader(f'Risk of CVD: {prob[0][1]*100:.2f}%')
 st.title('Cardiovascular Disease Risk Prediction DEMO')
 st.markdown('''
@@ -25,27 +55,36 @@ st.markdown('''
             Output from a simple logistic regression model based on 1000 individuals in India.
             Data source: [Cardiovascular Disease Dataset](https://www.kaggle.com/datasets/jocelyndumlao/cardiovascular-disease-dataset/)
             ''')
-age = st.number_input('Age', 0, 100)
-sex_options = ['Female', 'Male']
-sex = st.radio('Sex', sex_options)
-chestpain_options = ['typical angina', 'atypical angina', 'non-anginal pain', 'none']
-chestpain= st.radio('Chest pain type', chestpain_options)
-restingBP = st.number_input('Resting systolic blood pressure mmHG', 0, 200)
-serumcholestrol = st.number_input('Serum Cholesterol in mg/dl', 0, 300)
-fastingbloodsugar_options = ['< than 120 mg/dl', '>= than 120 mg/dl']
-fastingbloodsugar = st.radio('Fasting blood sugar', fastingbloodsugar_options)
-maxheartrate = st.number_input('Maximum heart rate achieved', 0, 300)
-exerciseangia_options = ['no', 'yes']
-exerciseangia = st.radio('Exercise induced angina', exerciseangia_options)
 input={
     'age': age,
@@ -58,18 +97,20 @@ input={
     'exerciseangia': exerciseangia_options.index(exerciseangia)
 }
-col1, col2 = st.columns(2)
-with col1:
     but1 = st.empty()
-with col2:
     but2 = st.empty()
-if but1.button('Predict Input'):
     predict(input)
-if but2.button('Predict Random'):
     predict({
     'age': np.random.randint(35, 90),
     'gender': np.random.randint(0, 2),

 import pandas as pd
 import pickle
 import streamlit as st
 import numpy as np
+from huggingface_hub import hf_hub_download
+import shap
+import matplotlib.pyplot as plt
+model_path = hf_hub_download(repo_id=st.secrets["REPO_ID"], filename="model.pkl", token=st.secrets["HF_TOKEN"])
+explainer_path = hf_hub_download(repo_id=st.secrets["REPO_ID"], filename="explainer.pkl", token=st.secrets["HF_TOKEN"])
+# load model
+model = pickle.load(open(model_path, "rb"))
+def model_proba(x):
+    return model.predict_proba(x)[:, 1]
+explainer = pickle.load(open(explainer_path, "rb"))
+def predict(input):
+    col1c, col2c= st.columns([0.3, 0.7])
+    inputss = pd.Series(input)
+    inputdf = pd.DataFrame(inputss)
+    inputdf.rename(columns={0: 'value'}, inplace=True)
+    with col1c:
+        st.subheader('Input data')
+        st.table(inputdf)
+        prob = model.predict_proba([inputss])
+    st.header(f'CVD Risk: {prob[0][1]*100:.2f}%')
+    with col2c:
+        st.subheader('CVD Risk Explanation')
+        shap_value = explainer(pd.DataFrame(inputdf).T)
+        shap.decision_plot(shap_value.base_values, shap_value.values, feature_names=shap_value.feature_names)
+        ax = plt.gca()
+        ax.set_xlabel('<-- Feature input decreases risk | Feature input increases risk -->')
+        ax.set_ylabel('Feature impact -->')
+        st.pyplot(plt.gcf())
+    st.markdown('''
+            * The effect of each input feature's value on the model's result shown relates to THIS instance only.
+            * The straight vertical line is the expected (mean) value of the model.
+            * The plotted line shows the effect of each feature in deviating from the expected value.
+            ''')
 st.title('Cardiovascular Disease Risk Prediction DEMO')
 st.markdown('''
             Output from a simple logistic regression model based on 1000 individuals in India.
             Data source: [Cardiovascular Disease Dataset](https://www.kaggle.com/datasets/jocelyndumlao/cardiovascular-disease-dataset/)
+            The CVD risk model prediction is explained using [SHAP](https://shap.readthedocs.io/en/stable/) values.
             ''')
+col1, col2= st.columns(2)
+with col1:
+    age = st.number_input('Age (years)', 0, 100)
+    sex_options = ['Female', 'Male']
+    sex = st.radio('Sex', sex_options)
+    chestpain_options = ['none', 'non-anginal pain', 'typical angina', 'atypical angina']
+    chestpain= st.radio('Chest pain type', chestpain_options)
+    restingBP = st.number_input('Resting systolic blood pressure mm HG (94-200)', 0, 200)
+with col2:
+    serumcholestrol = st.number_input('Serum Cholesterol in mg/dl (126-564)', 0, 300)
+    fastingbloodsugar_options = ['LESS than 120 mg/dl', 'GREATER than or EQUAL 120 mg/dl']
+    fastingbloodsugar = st.radio('Fasting blood sugar', fastingbloodsugar_options)
+    maxheartrate = st.number_input('Maximum heart rate achieved BPM (71-202)', 0, 300)
+    exerciseangia_options = ['no', 'yes']
+    exerciseangia = st.radio('Exercise induced angina', exerciseangia_options)
+st.divider()
 input={
     'age': age,
     'exerciseangia': exerciseangia_options.index(exerciseangia)
 }
+col1b, col2b = st.columns(2)
+with col1b:
     but1 = st.empty()
+with col2b:
     but2 = st.empty()
+st.divider()
+if but1.button('Predict Input', use_container_width=True):
     predict(input)
+if but2.button('Predict Random', use_container_width=True):
     predict({
     'age': np.random.randint(35, 90),
     'gender': np.random.randint(0, 2),