Spaces:
Build error
Build error
Add application files
Browse files- .streamlit/config.toml +5 -0
- README.md +3 -11
- app.py +26 -0
- apps/data.py +23 -0
- apps/home.py +15 -0
- apps/model.py +54 -0
- apps/models/gbm/gbm-model-pickle.sav +0 -0
- apps/models/gbm/gbm-scaler.sav +0 -0
- apps/models/gbm/gbm-xtest.sav +0 -0
- apps/models/gbm/gbm-ytest.sav +0 -0
- apps/pred.py +113 -0
- dataset/healthcare-dataset-stroke-data.csv +0 -0
- favicon.png +0 -0
- multiapp.py +23 -0
- requirements.txt +5 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base="dark"
|
3 |
+
primaryColor="#FF3333"
|
4 |
+
backgroundColor="#3d0404"
|
5 |
+
secondaryBackgroundColor="#1e0203"
|
README.md
CHANGED
@@ -1,12 +1,4 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
emoji: 💩
|
4 |
-
colorFrom: gray
|
5 |
-
colorTo: gray
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.10.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
|
12 |
-
|
|
|
1 |
+
# Stroke Prediction using Machine Learning
|
2 |
+
The aim of this project is to develop a model which predicts whether a patient is likely to get a stroke based on the parameters like gender, age various diseases and smoking status.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
Dataset used: https://www.kaggle.com/datasets/fedesoriano/stroke-prediction-dataset
|
app.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from multiapp import MultiApp
|
3 |
+
from apps import home, pred, data, model
|
4 |
+
|
5 |
+
st.set_page_config(page_title='Stroke Prediction using ML - Mini-Project for 19CS601', page_icon = 'favicon.png', initial_sidebar_state = 'auto')
|
6 |
+
|
7 |
+
# Hide Streamlit brandings
|
8 |
+
hide_streamlit_style = """
|
9 |
+
<style>
|
10 |
+
#MainMenu {visibility: hidden;}
|
11 |
+
footer {visibility: hidden;}
|
12 |
+
</style>
|
13 |
+
"""
|
14 |
+
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
15 |
+
|
16 |
+
app = MultiApp()
|
17 |
+
|
18 |
+
app.add_app("Home", home.app)
|
19 |
+
app.add_app("Prediction Service", pred.app)
|
20 |
+
app.add_app("Dataset Overview", data.app)
|
21 |
+
app.add_app("Model Overview", model.app)
|
22 |
+
|
23 |
+
with st.sidebar:
|
24 |
+
sess = app.run()
|
25 |
+
|
26 |
+
app.view(sess)
|
apps/data.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def app():
|
5 |
+
with st.sidebar:
|
6 |
+
st.title('Stroke Prediction using Machine Learning')
|
7 |
+
|
8 |
+
st.write('This model which predicts whether a patient is likely to get a stroke based on the parameters like gender, age various diseases and smoking status.')
|
9 |
+
st.markdown('_For Machine Learning - 19CS601_')
|
10 |
+
|
11 |
+
st.title('Dataset Overview')
|
12 |
+
|
13 |
+
st.write("The following is the DataFrame of the healthcare dataset for stroke prediction.")
|
14 |
+
st.write('This dataset is used to predict whether a patient is likely to get stroke based on the input parameters like gender, age, various diseases, and smoking status. Each row in the data provides relavant information about the patient.')
|
15 |
+
|
16 |
+
st.markdown('Dataset by Federico Soriano Palacios ([__fedesoriano__](https://www.kaggle.com/fedesoriano) on Kaggle)')
|
17 |
+
st.markdown('Source: https://www.kaggle.com/datasets/fedesoriano/stroke-prediction-dataset')
|
18 |
+
|
19 |
+
df = pd.read_csv("dataset\healthcare-dataset-stroke-data.csv")
|
20 |
+
df['hypertension'] = df['hypertension'].map({0:"No", 1:"Yes"})
|
21 |
+
df['heart_disease'] = df['heart_disease'].map({0:"No", 1:"Yes"})
|
22 |
+
df['stroke'] = df['stroke'].map({0:"No", 1:"Yes"})
|
23 |
+
st.write(df)
|
apps/home.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
|
4 |
+
def app():
|
5 |
+
with st.container():
|
6 |
+
st.title('Stroke Prediction using Machine Learning')
|
7 |
+
st.markdown('For _Machine Learning - 19CS601_')
|
8 |
+
|
9 |
+
st.write('This model which predicts whether a patient is likely to get a stroke based on the parameters like gender, age various diseases and smoking status.')
|
10 |
+
|
11 |
+
st.write('* Pick the \'Prediction Service\' to check the working of the model.')
|
12 |
+
|
13 |
+
st.write('* Pick the \'Dataset Overview\' to know more about the dataset.')
|
14 |
+
|
15 |
+
st.write('* Pick the \'Model Overview\' to know more about the model that we have used for predictions.')
|
apps/model.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pickle
|
3 |
+
import lightgbm
|
4 |
+
from sklearn.metrics import classification_report,plot_precision_recall_curve,plot_confusion_matrix,precision_recall_fscore_support,plot_roc_curve
|
5 |
+
|
6 |
+
def app():
|
7 |
+
with st.sidebar:
|
8 |
+
st.title('Stroke Prediction using Machine Learning')
|
9 |
+
|
10 |
+
st.write('This model which predicts whether a patient is likely to get a stroke based on the parameters like gender, age various diseases and smoking status.')
|
11 |
+
st.markdown('_For Machine Learning - 19CS601_')
|
12 |
+
|
13 |
+
st.title('Model Overview')
|
14 |
+
st.write('The model performance of the dataset is presented below.')
|
15 |
+
|
16 |
+
# Retreving model and it's components for performance metric
|
17 |
+
model = pickle.load(open("apps\models\gbm\gbm-model-pickle.sav", 'rb'))
|
18 |
+
X_test = pickle.load(open("apps\models\gbm\gbm-xtest.sav", 'rb'))
|
19 |
+
Y_test = pickle.load(open("apps\models\gbm\gbm-ytest.sav", 'rb'))
|
20 |
+
Y_pred = model.predict(X_test)
|
21 |
+
|
22 |
+
st.header('Model performance')
|
23 |
+
#result = model.score(X_test, Y_test)
|
24 |
+
|
25 |
+
precision,recall,f1_sc,support=precision_recall_fscore_support(Y_test,Y_pred)
|
26 |
+
accuracy=model.score(X_test,Y_test)
|
27 |
+
|
28 |
+
col1, col2, col3, col4 = st.columns(4)
|
29 |
+
col1.metric("Accuracy", round(accuracy,4), "")
|
30 |
+
col2.metric("Recall", round(recall[0],4), "")
|
31 |
+
col3.metric("F-measure", round(f1_sc[0],4), "")
|
32 |
+
col4.metric("Support", support[0], "")
|
33 |
+
|
34 |
+
st.subheader("Model type: ")
|
35 |
+
st.write(model)
|
36 |
+
|
37 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
38 |
+
st.subheader("Confusion Matrix: ")
|
39 |
+
plot_confusion_matrix(model, X_test, Y_test, display_labels=['NoStroke','Stroke'])
|
40 |
+
st.pyplot()
|
41 |
+
#st.table(confusion_matrix(Y_test, Y_pred))
|
42 |
+
|
43 |
+
st.subheader("ROC Curve")
|
44 |
+
plot_roc_curve(model, X_test, Y_test)
|
45 |
+
st.set_option('deprecation.showPyplotGlobalUse', False)
|
46 |
+
st.pyplot()
|
47 |
+
|
48 |
+
st.subheader("Precision-Recall Curve")
|
49 |
+
plot_precision_recall_curve(model, X_test, Y_test)
|
50 |
+
st.pyplot()
|
51 |
+
|
52 |
+
st.subheader('Other metrics:')
|
53 |
+
report=classification_report(Y_test, Y_pred, target_names=None)
|
54 |
+
st.code(report)
|
apps/models/gbm/gbm-model-pickle.sav
ADDED
Binary file (257 kB). View file
|
|
apps/models/gbm/gbm-scaler.sav
ADDED
Binary file (886 Bytes). View file
|
|
apps/models/gbm/gbm-xtest.sav
ADDED
Binary file (101 kB). View file
|
|
apps/models/gbm/gbm-ytest.sav
ADDED
Binary file (31.1 kB). View file
|
|
apps/pred.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import lightgbm
|
3 |
+
import pickle
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.preprocessing import RobustScaler
|
6 |
+
from sklearn.decomposition import PCA
|
7 |
+
model = pickle.load(open("apps\models\gbm\gbm-model-pickle.sav", 'rb'))
|
8 |
+
scaler = pickle.load(open("apps\models\gbm\gbm-scaler.sav", 'rb'))
|
9 |
+
|
10 |
+
def app():
|
11 |
+
with st.sidebar:
|
12 |
+
st.title('Stroke Prediction using Machine Learning')
|
13 |
+
|
14 |
+
st.write('This model which predicts whether a patient is likely to get a stroke based on the parameters like gender, age various diseases and smoking status.')
|
15 |
+
st.markdown('_For Machine Learning - 19CS601_')
|
16 |
+
|
17 |
+
st.write('It may take a few moments to complete this survey.')
|
18 |
+
|
19 |
+
with st.container():
|
20 |
+
st.subheader('Stage 1: Personal Questions')
|
21 |
+
|
22 |
+
ch_gender = st.selectbox(
|
23 |
+
'Gender: ',
|
24 |
+
('Male', 'Female', 'Others'))
|
25 |
+
|
26 |
+
ch_age = st.number_input('Age: ',min_value=0, max_value=150, value=18,step=1)
|
27 |
+
|
28 |
+
ch_restype = st.radio(
|
29 |
+
'Residence Type: ',
|
30 |
+
('Urban', 'Rural'))
|
31 |
+
|
32 |
+
ch_marital = st.radio(
|
33 |
+
'Did you ever get married? ',
|
34 |
+
('Yes', 'No'))
|
35 |
+
|
36 |
+
ch_worktype = st.selectbox(
|
37 |
+
'Work type: ',
|
38 |
+
('I\'m a child.', 'I\'m self employed', 'Working for the Private.','Working for the Government.','Never worked for anyone.'))
|
39 |
+
|
40 |
+
st.subheader('Stage 2: Health Questions')
|
41 |
+
|
42 |
+
ch_height = st.number_input('Height (in m): ',min_value=0.0, max_value=500.0, value=175.0,step=0.1)
|
43 |
+
|
44 |
+
ch_weight = st.number_input('Weight (in kg): ',min_value=0.0, max_value=5000.0, value=75.0,step=0.01)
|
45 |
+
|
46 |
+
calc_bmi = ch_weight / (ch_height/100)**2
|
47 |
+
|
48 |
+
ch_bmi = st.number_input('BMI: (Optional)',min_value=0.0, max_value=60.0, value=calc_bmi,step=0.01)
|
49 |
+
|
50 |
+
ch_agl = st.number_input('Average Glucose Level (in mg/dL): ',min_value=50.0, max_value=300.0, value=50.0,step=0.01)
|
51 |
+
|
52 |
+
ch_smokingstat = st.selectbox(
|
53 |
+
'Smoking status: ',
|
54 |
+
('Never smoked', 'Formerly smoked', 'I\'m an active smoker','I prefer not to speak'))
|
55 |
+
|
56 |
+
st.write('Are you currently suffering from these diseases?')
|
57 |
+
|
58 |
+
ch_hypertn = st.checkbox('Hypertension')
|
59 |
+
|
60 |
+
ch_hearttn = st.checkbox('Heart Disease')
|
61 |
+
|
62 |
+
submit = st.button('Submit')
|
63 |
+
|
64 |
+
if submit:
|
65 |
+
|
66 |
+
ch_gender = 0 if ch_gender=="Female" else 1 if ch_gender=="Male" else 2
|
67 |
+
ch_marital = 1 if ch_marital=="Yes" else 0
|
68 |
+
ch_worktype = 1 if ch_worktype=="Never worked for anyone." else 4 if ch_worktype=="I\'m a child." else 3 if ch_worktype=="I\'m self employed" else 2 if ch_worktype=="Working for the Private." else 0
|
69 |
+
ch_restype = 1 if ch_restype=="Urban" else 1
|
70 |
+
ch_smokingstat = 3 if ch_smokingstat=="I\'m an active smoker" else 1 if ch_smokingstat=="Formerly smoked" else 2 if ch_smokingstat=="Never smoked" else 0
|
71 |
+
ch_hypertn = 0 if ch_hypertn==False else 1 if ch_hypertn==True else 999
|
72 |
+
ch_hearttn = 0 if ch_hearttn==False else 1 if ch_hearttn==True else 999
|
73 |
+
|
74 |
+
input = scaler.transform([[ch_gender,ch_age,ch_hypertn,ch_hearttn,ch_marital,ch_worktype,ch_restype,ch_agl,ch_bmi,ch_smokingstat]])
|
75 |
+
|
76 |
+
prediction = model.predict(input)
|
77 |
+
predictval = model.predict_proba(input)
|
78 |
+
|
79 |
+
with st.expander("Results"):
|
80 |
+
if prediction==0:
|
81 |
+
str_result = 'The model predicts that with the probability of %.2f%%, you won\'t be suffering from stroke in the future.'%(predictval[0][0]*100)
|
82 |
+
st.success(str_result)
|
83 |
+
st.write("""
|
84 |
+
The best way to help prevent a stroke is to eat a healthy diet, exercise regularly, and avoid smoking and drinking too much alcohol.
|
85 |
+
These lifestyle changes can reduce your risk of problems like:
|
86 |
+
- arteries becoming clogged with fatty substances (atherosclerosis)
|
87 |
+
- high blood pressure
|
88 |
+
- high cholesterol levels
|
89 |
+
If you have already had a stroke, making these changes can help reduce your risk of having another stroke in the future.
|
90 |
+
|
91 |
+
""")
|
92 |
+
st.write("Source: [National Health Service (NHS) - United Kingdom](https://www.nhs.uk/conditions/stroke/prevention/)")
|
93 |
+
|
94 |
+
elif prediction==1:
|
95 |
+
str_result = 'The model predicts that with the probability of %.2f%%, you will be suffering from stroke in the future.'%(predictval[0][1]*100)
|
96 |
+
st.error(str_result)
|
97 |
+
if predictval[0][1] >= 0.85:
|
98 |
+
st.subheader("Please seek medical attention as early as possible to mitigate the stroke disease.")
|
99 |
+
st.write("""
|
100 |
+
The best way to help prevent a stroke is to eat a healthy diet, exercise regularly, and avoid smoking and drinking too much alcohol.
|
101 |
+
These lifestyle changes can reduce your risk of problems like:
|
102 |
+
- arteries becoming clogged with fatty substances (atherosclerosis)
|
103 |
+
- high blood pressure
|
104 |
+
- high cholesterol levels
|
105 |
+
If you have already had a stroke, making these changes can help reduce your risk of having another stroke in the future.
|
106 |
+
|
107 |
+
""")
|
108 |
+
st.write("Source: [National Health Service (NHS) - United Kingdom](https://www.nhs.uk/conditions/stroke/prevention/)")
|
109 |
+
|
110 |
+
else:
|
111 |
+
st.error('NaN: Unexpected error')
|
112 |
+
st.markdown("Debug: Selected input:")
|
113 |
+
st.code(input)
|
dataset/healthcare-dataset-stroke-data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
favicon.png
ADDED
multiapp.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Frameworks for running multiple Streamlit applications as a single app.
|
2 |
+
"""
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
class MultiApp:
|
6 |
+
def __init__(self):
|
7 |
+
self.apps = []
|
8 |
+
|
9 |
+
def add_app(self, title, func):
|
10 |
+
self.apps.append({
|
11 |
+
"title": title,
|
12 |
+
"function": func
|
13 |
+
})
|
14 |
+
|
15 |
+
def run(self):
|
16 |
+
app = st.selectbox(
|
17 |
+
'Navigation',
|
18 |
+
self.apps,
|
19 |
+
format_func=lambda app: app['title'])
|
20 |
+
return app
|
21 |
+
|
22 |
+
def view(self, app):
|
23 |
+
app['function']()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==0.71.0
|
2 |
+
pandas==1.1.3
|
3 |
+
scikit-learn==0.23.2
|
4 |
+
numpy==1.19.2
|
5 |
+
lightgbm==3.3.0
|