|
|
|
import numpy as np |
|
import pandas as pd |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
from sklearn.metrics import roc_curve, roc_auc_score |
|
from sklearn.metrics import confusion_matrix ,classification_report,precision_score, recall_score ,f1_score |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.neighbors import KNeighborsClassifier |
|
from sklearn.naive_bayes import GaussianNB |
|
from sklearn.svm import SVC |
|
import warnings |
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
data = pd.read_csv('datasetforriskofcardiodisease.csv') |
|
data_num = data[['age','height','weight','ap_hi','ap_lo']] |
|
data_cat = data[['gender','cholesterol','gluc','smoke','alco','active']] |
|
xaxis = ['Age', 'Height', 'Weight', 'Systolic Blood Pressure', 'Diastolic Blood Pressure'] |
|
|
|
for i, col in enumerate(data_num.columns): |
|
plt.hist(data_num[col]) |
|
plt.title(f'Frequency vs. {xaxis[i]}') |
|
plt.xlabel(xaxis[i]) |
|
plt.ylabel('Frequency') |
|
|
|
|
|
pd.pivot_table(data, index='cardio', values=['age','height','weight','ap_hi','ap_lo']) |
|
|
|
for i in data_cat.columns: |
|
sns.barplot(x=data_cat[i].value_counts().index,y=data_cat[i].value_counts()).set_title(i) |
|
|
|
|
|
|
|
print(pd.pivot_table(data,index='cardio',columns='cholesterol', values='age')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='gluc', values='age')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='smoke', values='age')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='alco', values='age')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='active', values='age')) |
|
|
|
print(pd.pivot_table(data,index='cardio',columns='cholesterol', values='ap_hi')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='gluc', values='ap_hi')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='smoke', values='ap_hi')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='alco', values='ap_hi')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='active', values='ap_hi')) |
|
|
|
print(pd.pivot_table(data,index='cardio',columns='cholesterol', values='ap_lo')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='gluc', values='ap_lo')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='smoke', values='ap_lo')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='alco', values='ap_lo')) |
|
print("="*100) |
|
print(pd.pivot_table(data,index='cardio',columns='active', values='ap_lo')) |
|
|
|
for i in data_num.columns: |
|
sns.boxplot(data_num[i]) |
|
plt.title(i) |
|
|
|
|
|
|
|
def outlinefree(dataCol): |
|
sorted(dataCol) |
|
Q1,Q3 = np.percentile(dataCol,[25,75]) |
|
IQR = Q3-Q1 |
|
LowerRange = Q1-(1.5 * IQR) |
|
UpperRange = Q3+(1.5 * IQR) |
|
return LowerRange,UpperRange |
|
|
|
|
|
lwap_hi,upap_hi = outlinefree(data['ap_hi']) |
|
lwap_lo,upap_lo = outlinefree(data['ap_lo']) |
|
|
|
|
|
data['ap_hi'].replace(list(data[data['ap_hi'] > upap_hi].ap_hi) ,upap_hi,inplace=True) |
|
data['ap_lo'].replace(list(data[data['ap_lo'] > upap_lo].ap_lo) ,upap_lo,inplace=True) |
|
|
|
|
|
features = data.iloc[:,:-1].values |
|
label = data.iloc[:,-1].values |
|
|
|
|
|
X_train, X_test, y_train, y_test= train_test_split(features,label, test_size= 0.25, random_state=102) |
|
|
|
classimodel= LogisticRegression() |
|
classimodel.fit(X_train, y_train) |
|
trainscore = classimodel.score(X_train,y_train) |
|
testscore = classimodel.score(X_test,y_test) |
|
print("Logistic Regression-----------------------------------------------------\n") |
|
print("test score: {} train score: {}".format(testscore,trainscore),'\n') |
|
|
|
y_pred = classimodel.predict(X_test) |
|
|
|
|
|
confusion_matrix(y_test, y_pred) |
|
|
|
print(' f1 score: ',f1_score(y_test, y_pred),'\n') |
|
print(' precision score: ',precision_score(y_test, y_pred),'\n') |
|
print(' recall score: ',recall_score(y_test, y_pred),'\n') |
|
print(classification_report(y_test, y_pred)) |
|
|
|
|
|
X_train, X_test, y_train, y_test= train_test_split(features,label, test_size= 0.25, random_state=193) |
|
|
|
|
|
classifier= KNeighborsClassifier() |
|
knnmodel = classifier.fit(X_train, y_train) |
|
|
|
trainscore = knnmodel.score(X_train,y_train) |
|
testscore = knnmodel.score(X_test,y_test) |
|
print("KNN-----------------------------------------------------\n") |
|
print("test score: {} train score: {}".format(testscore,trainscore),'\n') |
|
|
|
y_predknn = knnmodel.predict(X_test) |
|
|
|
print(confusion_matrix(y_test, y_predknn)) |
|
|
|
print("f1_score: ",f1_score(y_test, y_predknn),'\n') |
|
print("precision_score: ",precision_score(y_test, y_predknn),'\n') |
|
print("recall_score: ",recall_score(y_test, y_predknn),'\n') |
|
print(classification_report(y_test, y_predknn)) |
|
|
|
|
|
X_train, X_test, y_train, y_test= train_test_split(features,label, test_size= 0.25, random_state=34) |
|
|
|
NBmodel = GaussianNB() |
|
NBmodel.fit(X_train, y_train) |
|
|
|
trainscore = NBmodel.score(X_train,y_train) |
|
testscore = NBmodel.score(X_test,y_test) |
|
print("Naive Bayes-----------------------------------------------------\n") |
|
print("test score: {} train score: {}".format(testscore,trainscore),'\n') |
|
y_predNB = NBmodel.predict(X_test) |
|
print(confusion_matrix(y_test, y_predNB)) |
|
|
|
print("f1_score: ",f1_score(y_test, y_predNB),'\n') |
|
print("precision_score: ",precision_score(y_test, y_predNB),'\n') |
|
print("recall_score: ",recall_score(y_test, y_predNB),'\n') |
|
print(classification_report(y_test, y_predNB)) |
|
|
|
|
|
|
|
import xgboost as xgb |
|
from sklearn.metrics import mean_squared_error |
|
import pandas as pd |
|
import numpy as np |
|
|
|
X_train, X_test, y_train, y_test= train_test_split(features,label, test_size= 0.25, random_state=102) |
|
|
|
XGmodel= xgb.XGBRFClassifier() |
|
XGmodel.fit(X_train, y_train) |
|
trainscore = XGmodel.score(X_train,y_train) |
|
testscore = XGmodel.score(X_test,y_test) |
|
print("XGBoost-----------------------------------------------------\n") |
|
print("test score: {} train score: {}".format(testscore,trainscore),'\n') |
|
|
|
y_predXG = XGmodel.predict(X_test) |
|
|
|
confusion_matrix(y_test, y_pred) |
|
|
|
print("f1_score: ",f1_score(y_test, y_predXG),'\n') |
|
print("precision_score: ",precision_score(y_test, y_predXG),'\n') |
|
print("recall_score: ",recall_score(y_test, y_predXG),'\n') |
|
print(classification_report(y_test, y_predXG),'\n') |
|
print("AREA UNDER CURVES-----------------------------------------------------\n") |
|
|
|
probabilityValues = classimodel.predict_proba(features)[:,1] |
|
|
|
auc = roc_auc_score(label,probabilityValues) |
|
print(auc) |
|
|
|
fpr,tpr, threshold = roc_curve(label,probabilityValues) |
|
plt.plot([0,1],[0,1], linestyle = '--') |
|
plt.plot(fpr,tpr) |
|
|
|
|
|
probabilityValues = knnmodel.predict_proba(features)[:,1] |
|
|
|
auc = roc_auc_score(label,probabilityValues) |
|
print(auc) |
|
|
|
fpr,tpr, threshold = roc_curve(label,probabilityValues) |
|
plt.plot([0,1],[0,1], linestyle = '--') |
|
plt.plot(fpr,tpr) |
|
|
|
|
|
probabilityValues = NBmodel.predict_proba(features)[:,1] |
|
|
|
auc = roc_auc_score(label,probabilityValues) |
|
print(auc) |
|
|
|
fpr,tpr, threshold = roc_curve(label,probabilityValues) |
|
plt.plot([0,1],[0,1], linestyle = '--') |
|
plt.plot(fpr,tpr) |
|
|
|
|
|
|
|
|
|
probabilityValues = XGmodel.predict_proba(features)[:,1] |
|
|
|
auc = roc_auc_score(label,probabilityValues) |
|
print(auc) |
|
|
|
fpr,tpr, threshold = roc_curve(label,probabilityValues) |
|
plt.plot([0,1],[0,1], linestyle = '--') |
|
plt.plot(fpr,tpr) |
|
''' |
|
#--------------------------------------INTERACE TIME LETS GO BOYS----------------------- |
|
from sklearn.feature_extraction.text import CountVectorizer |
|
import joblib |
|
import matplotlib |
|
matplotlib.use("agg") |
|
model_file_name = 'XG_best_model.joblib' |
|
model_folder = 'C:\\Users\\Ben Z\\Downloads\\Models\\' |
|
joblib.dump(XGmodel, model_folder+''+model_file_name) |
|
|
|
#Loading da model |
|
loaded_XG_model = joblib.load(open(model_folder+''+model_file_name, 'rb')) |
|
print (loaded_XG_model) |
|
def make_prediction(value1, checkbox1, value2, value3, value4, value5, value6, value7, checkbox3, checkbox4, checkbox5): |
|
input_array = np.array([value1*365.25, checkbox1, value2, value3, value4, value5, value6, value7, checkbox3, checkbox4, checkbox5]).reshape(1, -1) |
|
prediction = loaded_XG_model.predict(input_array) |
|
info = '' |
|
if prediction[0] == 0: |
|
info = "You are not currently at risk of a cardiovascular disease! ✅" |
|
else: |
|
info = "You are at risk of a cardiovascular disease. I would recommend going to the doctor however, take my advice with a grain of salt as I am an AI model capable of making mistakes. 🚨" |
|
final_info = "The prediction is: {}".format(info) |
|
print (prediction[0]) |
|
return final_info |
|
|
|
input_values = [50.3572895, 1, 168, 62, 110, 80, 1, 1, 0, 0, 1] |
|
result = make_prediction(*input_values) |
|
print(result) |
|
|
|
#------------------------------------------------GRADIO Time lmfao |
|
import gradio as gr |
|
|
|
|
|
headline = "Cardiovascular Disease Risk Prediction Application" |
|
iface = gr.Interface(fn=make_prediction, inputs= |
|
[gr.inputs.Number(label="Age (Years)"), |
|
gr.inputs.Checkbox(label="I am a male"), |
|
gr.inputs.Number(label="Height (cm)"), |
|
gr.inputs.Number(label="Weight (kg)"), |
|
gr.inputs.Number(label="Systolic Blood Pressure (mmHg)"), |
|
gr.inputs.Number(label="Diastolic Blood Pressure (mmHg)"), |
|
gr.inputs.Number(label="Cholesterol (per 20mg/dL)"), |
|
gr.inputs.Number(label="Glucose (per 1 mmol/L)"), |
|
gr.inputs.Checkbox(label="I have smoked."), |
|
gr.inputs.Checkbox(label="I drink more alcohol than I should (>2 cups for men and >1 cup for women)."), |
|
gr.inputs.Checkbox(label="I am physically active.") |
|
], outputs=gr.outputs.Textbox(label="Prediction Result"), title=headline, theme='soft') |
|
|
|
if __name__ == "__main__": |
|
iface.launch(share=True) |
|
''' |
|
|
|
from sklearn.feature_extraction.text import CountVectorizer |
|
import joblib |
|
import matplotlib |
|
matplotlib.use("agg") |
|
model_file_name = 'XG_best_model.joblib' |
|
model_folder = 'C:\\Users\\Ben Z\\Downloads\\Models\\' |
|
joblib.dump(XGmodel, model_folder+''+model_file_name) |
|
|
|
|
|
loaded_XG_model = joblib.load(open(model_folder+''+model_file_name, 'rb')) |
|
print (loaded_XG_model) |
|
def make_prediction(value1, checkbox1, value2, value3, value4, value5, value6, value7, checkbox3, checkbox4, checkbox5): |
|
checkbox1 = 1 if "Male" in checkbox1 else 0 |
|
input_array = np.array([value1*365.25, checkbox1, value2, value3, value4, value5, value6, value7, checkbox3, checkbox4, checkbox5]).reshape(1, -1) |
|
prediction = loaded_XG_model.predict(input_array) |
|
info = '' |
|
if prediction[0] == 0: |
|
info = "You are not currently at risk of a cardiovascular disease! ✅" |
|
else: |
|
info = "You are at risk of a cardiovascular disease. I would recommend going to the doctor however, take my advice with a grain of salt as I am an AI model capable of making mistakes. 🚨" |
|
final_info = "The prediction is: {}".format(info) |
|
return final_info |
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
headline = "Cardiovascular Disease Risk Prediction Application" |
|
iface = gr.Interface(fn=make_prediction, inputs= |
|
[gr.inputs.Number(label="Age (Years)"), |
|
gr.inputs.CheckboxGroup( |
|
label="Gender", |
|
choices=["Male", "Female"], |
|
), |
|
gr.inputs.Number(label="Height (cm)"), |
|
gr.inputs.Number(label="Weight (kg)"), |
|
gr.inputs.Number(label="Systolic Blood Pressure (mmHg)"), |
|
gr.inputs.Number(label="Diastolic Blood Pressure (mmHg)"), |
|
gr.inputs.Number(label="Cholesterol (per 20mg/dL)"), |
|
gr.inputs.Number(label="Glucose (per 1 mmol/L)"), |
|
gr.inputs.Checkbox(label="I have smoked."), |
|
gr.inputs.Checkbox(label="I drink more alcohol than I should (>2 cups for men and >1 cup for women)."), |
|
gr.inputs.Checkbox(label="I am physically active.") |
|
], outputs=gr.outputs.Textbox(label="Prediction Result"), title=headline, theme='soft') |
|
|
|
if __name__ == "__main__": |
|
iface.launch(share=False) |