Spaces:
Sleeping
Sleeping
File size: 7,708 Bytes
0c2d844 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
# %matplotlib inline
import io
def importdata():
#balance_data = pd.read_csv(io.BytesIO(uploaded['heart_disease_data.csv']))
balance_data = pd.read_csv('heart_disease_data.csv')
# Printing the dataswet shape
print ("Dataset Length: ", len(balance_data))
print ("Dataset Shape: ", balance_data.shape)
# Printing the dataset obseravtions
print ("Dataset: ",balance_data.head())
return balance_data
def splitdatasetL(heart_data, input_data):
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
model = LogisticRegression()
model.fit(X_train, Y_train)
input_data_as_numpy_array= np.asarray(input_data)
# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
prediction = model.predict(input_data_reshaped)
return prediction[0]
def splitdataset(balance_data):
# Separating the target variable
X = balance_data.values[:, 0:13]
Y = balance_data.values[:, 13]
# Splitting the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size = 0.3, random_state = 100)
return X, Y, X_train, X_test, y_train, y_test
def train_using_gini(X_train, X_test, y_train):
clf_gini = DecisionTreeClassifier(criterion = "gini",
random_state = 100,max_depth=3, min_samples_leaf=5)
clf_gini.fit(X_train, y_train)
return clf_gini
def tarin_using_entropy(X_train, X_test, y_train):
clf_entropy = DecisionTreeClassifier(
criterion = "entropy", random_state = 100,
max_depth = 3, min_samples_leaf = 5)
clf_entropy.fit(X_train, y_train)
return clf_entropy
# Function to make predictions
def prediction(X_test, clf_object):
# Predicton on test with giniIndex
y_pred = clf_object.predict(X_test)
print("Predicted values:")
print(y_pred)
return y_pred
def RandomF(X_train, y_train, X_test):
rf_clf = RandomForestClassifier(n_estimators=1000, random_state=42)
rf_clf.fit(X_train, y_train)
pred = rf_clf.predict(X_test)
return pred
def SBM(df, X_test):
X = df.drop('target', axis=1)
y = df['target']
X_train, X_T, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)
svm = SVC(kernel='rbf', gamma=0.1)
svm.fit(X_train_scaled, y_train)
y_pred = svm.predict(X_test_scaled)
return y_pred
def SBF(new_data):
df = pd.read_csv('heart_disease_data.csv')
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
y_pred = svm.predict(new_data)
print(y_pred)
print(y_pred[0])
print("MEasdasdaGASDASD")
return y_pred[0]
def heart(age, gender, chestpaintype, restingbloodpressure, serumcholestrol, fastingbloodsugar, resting_ecg_result, maximumheartrate, exerciseinduced_angina, oldpeak, slope, ca, thal):
data = importdata()
X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
clf_gini = train_using_gini(X_train, X_test, y_train)
clf_entropy = tarin_using_entropy(X_train, X_test, y_train)
fbs = 1 if fastingbloodsugar > 120 else 0
g = 0 if gender == "Female" else 1
exang = 0 if exerciseinduced_angina == "No" else 1
cp = 0
if chestpaintype == "Typical Angina" :
cp = 0
elif chestpaintype == "Non Typical Angina" :
cp = 1
elif chestpaintype == "Non Anginal Pain" :
cp = 2
else :
cp = 3
ecg = 0
if resting_ecg_result == "0 - Nothing to note" :
ecg = 0
elif resting_ecg_result == "1 - ST-T abnormality" :
ecg = 1
else :
ecg = 2
XX = np.array([age, g, cp, restingbloodpressure, serumcholestrol, fbs, ecg, maximumheartrate, exang, oldpeak, slope, ca, thal])
X_test[1][0] = age
X_test[1][1] = g
X_test[1][2] = cp
X_test[1][3] = restingbloodpressure
X_test[1][4] = serumcholestrol
X_test[1][5] = fbs
X_test[1][6] = ecg
X_test[1][7] = maximumheartrate
X_test[1][8] = exang
X_test[1][9] = oldpeak
X_test[1][10] = slope
X_test[1][11] = ca
X_test[1][12] = thal
new_data = pd.DataFrame({'age':[age],'sex':[g],'cp':[cp],'trestbps':[restingbloodpressure],
'chol': [serumcholestrol],'fbs':[fbs],'restecg': [ecg],
'thalach':[maximumheartrate],'exang':[exang],'oldpeak': [oldpeak],
'slope':[slope], 'ca':[ca], 'thal':[thal]})
y_pred_gini = prediction(X_test, clf_gini)
k = RandomF(X_train, y_train, X_test)
#m = SBM(data, new_data)
m = SBF(new_data)
print("ASDASDASDADS")
print(type(m))
#m = 0
pred = splitdatasetL(data, XX)
if y_pred_gini[1] == 1.0:
SD = "Based on our Decision Tree Machine Learning model which has an accuracy of 82.42%, you have high chances of having heart disease"
else:
SD = "Based on our Decision Tree Machine Learning model which has an accuracy of 82.42%, you are less likely to have heart disease"
if pred == 1:
SL = "Based on our Logistic Regression Machine Learning model which has an accuracy of 81.97%, you have high chances of having heart disease"
else:
SL = "Based on our Logistic Regression Machine Learning model which has an accuracy of 81.97%, you are less likely to have heart disease"
if k[1] == 1:
SR = "Based on our Random Forest Machine Learning model which has an accuracy of 82.42%, you have high chances of having heart disease"
else:
SR = "Based on our Random Forest Machine Learning model which has an accuracy of 82.42%, you are less likely to have heart disease"
if m == 1:
SS = "Based on our SVM Machine Learning model which has an accuracy of 89.01%, you have high chances of having heart disease"
else:
SS = "Based on our SVM Machine Learning model which has an accuracy of 89.01%, you are less likely to have heart disease"
models = ['Logistic Regression','Decision Tree','SVM','Random Forest']
accuracies = [81.97,82.42,89.01,82.42]
fig, ax = plt.subplots(figsize = (40,40))
ax.bar(models, accuracies)
ax.set_xlabel('Models')
ax.set_ylabel('Accuracy')
ax.set_title('Machine Learning Models Accuracy')
return SL, SD, SS, SR, fig
interface = gr.Interface(
fn=heart,
inputs=["number", gr.Radio(["Male", "Female"]),gr.Dropdown(["Typical Angina", "Non Typical Angina", "Non Anginal Pain", "Asymptomatic"]), "number", "number", "number", gr.Dropdown(["0 - Nothing to note", "1 - ST-T abnormality", "2 - Possible or definite left ventricular hypertrophy"]), "number", gr.Radio(["No", "Yes"]), "number" , "number", "number", "number"],
outputs=[gr.outputs.Label(label="Logistic Regression", type="text"),gr.outputs.Label(label="Decision Tree", type="auto"),gr.outputs.Label(label="Random Forest", type="text"),gr.outputs.Label(label="SVM", type="auto"),"plot"],
)
interface.launch() |