|
import pandas as pd |
|
from sklearn.model_selection import train_test_split |
|
import numpy as np |
|
from sklearn.metrics import accuracy_score |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
|
|
|
|
data = pd.read_csv('Cardio_Vascular_Disease_by_Gut_Microbiota.csv') |
|
print(data.head()) |
|
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
|
|
|
X = data.drop(columns=['patient_id', 'CVD_Status']) |
|
y = data['CVD_Status'] |
|
|
|
|
|
rf = RandomForestClassifier(random_state=42) |
|
rf.fit(X, y) |
|
|
|
|
|
importances = rf.feature_importances_ |
|
|
|
|
|
feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': importances}) |
|
feature_importance_df = feature_importance_df.sort_values('Importance', ascending=False) |
|
|
|
plt.figure(figsize=(10,6)) |
|
sns.barplot(x='Importance', y='Feature', data=feature_importance_df) |
|
plt.title('Feature Importance from Random Forest') |
|
plt.show() |
|
|
|
from sklearn.ensemble import GradientBoostingClassifier |
|
from xgboost import XGBClassifier |
|
from lightgbm import LGBMClassifier |
|
from sklearn.metrics import accuracy_score, confusion_matrix |
|
from sklearn.metrics import accuracy_score, confusion_matrix, r2_score, mean_squared_error, mean_absolute_error |
|
from math import sqrt |
|
|
|
|
|
gradient_boosting = GradientBoostingClassifier(random_state=42) |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
gradient_boosting.fit(X_train, y_train) |
|
y_pred_gb = gradient_boosting.predict(X_test) |
|
accuracy_gb = accuracy_score(y_test, y_pred_gb) |
|
conf_matrix_gb = confusion_matrix(y_test, y_pred_gb) |
|
|
|
|
|
print(f"Gradient Boosting Accuracy: {accuracy_gb * 100:.2f}%") |
|
print(f"Confusion Matrix:\n {conf_matrix_gb}\n") |
|
|
|
|
|
y_pred_prob_gb = gradient_boosting.predict_proba(X_test)[:, 1] |
|
|
|
|
|
y_pred_gb = gradient_boosting.predict(X_test) |
|
|
|
|
|
r2_gb = r2_score(y_test, y_pred_prob_gb) |
|
rmse_gb = sqrt(mean_squared_error(y_test, y_pred_prob_gb)) |
|
mse_gb = mean_squared_error(y_test, y_pred_prob_gb) |
|
mae_gb = mean_absolute_error(y_test, y_pred_prob_gb) |
|
|
|
|
|
print(f"Gradient Boosting Accuracy: {accuracy_gb * 100:.2f}%") |
|
print(f"R² Score: {r2_gb:.4f}, RMSE: {rmse_gb:.4f}, MSE: {mse_gb:.4f}, MAE: {mae_gb:.4f}") |
|
print(f"Confusion Matrix:\n {conf_matrix_gb}\n") |
|
|
|
xgboost = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42) |
|
|
|
|
|
|
|
xgboost.fit(X_train, y_train) |
|
y_pred_xgb = xgboost.predict(X_test) |
|
accuracy_xgb = accuracy_score(y_test, y_pred_xgb) |
|
conf_matrix_xgb = confusion_matrix(y_test, y_pred_xgb) |
|
|
|
print(f"XGBoost Accuracy: {accuracy_xgb * 100:.2f}%") |
|
print(f"Confusion Matrix:\n {conf_matrix_xgb}\n") |
|
|
|
y_pred_prob_xgb = xgboost.predict_proba(X_test)[:, 1] |
|
|
|
y_pred_xgb = xgboost.predict(X_test) |
|
|
|
|
|
r2_xgb = r2_score(y_test, y_pred_prob_xgb) |
|
rmse_xgb = sqrt(mean_squared_error(y_test, y_pred_prob_xgb)) |
|
mse_xgb = mean_squared_error(y_test, y_pred_prob_xgb) |
|
mae_xgb = mean_absolute_error(y_test, y_pred_prob_xgb) |
|
|
|
|
|
print(f"XGBoost Accuracy: {accuracy_xgb * 100:.2f}%") |
|
print(f"R² Score: {r2_xgb:.4f}, RMSE: {rmse_xgb:.4f}, MSE: {mse_xgb:.4f}, MAE: {mae_xgb:.4f}") |
|
print(f"Confusion Matrix:\n {conf_matrix_xgb}\n") |
|
|
|
lightgbm = LGBMClassifier(random_state=42) |
|
|
|
|
|
lightgbm.fit(X_train, y_train) |
|
y_pred_lgbm = lightgbm.predict(X_test) |
|
accuracy_lgbm = accuracy_score(y_test, y_pred_lgbm) |
|
conf_matrix_lgbm = confusion_matrix(y_test, y_pred_lgbm) |
|
|
|
|
|
print(f"LightGBM Accuracy: {accuracy_lgbm * 100:.2f}%") |
|
print(f"Confusion Matrix:\n {conf_matrix_lgbm}\n") |
|
|
|
y_pred_prob_lgbm = lightgbm.predict_proba(X_test)[:, 1] |
|
|
|
y_pred_lgbm = lightgbm.predict(X_test) |
|
|
|
|
|
r2_lgbm = r2_score(y_test, y_pred_prob_lgbm) |
|
rmse_lgbm = sqrt(mean_squared_error(y_test, y_pred_prob_lgbm)) |
|
mse_lgbm = mean_squared_error(y_test, y_pred_prob_lgbm) |
|
mae_lgbm = mean_absolute_error(y_test, y_pred_prob_lgbm) |
|
|
|
|
|
|
|
print(f"LightGBM Accuracy: {accuracy_lgbm * 100:.2f}%") |
|
print(f"R² Score: {r2_lgbm:.4f}, RMSE: {rmse_lgbm:.4f}, MSE: {mse_lgbm:.4f}, MAE: {mae_lgbm:.4f}") |
|
print(f"Confusion Matrix:\n {conf_matrix_lgbm}\n") |
|
|
|
import joblib |
|
|
|
|
|
|
|
from sklearn.ensemble import GradientBoostingClassifier |
|
|
|
|
|
model = GradientBoostingClassifier(random_state=42) |
|
model.fit(X_train, y_train) |
|
|
|
|
|
joblib.dump(model, 'trained_model.pkl') |
|
|
|
print("Model saved successfully as trained_model.pkl") |
|
|
|
|
|
def predict_cvd(Age, Gender, BMI, Blood_pressure, cholesterol, Bacteroides_fragilis, Faecalibacterium_prausnitzii, |
|
Akkermansia_muciniphila, Ruminococcus_bromii, Microbiome_Diversity): |
|
|
|
|
|
Gender = 1 if Gender.lower() == 'female' else 0 |
|
|
|
|
|
input_data = pd.DataFrame({ |
|
'Age': [Age], |
|
'Gender': [Gender], |
|
'BMI': [BMI], |
|
'Blood_pressure': [Blood_pressure], |
|
'cholesterol': [cholesterol], |
|
'Bacteroides_fragilis': [Bacteroides_fragilis], |
|
'Faecalibacterium_prausnitzii': [Faecalibacterium_prausnitzii], |
|
'Akkermansia_muciniphila': [Akkermansia_muciniphila], |
|
'Ruminococcus_bromii': [Ruminococcus_bromii], |
|
'Microbiome_Diversity': [Microbiome_Diversity] |
|
}) |
|
|
|
print(input_data) |
|
|
|
|
|
prediction = model.predict(input_data) |
|
|
|
|
|
return "Cardiovascular Disease Detected" if prediction[0] == 1 else "No Cardiovascular Disease Detected" |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
import joblib |
|
|
|
|
|
model = joblib.load('trained_model.pkl') |
|
|
|
|
|
def predict_cvd(Age, Gender, BMI, Blood_pressure, Cholesterol, Bacteroides_fragilis, Faecalibacterium_prausnitzii, |
|
Akkermansia_muciniphila, Ruminococcus_bromii, Microbiome_Diversity): |
|
|
|
try: |
|
|
|
Gender = 1 if Gender.lower() == 'female' else 0 |
|
|
|
|
|
input_data = pd.DataFrame({ |
|
'Age': [Age], |
|
'Gender': [Gender], |
|
'BMI': [BMI], |
|
'Blood_pressure': [Blood_pressure], |
|
'Cholesterol': [Cholesterol], |
|
'Bacteroides_fragilis': [Bacteroides_fragilis], |
|
'Faecalibacterium_prausnitzii': [Faecalibacterium_prausnitzii], |
|
'Akkermansia_muciniphila': [Akkermansia_muciniphila], |
|
'Ruminococcus_bromii': [Ruminococcus_bromii], |
|
'Microbiome_Diversity': [Microbiome_Diversity] |
|
}) |
|
|
|
|
|
prediction = model.predict(input_data) |
|
|
|
|
|
return "Cardiovascular Disease Detected" if prediction[0] == 1 else "No Cardiovascular Disease Detected" |
|
|
|
except Exception as e: |
|
return f"An error occurred: {str(e)}" |
|
|
|
|
|
inputs = [ |
|
gr.Slider(18, 100, step=1, value=50, label="Age"), |
|
gr.Dropdown(['Male', 'Female'], label="Gender"), |
|
gr.Slider(10.0, 50.0, step=0.1, value=25.0, label="BMI"), |
|
gr.Slider(90, 200, step=1, value=120, label="Blood Pressure"), |
|
gr.Slider(100, 300, step=1, value=180, label="Cholesterol"), |
|
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Bacteroides Fragilis Level"), |
|
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Faecalibacterium Prausnitzii Level"), |
|
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Akkermansia Muciniphila Level"), |
|
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Ruminococcus Bromii Level"), |
|
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Microbiome Diversity"), |
|
] |
|
|
|
|
|
iface = gr.Interface(fn=predict_cvd, inputs=inputs, outputs="text", title="Cardiovascular Disease Prediction") |
|
|
|
|
|
iface.launch() |