|
|
|
"""diarc.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1Jyccp5Aeml-7oZABbACY2VTE9iQJg9Pe |
|
|
|
# Bismillahir Rahmaanir Raheem |
|
# Almadadh Ya Gause RadiAllahu Ta'alah Anh - Ameen |
|
|
|
# <font color=grey>DIabetes-related Amputation Risk Calculator (DIARC)</font> |
|
<b>_by Zakia Salod_</b> |
|
""" |
|
|
|
!pip install pycaret |
|
|
|
from pycaret.utils import version |
|
version() |
|
|
|
from pycaret.utils import enable_colab |
|
enable_colab() |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
|
|
np.random.seed(1234) |
|
|
|
dataset = pd.read_excel('amputation_dataset.xlsx') |
|
|
|
print(dataset['AMPUTATION'].value_counts()) |
|
|
|
ax = sns.countplot(x="AMPUTATION", data=dataset) |
|
|
|
|
|
dataset.duplicated(keep='first').sum() |
|
|
|
|
|
|
|
dataset = dataset.drop_duplicates(keep='first') |
|
|
|
print(dataset['AMPUTATION'].value_counts()) |
|
|
|
ax = sns.countplot(x="AMPUTATION", data=dataset) |
|
|
|
dataset.head() |
|
|
|
|
|
|
|
shuffled_dataset = dataset.sample(frac=1, random_state=4) |
|
|
|
|
|
amputation_dataset = shuffled_dataset.loc[shuffled_dataset['AMPUTATION'] == 1] |
|
|
|
|
|
|
|
non_amputation_dataset = shuffled_dataset.loc[shuffled_dataset['AMPUTATION'] == 0].sample(n=105,random_state=42) |
|
|
|
|
|
dataset = pd.concat([amputation_dataset, non_amputation_dataset]) |
|
|
|
print(dataset['AMPUTATION'].value_counts()) |
|
|
|
ax = sns.countplot(x="AMPUTATION", data=dataset) |
|
|
|
dataset.to_excel('amputation_removed_duplicates_and_balanced.xlsx') |
|
|
|
from pycaret.classification import * |
|
|
|
clf = setup(data = dataset, target = 'AMPUTATION', session_id = 42) |
|
|
|
|
|
get_config('X_train') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
get_config('y_train') |
|
|
|
best_model = compare_models(sort = 'AUC') |
|
|
|
|
|
|
|
nb = create_model('nb') |
|
bagged_nb = ensemble_model(nb, method='Bagging') |
|
lr = create_model('lr') |
|
bagged_lr = ensemble_model(lr, method='Bagging') |
|
lda = create_model('lda') |
|
bagged_lda = ensemble_model(lda, method='Bagging') |
|
|
|
rf = create_model('rf') |
|
bagged_rf = ensemble_model(rf, method='Bagging') |
|
ada = create_model('ada') |
|
bagged_ada = ensemble_model(ada, method='Bagging') |
|
|
|
|
|
blend_specific = blend_models(estimator_list = [bagged_nb, bagged_lr, bagged_lda, bagged_rf, bagged_ada]) |
|
|
|
|
|
plot_model(blend_specific) |
|
|
|
|
|
tuned_blend_specific = tune_model(blend_specific) |
|
|
|
evaluate_model(tuned_blend_specific) |
|
|
|
tuned_blend_specific_predictions = predict_model(tuned_blend_specific) |
|
|
|
|
|
final_tuned_blend_specific = finalize_model(tuned_blend_specific) |
|
|
|
|
|
|
|
save_model(tuned_blend_specific, "tuned_blend_specific_model_19112021", verbose=True) |
|
|
|
|
|
get_config('X_test') |
|
|
|
|
|
get_config('y_test') |
|
|
|
dataset2 = pd.read_excel('amputation_removed_duplicates_and_balanced.xlsx') |
|
|
|
!pip install pandas-profiling |
|
|
|
from pandas_profiling import ProfileReport |
|
|
|
profile = ProfileReport(dataset2, title="Pandas Profiling Report") |
|
|
|
profile.to_file("amputation_removed_duplicates_and_balanced_report.html") |