|
import numpy as np |
|
from sklearn import datasets |
|
import matplotlib.pyplot as plt |
|
|
|
from sklearn import svm, linear_model |
|
from sklearn.metrics import auc |
|
from sklearn.metrics import RocCurveDisplay |
|
from sklearn.model_selection import StratifiedKFold |
|
import gradio as gr |
|
|
|
from functools import partial |
|
|
|
|
|
|
|
|
|
def auc_analysis(selected_data, n_folds, cls_name): |
|
default_base = {"n_folds": 5} |
|
|
|
|
|
iris = datasets.load_iris() |
|
X_iris, y_iris, target_names_iris = iris.data, iris.target, iris.target_names |
|
X_iris, y_iris, target_names_iris = X_iris[y_iris != 2], y_iris[y_iris != 2], target_names_iris[0:-1] |
|
n_samples_iris, n_features_iris = X_iris.shape |
|
|
|
random_state = np.random.RandomState(0) |
|
X_iris = np.concatenate([X_iris, random_state.randn(n_samples_iris, 200 * n_features_iris)], axis=1) |
|
|
|
dataset_list = { |
|
"Iris": [X_iris, y_iris, target_names_iris] |
|
} |
|
|
|
|
|
params = default_base.copy() |
|
params.update({"n_folds": n_folds}) |
|
X, y, target_names = dataset_list[selected_data] |
|
|
|
|
|
svc_linear = svm.SVC(kernel="linear", probability=True, random_state=random_state) |
|
logistic_regression = linear_model.LogisticRegression() |
|
|
|
classification_models = { |
|
"SVC - linear kernel": svc_linear, |
|
"Logistic Regression": logistic_regression |
|
} |
|
|
|
classifier = classification_models[cls_name] |
|
|
|
|
|
cv = StratifiedKFold(n_splits=params["n_folds"]) |
|
|
|
|
|
tprs = [] |
|
aucs = [] |
|
mean_fpr = np.linspace(0, 1, 100) |
|
|
|
fig, ax = plt.subplots(figsize=(6, 6)) |
|
for fold, (train, test) in enumerate(cv.split(X, y)): |
|
classifier.fit(X[train], y[train]) |
|
viz = RocCurveDisplay.from_estimator( |
|
classifier, |
|
X[test], |
|
y[test], |
|
name=f"ROC fold {fold}", |
|
alpha=0.5, |
|
lw=1, |
|
ax=ax, |
|
) |
|
interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr) |
|
interp_tpr[0] = 0.0 |
|
tprs.append(interp_tpr) |
|
aucs.append(viz.roc_auc) |
|
ax.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)") |
|
|
|
mean_tpr = np.mean(tprs, axis=0) |
|
mean_tpr[-1] = 1.0 |
|
mean_auc = auc(mean_fpr, mean_tpr) |
|
std_auc = np.std(aucs) |
|
ax.plot( |
|
mean_fpr, |
|
mean_tpr, |
|
color="b", |
|
label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc), |
|
lw=2, |
|
alpha=0.8, |
|
) |
|
|
|
std_tpr = np.std(tprs, axis=0) |
|
tprs_upper = np.minimum(mean_tpr + std_tpr, 1) |
|
tprs_lower = np.maximum(mean_tpr - std_tpr, 0) |
|
ax.fill_between( |
|
mean_fpr, |
|
tprs_lower, |
|
tprs_upper, |
|
color="grey", |
|
alpha=0.2, |
|
label=r"$\pm$ 1 std. dev.", |
|
) |
|
|
|
ax.set( |
|
xlim=[-0.05, 1.05], |
|
ylim=[-0.05, 1.05], |
|
xlabel="False Positive Rate", |
|
ylabel="True Positive Rate", |
|
title=f"Mean ROC curve with variability\n(Positive label '{target_names[1]}')", |
|
) |
|
ax.axis("square") |
|
ax.legend(loc="lower right") |
|
|
|
return fig |
|
|
|
|
|
|
|
|
|
def iter_grid(n_rows, n_cols): |
|
|
|
for _ in range(n_rows): |
|
with gr.Row(): |
|
for _ in range(n_cols): |
|
with gr.Column(): |
|
yield |
|
|
|
|
|
input_models = ["SVC - linear kernel", "Logistic Regression"] |
|
|
|
title = "π¬ Receiver Operating Characteristic (ROC) with cross validation" |
|
with gr.Blocks(title=title) as demo: |
|
gr.Markdown(f"## {title}") |
|
gr.Markdown( |
|
"This app demonstrates Receiver Operating Characteristic (ROC) metric estimate variability using " |
|
"cross-validation. It shows the response of ROC and of its variance to different datasets, created from " |
|
"K-fold cross-validation. " |
|
"See the [source](https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html)" |
|
" for more details.") |
|
gr.Markdown(f'Available classification models: {", ".join(input_models)}.') |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
input_data = gr.Radio( |
|
choices=["Iris"], |
|
value="Iris", |
|
label="Dataset", |
|
info="Available datasets" |
|
) |
|
with gr.Column(): |
|
n_folds = gr.Radio( |
|
[3, 4, 5, 6, 7, 8, 9], value=4, label="Folds", info="Number of cross-validation splits" |
|
) |
|
|
|
counter = 0 |
|
for _ in iter_grid(len(input_models) // 2 + len(input_models) % 2, 2): |
|
if counter >= len(input_models): |
|
break |
|
input_model = input_models[counter] |
|
plot = gr.Plot(label=input_model) |
|
fn = partial(auc_analysis, cls_name=input_model) |
|
input_data.change(fn=fn, inputs=[input_data, n_folds], outputs=plot) |
|
n_folds.change(fn=fn, inputs=[input_data, n_folds], outputs=plot) |
|
counter += 1 |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|