Spaces:

sklearn-docs
/

receiver-operating-characteristic-with-cross-validation

Running on CPU Upgrade

App Files Files Community

Zhenev commited on Apr 12, 2023

Commit

4701648

•

1 Parent(s): 13e85c3

Add application file

Browse files

Files changed (1) hide show

app.py +161 -0

app.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import numpy as np
+from sklearn.datasets import load_iris
+import matplotlib.pyplot as plt
+from sklearn import svm, linear_model
+from sklearn.metrics import auc
+from sklearn.metrics import RocCurveDisplay
+from sklearn.model_selection import StratifiedKFold
+import gradio as gr
+from functools import partial
+# Wrap the [Initial Analysis](https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html)
+def auc_analysis(selected_data, n_folds, cls_name):
+    default_base = {"n_folds": 5}
+    # Load and prepare iris data
+    iris = load_iris()
+    X_iris, y_iris, target_names_iris = iris.data, iris.target, iris.target_names
+    X_iris, y_iris, target_names_iris = X_iris[y_iris != 2], y_iris[y_iris != 2], target_names_iris[0:-1]
+    n_samples_iris, n_features_iris = X_iris.shape
+    # Add noisy features to make the problem harder
+    random_state = np.random.RandomState(0)
+    X_iris = np.concatenate([X_iris, random_state.randn(n_samples_iris, 200 * n_features_iris)], axis=1)
+    dataset_list = {
+        "Iris": [X_iris, y_iris, target_names_iris]
+    }
+    # Load selected data
+    params = default_base.copy()
+    params.update({"n_folds": n_folds})
+    X, y, target_names = dataset_list[selected_data]
+    # Define classification model
+    svc_linear = svm.SVC(kernel="linear", probability=True, random_state=random_state)
+    logistic_regression = linear_model.LogisticRegression()
+    classification_models = {
+        "SVC - linear kernel": svc_linear,
+        "Logistic Regression": logistic_regression
+    }
+    classifier = classification_models[cls_name]
+    # Define folds
+    cv = StratifiedKFold(n_splits=params["n_folds"])
+    # ROC analysis
+    tprs = []
+    aucs = []
+    mean_fpr = np.linspace(0, 1, 100)
+    fig, ax = plt.subplots(figsize=(6, 6))
+    for fold, (train, test) in enumerate(cv.split(X, y)):
+        classifier.fit(X[train], y[train])
+        viz = RocCurveDisplay.from_estimator(
+            classifier,
+            X[test],
+            y[test],
+            name=f"ROC fold {fold}",
+            alpha=0.5,
+            lw=1,
+            ax=ax,
+        )
+        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
+        interp_tpr[0] = 0.0
+        tprs.append(interp_tpr)
+        aucs.append(viz.roc_auc)
+    ax.plot([0, 1], [0, 1], "k--", label="chance level (AUC = 0.5)")
+    mean_tpr = np.mean(tprs, axis=0)
+    mean_tpr[-1] = 1.0
+    mean_auc = auc(mean_fpr, mean_tpr)
+    std_auc = np.std(aucs)
+    ax.plot(
+        mean_fpr,
+        mean_tpr,
+        color="b",
+        label=r"Mean ROC (AUC = %0.2f $\pm$ %0.2f)" % (mean_auc, std_auc),
+        lw=2,
+        alpha=0.8,
+    )
+    std_tpr = np.std(tprs, axis=0)
+    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
+    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
+    ax.fill_between(
+        mean_fpr,
+        tprs_lower,
+        tprs_upper,
+        color="grey",
+        alpha=0.2,
+        label=r"$\pm$ 1 std. dev.",
+    )
+    ax.set(
+        xlim=[-0.05, 1.05],
+        ylim=[-0.05, 1.05],
+        xlabel="False Positive Rate",
+        ylabel="True Positive Rate",
+        title=f"Mean ROC curve with variability\n(Positive label '{target_names[1]}')",
+    )
+    ax.axis("square")
+    ax.legend(loc="lower right")
+    return fig
+# Build the Demo
+def iter_grid(n_rows, n_cols):
+    # create a grid using gradio Block
+    for _ in range(n_rows):
+        with gr.Row():
+            for _ in range(n_cols):
+                with gr.Column():
+                    yield
+input_models = ["SVC - linear kernel", "Logistic Regression"]
+title = "🔬 Receiver Operating Characteristic (ROC) with cross validation"
+with gr.Blocks(title=title) as demo:
+    gr.Markdown(f"## {title}")
+    gr.Markdown(
+        "This app demonstrates Receiver Operating Characteristic (ROC) metric estimate variability using "
+        "cross-validation. It shows the response of ROC and of its variance to different datasets, created from "
+        "K-fold cross-validation. "
+        "See the [source](https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html)"
+        " for more details.")
+    gr.Markdown(f'Available classification models: {", ".join(input_models)}.')
+    with gr.Row():
+        with gr.Column():
+            input_data = gr.Radio(
+                choices=["Iris"],
+                value="Iris",
+                label="Dataset",
+                info="Available datasets"
+            )
+        with gr.Column():
+            n_folds = gr.Radio(
+                [3, 4, 5, 6, 7, 8, 9], value=4, label="Folds", info="Number of cross-validation splits"
+            )
+    counter = 0
+    for _ in iter_grid(len(input_models) // 2 + len(input_models) % 2, 2):
+        if counter >= len(input_models):
+            break
+        input_model = input_models[counter]
+        plot = gr.Plot(label=input_model)
+        fn = partial(auc_analysis, cls_name=input_model)
+        input_data.change(fn=fn, inputs=[input_data, n_folds], outputs=plot)
+        n_folds.change(fn=fn, inputs=[input_data, n_folds], outputs=plot)
+        counter += 1
+if __name__ == "__main__":
+    demo.launch()