Spaces:

sklearn-docs
/

classification

Running

File size: 5,272 Bytes

bc83f23

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.datasets import make_blobs, make_circles, make_moons
import gradio as gr
import math
from functools import partial



### DATASETS

def normalize(X):
    return StandardScaler().fit_transform(X)


def linearly_separable():
    X, y = make_classification(
        n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1
    )
    rng = np.random.RandomState(2)
    X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)
    return linearly_separable

DATA_MAPPING = {
    "Moons": make_moons(noise=0.3, random_state=0),
    "Circles":make_circles(noise=0.2, factor=0.5, random_state=1),
    "Linearly Separable Random Dataset": linearly_separable(),
}


#### MODELS

def get_groundtruth_model(X, labels):
    # dummy model to show true label distribution
    class Dummy:
        def __init__(self, y):
            self.labels_ = labels

    return Dummy(labels)
    
DATASETS = [
    make_moons(noise=0.3, random_state=0),
    make_circles(noise=0.2, factor=0.5, random_state=1),
    linearly_separable()
]
NAME_CLF_MAPPING = {
    "Ground Truth":get_groundtruth_model,
    "Nearest Neighbors":KNeighborsClassifier(3),
    "Linear SVM":SVC(kernel="linear", C=0.025),
    "RBF SVM":SVC(gamma=2, C=1),
    "Gaussian Process":GaussianProcessClassifier(1.0 * RBF(1.0)),
    "Decision Tree":DecisionTreeClassifier(max_depth=5),
    "Random Forest":RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    "Neural Net":MLPClassifier(alpha=1, max_iter=1000),
    "AdaBoost":AdaBoostClassifier(),
    "Naive Bayes":GaussianNB(),
}



#### PLOT
FIGSIZE = 7,7
figure = plt.figure(figsize=(25, 10))
i = 1




def train_models(selected_data, clf_name):
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(["#FF0000", "#0000FF"])
    clf = NAME_CLF_MAPPING[clf_name]
    
    X, y = DATA_MAPPING[selected_data]
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.4, random_state=42
    )
    
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    if clf_name != "Ground Truth":
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        fig, ax = plt.subplots(figsize=FIGSIZE)
        ax.set_title(clf_name, fontsize = 10)
        
        DecisionBoundaryDisplay.from_estimator(
                clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5
            ).plot()
        return fig
    else:
        #########
        
        for ds_cnt, ds in enumerate(DATASETS):
            X, y = ds

            x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
            y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5

            # just plot the dataset first
            cm = plt.cm.RdBu
            cm_bright = ListedColormap(["#FF0000", "#0000FF"])
            fig, ax = plt.subplots(figsize=FIGSIZE)
            ax.set_title("Input data")
            # Plot the training points

            ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k")
            # Plot the testing points
            ax.scatter(
                X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k"
            )
            ax.set_xlim(x_min, x_max)
            ax.set_ylim(y_min, y_max)
            ax.set_xticks(())
            ax.set_yticks(())

            return fig



        ###########
description = "Learn how different statistical classifiers perform in different datasets."

def iter_grid(n_rows, n_cols):
    # create a grid using gradio Block
    for _ in range(n_rows):
        with gr.Row():
            for _ in range(n_cols):
                with gr.Column():
                    yield

title = "Classification"
with gr.Blocks(title=title) as demo:
    gr.HTML(f"<b>{title}</b>")
    gr.Markdown(description)

    input_models = list(NAME_CLF_MAPPING)
    input_data = gr.Radio(
        choices=["Moons", "Circles", "Linearly Separable Random Dataset"],
        value="Moons"
    )
    counter = 0

    plot_run = gr.Button("Run")


    for _ in iter_grid(2, 5):
        if counter >= len(input_models):
            break

        input_model = input_models[counter]
        plot = gr.Plot(label=input_model)
        fn = partial(train_models, clf_name=input_model)
        input_data.change(fn=fn, inputs=[input_data], outputs=plot)
        counter += 1

demo.launch(debug=True)