Spaces:

sklearn-docs
/

classification

Running

App Files Files Community

mervenoyan commited on Jul 27, 2022

Commit

bc83f23

1 Parent(s): faa9c6e

initial commit

Browse files

Files changed (2) hide show

app.py +173 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.colors import ListedColormap
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.datasets import make_moons, make_circles, make_classification
+from sklearn.neural_network import MLPClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.svm import SVC
+from sklearn.gaussian_process import GaussianProcessClassifier
+from sklearn.gaussian_process.kernels import RBF
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
+from sklearn.naive_bayes import GaussianNB
+from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
+from sklearn.inspection import DecisionBoundaryDisplay
+from sklearn.datasets import make_blobs, make_circles, make_moons
+import gradio as gr
+import math
+from functools import partial
+### DATASETS
+def normalize(X):
+    return StandardScaler().fit_transform(X)
+def linearly_separable():
+    X, y = make_classification(
+        n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1
+    )
+    rng = np.random.RandomState(2)
+    X += 2 * rng.uniform(size=X.shape)
+    linearly_separable = (X, y)
+    return linearly_separable
+DATA_MAPPING = {
+    "Moons": make_moons(noise=0.3, random_state=0),
+    "Circles":make_circles(noise=0.2, factor=0.5, random_state=1),
+    "Linearly Separable Random Dataset": linearly_separable(),
+}
+#### MODELS
+def get_groundtruth_model(X, labels):
+    # dummy model to show true label distribution
+    class Dummy:
+        def __init__(self, y):
+            self.labels_ = labels
+    return Dummy(labels)
+DATASETS = [
+    make_moons(noise=0.3, random_state=0),
+    make_circles(noise=0.2, factor=0.5, random_state=1),
+    linearly_separable()
+]
+NAME_CLF_MAPPING = {
+    "Ground Truth":get_groundtruth_model,
+    "Nearest Neighbors":KNeighborsClassifier(3),
+    "Linear SVM":SVC(kernel="linear", C=0.025),
+    "RBF SVM":SVC(gamma=2, C=1),
+    "Gaussian Process":GaussianProcessClassifier(1.0 * RBF(1.0)),
+    "Decision Tree":DecisionTreeClassifier(max_depth=5),
+    "Random Forest":RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
+    "Neural Net":MLPClassifier(alpha=1, max_iter=1000),
+    "AdaBoost":AdaBoostClassifier(),
+    "Naive Bayes":GaussianNB(),
+}
+#### PLOT
+FIGSIZE = 7,7
+figure = plt.figure(figsize=(25, 10))
+i = 1
+def train_models(selected_data, clf_name):
+    cm = plt.cm.RdBu
+    cm_bright = ListedColormap(["#FF0000", "#0000FF"])
+    clf = NAME_CLF_MAPPING[clf_name]
+    X, y = DATA_MAPPING[selected_data]
+    X = StandardScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.4, random_state=42
+    )
+    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
+    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
+    if clf_name != "Ground Truth":
+        clf.fit(X_train, y_train)
+        score = clf.score(X_test, y_test)
+        fig, ax = plt.subplots(figsize=FIGSIZE)
+        ax.set_title(clf_name, fontsize = 10)
+        DecisionBoundaryDisplay.from_estimator(
+                clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5
+            ).plot()
+        return fig
+    else:
+        #########
+        for ds_cnt, ds in enumerate(DATASETS):
+            X, y = ds
+            x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
+            y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
+            # just plot the dataset first
+            cm = plt.cm.RdBu
+            cm_bright = ListedColormap(["#FF0000", "#0000FF"])
+            fig, ax = plt.subplots(figsize=FIGSIZE)
+            ax.set_title("Input data")
+            # Plot the training points
+            ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k")
+            # Plot the testing points
+            ax.scatter(
+                X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k"
+            )
+            ax.set_xlim(x_min, x_max)
+            ax.set_ylim(y_min, y_max)
+            ax.set_xticks(())
+            ax.set_yticks(())
+            return fig
+        ###########
+description = "Learn how different statistical classifiers perform in different datasets."
+def iter_grid(n_rows, n_cols):
+    # create a grid using gradio Block
+    for _ in range(n_rows):
+        with gr.Row():
+            for _ in range(n_cols):
+                with gr.Column():
+                    yield
+title = "Classification"
+with gr.Blocks(title=title) as demo:
+    gr.HTML(f"<b>{title}</b>")
+    gr.Markdown(description)
+    input_models = list(NAME_CLF_MAPPING)
+    input_data = gr.Radio(
+        choices=["Moons", "Circles", "Linearly Separable Random Dataset"],
+        value="Moons"
+    )
+    counter = 0
+    plot_run = gr.Button("Run")
+    for _ in iter_grid(2, 5):
+        if counter >= len(input_models):
+            break
+        input_model = input_models[counter]
+        plot = gr.Plot(label=input_model)
+        fn = partial(train_models, clf_name=input_model)
+        input_data.change(fn=fn, inputs=[input_data], outputs=plot)
+        counter += 1
+demo.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ scikit-learn
2	+ matplotlib