classification / app.py
freddyaboulton's picture
Upload app.py
dfc2b86
raw
history blame
5.25 kB
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.datasets import make_blobs, make_circles, make_moons
import gradio as gr
import math
from functools import partial
### DATASETS
def normalize(X):
return StandardScaler().fit_transform(X)
def linearly_separable():
X, y = make_classification(
n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1
)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)
return linearly_separable
DATA_MAPPING = {
"Moons": make_moons(noise=0.3, random_state=0),
"Circles":make_circles(noise=0.2, factor=0.5, random_state=1),
"Linearly Separable Random Dataset": linearly_separable(),
}
#### MODELS
def get_groundtruth_model(X, labels):
# dummy model to show true label distribution
class Dummy:
def __init__(self, y):
self.labels_ = labels
return Dummy(labels)
DATASETS = [
make_moons(noise=0.3, random_state=0),
make_circles(noise=0.2, factor=0.5, random_state=1),
linearly_separable()
]
NAME_CLF_MAPPING = {
"Ground Truth":get_groundtruth_model,
"Nearest Neighbors":KNeighborsClassifier(3),
"Linear SVM":SVC(kernel="linear", C=0.025),
"RBF SVM":SVC(gamma=2, C=1),
"Gaussian Process":GaussianProcessClassifier(1.0 * RBF(1.0)),
"Decision Tree":DecisionTreeClassifier(max_depth=5),
"Random Forest":RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
"Neural Net":MLPClassifier(alpha=1, max_iter=1000),
"AdaBoost":AdaBoostClassifier(),
"Naive Bayes":GaussianNB(),
}
#### PLOT
FIGSIZE = 7,7
figure = plt.figure(figsize=(25, 10))
i = 1
def train_models(selected_data, clf_name):
cm = plt.cm.RdBu
cm_bright = ListedColormap(["#FF0000", "#0000FF"])
clf = NAME_CLF_MAPPING[clf_name]
X, y = DATA_MAPPING[selected_data]
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.4, random_state=42
)
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
if clf_name != "Ground Truth":
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
fig, ax = plt.subplots(figsize=FIGSIZE)
ax.set_title(clf_name, fontsize = 10)
DecisionBoundaryDisplay.from_estimator(
clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5
).plot()
return fig
else:
#########
for ds_cnt, ds in enumerate(DATASETS):
X, y = ds
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
# just plot the dataset first
cm = plt.cm.RdBu
cm_bright = ListedColormap(["#FF0000", "#0000FF"])
fig, ax = plt.subplots(figsize=FIGSIZE)
ax.set_title("Input data")
# Plot the training points
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k")
# Plot the testing points
ax.scatter(
X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k"
)
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
ax.set_xticks(())
ax.set_yticks(())
return fig
###########
description = "Learn how different statistical classifiers perform in different datasets."
def iter_grid(n_rows, n_cols):
# create a grid using gradio Block
for _ in range(n_rows):
with gr.Row():
for _ in range(n_cols):
with gr.Column():
yield
title = "Compare Classifiers!"
with gr.Blocks(title=title) as demo:
gr.Markdown(f"## {title}")
gr.Markdown(description)
input_models = list(NAME_CLF_MAPPING)
input_data = gr.Radio(
choices=["Moons", "Circles", "Linearly Separable Random Dataset"],
value="Moons"
)
counter = 0
for _ in iter_grid(2, 5):
if counter >= len(input_models):
break
input_model = input_models[counter]
plot = gr.Plot(label=input_model)
fn = partial(train_models, clf_name=input_model)
input_data.change(fn=fn, inputs=[input_data], outputs=plot)
counter += 1
demo.launch(debug=True)