mervenoyan commited on
Commit
bc83f23
1 Parent(s): faa9c6e

initial commit

Browse files
Files changed (2) hide show
  1. app.py +173 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from matplotlib.colors import ListedColormap
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.preprocessing import StandardScaler
6
+ from sklearn.datasets import make_moons, make_circles, make_classification
7
+ from sklearn.neural_network import MLPClassifier
8
+ from sklearn.neighbors import KNeighborsClassifier
9
+ from sklearn.svm import SVC
10
+ from sklearn.gaussian_process import GaussianProcessClassifier
11
+ from sklearn.gaussian_process.kernels import RBF
12
+ from sklearn.tree import DecisionTreeClassifier
13
+ from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
14
+ from sklearn.naive_bayes import GaussianNB
15
+ from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
16
+ from sklearn.inspection import DecisionBoundaryDisplay
17
+ from sklearn.datasets import make_blobs, make_circles, make_moons
18
+ import gradio as gr
19
+ import math
20
+ from functools import partial
21
+
22
+
23
+
24
+ ### DATASETS
25
+
26
+ def normalize(X):
27
+ return StandardScaler().fit_transform(X)
28
+
29
+
30
+ def linearly_separable():
31
+ X, y = make_classification(
32
+ n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1
33
+ )
34
+ rng = np.random.RandomState(2)
35
+ X += 2 * rng.uniform(size=X.shape)
36
+ linearly_separable = (X, y)
37
+ return linearly_separable
38
+
39
+ DATA_MAPPING = {
40
+ "Moons": make_moons(noise=0.3, random_state=0),
41
+ "Circles":make_circles(noise=0.2, factor=0.5, random_state=1),
42
+ "Linearly Separable Random Dataset": linearly_separable(),
43
+ }
44
+
45
+
46
+ #### MODELS
47
+
48
+ def get_groundtruth_model(X, labels):
49
+ # dummy model to show true label distribution
50
+ class Dummy:
51
+ def __init__(self, y):
52
+ self.labels_ = labels
53
+
54
+ return Dummy(labels)
55
+
56
+ DATASETS = [
57
+ make_moons(noise=0.3, random_state=0),
58
+ make_circles(noise=0.2, factor=0.5, random_state=1),
59
+ linearly_separable()
60
+ ]
61
+ NAME_CLF_MAPPING = {
62
+ "Ground Truth":get_groundtruth_model,
63
+ "Nearest Neighbors":KNeighborsClassifier(3),
64
+ "Linear SVM":SVC(kernel="linear", C=0.025),
65
+ "RBF SVM":SVC(gamma=2, C=1),
66
+ "Gaussian Process":GaussianProcessClassifier(1.0 * RBF(1.0)),
67
+ "Decision Tree":DecisionTreeClassifier(max_depth=5),
68
+ "Random Forest":RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
69
+ "Neural Net":MLPClassifier(alpha=1, max_iter=1000),
70
+ "AdaBoost":AdaBoostClassifier(),
71
+ "Naive Bayes":GaussianNB(),
72
+ }
73
+
74
+
75
+
76
+ #### PLOT
77
+ FIGSIZE = 7,7
78
+ figure = plt.figure(figsize=(25, 10))
79
+ i = 1
80
+
81
+
82
+
83
+
84
+ def train_models(selected_data, clf_name):
85
+ cm = plt.cm.RdBu
86
+ cm_bright = ListedColormap(["#FF0000", "#0000FF"])
87
+ clf = NAME_CLF_MAPPING[clf_name]
88
+
89
+ X, y = DATA_MAPPING[selected_data]
90
+ X = StandardScaler().fit_transform(X)
91
+ X_train, X_test, y_train, y_test = train_test_split(
92
+ X, y, test_size=0.4, random_state=42
93
+ )
94
+
95
+ x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
96
+ y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
97
+ if clf_name != "Ground Truth":
98
+ clf.fit(X_train, y_train)
99
+ score = clf.score(X_test, y_test)
100
+ fig, ax = plt.subplots(figsize=FIGSIZE)
101
+ ax.set_title(clf_name, fontsize = 10)
102
+
103
+ DecisionBoundaryDisplay.from_estimator(
104
+ clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5
105
+ ).plot()
106
+ return fig
107
+ else:
108
+ #########
109
+
110
+ for ds_cnt, ds in enumerate(DATASETS):
111
+ X, y = ds
112
+
113
+ x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
114
+ y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
115
+
116
+ # just plot the dataset first
117
+ cm = plt.cm.RdBu
118
+ cm_bright = ListedColormap(["#FF0000", "#0000FF"])
119
+ fig, ax = plt.subplots(figsize=FIGSIZE)
120
+ ax.set_title("Input data")
121
+ # Plot the training points
122
+
123
+ ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k")
124
+ # Plot the testing points
125
+ ax.scatter(
126
+ X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k"
127
+ )
128
+ ax.set_xlim(x_min, x_max)
129
+ ax.set_ylim(y_min, y_max)
130
+ ax.set_xticks(())
131
+ ax.set_yticks(())
132
+
133
+ return fig
134
+
135
+
136
+
137
+ ###########
138
+ description = "Learn how different statistical classifiers perform in different datasets."
139
+
140
+ def iter_grid(n_rows, n_cols):
141
+ # create a grid using gradio Block
142
+ for _ in range(n_rows):
143
+ with gr.Row():
144
+ for _ in range(n_cols):
145
+ with gr.Column():
146
+ yield
147
+
148
+ title = "Classification"
149
+ with gr.Blocks(title=title) as demo:
150
+ gr.HTML(f"<b>{title}</b>")
151
+ gr.Markdown(description)
152
+
153
+ input_models = list(NAME_CLF_MAPPING)
154
+ input_data = gr.Radio(
155
+ choices=["Moons", "Circles", "Linearly Separable Random Dataset"],
156
+ value="Moons"
157
+ )
158
+ counter = 0
159
+
160
+ plot_run = gr.Button("Run")
161
+
162
+
163
+ for _ in iter_grid(2, 5):
164
+ if counter >= len(input_models):
165
+ break
166
+
167
+ input_model = input_models[counter]
168
+ plot = gr.Plot(label=input_model)
169
+ fn = partial(train_models, clf_name=input_model)
170
+ input_data.change(fn=fn, inputs=[input_data], outputs=plot)
171
+ counter += 1
172
+
173
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
1
+ scikit-learn
2
+ matplotlib