rtik007 commited on
Commit
428b8af
·
verified ·
1 Parent(s): 551187b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -109
app.py CHANGED
@@ -1,45 +1,35 @@
1
  import numpy as np
2
  import matplotlib.pyplot as plt
3
- from threading import Thread
4
- from matplotlib.colors import ListedColormap
5
- from sklearn.datasets import make_moons, make_circles, make_classification
6
- from sklearn.datasets import make_blobs, make_circles, make_moons
7
- import gradio as gr
8
- import math
9
- from functools import partial
10
- import time
11
-
12
- import matplotlib
13
-
14
  from sklearn import svm
15
- from sklearn.datasets import make_moons, make_blobs
16
  from sklearn.covariance import EllipticEnvelope
17
  from sklearn.ensemble import IsolationForest
18
  from sklearn.neighbors import LocalOutlierFactor
19
  from sklearn.linear_model import SGDOneClassSVM
20
  from sklearn.kernel_approximation import Nystroem
21
  from sklearn.pipeline import make_pipeline
 
 
 
22
 
23
- def get_groundtruth_model(X, labels):
24
- # dummy model to show true label distribution
25
- class Dummy:
26
- def __init__(self, y):
27
- self.labels_ = labels
28
-
29
- return Dummy(labels)
30
-
31
- #### PLOT
32
- FIGSIZE = 10,10
33
- figure = plt.figure(figsize=(25, 10))
34
-
35
-
36
- def train_models(input_data, outliers_fraction, n_samples, clf_name):
37
  n_outliers = int(outliers_fraction * n_samples)
38
  n_inliers = n_samples - n_outliers
39
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
40
- NAME_CLF_MAPPING = {"Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
41
- "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
42
- "One-Class SVM (SGD)":make_pipeline(
 
 
 
 
 
 
 
 
 
 
43
  Nystroem(gamma=0.1, random_state=42, n_components=150),
44
  SGDOneClassSVM(
45
  nu=outliers_fraction,
@@ -51,110 +41,78 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
51
  ),
52
  "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
53
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
54
- }
55
- DATA_MAPPING = {
56
- "Central Blob":make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
57
- "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
58
- "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
59
- "Moons": 4.0
60
- * (
61
- make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0]
62
- - np.array([0.5, 0.25])
63
- ),
64
- "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
65
  }
66
- DATASETS = [
67
- make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
68
- make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
69
- make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
70
- 4.0
71
- * (
72
- make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0]
73
- - np.array([0.5, 0.25])
74
- ),
75
- 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
76
- ]
77
 
 
 
 
 
78
  xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
79
  clf = NAME_CLF_MAPPING[clf_name]
80
- plt.figure(figsize=(len(NAME_CLF_MAPPING) * 2 + 4, 12.5))
81
 
82
-
83
- plot_num = 1
84
- rng = np.random.RandomState(42)
85
- X = DATA_MAPPING[input_data]
86
- X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
87
-
88
  t0 = time.time()
89
- clf.fit(X)
90
- t1 = time.time()
91
- # fit the data and tag outliers
92
  if clf_name == "Local Outlier Factor":
93
  y_pred = clf.fit_predict(X)
94
  else:
95
- y_pred = clf.fit(X).predict(X)
 
 
96
 
97
- # plot the levels lines and the points
98
- if clf_name != "Local Outlier Factor":
 
99
  Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
100
  Z = Z.reshape(xx.shape)
101
- plt.contour(xx, yy, Z, levels=[0], linewidths=10, colors="black")
102
 
103
  colors = np.array(["#377eb8", "#ff7f00"])
104
- plt.scatter(X[:, 0], X[:, 1], s=100, color=colors[(y_pred + 1) // 2])
105
-
106
  plt.xlim(-7, 7)
107
  plt.ylim(-7, 7)
108
  plt.xticks(())
109
  plt.yticks(())
110
- plt.text(
111
- 0.99,
112
- 0.01,
113
- ("%.2fs" % (t1 - t0)).lstrip("0"),
114
- transform=plt.gca().transAxes,
115
- size=60,
116
- horizontalalignment="right",
117
- )
118
- plot_num += 1
119
-
120
- return plt
121
 
122
- description = "Learn how different anomaly detection algorithms perform in different datasets."
 
 
123
 
124
- def iter_grid(n_rows, n_cols):
125
- # create a grid using gradio Block
126
- for _ in range(n_rows):
127
- with gr.Row():
128
- for _ in range(n_cols):
129
- with gr.Column():
130
- yield
131
-
132
- title = "🕵️‍♀️ compare anomaly detection algorithms 🕵️‍♂️"
133
  with gr.Blocks() as demo:
134
  gr.Markdown(f"## {title}")
135
  gr.Markdown(description)
136
-
137
- input_models = ["Robust covariance","One-Class SVM","One-Class SVM (SGD)","Isolation Forest",
138
- "Local Outlier Factor"]
139
  input_data = gr.Radio(
140
  choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
141
- value="Moons"
 
142
  )
143
- n_samples = gr.Slider(minimum=100, maximum=500, step=25, label="Number of Samples")
144
- outliers_fraction = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, label="Fraction of Outliers")
145
- counter = 0
146
-
147
-
148
- for _ in iter_grid(5, 5):
149
- if counter >= len(input_models):
150
- break
151
-
152
- input_model = input_models[counter]
153
- plot = gr.Plot(label=input_model)
154
- fn = partial(train_models, clf_name=input_model)
155
- input_data.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
156
- n_samples.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
157
- outliers_fraction.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
158
- counter += 1
159
 
160
- demo.launch(enable_queue=True, debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import numpy as np
2
  import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
3
  from sklearn import svm
 
4
  from sklearn.covariance import EllipticEnvelope
5
  from sklearn.ensemble import IsolationForest
6
  from sklearn.neighbors import LocalOutlierFactor
7
  from sklearn.linear_model import SGDOneClassSVM
8
  from sklearn.kernel_approximation import Nystroem
9
  from sklearn.pipeline import make_pipeline
10
+ from sklearn.datasets import make_blobs, make_moons
11
+ import gradio as gr
12
+ import time
13
 
14
+ # Function to train models and generate plots
15
+ def train_models(input_data, outliers_fraction, n_samples, clf_name):
16
+ # Prepare data
 
 
 
 
 
 
 
 
 
 
 
17
  n_outliers = int(outliers_fraction * n_samples)
18
  n_inliers = n_samples - n_outliers
19
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
20
+
21
+ DATA_MAPPING = {
22
+ "Central Blob": make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
23
+ "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
24
+ "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
25
+ "Moons": 4.0 * (make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] - np.array([0.5, 0.25])),
26
+ "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
27
+ }
28
+
29
+ NAME_CLF_MAPPING = {
30
+ "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
31
+ "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
32
+ "One-Class SVM (SGD)": make_pipeline(
33
  Nystroem(gamma=0.1, random_state=42, n_components=150),
34
  SGDOneClassSVM(
35
  nu=outliers_fraction,
 
41
  ),
42
  "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
43
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
 
 
 
 
 
 
 
 
 
 
 
44
  }
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ X = DATA_MAPPING[input_data]
47
+ rng = np.random.RandomState(42)
48
+ X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
49
+
50
  xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
51
  clf = NAME_CLF_MAPPING[clf_name]
 
52
 
 
 
 
 
 
 
53
  t0 = time.time()
 
 
 
54
  if clf_name == "Local Outlier Factor":
55
  y_pred = clf.fit_predict(X)
56
  else:
57
+ clf.fit(X)
58
+ y_pred = clf.predict(X)
59
+ t1 = time.time()
60
 
61
+ # Plot
62
+ plt.figure(figsize=(10, 10))
63
+ if clf_name != "Local Outlier Factor":
64
  Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
65
  Z = Z.reshape(xx.shape)
66
+ plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
67
 
68
  colors = np.array(["#377eb8", "#ff7f00"])
69
+ plt.scatter(X[:, 0], X[:, 1], s=30, color=colors[(y_pred + 1) // 2])
70
+ plt.title(f"{clf_name} ({t1 - t0:.2f}s)")
71
  plt.xlim(-7, 7)
72
  plt.ylim(-7, 7)
73
  plt.xticks(())
74
  plt.yticks(())
75
+ return plt.gcf()
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # Gradio Interface
78
+ description = "Compare how different anomaly detection algorithms perform on various datasets."
79
+ title = "🕵️‍♀️ Compare Anomaly Detection Algorithms 🕵️‍♂️"
80
 
 
 
 
 
 
 
 
 
 
81
  with gr.Blocks() as demo:
82
  gr.Markdown(f"## {title}")
83
  gr.Markdown(description)
84
+
85
+ # Inputs
 
86
  input_data = gr.Radio(
87
  choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
88
+ value="Moons",
89
+ label="Dataset"
90
  )
91
+ n_samples = gr.Slider(minimum=100, maximum=500, step=25, value=300, label="Number of Samples")
92
+ outliers_fraction = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, value=0.2, label="Fraction of Outliers")
93
+
94
+ # Models and their plots
95
+ input_models = ["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"]
96
+ plots = []
 
 
 
 
 
 
 
 
 
 
97
 
98
+ for model_name in input_models:
99
+ with gr.Row():
100
+ plot = gr.Plot(label=model_name)
101
+ plots.append((model_name, plot))
102
+
103
+ # Update function
104
+ def update(input_data, outliers_fraction, n_samples):
105
+ results = []
106
+ for clf_name, plot in plots:
107
+ fig = train_models(input_data, outliers_fraction, n_samples, clf_name)
108
+ results.append(fig)
109
+ return results
110
+
111
+ # Set change triggers
112
+ inputs = [input_data, outliers_fraction, n_samples]
113
+ demo_outputs = [plot for _, plot in plots]
114
+ input_data.change(fn=update, inputs=inputs, outputs=demo_outputs)
115
+ n_samples.change(fn=update, inputs=inputs, outputs=demo_outputs)
116
+ outliers_fraction.change(fn=update, inputs=inputs, outputs=demo_outputs)
117
+
118
+ demo.launch(enable_queue=True, debug=True)