caliex commited on
Commit
85feb97
1 Parent(s): f594e0b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from sklearn.datasets import load_iris
4
+ from sklearn.pipeline import Pipeline
5
+ from sklearn.feature_selection import SelectPercentile, f_classif
6
+ from sklearn.preprocessing import StandardScaler
7
+ from sklearn.svm import SVC
8
+ import matplotlib.pyplot as plt
9
+ from sklearn.model_selection import cross_val_score
10
+
11
+ def svm_anova_app(percentiles):
12
+ X, y = load_iris(return_X_y=True)
13
+
14
+ # Add non-informative features
15
+ rng = np.random.RandomState(0)
16
+ X = np.hstack((X, 2 * rng.random((X.shape[0], 36))))
17
+
18
+ # Create a feature-selection transform, a scaler, and an instance of SVM
19
+ clf = Pipeline([
20
+ ("anova", SelectPercentile(f_classif)),
21
+ ("scaler", StandardScaler()),
22
+ ("svc", SVC(gamma="auto")),
23
+ ])
24
+
25
+ score_means = []
26
+ score_stds = []
27
+
28
+ for p in percentiles:
29
+ clf.set_params(anova__percentile=float(p))
30
+ this_scores = cross_val_score(clf, X, y)
31
+ score_means.append(this_scores.mean())
32
+ score_stds.append(this_scores.std())
33
+
34
+ plt.errorbar(percentiles, score_means, np.array(score_stds))
35
+ plt.title("Performance of the SVM-Anova varying the percentile of features selected")
36
+ plt.xticks(np.linspace(0, 100, 11, endpoint=True))
37
+ plt.xlabel("Percentile")
38
+ plt.ylabel("Accuracy Score")
39
+ plt.axis("tight")
40
+
41
+ # Save the plot to a file
42
+ plt.savefig("plot.png")
43
+ plt.close()
44
+
45
+ return "plot.png"
46
+
47
+ iface = gr.Interface(
48
+ fn=svm_anova_app,
49
+ inputs=gr.inputs.CheckboxGroup(['1', '3', '6', '10', '15', '20', '25', '30', '35', '40', '45', '50', '55', '60', '65', '70', '75', '80', '85', '90', '95', '100'], label="Percentiles"),
50
+ outputs="image",
51
+ title="SVM-Anova Performance",
52
+ description="This example shows how to perform univariate feature selection before running a SVC (support vector classifier) to improve the classification scores. We use the iris dataset (4 features) and add 36 non-informative features. We can find that our model achieves best performance when we select around 10 percent of features. See the original scikit-learn example here: https://scikit-learn.org/stable/auto_examples/svm/plot_svm_anova.html"
53
+ )
54
+
55
+ iface.launch()