NTaylor commited on
Commit
ad0428a
1 Parent(s): 97f5149

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ =======================================================
3
+ Comparison of LDA and PCA 2D projection of Iris dataset
4
+ =======================================================
5
+
6
+ The Iris dataset represents 3 kind of Iris flowers (Setosa, Versicolour
7
+ and Virginica) with 4 attributes: sepal length, sepal width, petal length
8
+ and petal width.
9
+
10
+ Principal Component Analysis (PCA) applied to this data identifies the
11
+ combination of attributes (principal components, or directions in the
12
+ feature space) that account for the most variance in the data. Here we
13
+ plot the different samples on the 2 first principal components.
14
+
15
+ Linear Discriminant Analysis (LDA) tries to identify attributes that
16
+ account for the most variance *between classes*. In particular,
17
+ LDA, in contrast to PCA, is a supervised method, using known class labels.
18
+
19
+ """
20
+
21
+ import matplotlib.pyplot as plt
22
+ import gradio as gr
23
+ from sklearn import datasets
24
+ from sklearn.decomposition import PCA
25
+ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
26
+
27
+ # load data
28
+ iris = datasets.load_iris()
29
+
30
+ X = iris.data
31
+ y = iris.target
32
+ target_names = iris.target_names
33
+
34
+ # fit PCA
35
+ pca = PCA(n_components=2)
36
+ X_r = pca.fit(X).transform(X)
37
+
38
+ # fit LDA
39
+ lda = LinearDiscriminantAnalysis(n_components=2)
40
+ X_r2 = lda.fit(X, y).transform(X)
41
+
42
+ # Percentage of variance explained for each components
43
+ print(
44
+ "explained variance ratio (first two components): %s"
45
+ % str(pca.explained_variance_ratio_)
46
+ )
47
+
48
+ # save models using skop
49
+
50
+
51
+ def plot_lda_pca():
52
+
53
+ fig = plt.figure(1, facecolor="w", figsize=(5,5))
54
+ colors = ["navy", "turquoise", "darkorange"]
55
+ lw = 2
56
+
57
+ for color, i, target_name in zip(colors, [0, 1, 2], target_names):
58
+ plt.scatter(
59
+ X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=0.8, lw=lw, label=target_name
60
+ )
61
+ plt.legend(loc="best", shadow=False, scatterpoints=1)
62
+ plt.title("PCA of IRIS dataset")
63
+
64
+
65
+ for color, i, target_name in zip(colors, [0, 1, 2], target_names):
66
+ plt.scatter(
67
+ X_r2[y == i, 0], X_r2[y == i, 1], alpha=0.8, color=color, label=target_name
68
+ )
69
+ plt.legend(loc="best", shadow=False, scatterpoints=1)
70
+ plt.title("LDA of IRIS dataset")
71
+
72
+ return fig
73
+
74
+
75
+ title = "2-D projection of Iris dataset using LDA and PCA"
76
+ with gr.Blocks(title=title) as demo:
77
+ gr.Markdown(f"# {title}")
78
+ gr.Markdown(" This example shows how one can use Prinicipal Components Analysis (PCA) and Factor Analysis (FA) for model selection by observing the likelihood of a held-out dataset with added noise <br>"
79
+ " The number of samples (n_samples) will determine the number of data points to produce. <br>"
80
+ " The number of components (n_components) will determine the number of components each method will fit to, and will affect the likelihood of the held-out set. <br>"
81
+ " The number of features (n_components) determine the number of features the toy dataset X variable will have. <br>"
82
+ " For further details please see the sklearn docs:"
83
+ )
84
+
85
+ gr.Markdown(" **[Demo is based on sklearn docs found here](https://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_vs_lda.html#sphx-glr-auto-examples-decomposition-plot-pca-vs-lda-py)** <br>")
86
+
87
+ gr.Markdown(" **Dataset** : A toy dataset with corrupted with homoscedastic noise (noise variance is the same for each feature) or heteroscedastic noise (noise variance is the different for each feature) . <br>")
88
+ gr.Markdown(" Different number of features and number of components affect how well the low rank space is recovered. <br>"
89
+ " Larger Depth trying to overfit and learn even the finner details of the data.<br>"
90
+ )
91
+
92
+ # with gr.Row():
93
+ # n_samples = gr.Slider(value=100, minimum=10, maximum=1000, step=10, label="n_samples")
94
+ # n_components = gr.Slider(value=2, minimum=1, maximum=20, step=1, label="n_components")
95
+ # n_features = gr.Slider(value=5, minimum=5, maximum=25, step=1, label="n_features")
96
+
97
+
98
+ # options for n_components
99
+
100
+ btn = gr.Button(value="Run")
101
+ btn.click(plot_lda_pca, outputs= gr.Plot(label='PCA vs LDA clustering') ) #
102
+
103
+
104
+ demo.launch()