NTaylor commited on
Commit
b0661c4
·
0 Parent(s):

Duplicate from NTaylor/pca_vs_lda

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +103 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Pca Vs Lda
3
+ emoji: 🐨
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.27.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: NTaylor/pca_vs_lda
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ =======================================================
3
+ Comparison of LDA and PCA 2D projection of Iris dataset
4
+ =======================================================
5
+
6
+ The Iris dataset represents 3 kind of Iris flowers (Setosa, Versicolour
7
+ and Virginica) with 4 attributes: sepal length, sepal width, petal length
8
+ and petal width.
9
+
10
+ Principal Component Analysis (PCA) applied to this data identifies the
11
+ combination of attributes (principal components, or directions in the
12
+ feature space) that account for the most variance in the data. Here we
13
+ plot the different samples on the 2 first principal components.
14
+
15
+ Linear Discriminant Analysis (LDA) tries to identify attributes that
16
+ account for the most variance *between classes*. In particular,
17
+ LDA, in contrast to PCA, is a supervised method, using known class labels.
18
+
19
+ """
20
+
21
+ import matplotlib.pyplot as plt
22
+ import gradio as gr
23
+ from sklearn import datasets
24
+ from sklearn.decomposition import PCA
25
+ from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
26
+
27
+ # load data
28
+ iris = datasets.load_iris()
29
+
30
+ X = iris.data
31
+ y = iris.target
32
+ target_names = iris.target_names
33
+
34
+ # fit PCA
35
+ pca = PCA(n_components=2)
36
+ X_r = pca.fit(X).transform(X)
37
+
38
+ # fit LDA
39
+ lda = LinearDiscriminantAnalysis(n_components=2)
40
+ X_r2 = lda.fit(X, y).transform(X)
41
+
42
+ # Percentage of variance explained for each components
43
+ print(
44
+ "explained variance ratio (first two components): %s"
45
+ % str(pca.explained_variance_ratio_)
46
+ )
47
+
48
+ # save models using skop
49
+
50
+
51
+ def plot_lda_pca():
52
+
53
+ # fig = plt.figure(1, facecolor="w", figsize=(5,5))
54
+ fig, axes = plt.subplots(2,1, sharey= False, sharex=False, figsize = (8,6))
55
+ colors = ["navy", "turquoise", "darkorange"]
56
+ lw = 2
57
+
58
+ for color, i, target_name in zip(colors, [0, 1, 2], target_names):
59
+ axes[0].scatter(
60
+ X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=0.8, lw=lw, label=target_name
61
+ )
62
+ axes[0].legend(loc="lower right")
63
+ axes[0].set_title("PCA of IRIS dataset")
64
+ for color, i, target_name in zip(colors, [0, 1, 2], target_names):
65
+ axes[1].scatter(
66
+ X_r2[y == i, 0], X_r2[y == i, 1], alpha=0.8, color=color, label=target_name
67
+ )
68
+ plt.legend(loc="best", shadow=False, scatterpoints=1)
69
+ axes[1].legend(loc="lower right")
70
+ axes[1].set_title("LDA of IRIS dataset")
71
+ plt.tight_layout()
72
+
73
+
74
+ return fig
75
+
76
+
77
+ title = "2-D projection of Iris dataset using LDA and PCA"
78
+ with gr.Blocks(title=title) as demo:
79
+ gr.Markdown(f"# {title}")
80
+ gr.Markdown(" This example shows how one can use Prinicipal Components Analysis (PCA) and Linear Discriminant Analysis (LDA) to cluster the Iris dataset based on provided features. <br>"
81
+ " PCA applied to this data identifies the combination of attributes (principal components, or directions in the feature space) that account for the most variance in the data. Here we plot the different samples on the 2 first principal components. <br>"
82
+ " <br>"
83
+
84
+ " For further details please see the sklearn docs:"
85
+ )
86
+
87
+ gr.Markdown(" **[Demo is based on sklearn docs found here](https://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_vs_lda.html#sphx-glr-auto-examples-decomposition-plot-pca-vs-lda-py)** <br>")
88
+
89
+ gr.Markdown(" **Dataset** : The Iris dataset represents 3 kind of Iris flowers (Setosa, Versicolour and Virginica) with 4 attributes: sepal length, sepal width, petal length and petal width. . <br>")
90
+
91
+ # with gr.Row():
92
+ # n_samples = gr.Slider(value=100, minimum=10, maximum=1000, step=10, label="n_samples")
93
+ # n_components = gr.Slider(value=2, minimum=1, maximum=20, step=1, label="n_components")
94
+ # n_features = gr.Slider(value=5, minimum=5, maximum=25, step=1, label="n_features")
95
+
96
+
97
+ # options for n_components
98
+
99
+ btn = gr.Button(value="Run")
100
+ btn.click(plot_lda_pca, outputs= gr.Plot(label='PCA vs LDA clustering') ) #
101
+
102
+
103
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ scikit-learn==1.2.2
2
+ matplotlib==3.5.1
3
+ numpy==1.21.6