Spaces:
Runtime error
Runtime error
""" | |
======================================================= | |
Comparison of LDA and PCA 2D projection of Iris dataset | |
======================================================= | |
The Iris dataset represents 3 kind of Iris flowers (Setosa, Versicolour | |
and Virginica) with 4 attributes: sepal length, sepal width, petal length | |
and petal width. | |
Principal Component Analysis (PCA) applied to this data identifies the | |
combination of attributes (principal components, or directions in the | |
feature space) that account for the most variance in the data. Here we | |
plot the different samples on the 2 first principal components. | |
Linear Discriminant Analysis (LDA) tries to identify attributes that | |
account for the most variance *between classes*. In particular, | |
LDA, in contrast to PCA, is a supervised method, using known class labels. | |
""" | |
import matplotlib.pyplot as plt | |
import gradio as gr | |
from sklearn import datasets | |
from sklearn.decomposition import PCA | |
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | |
# load data | |
iris = datasets.load_iris() | |
X = iris.data | |
y = iris.target | |
target_names = iris.target_names | |
# fit PCA | |
pca = PCA(n_components=2) | |
X_r = pca.fit(X).transform(X) | |
# fit LDA | |
lda = LinearDiscriminantAnalysis(n_components=2) | |
X_r2 = lda.fit(X, y).transform(X) | |
# Percentage of variance explained for each components | |
print( | |
"explained variance ratio (first two components): %s" | |
% str(pca.explained_variance_ratio_) | |
) | |
# save models using skop | |
def plot_lda_pca(): | |
fig = plt.figure(1, facecolor="w", figsize=(5,5)) | |
colors = ["navy", "turquoise", "darkorange"] | |
lw = 2 | |
for color, i, target_name in zip(colors, [0, 1, 2], target_names): | |
plt.scatter( | |
X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=0.8, lw=lw, label=target_name | |
) | |
plt.legend(loc="best", shadow=False, scatterpoints=1) | |
plt.title("PCA of IRIS dataset") | |
for color, i, target_name in zip(colors, [0, 1, 2], target_names): | |
plt.scatter( | |
X_r2[y == i, 0], X_r2[y == i, 1], alpha=0.8, color=color, label=target_name | |
) | |
plt.legend(loc="best", shadow=False, scatterpoints=1) | |
plt.title("LDA of IRIS dataset") | |
return fig | |
title = "2-D projection of Iris dataset using LDA and PCA" | |
with gr.Blocks(title=title) as demo: | |
gr.Markdown(f"# {title}") | |
gr.Markdown(" This example shows how one can use Prinicipal Components Analysis (PCA) and Factor Analysis (FA) for model selection by observing the likelihood of a held-out dataset with added noise <br>" | |
" The number of samples (n_samples) will determine the number of data points to produce. <br>" | |
" The number of components (n_components) will determine the number of components each method will fit to, and will affect the likelihood of the held-out set. <br>" | |
" The number of features (n_components) determine the number of features the toy dataset X variable will have. <br>" | |
" For further details please see the sklearn docs:" | |
) | |
gr.Markdown(" **[Demo is based on sklearn docs found here](https://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_vs_lda.html#sphx-glr-auto-examples-decomposition-plot-pca-vs-lda-py)** <br>") | |
gr.Markdown(" **Dataset** : A toy dataset with corrupted with homoscedastic noise (noise variance is the same for each feature) or heteroscedastic noise (noise variance is the different for each feature) . <br>") | |
gr.Markdown(" Different number of features and number of components affect how well the low rank space is recovered. <br>" | |
" Larger Depth trying to overfit and learn even the finner details of the data.<br>" | |
) | |
# with gr.Row(): | |
# n_samples = gr.Slider(value=100, minimum=10, maximum=1000, step=10, label="n_samples") | |
# n_components = gr.Slider(value=2, minimum=1, maximum=20, step=1, label="n_components") | |
# n_features = gr.Slider(value=5, minimum=5, maximum=25, step=1, label="n_features") | |
# options for n_components | |
btn = gr.Button(value="Run") | |
btn.click(plot_lda_pca, outputs= gr.Plot(label='PCA vs LDA clustering') ) # | |
demo.launch() |