PCAdemo / app.py
Pranav4datasc's picture
Upload 2 files
8e0e6f2 verified
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
st.title('Principal Component Analysis Demo App 🤖')
st.subheader('Credit: @BenjaminJack')
st.subheader('Raw Data')
iris = px.data.iris().drop('species_id',axis=1)
st.write(iris)
xvar = st.selectbox('Select x-axis',iris.columns[:-1])
yvar = st.selectbox('Select y-axis',iris.columns[:-1])
st.write(px.scatter(iris, x=xvar, y=yvar, color='species'))
iris_scaled = StandardScaler().fit_transform(iris.drop('species',axis=1))
iris_pca = PCA()
iris_transformed = iris_pca.fit_transform(iris_scaled)
col_names = [f'component{i+1}' for i in range(iris_transformed.shape[1])]
st.write(col_names)
iris_transformed_df = pd.DataFrame(iris_transformed, columns=col_names)
iris_transformed_df = pd.concat([iris_transformed_df, iris['species']],axis=1)
st.subheader('Transformed Data')
st.write(iris_transformed_df)
st.subheader('Explore principal components')
xvar = st.selectbox('Select x-axis:', iris_transformed_df.columns[:-1])
yvar = st.selectbox('Select y-axis:', iris_transformed_df.columns[:-1])
st.write(px.scatter(iris_transformed_df,x=xvar, y=yvar, color = 'species'))
st.subheader('Explore Loadings')
loadings = iris_pca.components_.T * np.sqrt(iris_pca.explained_variance_)
loadings_df = pd.DataFrame(loadings,columns=col_names)
loadings_df = pd.concat([loadings_df,
pd.Series(iris.columns[0:4],name='var')],
axis=1)
component =st.selectbox('Select Component:',loadings_df.columns[0:4])
bar_chart = px.bar(loadings_df[['var',component]].sort_values(component),
y='var',
x=component,
orientation='h',
range_x=[-1,1]
)
st.write(bar_chart)
st.write(loadings_df)
#Credits : benjaminjack