|
import pandas as pd |
|
from sklearn.decomposition import PCA |
|
import plotly.express as px |
|
|
|
|
|
def perform_pca(data, n_components): |
|
pca = PCA(n_components=n_components) |
|
principalComponents = pca.fit_transform(data) |
|
principalDf = pd.DataFrame(data=principalComponents, |
|
columns=[f'principal component {i+1}' for i in range(n_components)]) |
|
return principalDf |
|
|
|
def plot_pca(clustered_data,principalDf,df,information_columns): |
|
clustered_data = clustered_data.reset_index() |
|
finalDf = df.merge(clustered_data[['index', 'cluster']], left_index=True, right_on='index') |
|
finalDf = finalDf.merge(principalDf, left_index=True, right_index=True) |
|
|
|
finalDf.drop(columns=['index'], inplace=True) |
|
fig = px.scatter(finalDf, |
|
x='principal component 1', |
|
y='principal component 2', |
|
color='cluster', |
|
hover_data=information_columns, |
|
title='2 Component PCA', |
|
labels={'principal component 1':'Principal Component 1', 'principal component 2':'Principal Component 2'}, |
|
color_continuous_scale='viridis') |
|
|
|
return fig |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def plot_pca_3D(clustered_data, principalDf,df,information_columns): |
|
clustered_data = clustered_data.reset_index() |
|
finalDf = df.merge(clustered_data[['index', 'cluster']], left_index=True, right_on='index') |
|
finalDf = finalDf.merge(principalDf, left_index=True, right_index=True) |
|
finalDf.drop(columns=['index'], inplace=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hover_data = information_columns |
|
fig = px.scatter_3d(finalDf, |
|
x='principal component 1', |
|
y='principal component 2', |
|
z='principal component 3', |
|
color='cluster', |
|
hover_data=hover_data, |
|
title='3 Component PCA', |
|
labels={f'principal component {i+1}': f'Principal Component {i+1}' for i in range(3)}, |
|
color_continuous_scale='viridis') |
|
|
|
return fig |