|
import streamlit as st |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
import joblib |
|
from sklearn.decomposition import PCA |
|
from sklearn.preprocessing import StandardScaler |
|
from sklearn.cluster import KMeans, DBSCAN |
|
from scipy.cluster.hierarchy import fcluster, linkage |
|
|
|
|
|
scaler = joblib.load('scaler.sav') |
|
pca = joblib.load('pca_model.sav') |
|
kmeans = joblib.load('kmeans_model.sav') |
|
linked = joblib.load('hierarchical_model.sav') |
|
dbscan = joblib.load('dbscan_model.sav') |
|
|
|
|
|
st.title("聚類分析 - KMeans, Hierarchical Clustering 和 DBSCAN") |
|
st.write("上傳 CSV 文件並查看聚類結果") |
|
|
|
|
|
uploaded_file = st.file_uploader("上傳 CSV 文件", type=["csv"]) |
|
|
|
if uploaded_file is not None: |
|
|
|
data = pd.read_csv(uploaded_file) |
|
|
|
|
|
numerical_data = data.drop(columns=['Time']) |
|
|
|
|
|
scaled_data = scaler.transform(numerical_data) |
|
|
|
|
|
pca_data = pca.transform(scaled_data) |
|
|
|
|
|
pca_df = pd.DataFrame(pca_data, columns=['PC1', 'PC2']) |
|
|
|
|
|
kmeans_labels = kmeans.predict(pca_df) |
|
|
|
|
|
hclust_labels = fcluster(linked, 3, criterion='maxclust') |
|
|
|
|
|
dbscan_labels = dbscan.fit_predict(pca_df) |
|
|
|
|
|
chart_option = st.selectbox( |
|
"選擇要顯示的聚類結果圖表", |
|
("K-means", "Hierarchical Clustering", "DBSCAN") |
|
) |
|
|
|
|
|
if chart_option == "K-means": |
|
st.subheader("K-means_聚類結果") |
|
fig_kmeans, ax_kmeans = plt.subplots() |
|
ax_kmeans.scatter(pca_df['PC1'], pca_df['PC2'], c=kmeans_labels, cmap='viridis') |
|
ax_kmeans.set_title('K-means Clustering') |
|
ax_kmeans.set_xlabel('PC1') |
|
ax_kmeans.set_ylabel('PC2') |
|
st.pyplot(fig_kmeans) |
|
|
|
elif chart_option == "Hierarchical Clustering": |
|
st.subheader("Hierarchical Clustering_階層式聚類結果") |
|
fig_hclust, ax_hclust = plt.subplots() |
|
ax_hclust.scatter(pca_df['PC1'], pca_df['PC2'], c=hclust_labels, cmap='viridis') |
|
ax_hclust.set_title('Hierarchical Clustering') |
|
ax_hclust.set_xlabel('PC1') |
|
ax_hclust.set_ylabel('PC2') |
|
st.pyplot(fig_hclust) |
|
|
|
elif chart_option == "DBSCAN": |
|
st.subheader("DBSCAN_聚類結果") |
|
fig_dbscan, ax_dbscan = plt.subplots() |
|
ax_dbscan.scatter(pca_df['PC1'], pca_df['PC2'], c=dbscan_labels, cmap='viridis') |
|
ax_dbscan.set_title('DBSCAN Clustering') |
|
ax_dbscan.set_xlabel('PC1') |
|
ax_dbscan.set_ylabel('PC2') |
|
st.pyplot(fig_dbscan) |
|
|