Spaces:
Sleeping
Sleeping
Roberta2024
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import joblib
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import streamlit as st
|
5 |
+
from sklearn.preprocessing import StandardScaler
|
6 |
+
from sklearn.decomposition import PCA
|
7 |
+
from scipy.cluster.hierarchy import fcluster
|
8 |
+
|
9 |
+
# ================== 加載保存的模型 ==================
|
10 |
+
scaler = joblib.load('/content/gdrive/My Drive/scaler.sav') # 標準化模型
|
11 |
+
pca = joblib.load('/content/gdrive/My Drive/pca_model.sav') # PCA 模型
|
12 |
+
kmeans = joblib.load('/content/gdrive/My Drive/kmeans_model.sav') # K-means 模型
|
13 |
+
linked = joblib.load('/content/gdrive/My Drive/hierarchical_model.sav') # 階層式聚類模型
|
14 |
+
dbscan = joblib.load('/content/gdrive/My Drive/dbscan_model.sav') # DBSCAN 模型
|
15 |
+
|
16 |
+
# 定義繪圖函數
|
17 |
+
def plot_clusters(data, labels, title):
|
18 |
+
plt.figure(figsize=(8, 6))
|
19 |
+
plt.scatter(data['PC1'], data['PC2'], c=labels, cmap='viridis', s=50)
|
20 |
+
plt.title(title)
|
21 |
+
plt.xlabel('Principal Component 1 (PC1)')
|
22 |
+
plt.ylabel('Principal Component 2 (PC2)')
|
23 |
+
plt.colorbar()
|
24 |
+
plt.savefig('plot.png')
|
25 |
+
plt.close()
|
26 |
+
return 'plot.png'
|
27 |
+
|
28 |
+
# 處理上傳的資料
|
29 |
+
def process_data(file):
|
30 |
+
# 讀取新資料
|
31 |
+
new_data = pd.read_csv(file)
|
32 |
+
# 移除 'Time' 欄位
|
33 |
+
new_numerical_data = new_data.drop(columns=['Time'])
|
34 |
+
|
35 |
+
# 數據預處理
|
36 |
+
scaled_new_data = scaler.transform(new_numerical_data) # 標準化數據
|
37 |
+
pca_new_data = pca.transform(scaled_new_data) # 使用已保存的 PCA 模型進行轉換
|
38 |
+
|
39 |
+
# 創建包含主成分的 DataFrame
|
40 |
+
pca_new_df = pd.DataFrame(pca_new_data, columns=['PC1', 'PC2'])
|
41 |
+
|
42 |
+
# 使用加載的模型進行聚類
|
43 |
+
kmeans_new_labels = kmeans.predict(pca_new_df)
|
44 |
+
hclust_new_labels = fcluster(linked, 3, criterion='maxclust')
|
45 |
+
dbscan_new_labels = dbscan.fit_predict(pca_new_df)
|
46 |
+
|
47 |
+
# 可視化結果
|
48 |
+
kmeans_plot = plot_clusters(pca_new_df, kmeans_new_labels, 'K-means Clustering')
|
49 |
+
hclust_plot = plot_clusters(pca_new_df, hclust_new_labels, 'Hierarchical Clustering')
|
50 |
+
dbscan_plot = plot_clusters(pca_new_df, dbscan_new_labels, 'DBSCAN Clustering')
|
51 |
+
|
52 |
+
return kmeans_new_labels, hclust_new_labels, dbscan_new_labels, kmeans_plot, hclust_plot, dbscan_plot
|
53 |
+
|
54 |
+
# Streamlit 應用程式
|
55 |
+
st.title("聚類模型應用")
|
56 |
+
|
57 |
+
# 文件上傳
|
58 |
+
uploaded_file = st.file_uploader("上傳 CSV 檔案", type=["csv"])
|
59 |
+
|
60 |
+
if uploaded_file is not None:
|
61 |
+
kmeans_labels, hclust_labels, dbscan_labels, kmeans_plot, hclust_plot, dbscan_plot = process_data(uploaded_file)
|
62 |
+
|
63 |
+
# 顯示 K-means 標籤
|
64 |
+
st.subheader("K-means Labels")
|
65 |
+
st.text(kmeans_labels)
|
66 |
+
|
67 |
+
# 顯示 Hierarchical 標籤
|
68 |
+
st.subheader("Hierarchical Clustering Labels")
|
69 |
+
st.text(hclust_labels)
|
70 |
+
|
71 |
+
# 顯示 DBSCAN 標籤
|
72 |
+
st.subheader("DBSCAN Labels")
|
73 |
+
st.text(dbscan_labels)
|
74 |
+
|
75 |
+
# 顯示圖像
|
76 |
+
st.subheader("K-means Clustering Plot")
|
77 |
+
st.image(kmeans_plot)
|
78 |
+
|
79 |
+
st.subheader("Hierarchical Clustering Plot")
|
80 |
+
st.image(hclust_plot)
|
81 |
+
|
82 |
+
st.subheader("DBSCAN Clustering Plot")
|
83 |
+
st.image(dbscan_plot)
|