Roberta2024 commited on
Commit
9f4a990
·
verified ·
1 Parent(s): ce0eb71

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import joblib
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import streamlit as st
5
+ from sklearn.preprocessing import StandardScaler
6
+ from sklearn.decomposition import PCA
7
+ from scipy.cluster.hierarchy import fcluster
8
+
9
+ # ================== 加載保存的模型 ==================
10
+ scaler = joblib.load('/content/gdrive/My Drive/scaler.sav') # 標準化模型
11
+ pca = joblib.load('/content/gdrive/My Drive/pca_model.sav') # PCA 模型
12
+ kmeans = joblib.load('/content/gdrive/My Drive/kmeans_model.sav') # K-means 模型
13
+ linked = joblib.load('/content/gdrive/My Drive/hierarchical_model.sav') # 階層式聚類模型
14
+ dbscan = joblib.load('/content/gdrive/My Drive/dbscan_model.sav') # DBSCAN 模型
15
+
16
+ # 定義繪圖函數
17
+ def plot_clusters(data, labels, title):
18
+ plt.figure(figsize=(8, 6))
19
+ plt.scatter(data['PC1'], data['PC2'], c=labels, cmap='viridis', s=50)
20
+ plt.title(title)
21
+ plt.xlabel('Principal Component 1 (PC1)')
22
+ plt.ylabel('Principal Component 2 (PC2)')
23
+ plt.colorbar()
24
+ plt.savefig('plot.png')
25
+ plt.close()
26
+ return 'plot.png'
27
+
28
+ # 處理上傳的資料
29
+ def process_data(file):
30
+ # 讀取新資料
31
+ new_data = pd.read_csv(file)
32
+ # 移除 'Time' 欄位
33
+ new_numerical_data = new_data.drop(columns=['Time'])
34
+
35
+ # 數據預處理
36
+ scaled_new_data = scaler.transform(new_numerical_data) # 標準化數據
37
+ pca_new_data = pca.transform(scaled_new_data) # 使用已保存的 PCA 模型進行轉換
38
+
39
+ # 創建包含主成分的 DataFrame
40
+ pca_new_df = pd.DataFrame(pca_new_data, columns=['PC1', 'PC2'])
41
+
42
+ # 使用加載的模型進行聚類
43
+ kmeans_new_labels = kmeans.predict(pca_new_df)
44
+ hclust_new_labels = fcluster(linked, 3, criterion='maxclust')
45
+ dbscan_new_labels = dbscan.fit_predict(pca_new_df)
46
+
47
+ # 可視化結果
48
+ kmeans_plot = plot_clusters(pca_new_df, kmeans_new_labels, 'K-means Clustering')
49
+ hclust_plot = plot_clusters(pca_new_df, hclust_new_labels, 'Hierarchical Clustering')
50
+ dbscan_plot = plot_clusters(pca_new_df, dbscan_new_labels, 'DBSCAN Clustering')
51
+
52
+ return kmeans_new_labels, hclust_new_labels, dbscan_new_labels, kmeans_plot, hclust_plot, dbscan_plot
53
+
54
+ # Streamlit 應用程式
55
+ st.title("聚類模型應用")
56
+
57
+ # 文件上傳
58
+ uploaded_file = st.file_uploader("上傳 CSV 檔案", type=["csv"])
59
+
60
+ if uploaded_file is not None:
61
+ kmeans_labels, hclust_labels, dbscan_labels, kmeans_plot, hclust_plot, dbscan_plot = process_data(uploaded_file)
62
+
63
+ # 顯示 K-means 標籤
64
+ st.subheader("K-means Labels")
65
+ st.text(kmeans_labels)
66
+
67
+ # 顯示 Hierarchical 標籤
68
+ st.subheader("Hierarchical Clustering Labels")
69
+ st.text(hclust_labels)
70
+
71
+ # 顯示 DBSCAN 標籤
72
+ st.subheader("DBSCAN Labels")
73
+ st.text(dbscan_labels)
74
+
75
+ # 顯示圖像
76
+ st.subheader("K-means Clustering Plot")
77
+ st.image(kmeans_plot)
78
+
79
+ st.subheader("Hierarchical Clustering Plot")
80
+ st.image(hclust_plot)
81
+
82
+ st.subheader("DBSCAN Clustering Plot")
83
+ st.image(dbscan_plot)