Spaces:
Sleeping
Sleeping
jimmy624135
commited on
Commit
•
417af85
1
Parent(s):
0940338
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import requests
|
4 |
+
import plotly.express as px
|
5 |
+
from sklearn.cluster import KMeans
|
6 |
+
|
7 |
+
# 下載與處理CSV檔案的函數
|
8 |
+
def fetch_and_process_csv(url):
|
9 |
+
response = requests.get(url)
|
10 |
+
filename = url.split("/")[-1]
|
11 |
+
with open(filename, "wb") as file:
|
12 |
+
file.write(response.content)
|
13 |
+
df = pd.read_csv(filename)
|
14 |
+
df = df.fillna(0) # 填補缺失值
|
15 |
+
return df
|
16 |
+
|
17 |
+
# 繪製數值型欄位的長條圖
|
18 |
+
def plot_numeric_bar_chart(df, x_column, y_column, title):
|
19 |
+
fig = px.bar(df, x=x_column, y=y_column, title=title)
|
20 |
+
st.plotly_chart(fig)
|
21 |
+
|
22 |
+
# 繪製數值型欄位的圓餅圖(基於欄位的總和)
|
23 |
+
def plot_numeric_pie_chart(df, column, title):
|
24 |
+
sums = df.groupby(column)[column].sum()
|
25 |
+
fig = px.pie(names=sums.index, values=sums.values, title=title)
|
26 |
+
st.plotly_chart(fig)
|
27 |
+
|
28 |
+
# 執行 K-means 分析並繪製結果
|
29 |
+
def perform_kmeans(df, x_column, y_column, n_clusters):
|
30 |
+
# 如果 X 軸是 "公司名稱",只使用 Y 軸進行 K-means 分析
|
31 |
+
if x_column == "公司名稱":
|
32 |
+
kmeans = KMeans(n_clusters=n_clusters)
|
33 |
+
df['Cluster'] = kmeans.fit_predict(df[[y_column]])
|
34 |
+
fig = px.scatter(df, x=x_column, y=y_column, color='Cluster',
|
35 |
+
title=f"K-means 分析 - {n_clusters} 群組",
|
36 |
+
hover_data=[x_column, y_column])
|
37 |
+
else:
|
38 |
+
# 執行 K-means 聚類
|
39 |
+
kmeans = KMeans(n_clusters=n_clusters)
|
40 |
+
df['Cluster'] = kmeans.fit_predict(df[[x_column, y_column]])
|
41 |
+
fig = px.scatter(df, x=x_column, y=y_column, color='Cluster',
|
42 |
+
title=f"K-means 分析 - {n_clusters} 群組",
|
43 |
+
hover_data=[x_column, y_column])
|
44 |
+
|
45 |
+
st.plotly_chart(fig)
|
46 |
+
|
47 |
+
# 顯示群組中心
|
48 |
+
st.write("群組中心:")
|
49 |
+
cluster_centers = pd.DataFrame(kmeans.cluster_centers_, columns=[x_column, y_column] if x_column != "公司名稱" else [y_column])
|
50 |
+
st.dataframe(cluster_centers)
|
51 |
+
|
52 |
+
# 主程式
|
53 |
+
def main():
|
54 |
+
st.title("數據視覺化工具")
|
55 |
+
st.sidebar.title("選擇選項")
|
56 |
+
|
57 |
+
# 定義資料的URL
|
58 |
+
data_sources = {
|
59 |
+
"溫室氣體": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_1.csv",
|
60 |
+
"再生能源": "https://mopsfin.twse.com.tw/opendata/t187ap46_O_2.csv",
|
61 |
+
"董事會": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_6.csv"
|
62 |
+
}
|
63 |
+
|
64 |
+
# 下拉選單讓用戶選擇要繪製圖表的檔案
|
65 |
+
selected_dataset = st.sidebar.selectbox("選擇資料集", list(data_sources.keys()))
|
66 |
+
|
67 |
+
# 下載並處理選擇的資料集
|
68 |
+
df = fetch_and_process_csv(data_sources[selected_dataset])
|
69 |
+
|
70 |
+
# 確認數據集中至少有一個數值型欄位
|
71 |
+
if "公司名稱" in df.columns:
|
72 |
+
numeric_columns = ["公司名稱"] + df.select_dtypes(include=['int', 'float']).columns.tolist()
|
73 |
+
else:
|
74 |
+
numeric_columns = df.select_dtypes(include=['int', 'float']).columns.tolist()
|
75 |
+
|
76 |
+
if numeric_columns:
|
77 |
+
# 下拉選單讓用戶選擇數值型欄位
|
78 |
+
selected_x_column = st.sidebar.selectbox("選擇X軸欄位", numeric_columns)
|
79 |
+
selected_y_column = st.sidebar.selectbox("選擇Y軸數值型欄位", numeric_columns)
|
80 |
+
|
81 |
+
# 選擇要繪製的圖表類型
|
82 |
+
chart_type = st.sidebar.radio("選擇圖表類型", ["長條圖", "圓餅圖", "K-means 分析"])
|
83 |
+
|
84 |
+
# 根據選擇繪製相應的圖表
|
85 |
+
if chart_type == "長條圖":
|
86 |
+
plot_numeric_bar_chart(df, selected_x_column, selected_y_column, f"{selected_dataset} - {selected_y_column} 長條圖")
|
87 |
+
elif chart_type == "圓餅圖":
|
88 |
+
plot_numeric_pie_chart(df, selected_x_column, f"{selected_dataset} - {selected_x_column} 圓餅圖")
|
89 |
+
elif chart_type == "K-means 分析":
|
90 |
+
# 選擇群組數
|
91 |
+
n_clusters = st.sidebar.slider("選擇群組數", min_value=2, max_value=10, value=3)
|
92 |
+
perform_kmeans(df, selected_x_column, selected_y_column, n_clusters)
|
93 |
+
else:
|
94 |
+
st.write("所選的資料集中沒有數值型欄位可供繪製或分析。")
|
95 |
+
|
96 |
+
if __name__ == "__main__":
|
97 |
+
main()
|