Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import requests | |
import plotly.express as px | |
from sklearn.cluster import KMeans | |
# 下載與處理CSV檔案的函數 | |
def fetch_and_process_csv(url): | |
response = requests.get(url) | |
filename = url.split("/")[-1] | |
with open(filename, "wb") as file: | |
file.write(response.content) | |
df = pd.read_csv(filename) | |
df = df.fillna(0) # 填補缺失值 | |
return df | |
# 繪製數值型欄位的長條圖 | |
def plot_numeric_bar_chart(df, x_column, y_column, title): | |
fig = px.bar(df, x=x_column, y=y_column, title=title) | |
st.plotly_chart(fig) | |
# 繪製數值型欄位的圓餅圖(基於欄位的總和) | |
def plot_numeric_pie_chart(df, column, title): | |
sums = df.groupby(column)[column].sum() | |
fig = px.pie(names=sums.index, values=sums.values, title=title) | |
st.plotly_chart(fig) | |
# 執行 K-means 分析並繪製結果 | |
def perform_kmeans(df, x_column, y_column, n_clusters): | |
# 如果 X 軸是 "公司名稱",只使用 Y 軸進行 K-means 分析 | |
if x_column == "公司名稱": | |
kmeans = KMeans(n_clusters=n_clusters) | |
df['Cluster'] = kmeans.fit_predict(df[[y_column]]) | |
fig = px.scatter(df, x=x_column, y=y_column, color='Cluster', | |
title=f"K-means 分析 - {n_clusters} 群組", | |
hover_data=[x_column, y_column]) | |
else: | |
# 執行 K-means 聚類 | |
kmeans = KMeans(n_clusters=n_clusters) | |
df['Cluster'] = kmeans.fit_predict(df[[x_column, y_column]]) | |
fig = px.scatter(df, x=x_column, y=y_column, color='Cluster', | |
title=f"K-means 分析 - {n_clusters} 群組", | |
hover_data=[x_column, y_column]) | |
st.plotly_chart(fig) | |
# 顯示群組中心 | |
st.write("群組中心:") | |
cluster_centers = pd.DataFrame(kmeans.cluster_centers_, columns=[x_column, y_column] if x_column != "公司名稱" else [y_column]) | |
st.dataframe(cluster_centers) | |
# 主程式 | |
def main(): | |
st.title("數據視覺化工具") | |
st.sidebar.title("選擇選項") | |
# 定義資料的URL | |
data_sources = { | |
"溫室氣體": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_1.csv", | |
"再生能源": "https://mopsfin.twse.com.tw/opendata/t187ap46_O_2.csv", | |
"董事會": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_6.csv" | |
} | |
# 下拉選單讓用戶選擇要繪製圖表的檔案 | |
selected_dataset = st.sidebar.selectbox("選擇資料集", list(data_sources.keys())) | |
# 下載並處理選擇的資料集 | |
df = fetch_and_process_csv(data_sources[selected_dataset]) | |
# 確認數據集中至少有一個數值型欄位 | |
if "公司名稱" in df.columns: | |
numeric_columns = ["公司名稱"] + df.select_dtypes(include=['int', 'float']).columns.tolist() | |
else: | |
numeric_columns = df.select_dtypes(include=['int', 'float']).columns.tolist() | |
if numeric_columns: | |
# 下拉選單讓用戶選擇數值型欄位 | |
selected_x_column = st.sidebar.selectbox("選擇X軸欄位", numeric_columns) | |
selected_y_column = st.sidebar.selectbox("選擇Y軸數值型欄位", numeric_columns) | |
# 選擇要繪製的圖表類型 | |
chart_type = st.sidebar.radio("選擇圖表類型", ["長條圖", "圓餅圖", "K-means 分析"]) | |
# 根據選擇繪製相應的圖表 | |
if chart_type == "長條圖": | |
plot_numeric_bar_chart(df, selected_x_column, selected_y_column, f"{selected_dataset} - {selected_y_column} 長條圖") | |
elif chart_type == "圓餅圖": | |
plot_numeric_pie_chart(df, selected_x_column, f"{selected_dataset} - {selected_x_column} 圓餅圖") | |
elif chart_type == "K-means 分析": | |
# 選擇群組數 | |
n_clusters = st.sidebar.slider("選擇群組數", min_value=2, max_value=10, value=3) | |
perform_kmeans(df, selected_x_column, selected_y_column, n_clusters) | |
else: | |
st.write("所選的資料集中沒有數值型欄位可供繪製或分析。") | |
if __name__ == "__main__": | |
main() | |