0816ESG / app.py
jimmy624135's picture
Create app.py
417af85 verified
import streamlit as st
import pandas as pd
import requests
import plotly.express as px
from sklearn.cluster import KMeans
# 下載與處理CSV檔案的函數
def fetch_and_process_csv(url):
response = requests.get(url)
filename = url.split("/")[-1]
with open(filename, "wb") as file:
file.write(response.content)
df = pd.read_csv(filename)
df = df.fillna(0) # 填補缺失值
return df
# 繪製數值型欄位的長條圖
def plot_numeric_bar_chart(df, x_column, y_column, title):
fig = px.bar(df, x=x_column, y=y_column, title=title)
st.plotly_chart(fig)
# 繪製數值型欄位的圓餅圖(基於欄位的總和)
def plot_numeric_pie_chart(df, column, title):
sums = df.groupby(column)[column].sum()
fig = px.pie(names=sums.index, values=sums.values, title=title)
st.plotly_chart(fig)
# 執行 K-means 分析並繪製結果
def perform_kmeans(df, x_column, y_column, n_clusters):
# 如果 X 軸是 "公司名稱",只使用 Y 軸進行 K-means 分析
if x_column == "公司名稱":
kmeans = KMeans(n_clusters=n_clusters)
df['Cluster'] = kmeans.fit_predict(df[[y_column]])
fig = px.scatter(df, x=x_column, y=y_column, color='Cluster',
title=f"K-means 分析 - {n_clusters} 群組",
hover_data=[x_column, y_column])
else:
# 執行 K-means 聚類
kmeans = KMeans(n_clusters=n_clusters)
df['Cluster'] = kmeans.fit_predict(df[[x_column, y_column]])
fig = px.scatter(df, x=x_column, y=y_column, color='Cluster',
title=f"K-means 分析 - {n_clusters} 群組",
hover_data=[x_column, y_column])
st.plotly_chart(fig)
# 顯示群組中心
st.write("群組中心:")
cluster_centers = pd.DataFrame(kmeans.cluster_centers_, columns=[x_column, y_column] if x_column != "公司名稱" else [y_column])
st.dataframe(cluster_centers)
# 主程式
def main():
st.title("數據視覺化工具")
st.sidebar.title("選擇選項")
# 定義資料的URL
data_sources = {
"溫室氣體": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_1.csv",
"再生能源": "https://mopsfin.twse.com.tw/opendata/t187ap46_O_2.csv",
"董事會": "https://mopsfin.twse.com.tw/opendata/t187ap46_L_6.csv"
}
# 下拉選單讓用戶選擇要繪製圖表的檔案
selected_dataset = st.sidebar.selectbox("選擇資料集", list(data_sources.keys()))
# 下載並處理選擇的資料集
df = fetch_and_process_csv(data_sources[selected_dataset])
# 確認數據集中至少有一個數值型欄位
if "公司名稱" in df.columns:
numeric_columns = ["公司名稱"] + df.select_dtypes(include=['int', 'float']).columns.tolist()
else:
numeric_columns = df.select_dtypes(include=['int', 'float']).columns.tolist()
if numeric_columns:
# 下拉選單讓用戶選擇數值型欄位
selected_x_column = st.sidebar.selectbox("選擇X軸欄位", numeric_columns)
selected_y_column = st.sidebar.selectbox("選擇Y軸數值型欄位", numeric_columns)
# 選擇要繪製的圖表類型
chart_type = st.sidebar.radio("選擇圖表類型", ["長條圖", "圓餅圖", "K-means 分析"])
# 根據選擇繪製相應的圖表
if chart_type == "長條圖":
plot_numeric_bar_chart(df, selected_x_column, selected_y_column, f"{selected_dataset} - {selected_y_column} 長條圖")
elif chart_type == "圓餅圖":
plot_numeric_pie_chart(df, selected_x_column, f"{selected_dataset} - {selected_x_column} 圓餅圖")
elif chart_type == "K-means 分析":
# 選擇群組數
n_clusters = st.sidebar.slider("選擇群組數", min_value=2, max_value=10, value=3)
perform_kmeans(df, selected_x_column, selected_y_column, n_clusters)
else:
st.write("所選的資料集中沒有數值型欄位可供繪製或分析。")
if __name__ == "__main__":
main()