Spaces:
Sleeping
Sleeping
import pandas as pd | |
import requests | |
from bs4 import BeautifulSoup | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import streamlit as st | |
# Streamlit 應用程序標題 | |
st.title("各區餐廳數量分佈分析") | |
# 從 Google 試算表中讀取 URLs | |
sheet_id = "1SvHM_eV2hoPcOEB4bOnrUMbUCzslPnmHEn6qI7WuOqk" | |
urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv") | |
# 將 URLs 轉換為列表 | |
urls = urls_df['網址'].tolist() # 假設表格中的 URL 列名為"網址" | |
# 創建按鈕並定義處理邏輯 | |
if st.button("點擊開始爬取資料並生成圖表"): | |
with st.spinner('正在爬取資料...'): | |
progress = st.progress(0) | |
# 初始化一個空的 DataFrame 列表來存儲所有數據 | |
df_list = [] | |
# 迭代每個網址並爬取數據 | |
for i, url in enumerate(urls): | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# 解析並抓取所需數據 | |
title = soup.find('h1', class_='restaurant-details__heading--title').text.strip() | |
address = soup.find('li', class_='restaurant-details__heading--address').text.strip() | |
# 手機號碼處理 | |
phone_tag = soup.find('a', {'data-event': 'CTA_tel'}) | |
phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A' | |
description = soup.find('div', class_='restaurant-details__description--text').text.strip() | |
# 將抓取的數據添加到列表中 | |
df_list.append({'Title': title, 'Address': address, 'Phone': phone, 'Description': description}) | |
# 更新進度條 | |
progress.progress((i + 1) / len(urls)) | |
st.success('資料爬取完成!') | |
# 使用 pd.DataFrame() 將所有數據合併成一個 DataFrame | |
df = pd.DataFrame(df_list) | |
# 從地址中提取區域 | |
df['District'] = df['Address'].str.extract(r'(\w+區)') | |
# 統計每個區的商家數量 | |
district_counts = df['District'].value_counts().reset_index() | |
district_counts.columns = ['District', 'Count'] | |
# 創建增強的柱狀圖 | |
fig_bar = px.bar( | |
district_counts, | |
x='District', | |
y='Count', | |
title='各區餐廳數量分佈', | |
color='Count', | |
color_continuous_scale=px.colors.sequential.Viridis, | |
text='Count' | |
) | |
fig_bar.update_layout( | |
title={ | |
'text': "各區餐廳數量分佈", | |
'y':0.95, | |
'x':0.5, | |
'xanchor': 'center', | |
'yanchor': 'top', | |
'font': dict(size=24, family="Arial", color="DarkSlateGray") | |
}, | |
xaxis_title="區域", | |
yaxis_title="餐廳數量", | |
xaxis=dict(tickangle=-45), | |
plot_bgcolor='rgba(240,240,240,0.8)', | |
paper_bgcolor='white', | |
font=dict(family="Arial", size=14), | |
hoverlabel=dict(bgcolor="white", font_size=14), | |
margin=dict(l=50, r=50, t=80, b=50) | |
) | |
fig_bar.update_traces( | |
texttemplate='%{text}', | |
textposition='outside', | |
marker_line_color='rgb(8,48,107)', | |
marker_line_width=1.5, | |
opacity=0.8 | |
) | |
# 創建增強的圓餅圖 | |
fig_pie = go.Figure(data=[go.Pie( | |
labels=district_counts['District'], | |
values=district_counts['Count'], | |
hole=.3, | |
textinfo='label+percent', | |
insidetextorientation='radial', | |
textfont_size=14, | |
marker=dict( | |
colors=px.colors.qualitative.Set3, | |
line=dict(color='#000000', width=2) | |
), | |
pull=[0.1 if i == max(district_counts['Count']) else 0 for i in district_counts['Count']] | |
)]) | |
fig_pie.update_layout( | |
title={ | |
'text': "各區餐廳比例", | |
'y':0.95, | |
'x':0.5, | |
'xanchor': 'center', | |
'yanchor': 'top', | |
'font': dict(size=24, family="Arial", color="DarkSlateGray") | |
}, | |
legend_title="區域", | |
plot_bgcolor='rgba(240,240,240,0.8)', | |
paper_bgcolor='white', | |
font=dict(family="Arial", size=14), | |
hoverlabel=dict(bgcolor="white", font_size=14), | |
margin=dict(l=50, r=50, t=80, b=50) | |
) | |
# 在 Streamlit 中顯示圖表 | |
st.plotly_chart(fig_bar, use_container_width=True) | |
st.plotly_chart(fig_pie, use_container_width=True) | |
# 顯示統計結果 | |
st.write("各區餐廳數量統計表") | |
st.dataframe(district_counts) | |
# 顯示原始數據框 | |
st.write("原始餐廳資料") | |
st.dataframe(df) |