Spaces:
Sleeping
Sleeping
File size: 4,631 Bytes
c065297 c62d694 89cae94 8200581 c065297 89cae94 c62d694 3bdcd03 c62d694 53e3c95 c62d694 53e3c95 c62d694 5f1a4cd c62d694 53e3c95 c62d694 53e3c95 5f1a4cd c62d694 5f1a4cd c62d694 c065297 5f1a4cd c62d694 5f1a4cd c62d694 c065297 c62d694 a37a50e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
# Streamlit 應用程序標題
st.title("各區餐廳數量分佈分析")
# 從 Google 試算表中讀取 URLs
sheet_id = "1SvHM_eV2hoPcOEB4bOnrUMbUCzslPnmHEn6qI7WuOqk"
urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
# 將 URLs 轉換為列表
urls = urls_df['網址'].tolist() # 假設表格中的 URL 列名為"網址"
# 創建按鈕並定義處理邏輯
if st.button("點擊開始爬取資料並生成圖表"):
with st.spinner('正在爬取資料...'):
progress = st.progress(0)
# 初始化一個空的 DataFrame 列表來存儲所有數據
df_list = []
# 迭代每個網址並爬取數據
for i, url in enumerate(urls):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# 解析並抓取所需數據
title = soup.find('h1', class_='restaurant-details__heading--title').text.strip()
address = soup.find('li', class_='restaurant-details__heading--address').text.strip()
# 手機號碼處理
phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
description = soup.find('div', class_='restaurant-details__description--text').text.strip()
# 將抓取的數據添加到列表中
df_list.append({'Title': title, 'Address': address, 'Phone': phone, 'Description': description})
# 更新進度條
progress.progress((i + 1) / len(urls))
st.success('資料爬取完成!')
# 使用 pd.DataFrame() 將所有數據合併成一個 DataFrame
df = pd.DataFrame(df_list)
# 從地址中提取區域
df['District'] = df['Address'].str.extract(r'(\w+區)')
# 統計每個區的商家數量
district_counts = df['District'].value_counts().reset_index()
district_counts.columns = ['District', 'Count']
# 創建增強的柱狀圖
fig_bar = px.bar(
district_counts,
x='District',
y='Count',
title='各區餐廳數量分佈',
color='Count',
color_continuous_scale=px.colors.sequential.Viridis,
text='Count'
)
fig_bar.update_layout(
title={
'text': "各區餐廳數量分佈",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top',
'font': dict(size=24, family="Arial", color="DarkSlateGray")
},
xaxis_title="區域",
yaxis_title="餐廳數量",
xaxis=dict(tickangle=-45),
plot_bgcolor='rgba(240,240,240,0.8)',
paper_bgcolor='white',
font=dict(family="Arial", size=14),
hoverlabel=dict(bgcolor="white", font_size=14),
margin=dict(l=50, r=50, t=80, b=50)
)
fig_bar.update_traces(
texttemplate='%{text}',
textposition='outside',
marker_line_color='rgb(8,48,107)',
marker_line_width=1.5,
opacity=0.8
)
# 創建增強的圓餅圖
fig_pie = go.Figure(data=[go.Pie(
labels=district_counts['District'],
values=district_counts['Count'],
hole=.3,
textinfo='label+percent',
insidetextorientation='radial',
textfont_size=14,
marker=dict(
colors=px.colors.qualitative.Set3,
line=dict(color='#000000', width=2)
),
pull=[0.1 if i == max(district_counts['Count']) else 0 for i in district_counts['Count']]
)])
fig_pie.update_layout(
title={
'text': "各區餐廳比例",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top',
'font': dict(size=24, family="Arial", color="DarkSlateGray")
},
legend_title="區域",
plot_bgcolor='rgba(240,240,240,0.8)',
paper_bgcolor='white',
font=dict(family="Arial", size=14),
hoverlabel=dict(bgcolor="white", font_size=14),
margin=dict(l=50, r=50, t=80, b=50)
)
# 在 Streamlit 中顯示圖表
st.plotly_chart(fig_bar, use_container_width=True)
st.plotly_chart(fig_pie, use_container_width=True)
# 顯示統計結果
st.write("各區餐廳數量統計表")
st.dataframe(district_counts)
# 顯示原始數據框
st.write("原始餐廳資料")
st.dataframe(df) |