M_Resturant / app.py
Roberta2024's picture
Update app.py
a37a50e verified
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
# Streamlit 應用程序標題
st.title("各區餐廳數量分佈分析")
# 從 Google 試算表中讀取 URLs
sheet_id = "1SvHM_eV2hoPcOEB4bOnrUMbUCzslPnmHEn6qI7WuOqk"
urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
# 將 URLs 轉換為列表
urls = urls_df['網址'].tolist() # 假設表格中的 URL 列名為"網址"
# 創建按鈕並定義處理邏輯
if st.button("點擊開始爬取資料並生成圖表"):
with st.spinner('正在爬取資料...'):
progress = st.progress(0)
# 初始化一個空的 DataFrame 列表來存儲所有數據
df_list = []
# 迭代每個網址並爬取數據
for i, url in enumerate(urls):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# 解析並抓取所需數據
title = soup.find('h1', class_='restaurant-details__heading--title').text.strip()
address = soup.find('li', class_='restaurant-details__heading--address').text.strip()
# 手機號碼處理
phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
description = soup.find('div', class_='restaurant-details__description--text').text.strip()
# 將抓取的數據添加到列表中
df_list.append({'Title': title, 'Address': address, 'Phone': phone, 'Description': description})
# 更新進度條
progress.progress((i + 1) / len(urls))
st.success('資料爬取完成!')
# 使用 pd.DataFrame() 將所有數據合併成一個 DataFrame
df = pd.DataFrame(df_list)
# 從地址中提取區域
df['District'] = df['Address'].str.extract(r'(\w+區)')
# 統計每個區的商家數量
district_counts = df['District'].value_counts().reset_index()
district_counts.columns = ['District', 'Count']
# 創建增強的柱狀圖
fig_bar = px.bar(
district_counts,
x='District',
y='Count',
title='各區餐廳數量分佈',
color='Count',
color_continuous_scale=px.colors.sequential.Viridis,
text='Count'
)
fig_bar.update_layout(
title={
'text': "各區餐廳數量分佈",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top',
'font': dict(size=24, family="Arial", color="DarkSlateGray")
},
xaxis_title="區域",
yaxis_title="餐廳數量",
xaxis=dict(tickangle=-45),
plot_bgcolor='rgba(240,240,240,0.8)',
paper_bgcolor='white',
font=dict(family="Arial", size=14),
hoverlabel=dict(bgcolor="white", font_size=14),
margin=dict(l=50, r=50, t=80, b=50)
)
fig_bar.update_traces(
texttemplate='%{text}',
textposition='outside',
marker_line_color='rgb(8,48,107)',
marker_line_width=1.5,
opacity=0.8
)
# 創建增強的圓餅圖
fig_pie = go.Figure(data=[go.Pie(
labels=district_counts['District'],
values=district_counts['Count'],
hole=.3,
textinfo='label+percent',
insidetextorientation='radial',
textfont_size=14,
marker=dict(
colors=px.colors.qualitative.Set3,
line=dict(color='#000000', width=2)
),
pull=[0.1 if i == max(district_counts['Count']) else 0 for i in district_counts['Count']]
)])
fig_pie.update_layout(
title={
'text': "各區餐廳比例",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top',
'font': dict(size=24, family="Arial", color="DarkSlateGray")
},
legend_title="區域",
plot_bgcolor='rgba(240,240,240,0.8)',
paper_bgcolor='white',
font=dict(family="Arial", size=14),
hoverlabel=dict(bgcolor="white", font_size=14),
margin=dict(l=50, r=50, t=80, b=50)
)
# 在 Streamlit 中顯示圖表
st.plotly_chart(fig_bar, use_container_width=True)
st.plotly_chart(fig_pie, use_container_width=True)
# 顯示統計結果
st.write("各區餐廳數量統計表")
st.dataframe(district_counts)
# 顯示原始數據框
st.write("原始餐廳資料")
st.dataframe(df)