Spaces:

Roberta2024
/

M_Resturant

Sleeping

File size: 4,631 Bytes

import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st

# Streamlit 應用程序標題
st.title("各區餐廳數量分佈分析")

# 從 Google 試算表中讀取 URLs
sheet_id = "1SvHM_eV2hoPcOEB4bOnrUMbUCzslPnmHEn6qI7WuOqk"
urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")

# 將 URLs 轉換為列表
urls = urls_df['網址'].tolist()  # 假設表格中的 URL 列名為"網址"

# 創建按鈕並定義處理邏輯
if st.button("點擊開始爬取資料並生成圖表"):
    with st.spinner('正在爬取資料...'):
        progress = st.progress(0)
        
        # 初始化一個空的 DataFrame 列表來存儲所有數據
        df_list = []
        
        # 迭代每個網址並爬取數據
        for i, url in enumerate(urls):
            response = requests.get(url)
            soup = BeautifulSoup(response.content, 'html.parser')
            # 解析並抓取所需數據
            title = soup.find('h1', class_='restaurant-details__heading--title').text.strip()
            address = soup.find('li', class_='restaurant-details__heading--address').text.strip()
            # 手機號碼處理
            phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
            phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
            description = soup.find('div', class_='restaurant-details__description--text').text.strip()
            # 將抓取的數據添加到列表中
            df_list.append({'Title': title, 'Address': address, 'Phone': phone, 'Description': description})
            
            # 更新進度條
            progress.progress((i + 1) / len(urls))
        
        st.success('資料爬取完成！')

    # 使用 pd.DataFrame() 將所有數據合併成一個 DataFrame
    df = pd.DataFrame(df_list)
    
    # 從地址中提取區域
    df['District'] = df['Address'].str.extract(r'(\w+區)')
    
    # 統計每個區的商家數量
    district_counts = df['District'].value_counts().reset_index()
    district_counts.columns = ['District', 'Count']

    # 創建增強的柱狀圖
    fig_bar = px.bar(
        district_counts, 
        x='District', 
        y='Count', 
        title='各區餐廳數量分佈',
        color='Count',
        color_continuous_scale=px.colors.sequential.Viridis,
        text='Count'
    )

    fig_bar.update_layout(
        title={
            'text': "各區餐廳數量分佈",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=24, family="Arial", color="DarkSlateGray")
        },
        xaxis_title="區域",
        yaxis_title="餐廳數量",
        xaxis=dict(tickangle=-45),
        plot_bgcolor='rgba(240,240,240,0.8)',
        paper_bgcolor='white',
        font=dict(family="Arial", size=14),
        hoverlabel=dict(bgcolor="white", font_size=14),
        margin=dict(l=50, r=50, t=80, b=50)
    )

    fig_bar.update_traces(
        texttemplate='%{text}', 
        textposition='outside',
        marker_line_color='rgb(8,48,107)',
        marker_line_width=1.5,
        opacity=0.8
    )

    # 創建增強的圓餅圖
    fig_pie = go.Figure(data=[go.Pie(
        labels=district_counts['District'],
        values=district_counts['Count'],
        hole=.3,
        textinfo='label+percent',
        insidetextorientation='radial',
        textfont_size=14,
        marker=dict(
            colors=px.colors.qualitative.Set3,
            line=dict(color='#000000', width=2)
        ),
        pull=[0.1 if i == max(district_counts['Count']) else 0 for i in district_counts['Count']]
    )])

    fig_pie.update_layout(
        title={
            'text': "各區餐廳比例",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=24, family="Arial", color="DarkSlateGray")
        },
        legend_title="區域",
        plot_bgcolor='rgba(240,240,240,0.8)',
        paper_bgcolor='white',
        font=dict(family="Arial", size=14),
        hoverlabel=dict(bgcolor="white", font_size=14),
        margin=dict(l=50, r=50, t=80, b=50)
    )

    # 在 Streamlit 中顯示圖表
    st.plotly_chart(fig_bar, use_container_width=True)
    st.plotly_chart(fig_pie, use_container_width=True)

    # 顯示統計結果
    st.write("各區餐廳數量統計表")
    st.dataframe(district_counts)

    # 顯示原始數據框
    st.write("原始餐廳資料")
    st.dataframe(df)