File size: 4,631 Bytes
c065297
c62d694
 
89cae94
8200581
c065297
89cae94
c62d694
 
3bdcd03
c62d694
 
 
53e3c95
c62d694
 
53e3c95
c62d694
 
 
 
 
 
 
 
 
 
5f1a4cd
 
 
 
 
 
 
 
 
 
 
 
c62d694
 
 
 
53e3c95
c62d694
 
 
 
 
 
 
 
 
53e3c95
5f1a4cd
c62d694
 
 
 
 
 
 
 
 
5f1a4cd
c62d694
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c065297
5f1a4cd
 
 
 
 
 
 
 
 
c62d694
 
 
 
 
 
 
 
 
 
 
 
 
5f1a4cd
c62d694
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c065297
c62d694
 
 
 
 
 
 
 
 
 
a37a50e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import pandas as pd
import requests
from bs4 import BeautifulSoup
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st

# Streamlit 應用程序標題
st.title("各區餐廳數量分佈分析")

# 從 Google 試算表中讀取 URLs
sheet_id = "1SvHM_eV2hoPcOEB4bOnrUMbUCzslPnmHEn6qI7WuOqk"
urls_df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")

# 將 URLs 轉換為列表
urls = urls_df['網址'].tolist()  # 假設表格中的 URL 列名為"網址"

# 創建按鈕並定義處理邏輯
if st.button("點擊開始爬取資料並生成圖表"):
    with st.spinner('正在爬取資料...'):
        progress = st.progress(0)
        
        # 初始化一個空的 DataFrame 列表來存儲所有數據
        df_list = []
        
        # 迭代每個網址並爬取數據
        for i, url in enumerate(urls):
            response = requests.get(url)
            soup = BeautifulSoup(response.content, 'html.parser')
            # 解析並抓取所需數據
            title = soup.find('h1', class_='restaurant-details__heading--title').text.strip()
            address = soup.find('li', class_='restaurant-details__heading--address').text.strip()
            # 手機號碼處理
            phone_tag = soup.find('a', {'data-event': 'CTA_tel'})
            phone = phone_tag['href'].replace('tel:', '') if phone_tag else 'N/A'
            description = soup.find('div', class_='restaurant-details__description--text').text.strip()
            # 將抓取的數據添加到列表中
            df_list.append({'Title': title, 'Address': address, 'Phone': phone, 'Description': description})
            
            # 更新進度條
            progress.progress((i + 1) / len(urls))
        
        st.success('資料爬取完成!')

    # 使用 pd.DataFrame() 將所有數據合併成一個 DataFrame
    df = pd.DataFrame(df_list)
    
    # 從地址中提取區域
    df['District'] = df['Address'].str.extract(r'(\w+區)')
    
    # 統計每個區的商家數量
    district_counts = df['District'].value_counts().reset_index()
    district_counts.columns = ['District', 'Count']

    # 創建增強的柱狀圖
    fig_bar = px.bar(
        district_counts, 
        x='District', 
        y='Count', 
        title='各區餐廳數量分佈',
        color='Count',
        color_continuous_scale=px.colors.sequential.Viridis,
        text='Count'
    )

    fig_bar.update_layout(
        title={
            'text': "各區餐廳數量分佈",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=24, family="Arial", color="DarkSlateGray")
        },
        xaxis_title="區域",
        yaxis_title="餐廳數量",
        xaxis=dict(tickangle=-45),
        plot_bgcolor='rgba(240,240,240,0.8)',
        paper_bgcolor='white',
        font=dict(family="Arial", size=14),
        hoverlabel=dict(bgcolor="white", font_size=14),
        margin=dict(l=50, r=50, t=80, b=50)
    )

    fig_bar.update_traces(
        texttemplate='%{text}', 
        textposition='outside',
        marker_line_color='rgb(8,48,107)',
        marker_line_width=1.5,
        opacity=0.8
    )

    # 創建增強的圓餅圖
    fig_pie = go.Figure(data=[go.Pie(
        labels=district_counts['District'],
        values=district_counts['Count'],
        hole=.3,
        textinfo='label+percent',
        insidetextorientation='radial',
        textfont_size=14,
        marker=dict(
            colors=px.colors.qualitative.Set3,
            line=dict(color='#000000', width=2)
        ),
        pull=[0.1 if i == max(district_counts['Count']) else 0 for i in district_counts['Count']]
    )])

    fig_pie.update_layout(
        title={
            'text': "各區餐廳比例",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=24, family="Arial", color="DarkSlateGray")
        },
        legend_title="區域",
        plot_bgcolor='rgba(240,240,240,0.8)',
        paper_bgcolor='white',
        font=dict(family="Arial", size=14),
        hoverlabel=dict(bgcolor="white", font_size=14),
        margin=dict(l=50, r=50, t=80, b=50)
    )

    # 在 Streamlit 中顯示圖表
    st.plotly_chart(fig_bar, use_container_width=True)
    st.plotly_chart(fig_pie, use_container_width=True)

    # 顯示統計結果
    st.write("各區餐廳數量統計表")
    st.dataframe(district_counts)

    # 顯示原始數據框
    st.write("原始餐廳資料")
    st.dataframe(df)