File size: 4,934 Bytes
f844013
 
 
 
 
 
8a76c02
 
 
e500adc
 
e62c753
f844013
e62c753
f844013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e62c753
f844013
 
e62c753
f844013
 
e62c753
f844013
 
e62c753
f844013
e62c753
 
 
f844013
 
e62c753
f844013
 
e62c753
8a76c02
e62c753
8a76c02
e62c753
7002add
 
 
 
e500adc
 
 
 
7002add
e500adc
 
e62c753
e500adc
7002add
e62c753
e500adc
43a78ca
e62c753
f844013
 
 
e62c753
f844013
 
 
 
e62c753
f844013
 
 
 
 
e62c753
f844013
43a78ca
 
 
 
 
 
 
 
 
 
f844013
e62c753
f844013
 
 
e62c753
f844013
 
e62c753
f844013
 
 
 
e62c753
f844013
 
e62c753
f844013
 
e62c753
f844013
 
 
e62c753
f844013
 
 
e62c753
8a76c02
e62c753
8a76c02
 
 
e62c753
 
 
 
8a76c02
 
 
 
 
e62c753
8a76c02
e62c753
8a76c02
 
e62c753
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import base64
import folium
from streamlit_folium import st_folium
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
from folium.plugins import MarkerCluster  # 新增此行用於標記聚合

# 設定背景圖片的函數
def set_background(png_file):
    with open(png_file, "rb") as f:
        data = f.read()
    encoded = base64.b64encode(data).decode()
    st.markdown(
        f"""
        <style>
        .stApp {{
            background: url(data:image/png;base64,{encoded});
            background-size: cover;
        }}
        </style>
        """,
        unsafe_allow_html=True
    )

# 設定背景圖片
set_background('CAT.png')

# App 的標題
st.title("寵物醫院評分查詢")

# 用戶輸入的最低評分
min_rating = st.slider("請輸入想查詢的最低評分:", 1.0, 5.0, 3.5)

# 要爬取的 URL 列表
urls = [
    "https://www.tw-animal.com/pet/171211/c000196.html",
    "https://www.tw-animal.com/pet/171211/c000186.html",
    # ... 其他 URL ...
]

# 存放提取數據的空列表
data_list = []

# 初始化地理編碼器
geolocator = Nominatim(user_agent="geoapiExercises")
geocode_cache = {}  # 簡單的內存緩存

# 用於地理編碼地址的函數,帶有重試和緩存
def geocode_address(address, retries=5, delay=5):
    if address in geocode_cache:
        return geocode_cache[address]
    
    for i in range(retries):
        try:
            location = geolocator.geocode(address)
            if location:
                geocode_cache[address] = location
                return location
        except (GeocoderTimedOut, GeocoderServiceError) as e:
            st.warning(f"地理編碼錯誤: {e}. 重試中...")
            time.sleep(delay)
    
    st.warning(f"無法地理編碼地址: {address}")
    return None

# 當按下「開始爬取資料」按鈕時執行
if st.button('開始爬取資料'):
    st.write("正在爬取資料,請稍候...")
    
    # 迴圈遍歷每個 URL 並提取數據
    for url in urls:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # 提取數據
        title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
        phone = soup.find('a', class_='t-font-large').get_text(strip=True)
        address = soup.find('a', class_='t-font-medium').get_text(strip=True)
        rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))

        # 如果評分達到門檻,將數據添加到列表
        if rating >= min_rating:
            location = geocode_address(address)
            if location:
                data_list.append({
                    "標題": title,
                    "手機": phone,
                    "地址": address,
                    "評分": rating,
                    "經度": location.longitude,
                    "緯度": location.latitude
                })

    # 如果成功爬取到數據
    if data_list:
        df1 = pd.DataFrame(data_list)

        # 從地址中提取區域(假設區域是地址的一部分)
        df1['區域'] = df1['地址'].apply(lambda x: x.split()[0])

        # 按區域分組,合併同區域的醫院
        grouped_df = df1.groupby('區域').agg({
            '標題': lambda x: ' | '.join(x),
            '手機': lambda x: ' | '.join(x),
            '地址': lambda x: ' | '.join(x),
            '評分': 'mean'  # 平均評分
        }).reset_index()

        # 顯示數據表格
        st.dataframe(df1)

        # 顯示 Plotly 柱狀圖
        bar_fig = px.bar(grouped_df, x='區域', y='評分', title="各區域寵物醫院統計", labels={'評分':'平均評分', '區域':'區域'})
        st.plotly_chart(bar_fig)

        # 顯示 Plotly 圓餅圖
        pie_fig = px.pie(grouped_df, names='區域', values='評分', title="各區域寵物醫院比例")
        st.plotly_chart(pie_fig)

        # 顯示地圖
        if st.button('顯示地圖'):
            # 創建一個 Folium 地圖,集中在平均位置
            map_center = [df1['緯度'].mean(), df1['經度'].mean()]
            pet_map = folium.Map(location=map_center, zoom_start=12)

            # 創建一個標記聚合器
            marker_cluster = MarkerCluster().add_to(pet_map)

            # 為每家醫院添加標記
            for index, row in df1.iterrows():
                folium.Marker(
                    location=[row['緯度'], row['經度']],
                    popup=f"{row['標題']} (評分: {row['評分']})",
                    tooltip=row['標題']
                ).add_to(marker_cluster)  # 添加到標記聚合器中

            # 使用 streamlit_folium 渲染地圖
            st_folium(pet_map, width=700, height=500)