Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import plotly.express as px | |
import base64 | |
import folium | |
from streamlit_folium import st_folium | |
from geopy.geocoders import Nominatim | |
from geopy.exc import GeocoderTimedOut, GeocoderServiceError | |
import time | |
from folium.plugins import MarkerCluster # 新增此行用於標記聚合 | |
# 設定背景圖片的函數 | |
def set_background(png_file): | |
with open(png_file, "rb") as f: | |
data = f.read() | |
encoded = base64.b64encode(data).decode() | |
st.markdown( | |
f""" | |
<style> | |
.stApp {{ | |
background: url(data:image/png;base64,{encoded}); | |
background-size: cover; | |
}} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
# 設定背景圖片 | |
set_background('CAT.png') | |
# App 的標題 | |
st.title("寵物醫院評分查詢") | |
# 用戶輸入的最低評分 | |
min_rating = st.slider("請輸入想查詢的最低評分:", 1.0, 5.0, 3.5) | |
# 要爬取的 URL 列表 | |
urls = [ | |
"https://www.tw-animal.com/pet/171211/c000196.html", | |
"https://www.tw-animal.com/pet/171211/c000186.html", | |
# ... 其他 URL ... | |
] | |
# 存放提取數據的空列表 | |
data_list = [] | |
# 初始化地理編碼器 | |
geolocator = Nominatim(user_agent="geoapiExercises") | |
geocode_cache = {} # 簡單的內存緩存 | |
# 用於地理編碼地址的函數,帶有重試和緩存 | |
def geocode_address(address, retries=5, delay=5): | |
if address in geocode_cache: | |
return geocode_cache[address] | |
for i in range(retries): | |
try: | |
location = geolocator.geocode(address) | |
if location: | |
geocode_cache[address] = location | |
return location | |
except (GeocoderTimedOut, GeocoderServiceError) as e: | |
st.warning(f"地理編碼錯誤: {e}. 重試中...") | |
time.sleep(delay) | |
st.warning(f"無法地理編碼地址: {address}") | |
return None | |
# 當按下「開始爬取資料」按鈕時執行 | |
if st.button('開始爬取資料'): | |
st.write("正在爬取資料,請稍候...") | |
# 迴圈遍歷每個 URL 並提取數據 | |
for url in urls: | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# 提取數據 | |
title = soup.find('h1', class_='t-intro__title').get_text(strip=True) | |
phone = soup.find('a', class_='t-font-large').get_text(strip=True) | |
address = soup.find('a', class_='t-font-medium').get_text(strip=True) | |
rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True)) | |
# 如果評分達到門檻,將數據添加到列表 | |
if rating >= min_rating: | |
location = geocode_address(address) | |
if location: | |
data_list.append({ | |
"標題": title, | |
"手機": phone, | |
"地址": address, | |
"評分": rating, | |
"經度": location.longitude, | |
"緯度": location.latitude | |
}) | |
# 如果成功爬取到數據 | |
if data_list: | |
df1 = pd.DataFrame(data_list) | |
# 從地址中提取區域(假設區域是地址的一部分) | |
df1['區域'] = df1['地址'].apply(lambda x: x.split()[0]) | |
# 按區域分組,合併同區域的醫院 | |
grouped_df = df1.groupby('區域').agg({ | |
'標題': lambda x: ' | '.join(x), | |
'手機': lambda x: ' | '.join(x), | |
'地址': lambda x: ' | '.join(x), | |
'評分': 'mean' # 平均評分 | |
}).reset_index() | |
# 顯示數據表格 | |
st.dataframe(df1) | |
# 顯示 Plotly 柱狀圖 | |
bar_fig = px.bar(grouped_df, x='區域', y='評分', title="各區域寵物醫院統計", labels={'評分':'平均評分', '區域':'區域'}) | |
st.plotly_chart(bar_fig) | |
# 顯示 Plotly 圓餅圖 | |
pie_fig = px.pie(grouped_df, names='區域', values='評分', title="各區域寵物醫院比例") | |
st.plotly_chart(pie_fig) | |
# 顯示地圖 | |
if st.button('顯示地圖'): | |
# 創建一個 Folium 地圖,集中在平均位置 | |
map_center = [df1['緯度'].mean(), df1['經度'].mean()] | |
pet_map = folium.Map(location=map_center, zoom_start=12) | |
# 創建一個標記聚合器 | |
marker_cluster = MarkerCluster().add_to(pet_map) | |
# 為每家醫院添加標記 | |
for index, row in df1.iterrows(): | |
folium.Marker( | |
location=[row['緯度'], row['經度']], | |
popup=f"{row['標題']} (評分: {row['評分']})", | |
tooltip=row['標題'] | |
).add_to(marker_cluster) # 添加到標記聚合器中 | |
# 使用 streamlit_folium 渲染地圖 | |
st_folium(pet_map, width=700, height=500) | |