PET_STREAMLIT / app.py
Roberta2024's picture
Update app.py
e62c753 verified
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import base64
import folium
from streamlit_folium import st_folium
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
from folium.plugins import MarkerCluster # 新增此行用於標記聚合
# 設定背景圖片的函數
def set_background(png_file):
with open(png_file, "rb") as f:
data = f.read()
encoded = base64.b64encode(data).decode()
st.markdown(
f"""
<style>
.stApp {{
background: url(data:image/png;base64,{encoded});
background-size: cover;
}}
</style>
""",
unsafe_allow_html=True
)
# 設定背景圖片
set_background('CAT.png')
# App 的標題
st.title("寵物醫院評分查詢")
# 用戶輸入的最低評分
min_rating = st.slider("請輸入想查詢的最低評分:", 1.0, 5.0, 3.5)
# 要爬取的 URL 列表
urls = [
"https://www.tw-animal.com/pet/171211/c000196.html",
"https://www.tw-animal.com/pet/171211/c000186.html",
# ... 其他 URL ...
]
# 存放提取數據的空列表
data_list = []
# 初始化地理編碼器
geolocator = Nominatim(user_agent="geoapiExercises")
geocode_cache = {} # 簡單的內存緩存
# 用於地理編碼地址的函數,帶有重試和緩存
def geocode_address(address, retries=5, delay=5):
if address in geocode_cache:
return geocode_cache[address]
for i in range(retries):
try:
location = geolocator.geocode(address)
if location:
geocode_cache[address] = location
return location
except (GeocoderTimedOut, GeocoderServiceError) as e:
st.warning(f"地理編碼錯誤: {e}. 重試中...")
time.sleep(delay)
st.warning(f"無法地理編碼地址: {address}")
return None
# 當按下「開始爬取資料」按鈕時執行
if st.button('開始爬取資料'):
st.write("正在爬取資料,請稍候...")
# 迴圈遍歷每個 URL 並提取數據
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
# 提取數據
title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
phone = soup.find('a', class_='t-font-large').get_text(strip=True)
address = soup.find('a', class_='t-font-medium').get_text(strip=True)
rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))
# 如果評分達到門檻,將數據添加到列表
if rating >= min_rating:
location = geocode_address(address)
if location:
data_list.append({
"標題": title,
"手機": phone,
"地址": address,
"評分": rating,
"經度": location.longitude,
"緯度": location.latitude
})
# 如果成功爬取到數據
if data_list:
df1 = pd.DataFrame(data_list)
# 從地址中提取區域(假設區域是地址的一部分)
df1['區域'] = df1['地址'].apply(lambda x: x.split()[0])
# 按區域分組,合併同區域的醫院
grouped_df = df1.groupby('區域').agg({
'標題': lambda x: ' | '.join(x),
'手機': lambda x: ' | '.join(x),
'地址': lambda x: ' | '.join(x),
'評分': 'mean' # 平均評分
}).reset_index()
# 顯示數據表格
st.dataframe(df1)
# 顯示 Plotly 柱狀圖
bar_fig = px.bar(grouped_df, x='區域', y='評分', title="各區域寵物醫院統計", labels={'評分':'平均評分', '區域':'區域'})
st.plotly_chart(bar_fig)
# 顯示 Plotly 圓餅圖
pie_fig = px.pie(grouped_df, names='區域', values='評分', title="各區域寵物醫院比例")
st.plotly_chart(pie_fig)
# 顯示地圖
if st.button('顯示地圖'):
# 創建一個 Folium 地圖,集中在平均位置
map_center = [df1['緯度'].mean(), df1['經度'].mean()]
pet_map = folium.Map(location=map_center, zoom_start=12)
# 創建一個標記聚合器
marker_cluster = MarkerCluster().add_to(pet_map)
# 為每家醫院添加標記
for index, row in df1.iterrows():
folium.Marker(
location=[row['緯度'], row['經度']],
popup=f"{row['標題']} (評分: {row['評分']})",
tooltip=row['標題']
).add_to(marker_cluster) # 添加到標記聚合器中
# 使用 streamlit_folium 渲染地圖
st_folium(pet_map, width=700, height=500)