Spaces:

Roberta2024
/

PET_APP_Tainan

Sleeping

App Files Files Community

PET_APP_Tainan / app.py

Roberta2024

Update app.py

bd89067 verified 5 months ago

raw

history blame

4.35 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	import base64
	import plotly.express as px

	# 將背景圖片設置為標題正上方
	def set_background(image_file):
	with open(image_file, "rb") as image_file:
	encoded_string = base64.b64encode(image_file.read()).decode()
	st.markdown(
	f"""
	<style>
	.title-section {{
	background-image: url(data:image/png;base64,{encoded_string});
	background-size: cover;
	background-position: center;
	padding: 100px;
	text-align: center;
	}}
	</style>
	""",
	unsafe_allow_html=True
	)

	# 設定背景圖片到標題區域
	set_background('ddog.png')

	# 在背景圖片區域內顯示標題
	st.markdown('<div class="title-section"><h1>寵物診所資訊爬蟲</h1></div>', unsafe_allow_html=True)

	# 網址列表
	urls = [
	'https://www.tw-animal.com/pet/171211/c000196.html',
	'https://www.tw-animal.com/pet/171211/c000186.html',
	# 其餘網址...
	]

	# 讓使用者輸入評分門檻
	min_rating = st.slider("請選擇想篩選的最低評分", 0.0, 5.0, 4.5, 0.1)

	# 當使用者按下「開始爬蟲」按鈕時開始抓取資料
	if st.button('開始爬蟲'):
	all_data = []
	progress_bar = st.progress(0)
	status_text = st.empty()

	# 遍歷每個網址並抓取資料
	for i, url in enumerate(urls):
	progress = int((i + 1) / len(urls) * 100)
	progress_bar.progress(progress)
	status_text.text(f'正在處理第 {i+1} 個網址，共 {len(urls)} 個')

	response = requests.get(url)
	response.encoding = 'utf-8'
	soup = BeautifulSoup(response.text, 'html.parser')

	title = soup.find('h1', class_='t-intro__title').get_text(strip=True)
	phone = soup.find('a', href=lambda href: href and href.startswith('tel:')).get_text(strip=True)
	address = soup.find('a', class_='t-font-medium').get_text(strip=True)
	rating = float(soup.find('span', class_='t-intro__recommand').get_text(strip=True))

	# 提取地址中的區名
	district = address.split('區')[0] + '區' if '區' in address else '其他'

	# 只將符合評分條件的資料加入列表
	if rating >= min_rating:
	all_data.append({
	'標題': title,
	'手機': phone,
	'地址': address,
	'評分': rating,
	'區': district
	})

	# 轉換為 DataFrame
	df = pd.DataFrame(all_data)

	# 如果有符合條件的資料，顯示並繪圖
	if not df.empty:
	st.dataframe(df)

	# 以每個區的診所數量繪製動態圖表
	district_counts = df['區'].value_counts().reset_index()
	district_counts.columns = ['區', '診所數量']

	fig = px.bar(district_counts, x='區', y='診所數量', title='各區寵物診所數量')
	st.plotly_chart(fig)

	# 提供 CSV 下載功能
	csv = df.to_csv(index=False)
	st.download_button(
	label="下載 CSV 檔案",
	data=csv,
	file_name="pet_clinics.csv",
	mime="text/csv",
	)
	else:
	st.write(f"沒有找到評分大於或等於 {min_rating} 的資料。")

	# 清空進度條和狀態訊息
	progress_bar.empty()
	status_text.empty()

	# LINE Notify 部分
	st.header('傳送至 LINE Notify')
	token = st.text_input("請輸入 LINE Notify 權杖")
	if st.button('傳送至 LINE'):
	if 'df' in locals() and not df.empty:
	msg = df.to_string(index=False)

	def send_line_notify(token, msg):
	headers = {
	"Authorization": "Bearer " + token,
	"Content-Type": "application/x-www-form-urlencoded"
	}
	params = {
	"message": msg
	}
	r = requests.post("https://notify-api.line.me/api/notify", headers=headers, params=params)
	return r.status_code

	status_code = send_line_notify(token, msg)
	if status_code == 200:
	st.success('成功傳送至 LINE Notify!')
	else:
	st.error('傳送失敗，請檢查您的權杖是否正確。')
	else:
	st.warning('沒有資料可以傳送，請先執行爬蟲。')