Spaces:

Roberta2024
/

M_Restru

Sleeping

App Files Files Community

M_Restru / app.py

Roberta2024

Create app.py

3b45348 verified 5 months ago

raw

history blame

3.4 kB

	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	import folium
	from folium.plugins import MarkerCluster, HeatMap
	import plotly.graph_objects as go
	from geopy.geocoders import Nominatim
	import re
	import streamlit as st

	# Streamlit title and description
	st.title("Restaurant Data Extractor")
	st.write("Extracting restaurant data and displaying it on a map.")

	# Read data from Google Sheets
	sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
	df1 = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")

	# Convert "網址" column to a Python list
	urls = df1["網址"].tolist()

	# Create a DataFrame to store all restaurant data
	df = pd.DataFrame(columns=["Store Name", "Address", "Latitude", "Longitude", "Region"])

	# Initialize Nominatim geocoder
	geolocator = Nominatim(user_agent="my_app")

	# Function to extract region (區域) from the address using regex
	def extract_region(address):
	match = re.search(r'(.*?)區\|縣\|市', address)
	if match:
	return match.group(0)
	else:
	return "Unknown"

	# Progress bar in Streamlit
	progress_bar = st.progress(0)
	total_urls = len(urls)

	# Iterate through each URL
	for idx, url in enumerate(urls):
	response = requests.get(url)
	soup = BeautifulSoup(response.content, "html.parser")

	try:
	store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
	except AttributeError:
	store_name = None

	try:
	address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
	region = extract_region(address)
	except AttributeError:
	address = None
	region = "Unknown"

	try:
	location = geolocator.geocode(address)
	if location:
	latitude = location.latitude
	longitude = location.longitude
	else:
	latitude = None
	longitude = None
	except:
	latitude = None
	longitude = None

	new_row = pd.DataFrame({
	"Store Name": [store_name],
	"Address": [address],
	"Latitude": [latitude],
	"Longitude": [longitude],
	"Region": [region]
	})

	df = pd.concat([df, new_row], ignore_index=True)

	# Update progress bar
	progress_bar.progress((idx + 1) / total_urls)

	# Save the DataFrame to CSV with UTF-8 encoding
	csv_file = "restaurants_data.csv"
	df.to_csv(csv_file, encoding="utf-8-sig", index=False)

	# Display a download button for the CSV file
	st.write(f"Data saved to {csv_file}")
	st.download_button(
	label="Download restaurant data as CSV",
	data=open(csv_file, "rb").read(),
	file_name=csv_file,
	mime="text/csv"
	)

	# Display a map using Folium in Streamlit
	st.subheader("Restaurant Locations Map")

	# Create map centered around Tainan
	m = folium.Map(location=[23.0, 120.2], zoom_start=12)

	# Add marker cluster to the map
	marker_cluster = MarkerCluster().add_to(m)
	for index, row in df.iterrows():
	if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
	folium.Marker(
	location=[row["Latitude"], row["Longitude"]],
	popup=row["Store Name"],
	tooltip=row["Address"]
	).add_to(marker_cluster)

	# Display the map in Streamlit
	st.components.v1.html(m._repr_html_(), height=600)

	# Optional: Display the DataFrame as a table
	st.subheader("Restaurant Data")
	st.dataframe(df)