M_Restru / app.py
Roberta2024's picture
Create app.py
3b45348 verified
raw
history blame
3.4 kB
import requests
from bs4 import BeautifulSoup
import pandas as pd
import folium
from folium.plugins import MarkerCluster, HeatMap
import plotly.graph_objects as go
from geopy.geocoders import Nominatim
import re
import streamlit as st
# Streamlit title and description
st.title("Restaurant Data Extractor")
st.write("Extracting restaurant data and displaying it on a map.")
# Read data from Google Sheets
sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
df1 = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
# Convert "網址" column to a Python list
urls = df1["網址"].tolist()
# Create a DataFrame to store all restaurant data
df = pd.DataFrame(columns=["Store Name", "Address", "Latitude", "Longitude", "Region"])
# Initialize Nominatim geocoder
geolocator = Nominatim(user_agent="my_app")
# Function to extract region (區域) from the address using regex
def extract_region(address):
match = re.search(r'(.*?)區|縣|市', address)
if match:
return match.group(0)
else:
return "Unknown"
# Progress bar in Streamlit
progress_bar = st.progress(0)
total_urls = len(urls)
# Iterate through each URL
for idx, url in enumerate(urls):
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
try:
store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
except AttributeError:
store_name = None
try:
address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
region = extract_region(address)
except AttributeError:
address = None
region = "Unknown"
try:
location = geolocator.geocode(address)
if location:
latitude = location.latitude
longitude = location.longitude
else:
latitude = None
longitude = None
except:
latitude = None
longitude = None
new_row = pd.DataFrame({
"Store Name": [store_name],
"Address": [address],
"Latitude": [latitude],
"Longitude": [longitude],
"Region": [region]
})
df = pd.concat([df, new_row], ignore_index=True)
# Update progress bar
progress_bar.progress((idx + 1) / total_urls)
# Save the DataFrame to CSV with UTF-8 encoding
csv_file = "restaurants_data.csv"
df.to_csv(csv_file, encoding="utf-8-sig", index=False)
# Display a download button for the CSV file
st.write(f"Data saved to {csv_file}")
st.download_button(
label="Download restaurant data as CSV",
data=open(csv_file, "rb").read(),
file_name=csv_file,
mime="text/csv"
)
# Display a map using Folium in Streamlit
st.subheader("Restaurant Locations Map")
# Create map centered around Tainan
m = folium.Map(location=[23.0, 120.2], zoom_start=12)
# Add marker cluster to the map
marker_cluster = MarkerCluster().add_to(m)
for index, row in df.iterrows():
if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
folium.Marker(
location=[row["Latitude"], row["Longitude"]],
popup=row["Store Name"],
tooltip=row["Address"]
).add_to(marker_cluster)
# Display the map in Streamlit
st.components.v1.html(m._repr_html_(), height=600)
# Optional: Display the DataFrame as a table
st.subheader("Restaurant Data")
st.dataframe(df)