Roberta2024 commited on
Commit
3b45348
1 Parent(s): 1c4bab9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+ import folium
5
+ from folium.plugins import MarkerCluster, HeatMap
6
+ import plotly.graph_objects as go
7
+ from geopy.geocoders import Nominatim
8
+ import re
9
+ import streamlit as st
10
+
11
+ # Streamlit title and description
12
+ st.title("Restaurant Data Extractor")
13
+ st.write("Extracting restaurant data and displaying it on a map.")
14
+
15
+ # Read data from Google Sheets
16
+ sheet_id = "1xUfnD1WCF5ldqECI8YXIko1gCpaDDCwTztL17kjI42U"
17
+ df1 = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv")
18
+
19
+ # Convert "網址" column to a Python list
20
+ urls = df1["網址"].tolist()
21
+
22
+ # Create a DataFrame to store all restaurant data
23
+ df = pd.DataFrame(columns=["Store Name", "Address", "Latitude", "Longitude", "Region"])
24
+
25
+ # Initialize Nominatim geocoder
26
+ geolocator = Nominatim(user_agent="my_app")
27
+
28
+ # Function to extract region (區域) from the address using regex
29
+ def extract_region(address):
30
+ match = re.search(r'(.*?)區|縣|市', address)
31
+ if match:
32
+ return match.group(0)
33
+ else:
34
+ return "Unknown"
35
+
36
+ # Progress bar in Streamlit
37
+ progress_bar = st.progress(0)
38
+ total_urls = len(urls)
39
+
40
+ # Iterate through each URL
41
+ for idx, url in enumerate(urls):
42
+ response = requests.get(url)
43
+ soup = BeautifulSoup(response.content, "html.parser")
44
+
45
+ try:
46
+ store_name = soup.find("h2", class_="restaurant-details__heading--title").text.strip()
47
+ except AttributeError:
48
+ store_name = None
49
+
50
+ try:
51
+ address = soup.find("li", class_="restaurant-details__heading--address").text.strip()
52
+ region = extract_region(address)
53
+ except AttributeError:
54
+ address = None
55
+ region = "Unknown"
56
+
57
+ try:
58
+ location = geolocator.geocode(address)
59
+ if location:
60
+ latitude = location.latitude
61
+ longitude = location.longitude
62
+ else:
63
+ latitude = None
64
+ longitude = None
65
+ except:
66
+ latitude = None
67
+ longitude = None
68
+
69
+ new_row = pd.DataFrame({
70
+ "Store Name": [store_name],
71
+ "Address": [address],
72
+ "Latitude": [latitude],
73
+ "Longitude": [longitude],
74
+ "Region": [region]
75
+ })
76
+
77
+ df = pd.concat([df, new_row], ignore_index=True)
78
+
79
+ # Update progress bar
80
+ progress_bar.progress((idx + 1) / total_urls)
81
+
82
+ # Save the DataFrame to CSV with UTF-8 encoding
83
+ csv_file = "restaurants_data.csv"
84
+ df.to_csv(csv_file, encoding="utf-8-sig", index=False)
85
+
86
+ # Display a download button for the CSV file
87
+ st.write(f"Data saved to {csv_file}")
88
+ st.download_button(
89
+ label="Download restaurant data as CSV",
90
+ data=open(csv_file, "rb").read(),
91
+ file_name=csv_file,
92
+ mime="text/csv"
93
+ )
94
+
95
+ # Display a map using Folium in Streamlit
96
+ st.subheader("Restaurant Locations Map")
97
+
98
+ # Create map centered around Tainan
99
+ m = folium.Map(location=[23.0, 120.2], zoom_start=12)
100
+
101
+ # Add marker cluster to the map
102
+ marker_cluster = MarkerCluster().add_to(m)
103
+ for index, row in df.iterrows():
104
+ if pd.notnull(row["Latitude"]) and pd.notnull(row["Longitude"]):
105
+ folium.Marker(
106
+ location=[row["Latitude"], row["Longitude"]],
107
+ popup=row["Store Name"],
108
+ tooltip=row["Address"]
109
+ ).add_to(marker_cluster)
110
+
111
+ # Display the map in Streamlit
112
+ st.components.v1.html(m._repr_html_(), height=600)
113
+
114
+ # Optional: Display the DataFrame as a table
115
+ st.subheader("Restaurant Data")
116
+ st.dataframe(df)