Spaces:
Build error
Build error
mirainsight
commited on
Commit
•
78a73f4
1
Parent(s):
d814ae2
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
import pandas as pd
|
5 |
+
import geopandas as gpd
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import streamlit as st
|
8 |
+
import plotly.express as px
|
9 |
+
import plotly.graph_objects as go
|
10 |
+
|
11 |
+
# read shapefile
|
12 |
+
gdf = gpd.read_file(r'C:\Users\hp\Downloads\mys_admb_unhcr_20210211_shp\mys_admbnda_adm2_unhcr_20210211.shp')
|
13 |
+
|
14 |
+
|
15 |
+
# plot with colors, and right dimensions
|
16 |
+
gdf.plot(column='ADM2_EN', cmap=None, legend=None, figsize=(20, 20))
|
17 |
+
|
18 |
+
# gdf[["Shape_Leng", "Shape_Area", "ADM2_EN", "ADM2_PCODE", "ADM2_REF", "ADM2ALT1EN", "ADM2ALT2EN", "ADM1_EN", "ADM1_PCODE", "ADM0_EN", "ADM0_PCODE", "date", "validOn", "validTo", "geometry"]]
|
19 |
+
newgdf = gdf[["ADM2_EN", "ADM1_EN", "Shape_Leng", "Shape_Area", "ADM2_PCODE", "ADM1_PCODE",
|
20 |
+
"date", "validOn","geometry"]]
|
21 |
+
|
22 |
+
newgdf.columns=["District", "State", "Shape_Leng", "Shape_Area", "District Postcode", "State Postcode", "date", "validOn","geometry"]
|
23 |
+
newgdf.plot(column='District', cmap=None, legend=None, figsize=(20, 20))
|
24 |
+
|
25 |
+
poi = gpd.read_file(r'C:\Users\hp\Downloads\hotosm_mys_points_of_interest_points_shp/hotosm_mys_points_of_interest_points.shp')
|
26 |
+
simple_poi = poi[["osm_id", "man_made", "geometry"]].copy()
|
27 |
+
|
28 |
+
@st.cache(suppress_st_warning=True)
|
29 |
+
def fit_district_coord_to_state():
|
30 |
+
st.write("Cache miss: fit_district_coord_to_state() ran")
|
31 |
+
poi_temp = gpd.read_file(r'C:\Users\hp\Downloads\hotosm_mys_points_of_interest_points_shp/hotosm_mys_points_of_interest_points.shp')
|
32 |
+
gdf_temp = gpd.read_file(r'C:\Users\hp\Downloads\mys_admb_unhcr_20210211_shp\mys_admbnda_adm2_unhcr_20210211.shp')
|
33 |
+
test_state = []
|
34 |
+
test_dist = []
|
35 |
+
k = 1
|
36 |
+
for i in poi_temp["geometry"]:
|
37 |
+
j = 0
|
38 |
+
while not i.within(gdf_temp.iat[j, 14]):
|
39 |
+
j = j + 1
|
40 |
+
if j >= 144:
|
41 |
+
break
|
42 |
+
if j >= 144:
|
43 |
+
test_dist.append("None")
|
44 |
+
test_state.append("None")
|
45 |
+
else:
|
46 |
+
test_dist.append(gdf_temp.iat[j, 2])
|
47 |
+
test_state.append(gdf_temp.iat[j, 7])
|
48 |
+
return(test_dist, test_state)
|
49 |
+
|
50 |
+
temp = fit_district_coord_to_state()
|
51 |
+
poi["District"]= temp[0]
|
52 |
+
poi["State"] = temp[1]
|
53 |
+
|
54 |
+
poi_none = poi[poi.District == "None"]
|
55 |
+
kv_gdf = gdf[gdf.ADM1_EN == "Selangor"]
|
56 |
+
kv_gdf = kv_gdf.append(gdf[(gdf.ADM1_EN == "W.P. Putrajaya")], ignore_index=True)
|
57 |
+
kv_gdf = kv_gdf.append(gdf[(gdf.ADM1_EN == "W.P. Kuala Lumpur")], ignore_index=True)
|
58 |
+
kv_gdf = kv_gdf.append(gdf[(gdf.ADM2_EN == "Seremban")], ignore_index=True)
|
59 |
+
|
60 |
+
# find missing POI within KV and assign correct District and State
|
61 |
+
# ID = 2083151347, 5928578725 ; to be changed from None to Klang
|
62 |
+
#poi[poi.osm_id == 2083151347] # index of 3848
|
63 |
+
#poi[poi.osm_id == 5928578725] # index of 32419
|
64 |
+
poi.at[3848, 'State'] = "Selangor"
|
65 |
+
poi.at[3848, 'District'] = "Klang"
|
66 |
+
poi.at[32419, 'State'] = "Selangor"
|
67 |
+
poi.at[32419, 'District'] = "Klang"
|
68 |
+
|
69 |
+
kv_poi = poi[poi.State == "Selangor"]
|
70 |
+
kv_poi = kv_poi.append(poi[poi.State == "W.P. Putrajaya"], ignore_index=True)
|
71 |
+
kv_poi = kv_poi.append(poi[poi.State == "W.P. Kuala Lumpur"], ignore_index=True)
|
72 |
+
kv_poi = kv_poi.append(poi[poi.District == "Seremban"], ignore_index=True)
|
73 |
+
|
74 |
+
|
75 |
+
|
76 |
+
districts = kv_gdf['ADM2_EN'].unique()
|
77 |
+
|
78 |
+
shop_1 = sorted(list(filter(None, kv_poi['shop'].unique())))
|
79 |
+
shop_2 = [name.title() for name in shop_1]
|
80 |
+
amenity_1 = sorted(list(filter(None, kv_poi['amenity'].unique())))
|
81 |
+
amenity_2 = [name.title() for name in amenity_1]
|
82 |
+
tourism_1 = sorted(list(filter(None, kv_poi['tourism'].unique())))
|
83 |
+
tourism_2 = [name.title() for name in tourism_1]
|
84 |
+
man_made_1 = sorted(list(filter(None, kv_poi['man_made'].unique())))
|
85 |
+
man_made_2 = [name.title() for name in man_made_1]
|
86 |
+
types = {
|
87 |
+
'Shops' : shop_2,
|
88 |
+
'Amenity': amenity_2,
|
89 |
+
'Tourism': tourism_2,
|
90 |
+
'Man Made': man_made_2
|
91 |
+
}
|
92 |
+
category = sorted(['Shops', 'Amenity', 'Tourism', 'Man Made'])
|
93 |
+
|
94 |
+
MY_HASH = {
|
95 |
+
pd.DataFrame: lambda _: None,
|
96 |
+
str: lambda _: None,
|
97 |
+
int: lambda _: None,
|
98 |
+
list: lambda _: None,
|
99 |
+
gpd.GeoDataFrame: lambda _: None
|
100 |
+
}
|
101 |
+
@st.cache(suppress_st_warning=True, hash_funcs=MY_HASH)
|
102 |
+
def build_df():
|
103 |
+
st.write("build_df() ran")
|
104 |
+
f_kv_poi = kv_poi.copy()
|
105 |
+
f_districts = kv_gdf['ADM2_EN'].unique()
|
106 |
+
f_shopc = []
|
107 |
+
f_amenc = []
|
108 |
+
f_tourc = []
|
109 |
+
f_manmc = []
|
110 |
+
f_coord = []
|
111 |
+
f_shopt = pd.DataFrame()
|
112 |
+
f_ament = pd.DataFrame()
|
113 |
+
f_tourt = pd.DataFrame()
|
114 |
+
f_manmt = pd.DataFrame()
|
115 |
+
for d in f_districts:
|
116 |
+
summary = f_kv_poi[f_kv_poi.District == str(d)].count()
|
117 |
+
f_shopc.append(summary[11])
|
118 |
+
f_amenc.append(summary[6])
|
119 |
+
f_tourc.append(summary[1])
|
120 |
+
f_manmc.append(summary[13])
|
121 |
+
f_shopt = pd.concat([f_shopt, f_kv_poi[f_kv_poi.District == str(d)].shop.value_counts()], axis=1)
|
122 |
+
f_ament = pd.concat([f_ament, f_kv_poi[f_kv_poi.District == str(d)].amenity.value_counts()], axis=1)
|
123 |
+
f_tourt = pd.concat([f_tourt, f_kv_poi[f_kv_poi.District == str(d)].tourism.value_counts()], axis=1)
|
124 |
+
f_manmt = pd.concat([f_manmt, f_kv_poi[f_kv_poi.District == str(d)].man_made.value_counts()], axis=1)
|
125 |
+
return(f_shopt, f_ament, f_tourt, f_manmt, f_shopc, f_amenc, f_tourc, f_manmc)
|
126 |
+
|
127 |
+
temp_store = build_df()
|
128 |
+
shopt = temp_store[0]
|
129 |
+
ament = temp_store[1]
|
130 |
+
tourt = temp_store[2]
|
131 |
+
manmt = temp_store[3]
|
132 |
+
shopc = temp_store[4]
|
133 |
+
amenc = temp_store[5]
|
134 |
+
tourc = temp_store[6]
|
135 |
+
manmc = temp_store[7]
|
136 |
+
|
137 |
+
shopt.columns = districts
|
138 |
+
ament.columns = districts
|
139 |
+
tourt.columns = districts
|
140 |
+
manmt.columns = districts
|
141 |
+
total_df= pd.DataFrame()
|
142 |
+
|
143 |
+
total_df = total_df.set_axis(districts, axis=0)
|
144 |
+
|
145 |
+
total_df= pd.concat([total_df, shopt.transpose(), ament.transpose(), tourt.transpose(), manmt.transpose()], axis=1)
|
146 |
+
total_df['Amenity'] = amenc
|
147 |
+
total_df['Man Made'] = manmc
|
148 |
+
total_df['Shops'] = shopc
|
149 |
+
total_df['Tourism'] = tourc
|
150 |
+
total_df = total_df.fillna(0)
|
151 |
+
total_df = (100. * total_df / total_df.sum()).round(2)
|
152 |
+
temp = pd.DataFrame(kv_gdf.geometry).transpose()
|
153 |
+
temp.columns = districts
|
154 |
+
total_df = pd.concat([total_df, temp.transpose()], axis=1)
|
155 |
+
|
156 |
+
total_df['districts'] = districts
|
157 |
+
total_df.columns = [x.title() for x in total_df.columns]
|
158 |
+
total_gdf = gpd.GeoDataFrame(total_df, crs="EPSG:4326", geometry='Geometry')
|
159 |
+
|
160 |
+
|
161 |
+
st.title ("An Analysis of the Points of Interest (POIs) within Klang Valley ")
|
162 |
+
st.header("Definition of POIs")
|
163 |
+
st.write('We have four categories, "Amenity", "Man Made", "Shops", and "Tourism". And with each category, there are multiple types of POIs.')
|
164 |
+
|
165 |
+
# insert simple description of certain POIs measured.
|
166 |
+
|
167 |
+
st.header("Visualization")
|
168 |
+
st.write('Please select the category and type to view the choropleth maps.')
|
169 |
+
# # st.write("Select Category and Type")
|
170 |
+
# # category_option = st.selectbox('Select Category', category)
|
171 |
+
# #type_option = st.selectbox('Select Type', types[str(category_option)])
|
172 |
+
|
173 |
+
# # adding "select" as the first and default choice
|
174 |
+
# category_option = st.selectbox('Select Category', options=['select']+list(types.keys()))
|
175 |
+
# # display selectbox 2 if manufacturer is not "select"
|
176 |
+
# if category_option != 'select':
|
177 |
+
# type_option = st.selectbox('Select Type', options=types[category_option])
|
178 |
+
# submitted = st.form_submit_button("Submit")
|
179 |
+
# if submitted:
|
180 |
+
# st.write('You selected ' + category_option + ' ' + type_option)
|
181 |
+
|
182 |
+
category_option = st.selectbox('Select Category', options=['Select Category']+list(types.keys()))
|
183 |
+
|
184 |
+
if category_option != 'Select Category':
|
185 |
+
max_category = total_gdf.loc[total_gdf[str(category_option)].idxmax()][303]
|
186 |
+
min_category = total_gdf.loc[total_gdf[str(category_option)].idxmin()][303]
|
187 |
+
st.write(f"You have selected Category **{category_option}**")
|
188 |
+
fig_category = go.Figure(px.choropleth_mapbox(total_gdf,
|
189 |
+
geojson=total_gdf.geometry,
|
190 |
+
locations=total_gdf.Districts,
|
191 |
+
color=str(category_option),
|
192 |
+
center={"lat": 3.140853, "lon": 101.693207},
|
193 |
+
mapbox_style="carto-positron",
|
194 |
+
zoom=7.5,
|
195 |
+
color_continuous_scale = "blues",
|
196 |
+
opacity=0.7,
|
197 |
+
labels={str(category_option): "%"}
|
198 |
+
))
|
199 |
+
|
200 |
+
fig_category.update_geos(fitbounds="locations", visible=False)
|
201 |
+
fig_category.update_layout(title_text=f"Choropleth Map for {category_option} in Klang Valley Districts",
|
202 |
+
paper_bgcolor="#F0F2F6",
|
203 |
+
margin={"r": 30, "t": 50, "l": 1, "b": 1})
|
204 |
+
st.plotly_chart(fig_category)
|
205 |
+
st.write(f"Max: {max_category}, Min: {min_category}")
|
206 |
+
|
207 |
+
type_option = st.selectbox('Select Type', options=['Select Type']+types[category_option])
|
208 |
+
|
209 |
+
if type_option != 'Select Type': #st.submit('Confirm')
|
210 |
+
st.write(f"You have selected Type **{type_option}** for Category {category_option}.")
|
211 |
+
max_type = total_gdf.loc[total_gdf[str(type_option)].idxmax()][303]
|
212 |
+
min_type = total_gdf.loc[total_gdf[str(type_option)].idxmin()][303]
|
213 |
+
|
214 |
+
hi = total_gdf[[str(type_option), 'Geometry', 'Districts']]
|
215 |
+
fig_type = go.Figure(px.choropleth_mapbox(hi,
|
216 |
+
geojson=hi.Geometry,
|
217 |
+
locations=hi.Districts,
|
218 |
+
color=str(type_option),
|
219 |
+
center={"lat": 3.140853, "lon": 101.693207},
|
220 |
+
mapbox_style="carto-positron",
|
221 |
+
zoom=7.5,
|
222 |
+
color_continuous_scale = "bupu",
|
223 |
+
opacity=0.7,
|
224 |
+
labels={str(type_option): "%"}
|
225 |
+
))
|
226 |
+
|
227 |
+
fig_type.update_geos(fitbounds="locations", visible=False)
|
228 |
+
fig_type.update_layout(title_text=f"Choropleth Map for {type_option} ({category_option}) in Klang Valley Districts",
|
229 |
+
paper_bgcolor="#F0F2F6",
|
230 |
+
margin={"r": 30, "t": 50, "l": 1, "b": 1})
|
231 |
+
st.plotly_chart(fig_type)
|
232 |
+
st.write(f"Max: {max_type}, Min: {min_type}")
|
233 |
+
|
234 |
+
|
235 |
+
st.header("Comments")
|
236 |
+
st.write('From comparing the district with the most and least respective POI, we can see that Kuala Lumpur is the district with the most POIs.')
|
237 |
+
st.write('We can also see that Kuala Selangor has the least POIs in general.')
|
238 |
+
st.write('However, we also can see that the information from these maps are not very useful.')
|
239 |
+
|
240 |
+
st.header('Improvements')
|
241 |
+
st.write('What the visualizations lack is a comparison with other factors. Adding information such as **population density**, **number of roads or rivers**, or other information that would allow a more in depth comparison would result in a better analysis of the POIs.')
|
242 |
+
st.write('This would allow us to correlate and understand why certain districts have more or less POIs than others.')
|
243 |
+
st.write('Another point of improvement could be allowing user to zoom in a specific district to see the precise locations of the respective POIs.')
|
244 |
+
|
245 |
+
st.header("Questions")
|
246 |
+
st.write('Which district has the most "Shops"? Which shops are most prevalent in Petaling? Does more people lead to more petrol stations?')
|
247 |
+
|