mirainsight commited on
Commit
78a73f4
1 Parent(s): d814ae2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -0
app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ import pandas as pd
5
+ import geopandas as gpd
6
+ import matplotlib.pyplot as plt
7
+ import streamlit as st
8
+ import plotly.express as px
9
+ import plotly.graph_objects as go
10
+
11
+ # read shapefile
12
+ gdf = gpd.read_file(r'C:\Users\hp\Downloads\mys_admb_unhcr_20210211_shp\mys_admbnda_adm2_unhcr_20210211.shp')
13
+
14
+
15
+ # plot with colors, and right dimensions
16
+ gdf.plot(column='ADM2_EN', cmap=None, legend=None, figsize=(20, 20))
17
+
18
+ # gdf[["Shape_Leng", "Shape_Area", "ADM2_EN", "ADM2_PCODE", "ADM2_REF", "ADM2ALT1EN", "ADM2ALT2EN", "ADM1_EN", "ADM1_PCODE", "ADM0_EN", "ADM0_PCODE", "date", "validOn", "validTo", "geometry"]]
19
+ newgdf = gdf[["ADM2_EN", "ADM1_EN", "Shape_Leng", "Shape_Area", "ADM2_PCODE", "ADM1_PCODE",
20
+ "date", "validOn","geometry"]]
21
+
22
+ newgdf.columns=["District", "State", "Shape_Leng", "Shape_Area", "District Postcode", "State Postcode", "date", "validOn","geometry"]
23
+ newgdf.plot(column='District', cmap=None, legend=None, figsize=(20, 20))
24
+
25
+ poi = gpd.read_file(r'C:\Users\hp\Downloads\hotosm_mys_points_of_interest_points_shp/hotosm_mys_points_of_interest_points.shp')
26
+ simple_poi = poi[["osm_id", "man_made", "geometry"]].copy()
27
+
28
+ @st.cache(suppress_st_warning=True)
29
+ def fit_district_coord_to_state():
30
+ st.write("Cache miss: fit_district_coord_to_state() ran")
31
+ poi_temp = gpd.read_file(r'C:\Users\hp\Downloads\hotosm_mys_points_of_interest_points_shp/hotosm_mys_points_of_interest_points.shp')
32
+ gdf_temp = gpd.read_file(r'C:\Users\hp\Downloads\mys_admb_unhcr_20210211_shp\mys_admbnda_adm2_unhcr_20210211.shp')
33
+ test_state = []
34
+ test_dist = []
35
+ k = 1
36
+ for i in poi_temp["geometry"]:
37
+ j = 0
38
+ while not i.within(gdf_temp.iat[j, 14]):
39
+ j = j + 1
40
+ if j >= 144:
41
+ break
42
+ if j >= 144:
43
+ test_dist.append("None")
44
+ test_state.append("None")
45
+ else:
46
+ test_dist.append(gdf_temp.iat[j, 2])
47
+ test_state.append(gdf_temp.iat[j, 7])
48
+ return(test_dist, test_state)
49
+
50
+ temp = fit_district_coord_to_state()
51
+ poi["District"]= temp[0]
52
+ poi["State"] = temp[1]
53
+
54
+ poi_none = poi[poi.District == "None"]
55
+ kv_gdf = gdf[gdf.ADM1_EN == "Selangor"]
56
+ kv_gdf = kv_gdf.append(gdf[(gdf.ADM1_EN == "W.P. Putrajaya")], ignore_index=True)
57
+ kv_gdf = kv_gdf.append(gdf[(gdf.ADM1_EN == "W.P. Kuala Lumpur")], ignore_index=True)
58
+ kv_gdf = kv_gdf.append(gdf[(gdf.ADM2_EN == "Seremban")], ignore_index=True)
59
+
60
+ # find missing POI within KV and assign correct District and State
61
+ # ID = 2083151347, 5928578725 ; to be changed from None to Klang
62
+ #poi[poi.osm_id == 2083151347] # index of 3848
63
+ #poi[poi.osm_id == 5928578725] # index of 32419
64
+ poi.at[3848, 'State'] = "Selangor"
65
+ poi.at[3848, 'District'] = "Klang"
66
+ poi.at[32419, 'State'] = "Selangor"
67
+ poi.at[32419, 'District'] = "Klang"
68
+
69
+ kv_poi = poi[poi.State == "Selangor"]
70
+ kv_poi = kv_poi.append(poi[poi.State == "W.P. Putrajaya"], ignore_index=True)
71
+ kv_poi = kv_poi.append(poi[poi.State == "W.P. Kuala Lumpur"], ignore_index=True)
72
+ kv_poi = kv_poi.append(poi[poi.District == "Seremban"], ignore_index=True)
73
+
74
+
75
+
76
+ districts = kv_gdf['ADM2_EN'].unique()
77
+
78
+ shop_1 = sorted(list(filter(None, kv_poi['shop'].unique())))
79
+ shop_2 = [name.title() for name in shop_1]
80
+ amenity_1 = sorted(list(filter(None, kv_poi['amenity'].unique())))
81
+ amenity_2 = [name.title() for name in amenity_1]
82
+ tourism_1 = sorted(list(filter(None, kv_poi['tourism'].unique())))
83
+ tourism_2 = [name.title() for name in tourism_1]
84
+ man_made_1 = sorted(list(filter(None, kv_poi['man_made'].unique())))
85
+ man_made_2 = [name.title() for name in man_made_1]
86
+ types = {
87
+ 'Shops' : shop_2,
88
+ 'Amenity': amenity_2,
89
+ 'Tourism': tourism_2,
90
+ 'Man Made': man_made_2
91
+ }
92
+ category = sorted(['Shops', 'Amenity', 'Tourism', 'Man Made'])
93
+
94
+ MY_HASH = {
95
+ pd.DataFrame: lambda _: None,
96
+ str: lambda _: None,
97
+ int: lambda _: None,
98
+ list: lambda _: None,
99
+ gpd.GeoDataFrame: lambda _: None
100
+ }
101
+ @st.cache(suppress_st_warning=True, hash_funcs=MY_HASH)
102
+ def build_df():
103
+ st.write("build_df() ran")
104
+ f_kv_poi = kv_poi.copy()
105
+ f_districts = kv_gdf['ADM2_EN'].unique()
106
+ f_shopc = []
107
+ f_amenc = []
108
+ f_tourc = []
109
+ f_manmc = []
110
+ f_coord = []
111
+ f_shopt = pd.DataFrame()
112
+ f_ament = pd.DataFrame()
113
+ f_tourt = pd.DataFrame()
114
+ f_manmt = pd.DataFrame()
115
+ for d in f_districts:
116
+ summary = f_kv_poi[f_kv_poi.District == str(d)].count()
117
+ f_shopc.append(summary[11])
118
+ f_amenc.append(summary[6])
119
+ f_tourc.append(summary[1])
120
+ f_manmc.append(summary[13])
121
+ f_shopt = pd.concat([f_shopt, f_kv_poi[f_kv_poi.District == str(d)].shop.value_counts()], axis=1)
122
+ f_ament = pd.concat([f_ament, f_kv_poi[f_kv_poi.District == str(d)].amenity.value_counts()], axis=1)
123
+ f_tourt = pd.concat([f_tourt, f_kv_poi[f_kv_poi.District == str(d)].tourism.value_counts()], axis=1)
124
+ f_manmt = pd.concat([f_manmt, f_kv_poi[f_kv_poi.District == str(d)].man_made.value_counts()], axis=1)
125
+ return(f_shopt, f_ament, f_tourt, f_manmt, f_shopc, f_amenc, f_tourc, f_manmc)
126
+
127
+ temp_store = build_df()
128
+ shopt = temp_store[0]
129
+ ament = temp_store[1]
130
+ tourt = temp_store[2]
131
+ manmt = temp_store[3]
132
+ shopc = temp_store[4]
133
+ amenc = temp_store[5]
134
+ tourc = temp_store[6]
135
+ manmc = temp_store[7]
136
+
137
+ shopt.columns = districts
138
+ ament.columns = districts
139
+ tourt.columns = districts
140
+ manmt.columns = districts
141
+ total_df= pd.DataFrame()
142
+
143
+ total_df = total_df.set_axis(districts, axis=0)
144
+
145
+ total_df= pd.concat([total_df, shopt.transpose(), ament.transpose(), tourt.transpose(), manmt.transpose()], axis=1)
146
+ total_df['Amenity'] = amenc
147
+ total_df['Man Made'] = manmc
148
+ total_df['Shops'] = shopc
149
+ total_df['Tourism'] = tourc
150
+ total_df = total_df.fillna(0)
151
+ total_df = (100. * total_df / total_df.sum()).round(2)
152
+ temp = pd.DataFrame(kv_gdf.geometry).transpose()
153
+ temp.columns = districts
154
+ total_df = pd.concat([total_df, temp.transpose()], axis=1)
155
+
156
+ total_df['districts'] = districts
157
+ total_df.columns = [x.title() for x in total_df.columns]
158
+ total_gdf = gpd.GeoDataFrame(total_df, crs="EPSG:4326", geometry='Geometry')
159
+
160
+
161
+ st.title ("An Analysis of the Points of Interest (POIs) within Klang Valley ")
162
+ st.header("Definition of POIs")
163
+ st.write('We have four categories, "Amenity", "Man Made", "Shops", and "Tourism". And with each category, there are multiple types of POIs.')
164
+
165
+ # insert simple description of certain POIs measured.
166
+
167
+ st.header("Visualization")
168
+ st.write('Please select the category and type to view the choropleth maps.')
169
+ # # st.write("Select Category and Type")
170
+ # # category_option = st.selectbox('Select Category', category)
171
+ # #type_option = st.selectbox('Select Type', types[str(category_option)])
172
+
173
+ # # adding "select" as the first and default choice
174
+ # category_option = st.selectbox('Select Category', options=['select']+list(types.keys()))
175
+ # # display selectbox 2 if manufacturer is not "select"
176
+ # if category_option != 'select':
177
+ # type_option = st.selectbox('Select Type', options=types[category_option])
178
+ # submitted = st.form_submit_button("Submit")
179
+ # if submitted:
180
+ # st.write('You selected ' + category_option + ' ' + type_option)
181
+
182
+ category_option = st.selectbox('Select Category', options=['Select Category']+list(types.keys()))
183
+
184
+ if category_option != 'Select Category':
185
+ max_category = total_gdf.loc[total_gdf[str(category_option)].idxmax()][303]
186
+ min_category = total_gdf.loc[total_gdf[str(category_option)].idxmin()][303]
187
+ st.write(f"You have selected Category **{category_option}**")
188
+ fig_category = go.Figure(px.choropleth_mapbox(total_gdf,
189
+ geojson=total_gdf.geometry,
190
+ locations=total_gdf.Districts,
191
+ color=str(category_option),
192
+ center={"lat": 3.140853, "lon": 101.693207},
193
+ mapbox_style="carto-positron",
194
+ zoom=7.5,
195
+ color_continuous_scale = "blues",
196
+ opacity=0.7,
197
+ labels={str(category_option): "%"}
198
+ ))
199
+
200
+ fig_category.update_geos(fitbounds="locations", visible=False)
201
+ fig_category.update_layout(title_text=f"Choropleth Map for {category_option} in Klang Valley Districts",
202
+ paper_bgcolor="#F0F2F6",
203
+ margin={"r": 30, "t": 50, "l": 1, "b": 1})
204
+ st.plotly_chart(fig_category)
205
+ st.write(f"Max: {max_category}, Min: {min_category}")
206
+
207
+ type_option = st.selectbox('Select Type', options=['Select Type']+types[category_option])
208
+
209
+ if type_option != 'Select Type': #st.submit('Confirm')
210
+ st.write(f"You have selected Type **{type_option}** for Category {category_option}.")
211
+ max_type = total_gdf.loc[total_gdf[str(type_option)].idxmax()][303]
212
+ min_type = total_gdf.loc[total_gdf[str(type_option)].idxmin()][303]
213
+
214
+ hi = total_gdf[[str(type_option), 'Geometry', 'Districts']]
215
+ fig_type = go.Figure(px.choropleth_mapbox(hi,
216
+ geojson=hi.Geometry,
217
+ locations=hi.Districts,
218
+ color=str(type_option),
219
+ center={"lat": 3.140853, "lon": 101.693207},
220
+ mapbox_style="carto-positron",
221
+ zoom=7.5,
222
+ color_continuous_scale = "bupu",
223
+ opacity=0.7,
224
+ labels={str(type_option): "%"}
225
+ ))
226
+
227
+ fig_type.update_geos(fitbounds="locations", visible=False)
228
+ fig_type.update_layout(title_text=f"Choropleth Map for {type_option} ({category_option}) in Klang Valley Districts",
229
+ paper_bgcolor="#F0F2F6",
230
+ margin={"r": 30, "t": 50, "l": 1, "b": 1})
231
+ st.plotly_chart(fig_type)
232
+ st.write(f"Max: {max_type}, Min: {min_type}")
233
+
234
+
235
+ st.header("Comments")
236
+ st.write('From comparing the district with the most and least respective POI, we can see that Kuala Lumpur is the district with the most POIs.')
237
+ st.write('We can also see that Kuala Selangor has the least POIs in general.')
238
+ st.write('However, we also can see that the information from these maps are not very useful.')
239
+
240
+ st.header('Improvements')
241
+ st.write('What the visualizations lack is a comparison with other factors. Adding information such as **population density**, **number of roads or rivers**, or other information that would allow a more in depth comparison would result in a better analysis of the POIs.')
242
+ st.write('This would allow us to correlate and understand why certain districts have more or less POIs than others.')
243
+ st.write('Another point of improvement could be allowing user to zoom in a specific district to see the precise locations of the respective POIs.')
244
+
245
+ st.header("Questions")
246
+ st.write('Which district has the most "Shops"? Which shops are most prevalent in Petaling? Does more people lead to more petrol stations?')
247
+