Spaces:
Build error
Build error
#!/usr/bin/env python | |
# coding: utf-8 | |
import pandas as pd | |
import geopandas as gpd | |
import matplotlib.pyplot as plt | |
import streamlit as st | |
import plotly.express as px | |
import plotly.graph_objects as go | |
# read shapefile | |
gdf = gpd.read_file(r'C:\Users\hp\Downloads\mys_admb_unhcr_20210211_shp\mys_admbnda_adm2_unhcr_20210211.shp') | |
# plot with colors, and right dimensions | |
gdf.plot(column='ADM2_EN', cmap=None, legend=None, figsize=(20, 20)) | |
# gdf[["Shape_Leng", "Shape_Area", "ADM2_EN", "ADM2_PCODE", "ADM2_REF", "ADM2ALT1EN", "ADM2ALT2EN", "ADM1_EN", "ADM1_PCODE", "ADM0_EN", "ADM0_PCODE", "date", "validOn", "validTo", "geometry"]] | |
newgdf = gdf[["ADM2_EN", "ADM1_EN", "Shape_Leng", "Shape_Area", "ADM2_PCODE", "ADM1_PCODE", | |
"date", "validOn","geometry"]] | |
newgdf.columns=["District", "State", "Shape_Leng", "Shape_Area", "District Postcode", "State Postcode", "date", "validOn","geometry"] | |
newgdf.plot(column='District', cmap=None, legend=None, figsize=(20, 20)) | |
poi = gpd.read_file(r'C:\Users\hp\Downloads\hotosm_mys_points_of_interest_points_shp/hotosm_mys_points_of_interest_points.shp') | |
simple_poi = poi[["osm_id", "man_made", "geometry"]].copy() | |
def fit_district_coord_to_state(): | |
st.write("Cache miss: fit_district_coord_to_state() ran") | |
poi_temp = gpd.read_file(r'C:\Users\hp\Downloads\hotosm_mys_points_of_interest_points_shp/hotosm_mys_points_of_interest_points.shp') | |
gdf_temp = gpd.read_file(r'C:\Users\hp\Downloads\mys_admb_unhcr_20210211_shp\mys_admbnda_adm2_unhcr_20210211.shp') | |
test_state = [] | |
test_dist = [] | |
k = 1 | |
for i in poi_temp["geometry"]: | |
j = 0 | |
while not i.within(gdf_temp.iat[j, 14]): | |
j = j + 1 | |
if j >= 144: | |
break | |
if j >= 144: | |
test_dist.append("None") | |
test_state.append("None") | |
else: | |
test_dist.append(gdf_temp.iat[j, 2]) | |
test_state.append(gdf_temp.iat[j, 7]) | |
return(test_dist, test_state) | |
temp = fit_district_coord_to_state() | |
poi["District"]= temp[0] | |
poi["State"] = temp[1] | |
poi_none = poi[poi.District == "None"] | |
kv_gdf = gdf[gdf.ADM1_EN == "Selangor"] | |
kv_gdf = kv_gdf.append(gdf[(gdf.ADM1_EN == "W.P. Putrajaya")], ignore_index=True) | |
kv_gdf = kv_gdf.append(gdf[(gdf.ADM1_EN == "W.P. Kuala Lumpur")], ignore_index=True) | |
kv_gdf = kv_gdf.append(gdf[(gdf.ADM2_EN == "Seremban")], ignore_index=True) | |
# find missing POI within KV and assign correct District and State | |
# ID = 2083151347, 5928578725 ; to be changed from None to Klang | |
#poi[poi.osm_id == 2083151347] # index of 3848 | |
#poi[poi.osm_id == 5928578725] # index of 32419 | |
poi.at[3848, 'State'] = "Selangor" | |
poi.at[3848, 'District'] = "Klang" | |
poi.at[32419, 'State'] = "Selangor" | |
poi.at[32419, 'District'] = "Klang" | |
kv_poi = poi[poi.State == "Selangor"] | |
kv_poi = kv_poi.append(poi[poi.State == "W.P. Putrajaya"], ignore_index=True) | |
kv_poi = kv_poi.append(poi[poi.State == "W.P. Kuala Lumpur"], ignore_index=True) | |
kv_poi = kv_poi.append(poi[poi.District == "Seremban"], ignore_index=True) | |
districts = kv_gdf['ADM2_EN'].unique() | |
shop_1 = sorted(list(filter(None, kv_poi['shop'].unique()))) | |
shop_2 = [name.title() for name in shop_1] | |
amenity_1 = sorted(list(filter(None, kv_poi['amenity'].unique()))) | |
amenity_2 = [name.title() for name in amenity_1] | |
tourism_1 = sorted(list(filter(None, kv_poi['tourism'].unique()))) | |
tourism_2 = [name.title() for name in tourism_1] | |
man_made_1 = sorted(list(filter(None, kv_poi['man_made'].unique()))) | |
man_made_2 = [name.title() for name in man_made_1] | |
types = { | |
'Shops' : shop_2, | |
'Amenity': amenity_2, | |
'Tourism': tourism_2, | |
'Man Made': man_made_2 | |
} | |
category = sorted(['Shops', 'Amenity', 'Tourism', 'Man Made']) | |
MY_HASH = { | |
pd.DataFrame: lambda _: None, | |
str: lambda _: None, | |
int: lambda _: None, | |
list: lambda _: None, | |
gpd.GeoDataFrame: lambda _: None | |
} | |
def build_df(): | |
st.write("build_df() ran") | |
f_kv_poi = kv_poi.copy() | |
f_districts = kv_gdf['ADM2_EN'].unique() | |
f_shopc = [] | |
f_amenc = [] | |
f_tourc = [] | |
f_manmc = [] | |
f_coord = [] | |
f_shopt = pd.DataFrame() | |
f_ament = pd.DataFrame() | |
f_tourt = pd.DataFrame() | |
f_manmt = pd.DataFrame() | |
for d in f_districts: | |
summary = f_kv_poi[f_kv_poi.District == str(d)].count() | |
f_shopc.append(summary[11]) | |
f_amenc.append(summary[6]) | |
f_tourc.append(summary[1]) | |
f_manmc.append(summary[13]) | |
f_shopt = pd.concat([f_shopt, f_kv_poi[f_kv_poi.District == str(d)].shop.value_counts()], axis=1) | |
f_ament = pd.concat([f_ament, f_kv_poi[f_kv_poi.District == str(d)].amenity.value_counts()], axis=1) | |
f_tourt = pd.concat([f_tourt, f_kv_poi[f_kv_poi.District == str(d)].tourism.value_counts()], axis=1) | |
f_manmt = pd.concat([f_manmt, f_kv_poi[f_kv_poi.District == str(d)].man_made.value_counts()], axis=1) | |
return(f_shopt, f_ament, f_tourt, f_manmt, f_shopc, f_amenc, f_tourc, f_manmc) | |
temp_store = build_df() | |
shopt = temp_store[0] | |
ament = temp_store[1] | |
tourt = temp_store[2] | |
manmt = temp_store[3] | |
shopc = temp_store[4] | |
amenc = temp_store[5] | |
tourc = temp_store[6] | |
manmc = temp_store[7] | |
shopt.columns = districts | |
ament.columns = districts | |
tourt.columns = districts | |
manmt.columns = districts | |
total_df= pd.DataFrame() | |
total_df = total_df.set_axis(districts, axis=0) | |
total_df= pd.concat([total_df, shopt.transpose(), ament.transpose(), tourt.transpose(), manmt.transpose()], axis=1) | |
total_df['Amenity'] = amenc | |
total_df['Man Made'] = manmc | |
total_df['Shops'] = shopc | |
total_df['Tourism'] = tourc | |
total_df = total_df.fillna(0) | |
total_df = (100. * total_df / total_df.sum()).round(2) | |
temp = pd.DataFrame(kv_gdf.geometry).transpose() | |
temp.columns = districts | |
total_df = pd.concat([total_df, temp.transpose()], axis=1) | |
total_df['districts'] = districts | |
total_df.columns = [x.title() for x in total_df.columns] | |
total_gdf = gpd.GeoDataFrame(total_df, crs="EPSG:4326", geometry='Geometry') | |
st.title ("An Analysis of the Points of Interest (POIs) within Klang Valley ") | |
st.header("Definition of POIs") | |
st.write('We have four categories, "Amenity", "Man Made", "Shops", and "Tourism". And with each category, there are multiple types of POIs.') | |
# insert simple description of certain POIs measured. | |
st.header("Visualization") | |
st.write('Please select the category and type to view the choropleth maps.') | |
# # st.write("Select Category and Type") | |
# # category_option = st.selectbox('Select Category', category) | |
# #type_option = st.selectbox('Select Type', types[str(category_option)]) | |
# # adding "select" as the first and default choice | |
# category_option = st.selectbox('Select Category', options=['select']+list(types.keys())) | |
# # display selectbox 2 if manufacturer is not "select" | |
# if category_option != 'select': | |
# type_option = st.selectbox('Select Type', options=types[category_option]) | |
# submitted = st.form_submit_button("Submit") | |
# if submitted: | |
# st.write('You selected ' + category_option + ' ' + type_option) | |
category_option = st.selectbox('Select Category', options=['Select Category']+list(types.keys())) | |
if category_option != 'Select Category': | |
max_category = total_gdf.loc[total_gdf[str(category_option)].idxmax()][303] | |
min_category = total_gdf.loc[total_gdf[str(category_option)].idxmin()][303] | |
st.write(f"You have selected Category **{category_option}**") | |
fig_category = go.Figure(px.choropleth_mapbox(total_gdf, | |
geojson=total_gdf.geometry, | |
locations=total_gdf.Districts, | |
color=str(category_option), | |
center={"lat": 3.140853, "lon": 101.693207}, | |
mapbox_style="carto-positron", | |
zoom=7.5, | |
color_continuous_scale = "blues", | |
opacity=0.7, | |
labels={str(category_option): "%"} | |
)) | |
fig_category.update_geos(fitbounds="locations", visible=False) | |
fig_category.update_layout(title_text=f"Choropleth Map for {category_option} in Klang Valley Districts", | |
paper_bgcolor="#F0F2F6", | |
margin={"r": 30, "t": 50, "l": 1, "b": 1}) | |
st.plotly_chart(fig_category) | |
st.write(f"Max: {max_category}, Min: {min_category}") | |
type_option = st.selectbox('Select Type', options=['Select Type']+types[category_option]) | |
if type_option != 'Select Type': #st.submit('Confirm') | |
st.write(f"You have selected Type **{type_option}** for Category {category_option}.") | |
max_type = total_gdf.loc[total_gdf[str(type_option)].idxmax()][303] | |
min_type = total_gdf.loc[total_gdf[str(type_option)].idxmin()][303] | |
hi = total_gdf[[str(type_option), 'Geometry', 'Districts']] | |
fig_type = go.Figure(px.choropleth_mapbox(hi, | |
geojson=hi.Geometry, | |
locations=hi.Districts, | |
color=str(type_option), | |
center={"lat": 3.140853, "lon": 101.693207}, | |
mapbox_style="carto-positron", | |
zoom=7.5, | |
color_continuous_scale = "bupu", | |
opacity=0.7, | |
labels={str(type_option): "%"} | |
)) | |
fig_type.update_geos(fitbounds="locations", visible=False) | |
fig_type.update_layout(title_text=f"Choropleth Map for {type_option} ({category_option}) in Klang Valley Districts", | |
paper_bgcolor="#F0F2F6", | |
margin={"r": 30, "t": 50, "l": 1, "b": 1}) | |
st.plotly_chart(fig_type) | |
st.write(f"Max: {max_type}, Min: {min_type}") | |
st.header("Comments") | |
st.write('From comparing the district with the most and least respective POI, we can see that Kuala Lumpur is the district with the most POIs.') | |
st.write('We can also see that Kuala Selangor has the least POIs in general.') | |
st.write('However, we also can see that the information from these maps are not very useful.') | |
st.header('Improvements') | |
st.write('What the visualizations lack is a comparison with other factors. Adding information such as **population density**, **number of roads or rivers**, or other information that would allow a more in depth comparison would result in a better analysis of the POIs.') | |
st.write('This would allow us to correlate and understand why certain districts have more or less POIs than others.') | |
st.write('Another point of improvement could be allowing user to zoom in a specific district to see the precise locations of the respective POIs.') | |
st.header("Questions") | |
st.write('Which district has the most "Shops"? Which shops are most prevalent in Petaling? Does more people lead to more petrol stations?') | |