la-metro / app.py
David Wisdom
first draft
11cb781
raw history blame
No virus
3.54 kB
import os
# stop tensorflow from printing novels to stdout
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
import streamlit as st
import tensorflow as tf
import tensorflow_hub as hub
from sklearn.cluster import DBSCAN
def read_stops(p: str):
"""
DOCSTRING
"""
return pd.read_csv(p)
def read_encodings(p: str) -> tf.Tensor:
"""
Unpickle the Universal Sentence Encoder v4 encodings
and return them
This function doesn't make any attempt to patch the security holes in `pickle`.
:param p: Path to the encodings
:returns: A Tensor of the encodings with shape (number of sentences, 512)
"""
with open(p, 'rb') as f:
encodings = pickle.load(f)
return encodings
def cluster_encodings(encodings: tf.Tensor) -> np.ndarray:
"""
DOCSTRING
"""
# I know the hyperparams I want from the EDA I did in the notebook
clusterer = DBSCAN(eps=0.7, min_samples=100).fit(encodings)
return clusterer.labels_
def cluster_lat_lon(df: pd.DataFrame) -> np.ndarray:
"""
DOCSTRING
"""
# I know the hyperparams I want from the EDA I did in the notebook
clusterer = DBSCAN(eps=0.025, min_samples=100).fit(df[['latitude', 'longitude']])
return clusterer.labels_
def plot_example(df: pd.DataFrame, labels: np.ndarray) -> px.Figure:
"""
DOCSTRING
"""
plot_size = 800
labels = labels.astype('str')
fig = px.scatter(df, x='longitude', y='latitude',
hover_name='display_name',
color=labels,
opacity=0.5,
color_discrete_sequence=px.colors.qualitative.Safe,
template='presentation',
width=plot_size,
height=plot_size)
# fig.show()
return fig
def plot_venice_blvd(df: pd.DataFrame, labels: np.ndarray) -> px.Figure:
"""
DOCSTRING
"""
px.set_mapbox_access_token(st.secrets['mapbox_token'])
venice_blvd = {'lat': 34.008350,
'lon': -118.425362}
labels = labels.astype('str')
fig = px.scatter_mapbox(df, lat='latitude', lon='longitude',
color=labels,
hover_name='display_name',
center=venice_blvd,
zoom=12,
color_discrete_sequence=px.colors.qualitative.Dark24)
# fig.show()
return fig
def main(data_path: str, enc_path: str):
df = read_stops(data_path)
# Cluster based on lat/lon
example_labels = cluster_lat_lon(df)
example_fig = plot_example(df, example_labels)
# Cluster based on the name of the stop
encodings = read_encodings(enc_path)
encoding_labels = cluster_encodings(encodings)
venice_fig = plot_venice_blvd(df, encoding_labels)
# Display the plots with Streamlit
st.write('# Example of what DBSCAN does')
st.plotly_chart(example_fig, use_container_width=True)
st.write('# Venice Blvd')
st.plotly_chart(example_fig, use_container_width=True)
if __name__ == '__main__':
import argparse
p = argparse.ArgumentParser()
p.add_argument('--data_path',
nargs='?',
default='data/stops.csv',
help="Path to the dataset of LA Metro stops. Defaults to 'data/stops.csv'")
p.add_argument('--enc_path',
nargs='?',
default='data/encodings.pkl',
help="Path to the pickled encodings. Defaults to 'data/encodings.pkl'")
args = p.parse_args()
main(**vars(args))