davidna22 commited on
Commit
dad00c5
1 Parent(s): 9cb35ba

Upload folder using huggingface_hub

Browse files
.README ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Steps to run
2
+
3
+ ## Pip install requirements
4
+
5
+ ```bash
6
+ pip install -r requirements.txt
7
+ ```
8
+
9
+ ## Follow the Analysis.ipynb notebook for notebook format results
10
+
11
+ ### Recommended to view the gradio application for fuller view
12
+
13
+ ## Option 1: Run gradio app locally
14
+
15
+ ```bash
16
+ python /path/to/app.y
17
+ ```
18
+
19
+ ## Option 2: Access web application at (https://dna-casestudy.com/)[https://dna-casestudy.com/]
20
+
21
+ ## Option 3: Build Docker container
22
+
23
+
24
+ ```bash
25
+ cd /path/to/folder
26
+ docker build -t my-case-study .
27
+ docker run -p 7860:7860 my-case-study
28
+ ```
29
+
30
+
31
+ ## If ran locally, app will run via: (http://localhost:7860)[http://localhost:7860]
Analysis.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.2
2
+
3
+ workdir /app
4
+
5
+ COPY . .
6
+ RUN python -m pip install -U pip
7
+ RUN pip install -r /app/requirements.txt
8
+
9
+ EXPOSE 7860
10
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Dna Casestudy
3
- emoji: 🐢
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 4.28.3
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: dna-casestudy
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 4.27.0
6
  ---
 
 
__pycache__/utils.cpython-310.pyc ADDED
Binary file (26.8 kB). View file
 
app.py ADDED
@@ -0,0 +1,1255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import inspect
3
+ import math
4
+ import pandas as pd
5
+ import numpy as np
6
+ import polars as pl
7
+ import seaborn as sns
8
+ import matplotlib
9
+ import utils
10
+ from matplotlib import pyplot as plt
11
+ import sklearn
12
+ import gradio as gr
13
+ from IPython.display import display
14
+ import plotly.figure_factory as ff
15
+ from sklearn.impute import SimpleImputer
16
+ from utils import create_seasons
17
+ from bs4 import BeautifulSoup
18
+ from IPython.display import display, HTML
19
+ from bertopic import BERTopic
20
+ import html
21
+ import xgboost as xgb
22
+ from xgboost import plot_importance
23
+ from sklearn.metrics import r2_score, mean_absolute_percentage_error
24
+ from utils import find_variable_data, build_temporal_features, create_datetime, map_vals
25
+ import plotly.express as px
26
+ import plotly.graph_objects as go
27
+ import plotly.figure_factory as ff
28
+ from plotly.subplots import make_subplots
29
+ import plotly.io as pio
30
+ import folium
31
+ import gc
32
+ import json
33
+ from utils import MyNaiveImputer
34
+ matplotlib.use('agg')
35
+ dark_mode = """
36
+ function refresh() {
37
+ const url = new URL(window.location);
38
+
39
+ if (url.searchParams.get('__theme') !== 'dark') {
40
+ url.searchParams.set('__theme', 'dark');
41
+ window.location.href = url.href;
42
+ }
43
+ }
44
+ """
45
+
46
+ # Imputation Variables
47
+ wd_full_local = pd.read_csv("data/weather_aggregated_2010-2018.csv", index_col=0)
48
+ wd_full_local = wd_full_local.reset_index()
49
+ wd_full_local["Datetime"] = pd.to_datetime(wd_full_local["Datetime"], format="%Y-%m-%d")
50
+ wd_full_local = build_temporal_features(wd_full_local, "Datetime")
51
+ impute_cols = ['MeanTemp', 'MinTemp', 'MaxTemp', 'DewPoint',
52
+ 'Percipitation', 'WindSpeed', 'MaxSustainedWind',
53
+ 'Gust', 'Rain', 'SnowDepth', 'SnowIce']
54
+
55
+ my_imputer = MyNaiveImputer(wd_full_local, time_steps=49+7)
56
+ imputers = {
57
+ "Mean": my_imputer.impute_all(impute_cols, strategy="mean"),
58
+ "Median": my_imputer.impute_all(impute_cols, strategy="median"),
59
+ "Max": my_imputer.impute_all(impute_cols, strategy="max"),
60
+ "Min": my_imputer.impute_all(impute_cols, strategy="min")
61
+ }
62
+
63
+ # Merged Data Variables
64
+ data_merged = pd.read_csv("data/data_merged_full.csv", index_col=0)
65
+ data_merged = create_datetime(data_merged, "Datetime", format="%Y-%m-%d")
66
+ data_merged["Day Of Week"] = data_merged["Datetime"].dt.day_name()
67
+ data_merged["Year String"] = data_merged["Year"].astype(str)
68
+ data_merged["Month String"] = data_merged["Datetime"].dt.month_name()
69
+ data_merged["Rain Bool"] = data_merged["Rain"].astype(bool)
70
+ data_merged["SnowIce Bool"] = data_merged["SnowIce"].astype(bool)
71
+ data_merged = data_merged.set_index("Datetime")
72
+ weather_full_df = data_merged.loc[data_merged["Year"] <= 2018].copy()
73
+ data_merged_eda = data_merged.loc[(data_merged["Year"] <= 2018) & (data_merged["Year"] >= 2016)]
74
+
75
+ # Feature Preprocessing
76
+ data_preprocess = data_merged.loc[(data_merged["Year"] >= 2016)].copy()
77
+ data_preprocess["Gust_lin"] = data_preprocess["Gust"].interpolate(method="linear")
78
+ data_preprocess["Gust_spline3"] = data_preprocess["Gust"].interpolate(method="spline", order=3)
79
+ data_preprocess["Gust_spline5"] = data_preprocess["Gust"].interpolate(method="spline", order=5)
80
+ data_preprocess["Gust_quad"] = data_preprocess["Gust"].interpolate(method="quadratic")
81
+ data_preprocess["Gust"] = data_preprocess["Gust"].interpolate(method="linear")
82
+ data_preprocess["DewPoint_old"] = data_preprocess["DewPoint"]
83
+ data_preprocess["DewPoint_diff7d"] = data_preprocess["DewPoint"] - data_preprocess["DewPoint"].shift(7)
84
+ data_preprocess["DewPoint"] = data_preprocess["DewPoint_diff7d"]
85
+ data_preprocess["MinTemp_old"] = data_preprocess["MinTemp"]
86
+ data_preprocess["MinTemp_log"] = data_preprocess["MinTemp"].apply(np.log1p)
87
+ data_preprocess["MinTemp_log_diff7d"] = data_preprocess["MinTemp_log"] - data_preprocess["MinTemp_log"].shift(7)
88
+ data_preprocess["MinTemp"] = data_preprocess["MinTemp_log_diff7d"]
89
+
90
+
91
+ # Final Preprocessed Variables
92
+ data_final = pd.read_csv("data/data_final.csv")
93
+ data_final = create_datetime(data_final, "Datetime", format="%Y-%m-%d")
94
+ data_final = data_final.set_index("Datetime")
95
+ test = data_final[-7:]
96
+ dataset = data_final[:-7]
97
+ split_point = int(len(data_final[:-7])*0.75)
98
+ train, val = dataset[:split_point], dataset[split_point:]
99
+ X_train, y_train = train.drop(columns="Target"), train["Target"]
100
+ X_val, y_val = val.drop(columns="Target"), val["Target"]
101
+ X_test, y_test = test.drop(columns="Target"), test["Target"]
102
+ forecast_model = xgb.XGBRegressor()
103
+ forecast_model.load_model("models/final_model.json")
104
+
105
+ # Current Predictions
106
+ global r2_val, r2_train, mape_train, mape_val
107
+ r2_train = 0.8691238468740025
108
+ mape_train = 0.04889510400934162
109
+ r2_val = 0.6072642783665692
110
+ mape_val = 0.6072642783665692
111
+
112
+
113
+ # Initial Variables
114
+ reports = {
115
+ "weather_2011-2018": BeautifulSoup(open("reports/weather_data_ts.html"), "html.parser"),
116
+ "weather_2016-2018": BeautifulSoup(open("reports/weather_data_after2016_ts.html"), "html.parser"),
117
+ "service_full": BeautifulSoup(open("reports/311_data_1.html"), "html.parser")
118
+ }
119
+
120
+ iframe_dp_weather, _ = find_variable_data(reports["weather_2011-2018"], "MeanTemp")
121
+ iframe_dp_service, _ = find_variable_data(reports["service_full"], "Created Date")
122
+
123
+ # Code Variables to show in app
124
+ load_code = """
125
+ # Load Weather Data in pandas
126
+ # No need for polars because data is sufficiently small
127
+ weather_data = pd.read_csv("data/weather_NY_2010_2018Nov.csv")
128
+
129
+ # Load Service data in polars for speed optimization
130
+ # Loading directly with polars leads to errors
131
+ # Load in pandas then convert to polars
132
+ service_data_pd = pd.read_csv("data/311-2016-2018.csv")
133
+ assert service_data_pd["Unique Key"].nunique() == len(service_data_pd)
134
+ # This casting is done just because of some errors when loading pl from pandas
135
+ service_data_pd["Incident Zip"] = service_data_pd["Incident Zip"].astype("string")
136
+ service_data_pd["BBL"] = service_data_pd["BBL"].astype("string")
137
+ service_data = pl.DataFrame(service_data_pd)
138
+
139
+ # Clear some ram
140
+ del service_data_pd
141
+ gc.collect()"""
142
+
143
+
144
+ map_code = """
145
+ lat_min = service_data["Latitude"].min()
146
+ lat_max = service_data["Latitude"].max()
147
+ long_min = service_data["Longitude"].min()
148
+ long_max = service_data["Longitude"].max()
149
+
150
+ mincon_lat = weather_data["Latitude"] >= lat_min
151
+ maxcon_lat = weather_data["Latitude"] <= lat_max
152
+ mincon_long = weather_data["Longitude"] >= long_min
153
+ maxcon_long = weather_data["Longitude"] <= long_max
154
+ wd_localized = weather_data.loc[mincon_lat & maxcon_lat & mincon_long & maxcon_long]
155
+ """
156
+
157
+ Closed_Ticket_Code = """
158
+ # Fill null and Typos with mean time diff (13 days)
159
+ service_data = service_data.with_columns(
160
+ Closed_Date_New = pl.when(pl.col("Created Date") - pl.col("Closed Date") > pl.duration(days=1))
161
+ .then(pl.col("Created Date") + pl.duration(days=mean_diff))
162
+ .otherwise(pl.col("Closed Date")).fill_null(pl.col("Created Date") + pl.duration(days=mean_diff))
163
+ )
164
+
165
+ # Check for no null values
166
+ assert service_data["Closed_Date_New"].is_null().sum() == 0
167
+
168
+ # Pair wise GroupBy and Filter
169
+ closed_tickets = service_data.group_by(["Closed_Date_New", "Created Date"]) \
170
+ .agg((pl.when(pl.col("Created Date") <= pl.col("Closed_Date_New")).then(1).otherwise(0)).sum().alias("count")) \ # FILTER Created Date < Closed Date Here
171
+ .sort("Closed_Date_New") \ # Sort by new column Closed Date New
172
+ .filter((pl.col("Closed_Date_New").dt.year() >= 2016) & (pl.col("Closed_Date_New").dt.year() < 2019)) \ # Filter for only Closed Dates in time window
173
+ .group_by("Closed_Date_New").agg(pl.col("count").sum().alias("num_closed_tickets")) # Final Group By Closed date after filtering
174
+
175
+ ct_df = closed_tickets.with_columns(
176
+ pl.col("num_closed_tickets") # Rename Column
177
+ )
178
+ """
179
+
180
+ global topic_model
181
+ topic_model = BERTopic.load("models/BERTopic")
182
+
183
+ def plot_imputations(var, data, imputers=imputers):
184
+ plt.close('all')
185
+ fig = plt.figure(figsize=(15,5))
186
+ plt.plot(data["Datetime"][-800:], data[var][-800:], label="Actual")
187
+ plt.title(f"{var} Imputation")
188
+ for method in imputers:
189
+ plt.plot(imputers[method]["Datetime"], imputers[method][var], label=method)
190
+
191
+ plt.legend()
192
+
193
+ return gr.update(value=fig)
194
+
195
+
196
+ def plot_timeseries(data, var, data_name="My", all_vars=[], height=800, width=600):
197
+ plt.close('all')
198
+ if var == "":
199
+ return gr.update()
200
+
201
+ from utils import plot_timeseries
202
+ fig = plot_timeseries(data, var, data_name, all_vars, height, width)
203
+
204
+ return gr.update(value=fig)
205
+
206
+
207
+ def plot_bivariate(data, x, y, subset=None, trendline=True):
208
+ plt.close('all')
209
+ map_var = {
210
+ "Year": "Year String",
211
+ "Season": "Season",
212
+ "Month": "Month String",
213
+ "Day Of Week": "Day Of Week",
214
+ "Weekend": "is_weekend",
215
+ "Holiday": "is_holiday",
216
+ "Rain": "Rain Bool",
217
+ "SnowIce": "SnowIce Bool",
218
+ "None": None,
219
+ "": None,
220
+ }
221
+ subset = map_var[subset]
222
+
223
+ from utils import plot_bivariate
224
+ fig = plot_bivariate(data, x, y, subset, trendline)
225
+
226
+ return gr.update(value=fig)
227
+
228
+
229
+ def plot_seasonality(data, x, y, show_box=True, show_outliers=False):
230
+ plt.close('all')
231
+ map_var = {
232
+ "Year": "Year String",
233
+ "Season": "Season",
234
+ "Month": "Month String",
235
+ "Day Of Week": "Day Of Week",
236
+ "Weekend": "is_weekend",
237
+ "Holiday": "is_holiday",
238
+ "Rain": "Rain Bool",
239
+ "SnowIce": "SnowIce Bool",
240
+ "None": None,
241
+ }
242
+ x = map_var[x]
243
+
244
+ from utils import plot_seasonality
245
+ fig = plot_seasonality(data, x, y, show_box, show_outliers)
246
+
247
+ return gr.update(value=fig)
248
+
249
+
250
+ def plot_correlations(data, covar, target="Target", lags=[0,1,2,3,4,5,6,7,8,13,14,15,21], method="pearson"):
251
+ plt.close('all')
252
+ from utils import plot_correlations
253
+ fig = plot_correlations(data, covar, target, lags, method)
254
+
255
+ return gr.update(value=fig)
256
+
257
+
258
+ def plot_autocorr(data, var, apply=None):
259
+ plt.close('all')
260
+ from utils import plot_acf, plot_pacf
261
+ time_series = data.loc[:, var].to_frame().copy()
262
+ if apply:
263
+ time_series[var] = time_series[var].apply(apply)
264
+ fig, ax = plt.subplots(2, 1, figsize=(12, 8))
265
+ _ = plot_acf(time_series[var], lags=30, ax=ax[0])
266
+ _ = plot_pacf(time_series[var], lags=30, method="ols-adjusted", ax=ax[1])
267
+ _ = plt.suptitle(f"{var}", y=0.95)
268
+
269
+ return gr.update(value=fig)
270
+
271
+
272
+ def plot_all_correlations(data, data_name="weather", method="pearson"):
273
+ plt.close('all')
274
+ from utils import plot_all_correlations
275
+ fig = plot_all_correlations(data, data_name, method)
276
+
277
+ return fig
278
+
279
+
280
+
281
+ def run_report(report_base, variable_name, report_category="full"):
282
+ report_name = report_base + "_" + report_category
283
+ iframe, _ = find_variable_data(reports[report_name], variable_name)
284
+ return gr.update(value=iframe)
285
+
286
+
287
+ def test_stationary(data, var):
288
+ from utils import test_stationary
289
+ df = test_stationary(data, var)
290
+
291
+ return df
292
+
293
+
294
+ def plot_interpolation(data):
295
+ plt.close('all')
296
+ from utils import plot_gust_interpolation
297
+ fig = plot_gust_interpolation(data)
298
+
299
+ return fig
300
+
301
+
302
+ def plot_model_feature_importance():
303
+
304
+ plt.close('all')
305
+ from utils import plot_final_feature_importance
306
+ fig = plot_final_feature_importance(forecast_model)
307
+
308
+ return fig
309
+
310
+
311
+ def plot_final_predictions():
312
+ plt.close('all')
313
+ from utils import predict_recurse
314
+ next_7_day_prediction = predict_recurse(dataset, test, forecast_model)
315
+ fig = plt.subplots(figsize=(15, 5))
316
+ data_final.loc[data_final.index[-7:], "Target"]= next_7_day_prediction
317
+ ax = data_final.loc[data_final.index[-96:-6], "Target"].plot(label="Real", title="311 Service Volume: 7 Day Prediction")
318
+ data_final.loc[data_final.index[-7:], "Target"].plot(label="Forecast", ax=ax)
319
+ ax.legend()
320
+
321
+ curr_fig = plt.gcf()
322
+ plt.close()
323
+
324
+ return curr_fig
325
+
326
+
327
+ def plot_train_split():
328
+ plt.close('all')
329
+ from utils import plot_train_split
330
+ fig = plot_train_split(train, val)
331
+
332
+ return fig
333
+
334
+
335
+ def plot_val_predicitons():
336
+ data = val.copy()
337
+ data["Prediction"] = preds_val
338
+
339
+ from utils import plot_predictions
340
+
341
+ fig = plot_predictions(train, val, preds_val)
342
+
343
+ return fig
344
+
345
+
346
+ curr_theme = gr.themes.Default(
347
+ text_size=gr.themes.sizes.text_lg
348
+ )
349
+
350
+ with gr.Blocks(theme=curr_theme, js=dark_mode, css=open("custom.css", "r").read()) as app:
351
+ title = gr.HTML("""<h1 align="center">Point72 Case Study</h1>""")
352
+ with gr.Tabs() as pages:
353
+
354
+ with gr.Tab("Overview") as toc_page:
355
+ gr.Markdown("# My Point72 Case Study Results")
356
+ gr.Markdown("""
357
+ * Please follow the tabs sequentially left to right to get the full story of my work
358
+ * There will be many interactive parts where you will be able to test and view different parameters
359
+ * This app may also be built and ran locally
360
+ * This app is hosted and served from a cloud server VM Instance
361
+ * Any questions please email me: davidna22@gmail.com
362
+ """)
363
+
364
+
365
+ with gr.Tab("Data Preprocessing") as data_preprocessing_page:
366
+
367
+ with gr.Tab("Data Loading") as dp_overview:
368
+ gr.HTML("<h1 style=\"text-align: center;\">Loading the Data</h1>")
369
+ gr.Markdown("## Goal: Load the Data as efficiently as possible")
370
+ gr.Markdown("""
371
+ * Using Pandas alone is **slow and inefficient**.
372
+ * With small datasets, pandas is great because the API is robust.
373
+ * With medium datasets, using a library like polars (a Rust based module with 10x pandas speed) is much faster.
374
+ * As data gets even larger, multi-processing languages like Spark are required.
375
+ * For this dataset, I use pandas for the weather data and polars for the 311 data. After the aggregation and merge, I revert back to pandas for API compatibility.
376
+ """)
377
+
378
+ with gr.Accordion("Code", open=False):
379
+ gr.Code(load_code, language="python")
380
+
381
+
382
+ with gr.Tab("Location Mapping") as dp_overview:
383
+ src_doc = html.escape(open("figures/map1.html","r").read())
384
+ iframe1 = f'<iframe width="500px" height="500px" srcdoc="{src_doc}" frameborder="0"></iframe>'
385
+ src_doc = html.escape(open("figures/map2.html","r").read())
386
+ iframe2 = f'<iframe width="500px" height="500px" srcdoc="{src_doc}" frameborder="0"></iframe>'
387
+ src_doc = html.escape(open("figures/bounded_map.html","r").read())
388
+ iframe3 = f'<iframe width="500px" height="500px" srcdoc="{src_doc}" frameborder="0"></iframe>'
389
+ src_doc = html.escape(open("figures/final_map.html","r").read())
390
+ iframe4 = f'<iframe width="500px" height="500px" srcdoc="{src_doc}" frameborder="0"></iframe>'
391
+
392
+ gr.HTML("<h1 style=\"text-align: center;\">Location Mapping for Both Datasets</h1>")
393
+ with gr.Row(elem_classes="map-legend"):
394
+ gr.Markdown("""
395
+ **Legend:**
396
+ * <span style=\"color: red\">Red:</span> Weather records
397
+ * <span style=\"color: #5989ff\">Blue:</span> 311 Service records
398
+ """, elem_classes="map-legend-text")
399
+
400
+ with gr.Row():
401
+ with gr.Column():
402
+ gr.HTML("<h1 style=\"text-align: center; margin: 0px;\">Map of New York State</h1>")
403
+ map1 = gr.HTML(iframe1, elem_classes="map")
404
+ with gr.Column():
405
+ gr.HTML("<h1 style=\"text-align: center; margin: 0px;\">Map of New York City</h1>")
406
+ map2 = gr.HTML(iframe2, elem_classes="map")
407
+
408
+ with gr.Row():
409
+ gr.Markdown("""
410
+ Juxtaposing these two maps and seeing the approximate distributions of data observations,
411
+ its easy to see the problem. The weather dataset encompasses a larger area than the 311 Service call dataset.
412
+ Once this problem was diagnosed the solution was simple. First you find the max coordinate (Lat, Long) bounds
413
+ from the 311 Service Dataset. Then, you just filter the weather dataset to only include points from within
414
+ these bounds. This was one of my initial discoveries when analyzing the dataset and crucial to ensure
415
+ congruity between the two. **Below you can see the bounding box I created and how the new weather data
416
+ observations fit in this bounding box.**
417
+ """)
418
+
419
+ with gr.Row():
420
+ with gr.Column():
421
+ map3 = gr.HTML(iframe3, elem_classes="map")
422
+ with gr.Column():
423
+ map4 = gr.HTML(iframe4, elem_classes="map")
424
+
425
+ with gr.Accordion("Code", open=False):
426
+ gr.Code(map_code, language="python")
427
+
428
+
429
+ with gr.Tab("Variable Pruning") as var_pruning:
430
+ gr.HTML("<h1 style=\"text-align: center;\">How I pruned the datasets</h1>")
431
+ gr.Markdown("## Goal: Remove as many useless features as possible")
432
+ gr.HTML("<h3 style=\"color: darkorange;\">Key Factors for Feature Removal</h3>")
433
+ gr.Markdown("""
434
+ * Percentage of missing data points
435
+ * Distribution Imbalance
436
+ * Irrelevance
437
+ * Number of distinct categories
438
+ * Another variable was chosen as replacement <br/><br/>
439
+ NOTE: Look in the appendix for visualizations of individual variables
440
+ """)
441
+ droped_var_df = pd.read_excel("data/drop_vars.xlsx")
442
+ gr.Dataframe(
443
+ droped_var_df,
444
+ wrap=True,
445
+ label="Dropped Variables & Justification (Weather on Bottom)"
446
+ )
447
+
448
+
449
+ with gr.Tab("Time Aggregation") as time_agg:
450
+ gr.HTML("<h1 style=\"text-align: center;\">Aggregate Data by Date</h1>")
451
+ gr.Markdown("## Goal: Aggregate data by Date")
452
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: 311 Service data is not inherently formatted to provide Created Ticket Counts</h3>")
453
+ gr.HTML("""
454
+ <ul style="font-size: 18px">
455
+ <li>Data must be aggregated by day to find ticket counts</li>
456
+ <li>Covariate features need a special transformation</li>
457
+ <li>Final Aggregations Mapping</li>
458
+ <ul style="padding-inline-start: 40px;">
459
+ <li>Created Date ==> groupby.count ==> Target (Created ticket count)</li>
460
+ <li>Closed Date ==> Agg* ==> Number of closed tickets (Agg* explained in next tabs)</li>
461
+ <li>Agency ==> Agg* ==> Number of tickets by Agency (Agg* explained in next tabs)</li>
462
+ <li>Borough ==> Agg* ==> Number of tickets by Boroguh (Agg* explained in next tabs)</li>
463
+ <li>Descriptor ==> Agg* ==> Number of tickets by Descriptor Group/Category (Agg* explained in next tabs)</li>
464
+ </ul>
465
+ </ul>""")
466
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Weather data is not aggregated by day</h3>")
467
+ gr.HTML("""
468
+ <ul style="font-size: 18px">
469
+ <li>To merge with 311 Service data, both datasets must be aggregated</li>
470
+ <li>Additional transformations may be applied only after time aggregation</li>
471
+ <li>Aggregation function needs to be handled feature by feature</li>
472
+ <li>Final Aggregation Mapping</li>
473
+ <ul style="padding-inline-start: 40px;">
474
+ <li>MaxTemp, MaxSustainedWind ==> groupby.max ==> Variables have an inherent max feature</li>
475
+ <li>MinTemp ==> groupby.min ==> Variable has an inherent min feature</li>
476
+ <li>Rain, SnowIce ==> groupby.mean.round ==> Binary variables are first aggregated than rounded back to binary</li>
477
+ <li>All Other Variables ==> groupy.mean ==> Mean used by default as it is the least lossy pooling method</li>
478
+ </ul>
479
+ </ul>""")
480
+
481
+
482
+ with gr.Tab("Weather Data: Imputation") as wd_impute:
483
+ gr.HTML("<h1 style=\"text-align: center;\">Data Imputation</h1>")
484
+ gr.Markdown("## Goal: Impute missing values in Weather Data")
485
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue: Weather data is incomplete, 49 days are missing in 2018</h3>")
486
+ gr.Markdown("#### Proposed Solution: Use a simple imputer to fill these missing days + 7 more days into the \"future\"")
487
+ gr.HTML("""
488
+ <ul style="font-size: 18px">
489
+ <li>Use a simple imputer rather than a robust imputation method to reduce model complexity</li>
490
+ <ul style="padding-inline-start: 40px;">
491
+ <li>Using a robust imputer = Conducting a multivariate forcast, Very complex & can be slow</li>
492
+ <li>Using a simple imputer = Low complexity, low latency</li>
493
+ </ul>
494
+ <li>Simple imputer applies an aggregate function using Day Of Year (1-366) as the interval</li>
495
+ <li>4 different Imputation Methods: Mean, Median, Min, Max</li>
496
+ <li>7 additional days are imputed so the weather data can be used as a future covariate in our model</li>
497
+ <li>Final Aggregation Mapping</li>
498
+ <ul style="padding-inline-start: 40px;">
499
+ <li>WindSpeed, MaxSustainedWind, Gust, SnowDepth => Use Mean => Noisy Variables, Non-Mean/Median methods are too biased, curve best fit with Mean</li>
500
+ <li>Rain => Use Max => Binary Variables with noise, min/mean/median imputes 0, which does not follow the trend</li>
501
+ <li>SnowIce => Use Min (impute 0) => Binary variables but mostly 0's, any other imputation is visually inaccurate</li>
502
+ <li>MeanTemp, MinTemp, MaxTemp, DewPoint, Percipitation => Use Min => Perhaps helping to remove non-stationarity (global warming), Winter is colder now than before, Curve best fits with min</li>
503
+ </ul>
504
+ </ul>""")
505
+
506
+ gr.Markdown("Use plots below to view the plots used to help justify above reasoning")
507
+ with gr.Accordion("Show Plots", open=False):
508
+ impute_data = gr.State(wd_full_local)
509
+ impute_choices = ["None"]
510
+ impute_choices.extend(impute_cols)
511
+ wd_impute_col = gr.Dropdown(
512
+ choices=impute_choices,
513
+ value="None",
514
+ label="Choose a Variable to plot all imputation methods"
515
+ )
516
+
517
+ wd_impute_plot = gr.Plot()
518
+
519
+ wd_impute_col.change(
520
+ plot_imputations,
521
+ [wd_impute_col, impute_data],
522
+ [wd_impute_plot]
523
+ )
524
+
525
+
526
+ with gr.Tab("311: Closed Ticket Counting") as ct_date:
527
+ gr.HTML("<h1 style=\"text-align: center;\">Closed Ticket Feature</h1>")
528
+ gr.Markdown("## The Closed Ticket Feature is built from the Closed Date column similarly to how Created Date was used to generate new 311 Call Volume")
529
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Data Error, Typos, and/or Null valuess</h3>")
530
+ gr.HTML("""
531
+ <ul style="font-size: 18px">
532
+ <li>Number of Null Values: </li>
533
+ <li>Number of Closed Dates where Closed Date > Created Date: </li>
534
+ <ul style="padding-inline-start: 40px;">
535
+ <li>These values were most likely typos/data recording errors</li>
536
+ <li>For instance, some of these values dated to 1900</li>
537
+ </ul>
538
+ <li>SOLUTION: For every data error, impute with the mean difference (recompute Closed Date based off Created)</li>
539
+ <li>Mean is calculated as the mean time differential between all valid Closed & Created Dates</li>
540
+ <li>Mean Time Differential: 13 Days</li>
541
+ </ul>""")
542
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Data Leakage - Future into Past</h3>")
543
+ gr.HTML("""
544
+ <ul style="font-size: 18px">
545
+ <li>Most of the Closed Date values are 13 days ahead relative to Created Date</li>
546
+ <li>GroupBy Closed Date only will lead to some closed ticket counts leaking into future created dates</li>
547
+ <li>SOLUTION: GroupBy [Closed Date, Created Date] pairwise, filter so Created Date < Closed Date</li>
548
+ </ul>""")
549
+ with gr.Accordion("Code", open=False):
550
+ gr.Code(Closed_Ticket_Code, language="python")
551
+
552
+
553
+ with gr.Tab("311: Categorical Grouping") as cat_groups:
554
+ BERTopic = gr.State(BERTopic.load("models/BERTopic"))
555
+ gr.HTML("<h1 style=\"text-align: center;\">Categorical Features</h1>")
556
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Categorical Features have too many categories</h3>")
557
+ gr.Markdown("#### Create a mapping of categories into groups to reduce total number (Viewable at the bottom of the page)")
558
+ gr.HTML("""
559
+ <ul style="font-size: 18px">
560
+ <li>Borough:</li>
561
+ <ul style="padding-inline-start: 40px;">
562
+ <li>Only 9 Categories without grouping</li>
563
+ <li>Four Categories are either typos or just null => Group all into OTHER</li>
564
+ </ul>
565
+ <li>Agency:</li>
566
+ <ul style="padding-inline-start: 40px;">
567
+ <li>30 Agencies in total are listed</li>
568
+ <li>Manual Research to group each Agency by Category of what they typically do</li>
569
+ <li>30 Agencies down to 7 Agency Groupings, based on frequency and research</li>
570
+ </ul>
571
+ <li>Complaint Type: Removed because analysis showed complaints were too related to the agency</li>
572
+ <ul style="padding-inline-start: 40px;">
573
+ <li>299 unique pairs out of 271 unique complaints => only ~10% difference in distribution</li>
574
+ </ul>
575
+ <li>Descriptor: Over 1000+ unique categories. Only way to realistically group is to use NLP</li>
576
+ <ul style="padding-inline-start: 40px;">
577
+ <li>Pretrained a BERTopic model to extract topics from the text</li>
578
+ <li>BERTopic uses TF-IDF & Transformers to extract topics from text</li>
579
+ <li>BERTopic reduced 1000 categories into 8 groups</li>
580
+ </ul>
581
+ </ul>""")
582
+
583
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: How do we aggregate by day these features when there are multiple repeated categories per day</h3>")
584
+ gr.Markdown("#### One Hot Encode and Sum per category")
585
+ gr.HTML("""
586
+ <ul style="font-size: 18px">
587
+ <li>Step 1: One hot encode all the features before aggregation</li>
588
+ <li>Step 2: GroupBy date and Sum for each encoding</li>
589
+ <ul style="padding-inline-start: 40px;">
590
+ <li>Example: A categorical group with 4 categories</li>
591
+ <li>One Sum column per category representing the frequency of that category per day</li>
592
+ </ul>
593
+ <li>Main Downside: Highly correlated with Created Ticket data; aggregation method was essentially the same</li>
594
+ <ul style="padding-inline-start: 40px;">
595
+ <li>Summing across the four feature categories in the example above would just equal the ticket count</li>
596
+ </ul>
597
+ <li>Solution: Leave some categories out of final vector to reduce bias (Shown in feature engineering stage)</li>
598
+ </ul>""")
599
+
600
+ with gr.Accordion("View Feature Groups", open=False):
601
+ with gr.Accordion("Borough", open=False):
602
+ gr.JSON(json.loads(open("code/Borough.json", "r").read()))
603
+
604
+ with gr.Accordion("Agency", open=False):
605
+ gr.JSON(open("code/Agency.json", "r").read())
606
+
607
+ with gr.Accordion("Descriptor", open=False):
608
+ gr.Dataframe(topic_model.get_topic_info().loc[:, ["Count", "Name", "Representation"]])
609
+ gr.Plot(topic_model.visualize_barchart(list(range(-1,6,1))))
610
+
611
+
612
+ with gr.Tab("All Code") as code_preprocess:
613
+ gr.Markdown("# View Full Code for building Weather Data")
614
+ with gr.Accordion(open=False):
615
+ gr.Code(open("code/build_weather.py", "r").read())
616
+
617
+ gr.Markdown("# View Full Code for building 311 Service Data")
618
+ with gr.Accordion(open=False):
619
+ gr.Code(open("code/build_service.py", "r").read())
620
+
621
+
622
+ with gr.Tab("Exploratory Data Analysis", id="eda_page") as eda_page:
623
+ bivar_data = gr.State(data_merged_eda)
624
+ with gr.Tab("Overview", id="eda_overview") as eda_overview:
625
+ gr.Markdown("# The EDA Section is intended to be a set of interactive visualizations")
626
+ gr.Markdown("The tabs are interactive plots and tables that were used to generate the key insights below.")
627
+ gr.HTML("<h3 style=\"color: darkorange;\">Key Insights</h3>")
628
+ gr.HTML("""
629
+ <ul style="font-size: 18px">
630
+ <li>Missing Values:</li>
631
+ <ul style="padding-inline-start: 40px; font-size: 18px;">
632
+ <li>Gust if used may need interpolation to fill missing values</li>
633
+ </ul>
634
+ <li>Stationarity</li>
635
+ <ul style="padding-inline-start: 40px; font-size: 18px;">
636
+ <li>Weather variables exhibit various levels of non-stationarity (mostly based on trend but some constant)</li>
637
+ <ul style="padding-inline-start: 60px; font-size: 18px;">
638
+ <li>Trends are clear for some like Temperature and DewPoint</li>
639
+ <li>Possible cause of constant non-stationarity are factors such as global warming</li>
640
+ </ul>
641
+ <li>311 Calls may exhibit some forms of weekly non-stationarity</li>
642
+ <ul style="padding-inline-start: 60px; font-size: 18px;">
643
+ <li>Potentially weekly and monthly non-stationarity</li>
644
+ <li>Affected by Holidays and Weekends</li>
645
+ <li>More robust tests needed</li>
646
+ </ul>
647
+ <li>Action Item: Test for stationarity and remove</li>
648
+ </ul>
649
+ <li>Bivariate Interactions:</li>
650
+ <ul style="padding-inline-start: 40px; font-size: 18px;">
651
+ <li>311 Calls have stronger relationships with certain Agency, Borough and Descriptor categories</li>
652
+ <li>311 calls exhibit weak overal linear relationships with weather</li>
653
+ <ul style="padding-inline-start: 60px; font-size: 18px;">
654
+ <li>Monthly and Seasonal relationship is strongest in winter months</li>
655
+ <li>Month Of January: strongest linear relationship between MinTemp, DewPoint</li>
656
+ </ul>
657
+ </ul>
658
+ <li>Seasonality:</li>
659
+ <ul style="padding-inline-start: 40px; font-size: 18px;">
660
+ <li>Weather variables exhibit a strong Yearly and Seasonal seasonality</li>
661
+ <li>311 Service Variables exhibit Weekly Seasonality</li>
662
+ <li>311 Variables affected strongly by holidays and weekends (less 311 calls on weekends and holidays)</li>
663
+ </ul>
664
+ <li>Correlation:</li>
665
+ <ul style="padding-inline-start: 40px; font-size: 18px;">
666
+ <li>Heavy Collinearity among weather variables (especially Min, Mean, MaxTemp)</li>
667
+ <li>Varying degrees of correlation among 311 covariates and 311 volume</li>
668
+ </ul>
669
+ <li>Lags & Autocorrelation:</li>
670
+ <ul style="padding-inline-start: 40px; font-size: 18px;">
671
+ <li>311 Service Calls have highest correlation with 7,14,21 weekly lags</li>
672
+ <li>6,8 day lag intervals second strongest relationship. 8 day exhibits some negative correlation</li>
673
+ <li>1 day lag exhibits similar correlation with 6,7 day lags</li>
674
+ </ul>
675
+ </ul>""")
676
+
677
+
678
+ with gr.Tab("Univariate", id="eda_univar") as eda_univar:
679
+
680
+ with gr.Tab("Weather Data") as eda_uni_weather:
681
+ eda_univar_weatherdf = gr.State(weather_full_df)
682
+ gr.Markdown("# Use the Interactive plot below")
683
+ eda_uni_weather_name = gr.State("Weather")
684
+ weather_vars = [
685
+ "", 'MeanTemp', 'DewPoint', 'Percipitation', 'WindSpeed', 'Gust', 'SnowDepth',
686
+ 'MinTemp', 'MaxTemp', 'MaxSustainedWind'
687
+ ]
688
+ select_weather_var = gr.Dropdown(
689
+ choices=weather_vars,
690
+ value="",
691
+ label="Select a Variable to View"
692
+ )
693
+
694
+ weather_uniplot = gr.Plot()
695
+
696
+ select_weather_var.change(
697
+ plot_timeseries,
698
+ inputs=[
699
+ eda_univar_weatherdf,
700
+ select_weather_var,
701
+ eda_uni_weather_name
702
+ ],
703
+ outputs=[
704
+ weather_uniplot
705
+ ]
706
+ )
707
+
708
+ with gr.Tab("311 Service Data") as eda_uni_weather:
709
+ eda_univar_servicedf = gr.State(data_merged_eda)
710
+ gr.Markdown("# Use the Interactive plot below")
711
+ gr.Markdown("**NOTE: Target is the count of 311 service records**")
712
+ eda_uni_service_name = gr.State("Weather")
713
+ service_vars = [
714
+ "", 'Target', 'num_closed_tickets',
715
+ # Agency Group Counts
716
+ 'AG_Buildings', 'AG_Environment & Sanitation', 'AG_Health',
717
+ 'AG_Parks', 'AG_Security', 'AG_Transportation',
718
+ 'AG_Other',
719
+ # Borough Counts
720
+ 'Borough_BRONX', 'Borough_BROOKLYN', 'Borough_MANHATTAN',
721
+ 'Borough_QUEENS', 'Borough_STATEN ISLAND',
722
+ 'Borough_OTHER',
723
+ # Descriptor Group Counts
724
+ 'DG_damaged_sign_sidewalk_missing',
725
+ 'DG_english_emergency_spanish_chinese',
726
+ 'DG_exemption_commercial_tax_business',
727
+ 'DG_license_complaint_illegal_violation', 'DG_noise_animal_truck_dead',
728
+ 'DG_odor_food_air_smoke', 'DG_order_property_inspection_condition',
729
+ 'DG_water_basin_litter_missed'
730
+ ]
731
+ select_service_var = gr.Dropdown(
732
+ choices=service_vars,
733
+ value="",
734
+ label="Select a Variable to View"
735
+ )
736
+
737
+ service_uniplot = gr.Plot()
738
+
739
+ select_service_var.change(
740
+ plot_timeseries,
741
+ inputs=[
742
+ eda_univar_servicedf,
743
+ select_service_var,
744
+ eda_uni_service_name
745
+ ],
746
+ outputs=[
747
+ service_uniplot
748
+ ]
749
+ )
750
+
751
+
752
+ with gr.Tab("Bivariate", id="eda_bivar") as eda_bivar:
753
+ gr.Markdown("# Use the Interactive plot below")
754
+ gr.Markdown("Use this tab to view relationships between the Target variable (number of tickets created daily) and a Covariate")
755
+ with gr.Column():
756
+ with gr.Row() as bivar_params:
757
+ bivar_dist_target = gr.Dropdown(
758
+ choices=["Target"],
759
+ value="Target",
760
+ label="Target Variable (One option)"
761
+ )
762
+
763
+ all_bivars = ['num_closed_tickets', "Agency", "Borough", "Descriptor"]
764
+ all_bivars.extend(weather_vars)
765
+ all_bivars = sorted(all_bivars)
766
+ all_bivars = all_bivars[1:]
767
+ bivar_dist_cov = gr.Dropdown(
768
+ choices=all_bivars,
769
+ value="MeanTemp",
770
+ label="Select Covariate"
771
+ )
772
+ bivar_trendline = gr.Dropdown(
773
+ choices=[True, False],
774
+ value=True,
775
+ label="Graph with OLS Trendline"
776
+ )
777
+
778
+ with gr.Accordion("Add Seasonality", open=False):
779
+ bivar_subset = gr.Dropdown(
780
+ choices=["None", "Year", "Season", "Month", "Day Of Week", "Weekend", "Holiday"],
781
+ value="None",
782
+ label="Seasonality Options (Disabled for Agency, Borough and Descriptor)"
783
+ )
784
+
785
+ bivar_submit = gr.Button("Run")
786
+ bivar_plot = gr.Plot()
787
+ bivar_submit.click(
788
+ plot_bivariate,
789
+ [bivar_data, bivar_dist_cov, bivar_dist_target, bivar_subset, bivar_trendline],
790
+ bivar_plot
791
+ )
792
+
793
+
794
+ with gr.Tab("Seasonality") as bivar_season:
795
+ gr.Markdown("## Exploring the affect of Seasonality")
796
+
797
+ with gr.Row() as bivar_season_params:
798
+ bivar_season_var = gr.Dropdown(
799
+ choices=["Target", 'MeanTemp', 'DewPoint',
800
+ 'Percipitation', 'WindSpeed', 'Gust', 'SnowDepth',
801
+ 'MinTemp', 'MaxTemp', 'MaxSustainedWind'],
802
+ value="Target",
803
+ label="Variable"
804
+ )
805
+
806
+ bivar_season_cov = gr.Dropdown(
807
+ choices=["Year", "Season", "Month", "Day Of Week", "Weekend", "Holiday", "Rain", "SnowIce"],
808
+ value="Year",
809
+ label="Seasonality"
810
+ )
811
+
812
+ with gr.Column():
813
+ season_boxplot = gr.Checkbox(value=True, label="Show Boxplot")
814
+ season_outlier = gr.Checkbox(value=False, label="Show Outliers")
815
+
816
+ bivar_season_btn = gr.Button("Run")
817
+
818
+ bivar_season_plot = gr.Plot()
819
+
820
+ bivar_season_btn.click(
821
+ plot_seasonality,
822
+ [bivar_data, bivar_season_cov, bivar_season_var, season_boxplot, season_outlier],
823
+ [bivar_season_plot]
824
+ )
825
+
826
+
827
+ with gr.Tab("Correlation") as corr:
828
+
829
+ with gr.Tab("Weather Correlations") as corr_weather:
830
+ gr.Plot(plot_all_correlations(data_merged_eda, "weather", method="pearson"))
831
+
832
+
833
+ with gr.Tab("311 Service Correlations") as corr_service:
834
+ gr.Plot(plot_all_correlations(data_merged_eda, "service", method="pearson"))
835
+
836
+
837
+ with gr.Tab("Lag Correlations") as corr_dynamic:
838
+ gr.Markdown("## Use this to dynamically view correlations based on Lag")
839
+ gr.Markdown("By Default, we will analyze lags of [0,1,2,3,4,5,6,7,8,13,14,15,21] days for chosen variable")
840
+ gr.Markdown("Scroll Down For AutoCorrelation Graphs")
841
+ with gr.Row():
842
+ corr_vars = [
843
+ "None", 'Target', 'num_closed_tickets',
844
+ # Weather Variables
845
+ 'MeanTemp', 'DewPoint', 'Percipitation',
846
+ 'WindSpeed', 'Gust', 'SnowDepth',
847
+ 'MinTemp', 'MaxTemp', 'MaxSustainedWind',
848
+ # Agency Group Counts
849
+ 'AG_Buildings', 'AG_Environment & Sanitation', 'AG_Health',
850
+ 'AG_Parks', 'AG_Security', 'AG_Transportation',
851
+ 'AG_Other',
852
+ # Borough Counts
853
+ 'Borough_BRONX', 'Borough_BROOKLYN', 'Borough_MANHATTAN',
854
+ 'Borough_QUEENS', 'Borough_STATEN ISLAND',
855
+ 'Borough_OTHER',
856
+ # Descriptor Group Counts
857
+ 'DG_damaged_sign_sidewalk_missing',
858
+ 'DG_english_emergency_spanish_chinese',
859
+ 'DG_exemption_commercial_tax_business',
860
+ 'DG_license_complaint_illegal_violation', 'DG_noise_animal_truck_dead',
861
+ 'DG_odor_food_air_smoke', 'DG_order_property_inspection_condition',
862
+ 'DG_water_basin_litter_missed'
863
+ ]
864
+ corr_vars = gr.Dropdown(
865
+ choices=corr_vars,
866
+ value="Target",
867
+ label="Variable"
868
+ )
869
+
870
+ corr_btn = gr.Button("Run")
871
+ corr_plot = gr.Plot()
872
+ autocorr_plot = gr.Plot()
873
+
874
+ corr_btn.click(
875
+ plot_correlations,
876
+ [bivar_data, corr_vars],
877
+ [corr_plot]
878
+ )
879
+
880
+ corr_btn.click(
881
+ plot_autocorr,
882
+ [bivar_data, corr_vars],
883
+ [autocorr_plot]
884
+ )
885
+
886
+
887
+ with gr.Tab("Feature Engineering") as feature_engineer_page:
888
+
889
+
890
+ with gr.Tab("Feature Selection") as feature_select:
891
+ gr.HTML("<h1 style=\"text-align: center;\">Select Features Based on EDA</h1>")
892
+ gr.Markdown("### Below is the logic used in our model feature selection")
893
+ gr.HTML("""
894
+ <ul style="font-size: 18px">
895
+ <li>Weather Covariates</li>
896
+ <ul style="padding-inline-start: 30px; font-size: 18px;">
897
+ <li>Weather variables exhibit various levels of non-stationarity (mostly based on trend but some constant)</li>
898
+ <li>MeanTemp, MaxTemp: High collinearity with MinTemp. MinTemp has highest correlation of 3 => REMOVE</li>
899
+ <ul style="padding-inline-start: 50px; font-size: 18px;">
900
+ <li>Possible Reason: High temps, people stay indoors. A/C doesn't break nowadays. Lower Temps lead to building/tech failure more often</li>
901
+ </ul>
902
+ <li>Percipitation: Bivariate plot shows weak relationship, outliers no effect on 311 => REMOVE</li>
903
+ <li>SnowDepth: High number missing values, low correlation => REMOVE</li>
904
+ <li>Rain, SnowIce: Binary, plots (look in Seasonality Tab) show weak relationship, SnowIce heavily imbalanced (99% 0's) => REMOVE</li>
905
+ </ul>
906
+ <li>311 Service Covariates:</li>
907
+ <ul style="padding-inline-start: 30px; font-size: 18px;">
908
+ <li>LOO (Leave One - or many - Out) Encoding:</li>
909
+ <ul style="padding-inline-start: 50px; font-size: 18px;">
910
+ <li>Remove weakest features from our categorical covariates</li>
911
+ <li>Reduces bias and removes multicollinearity inherent to One-Hot Encoding</li>
912
+ <li>Candidates For Removal:</li>
913
+ <ul style="padding-inline-start: 70px; font-size: 18px;">
914
+ <li>AG_Health, AG_Other: Lowest Correlation, lowest counts => REMOVE</li>
915
+ <li>AG_Parks: Lowest Correlation, but low multi-collinearity => KEEP</li>
916
+ <li>Borough_OTHER: Weakest Correlation, lowest count => REMOVE</li>
917
+ <li>DG_english_emergency, DG_exemption_commercial: Weakest Correlation, lowest counts => REMOVE</li>
918
+ <li>DG_odor_food_air_smoke: Lowest Count, but high correlation => KEEP</li>
919
+ </ul>
920
+ </ul>
921
+ </ul>
922
+ </ul>""")
923
+
924
+ with gr.Accordion("Show Final Variable List", open=False):
925
+ gr.JSON(json.loads(open("code/all_vars.json","r").read()))
926
+
927
+
928
+ with gr.Tab("Feature Preprocessing") as feature_prep:
929
+ data_feature_prep = gr.State(data_preprocess)
930
+ gr.HTML("<h1 style=\"text-align: center;\">Preprocess Features</h1>")
931
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Missing Values</h3>")
932
+ gr.HTML("""
933
+ <ul style="font-size: 18px">
934
+ <li>Only One value has missing values to impute: Gust</li>
935
+ <ul style="padding-inline-start: 30px; font-size: 18px;">
936
+ <li>Various interpolation methods were tested</li>
937
+ <li>Methods like Spline and Polynomial over-estimated some values, breaking inherent data ranges</li>
938
+ <li>Turns out Simple Linear interpolation was best</li>
939
+ </ul>
940
+ <li>SOLUTION: Interpolate Gust with Linear method</li>
941
+ </ul>""")
942
+
943
+ with gr.Accordion("Show Interpolation Plots", open=False):
944
+ gr.Plot(plot_interpolation(data_preprocess))
945
+
946
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Remove Non-Stationarity</h3>")
947
+ gr.HTML("""
948
+ <ul style="font-size: 18px">
949
+ <li>Variables that are non-stationary change over time, they have a trend</li>
950
+ <li>Ideal to transform non-stationarity variables for modeling</li>
951
+ <li>Ignore Categorical Variables (simply to keep model complexity low)</li>
952
+ <li>Numerical Variables were tested for Non-Stationarity using two methods: ADF and KPSS</li>
953
+ <ul style="padding-inline-start: 30px; font-size: 18px;">
954
+ <li>Using ADF and KPSS together can reveal what kind of trend exists in the data</li>
955
+ <li>Only 1 Case Met: Pass KPSS, Fail ADF = Trend Stationary (most likely by season)</li>
956
+ </ul>
957
+ <li>Only Two Variables failed the tests: DewPoint & MinTemp</li>
958
+ <li>SOLUTION: Use Differencing (7d lag) + Log for MinTemp and Differencing (7d lag) for DewPoint (Log caused many NaNs)</li>
959
+ </ul>""")
960
+
961
+ with gr.Accordion("View Results Below", open=False):
962
+ gr.Markdown("### MinTemp (Log) Tests Before and After Transformation")
963
+ with gr.Row():
964
+ with gr.Column():
965
+ gr.Dataframe(test_stationary(data_preprocess, "MinTemp_old"), label="MinTemp No Augments")
966
+ with gr.Column():
967
+ gr.Dataframe(test_stationary(data_preprocess, "MinTemp"), label="Log + 7 Day Lag Differencing")
968
+
969
+ gr.Markdown("### DewPoint Tests Before and After Transformation")
970
+ with gr.Row():
971
+ with gr.Column():
972
+ gr.Dataframe(test_stationary(data_preprocess, "DewPoint_old"), label="DewPoint No Augments")
973
+ with gr.Column():
974
+ gr.Dataframe(test_stationary(data_preprocess, "DewPoint"), label="7 Day Lag Differencing")
975
+
976
+
977
+ with gr.Tab("Feature Engineering") as feature_eng:
978
+
979
+
980
+ with gr.Tab("Past Covariates") as fe_past:
981
+ gr.HTML("<h1 style=\"text-align: center;\">Past Covariate Features</h1>")
982
+ gr.Markdown("""
983
+ * Past Covariates are datapoints that are implied to be only related to past information
984
+ * For Instance, using past sales of product B to predict futures sales of product A
985
+ * There are two ways to use past covariates
986
+ * *Option 1:* Build a multi-variate forecast to predict these variables simultaneously
987
+ * *Option 2:* Use a sliding window and lags to provide past data (especially for multi-step forecasts)
988
+ """)
989
+ gr.Markdown("**I will use Option 2 to avoid building a very complex multi-variate model**")
990
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Leaking Future Data into the past</h3>")
991
+ gr.Markdown("""
992
+ * By using lags, I can shift my data in a way to avoid leaking past data into the future
993
+ * For predicting 7 days into the future, I must lag my data by at least 7 days
994
+ * Use a rolling window that will reset over time
995
+ """)
996
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Curse of Dimensionality</h3>")
997
+ gr.Markdown("""
998
+ * Possible to use many variations of lags, rolling and differences to generate many features
999
+ * Too many features leads to the curse of dimensionality, i.e. Overfitting
1000
+ * Thus, I keep my Feature Set as simple as possible
1001
+ """)
1002
+ gr.Markdown("""
1003
+ ### Feature Set
1004
+ * Lags: 7D, 14D, 21D
1005
+ * Rolling (Shifted 7 Days forward): Mean of 14D (14 because mean(Created - Closed Date) = 13 days)
1006
+ * Differencing (7D difference = 7D lag - 14D lag): 7D
1007
+ """)
1008
+
1009
+
1010
+ with gr.Accordion("Open to view implementation code", open=False):
1011
+ gr.Code(open("code/past_features.py","r").read())
1012
+
1013
+
1014
+ with gr.Tab("Future Covariates") as fe_past:
1015
+ gr.HTML("<h1 style=\"text-align: center;\">Past Covariate Features</h1>")
1016
+ gr.Markdown("""
1017
+ * Future Covariates are data that I have about the future
1018
+ * For Instance, I can use the projected revenue of Company A to predict daily sales
1019
+ * For Future Covariates, I do not need to shift variables. I will provide a shift up to 2 days.
1020
+ * I apply a rolling and expanding window as more features
1021
+ * Also, I use mean and min to follow the logic learned in EDA. Minimum temp values seem to be more impactful on 311 volume
1022
+ """)
1023
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Curse of Dimensionality</h3>")
1024
+ gr.Markdown("""
1025
+ * Similar to the Past Covaraiates, I keep my features as simple as possible with as little as possible
1026
+ * The more features, the more we may overfit
1027
+ """)
1028
+ gr.Markdown("""
1029
+ ### Feature Set
1030
+ * Lags: 0D, 1D, 2D
1031
+ * Rolling: Mean & Min of last 14D
1032
+ * Expanding Window: Max, Min (min-length of 14)
1033
+ * Differencing already performed to remove trends
1034
+ """)
1035
+
1036
+ with gr.Accordion("Open to view implementation code", open=False):
1037
+ gr.Code(open("code/future_features.py","r").read())
1038
+
1039
+
1040
+ with gr.Tab("Target Variable") as fe_past:
1041
+ gr.HTML("<h1 style=\"text-align: center;\">311 Service Calls Features</h1>")
1042
+ gr.Markdown("""
1043
+ * For providing feature transformations of our Target, we can follow a similar process as above
1044
+ * Main Difference: Lags of < prediction window need to be recomputed at each iteration
1045
+ * So, for predicting at time (t+1) we need the predicted value at time (t)
1046
+ * For a recursive prediction model, this means the model cannot make batch predictions without iterating
1047
+ """)
1048
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: More variables increase complexity for prediction</h3>")
1049
+ gr.Markdown("""
1050
+ * The more features, the more overfitting & more computation
1051
+ * As I will use a recursive model, these values must be recomputed at each step t+1
1052
+ * In favor of a less complex model, I will choose as minimal features as possible (excluding rolling features as its prone to error with recalculation)
1053
+ """)
1054
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Leaking Future Data into the past</h3>")
1055
+ gr.Markdown("""
1056
+ * Must be careful about how these features are computed
1057
+ * For instance, for rolling mean, I would shift the data up by 1 lag first then compute the rolling sum
1058
+ * For differencing, a 7D lag difference is really the 1D - 8D lag. (For t=8, 7D diff = t7-t1 not t8-t2)
1059
+ """)
1060
+ gr.Markdown("""
1061
+ ### Feature Set
1062
+ * Lags: 1D, 6D, 7D, 8D, 14D, 21D (based on highest correlations and weekly seasonality)
1063
+ * Differencing: 7D, 14D
1064
+ """)
1065
+
1066
+ with gr.Accordion("Open to view implementation code", open=False):
1067
+ gr.Code(open("code/target_features.py","r").read())
1068
+
1069
+
1070
+ with gr.Tab("Forecast Model") as model_select_train_page:
1071
+
1072
+
1073
+ with gr.Tab("Splitting the data") as model_data_split:
1074
+ gr.HTML("<h1 style=\"text-align: center;\">Splitting Time-Series Data</h1>")
1075
+ gr.HTML("""
1076
+ <ul style="font-size: 18px">
1077
+ <li>Splitting Time-Series Data is different than splitting other data</li>
1078
+ <li>Rather than splitting on random samples, you split the data by time with order consistent</li>
1079
+ <li>I took a 75% splitting approach where I split my data at the date that sits on the 75% of data length</li>
1080
+ </ul>""")
1081
+ gr.Markdown("#### As an example, I provide a graph showing exactly how I split my data")
1082
+ gr.Plot(plot_train_split())
1083
+
1084
+
1085
+ with gr.Tab("Model Selection") as model_data_split:
1086
+ gr.HTML("<h1 style=\"text-align: center;\">Choosing the Right Model</h1>")
1087
+ gr.Markdown("### Types of Forecast Models for Multi-Step Prediction")
1088
+ gr.HTML("""
1089
+ <ul style="font-size: 18px">
1090
+ <li>Parallel Models: Train a model for each prediction (one for 1 day ahead, another for 2, etc.)</li>
1091
+ <li>Recursive Models: Model makes a forecast, fills any values it needs for the next prediction, predicts again</li>
1092
+ <ul style="padding-inline-start: 40px; font-size: 18px;">
1093
+ <li>One of the assumptions was to build a model that was reasonable for production</li>
1094
+ <li>Parallel models are hard to maintain as the steps of prediction increase</li>
1095
+ </ul>
1096
+ <li>Decision: Recursive Modele</li>
1097
+ </ul>""")
1098
+ gr.Markdown("### My Model Choice: XGBoost")
1099
+ gr.HTML("""
1100
+ <ul style="font-size: 18px">
1101
+ <li>Reasons for choosing:</li>
1102
+ <ul style="padding-inline-start: 40px; font-size: 18px;">
1103
+ <li>Industry standard for regression</li>
1104
+ <li>Lightweight and relatively fast</li>
1105
+ <li>Many parameters to tune, such as tree depth and regularization</li>
1106
+ <li>Scale invariant - Data does not have to be scaled</li>
1107
+ <li>Allows NaN values and categorical features without encodings (unused in my implementation)</li>
1108
+ <li>Provides key explainability in its feature importance metrics</li>
1109
+ </ul>
1110
+ <li>Decision: Use XGBoost</li>
1111
+ </ul>""")
1112
+
1113
+
1114
+ with gr.Tab("Model Training") as model_data_split:
1115
+ gr.HTML("<h1 style=\"text-align: center;\">Training the Model</h1>")
1116
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 1: Overfitting</h3>")
1117
+ gr.HTML("""
1118
+ <ul style="font-size: 18px">
1119
+ <li>Main Cause: High number of variables and XGBoost's tendency to overfit without tuning</li>
1120
+ <li>While training, effort was made to watch the validation and training set's relative performance</li>
1121
+ <li>Steps Taken to avoid Overfitting</li>
1122
+ <ul style="padding-inline-start: 40px; font-size: 18px;">
1123
+ <li>Low Learning Rate</li>
1124
+ <li>Low Tree Depth</li>
1125
+ <li>Keeping Val score relatively close to Training score</li>
1126
+ <li>Increased l2-lambda parameter, boosting regularization</li>
1127
+ <li>Many trials to get best set of parameters</li>
1128
+ <li>Implementing Early Stopping</li>
1129
+ </ul>
1130
+ </ul>""")
1131
+ gr.HTML("<h3 style=\"color: darkorange;\">Issue 2: Choosing a Metric</h3>")
1132
+ gr.HTML("""
1133
+ <ul style="font-size: 18px">
1134
+ <li>Three metrics I considered: MAPE, MAE and MSE</li>
1135
+ <li>MAPE seemed to show the most consistent and visually accurate results</li>
1136
+ <li>Decision: MAPE</li>
1137
+ <li>Justification: 311 Service volume is quite noisy and MAPE better estimates fit to a very noisy curve than the others</li>
1138
+ </ul>""")
1139
+
1140
+
1141
+ with gr.Tab("Model Prediction") as model_data_split:
1142
+ gr.HTML("<h1 style=\"text-align: center;\">Recursive Model Prediction</h1>")
1143
+ gr.Markdown("""
1144
+ * Below is the code I wrote to implement the Recursive prediction explained in previous tabs
1145
+ * Predictions are made one step at a time, where the prediction t depends on prediction t-1
1146
+ * To view the final predictions made by the model see below
1147
+ """)
1148
+ gr.Code(open("code/recurse_predict.py","r").read())
1149
+ with gr.Accordion("View 7 Day Model Forecast", open=False):
1150
+ gr.Plot(plot_final_predictions())
1151
+
1152
+
1153
+ with gr.Tab("Model Evaluation") as model_eval_page:
1154
+ gr.HTML("<h1 style=\"text-align: center;\">Forecast Results</h1>")
1155
+ gr.Markdown("Overall, the model seemed to have performed pretty well. The MAPE is also <10% for both Validation and Training sets.")
1156
+ gr.Markdown("The model did suffer from a low validation R2, but this was difficult to resolve without compromising overall performance of the model.")
1157
+ gr.Markdown("The predictions seem to visually pass most backtests, which can be viewed in the graph below.")
1158
+ with gr.Accordion("Model Prediction Scores", open=False):
1159
+ gr.JSON({"Train R2": r2_train, "Train MAPE": mape_train, "Validation R2": r2_val, "Validation MAPE": mape_val})
1160
+ gr.Image("figures/model_performance.png", show_download_button=False)
1161
+
1162
+
1163
+ with gr.Tab("Feature Importance") as model_eval_page:
1164
+ gr.HTML("<h1 style=\"text-align: center;\">Feature Importance</h1>")
1165
+ gr.Markdown("""
1166
+ * Below you can view the feature importance metrics from the XGBoost model
1167
+ * It seems there is significant impact of the weather variables on 311 Service Call Volume
1168
+ * Interestingly, it seems some categories were more impactful than others as well
1169
+ """)
1170
+ gr.Plot(plot_model_feature_importance())
1171
+
1172
+
1173
+ with gr.Tab("Future Work & Limitations") as future_limitations_page:
1174
+ gr.Markdown("# Future Work")
1175
+ gr.Markdown("""
1176
+ * **Multi-Variate Time Series Forecasting** rather than imputing values naively
1177
+ * Testing more kinds of models such as LightGBM
1178
+ * Robustly testing parameters of current model using GridSearchCV
1179
+ * Comparing performance of my forecast model to others
1180
+ * More Data! Having more 311 Call data may help find other indicators
1181
+ """)
1182
+ gr.Markdown("# Future Deployments")
1183
+ gr.Markdown("""
1184
+ * Containerize the model and load onto an API for ingestion
1185
+ * Containerize data preprocessing and load into a Spark Cluster
1186
+ * Create triggers and view tables to verify data preprocessing
1187
+ * Create functions to monitor model performance
1188
+ """)
1189
+
1190
+ with gr.Tab("Appendix") as future_limitations_page:
1191
+
1192
+ with gr.Tab("Weather Data Analysis") as dp_weather:
1193
+ dp_weather_state = gr.State("weather")
1194
+ with gr.Column():
1195
+ with gr.Row():
1196
+ dp_weather_category = gr.Dropdown(
1197
+ choices=["2011-2018", "2016-2018"],
1198
+ value="2011-2018",
1199
+ label="Time Range"
1200
+ )
1201
+
1202
+ dp_weather_var = gr.Dropdown(
1203
+ choices = ["MeanTemp", "MinTemp", "MaxTemp", "DewPoint", "Percipitation", "WindSpeed", "MaxSustainedWind", "Gust", "Rain", "SnowDepth", "SnowIce"],
1204
+ value = "MeanTemp",
1205
+ label = "Variable"
1206
+ )
1207
+
1208
+ dp_weather_btn = gr.Button("Run")
1209
+
1210
+ dp_weather_report = gr.HTML(value=iframe_dp_weather)
1211
+
1212
+ dp_weather_btn.click(
1213
+ run_report,
1214
+ [dp_weather_state, dp_weather_var, dp_weather_category],
1215
+ dp_weather_report,
1216
+ )
1217
+
1218
+ with gr.Tab("Service Data Analysis") as dp_service:
1219
+ dp_service_state = gr.State("service")
1220
+ dp_service_category = gr.State("full")
1221
+ with gr.Column():
1222
+ dp_service_var = gr.Dropdown(
1223
+ choices = [
1224
+ "Created Date", "Closed Date", "Agency", "Agency Name",
1225
+ "Complaint Type", "Descriptor", "Location Type", "Landmark",
1226
+ "Facility Type", "Status", "Community Board", "Borough",
1227
+ "Open Data Channel Type", "Park Facility Name", "Park Borough",
1228
+ "Vehicle Type", "Taxi Company Borough", "Taxi Pick Up Location",
1229
+ "Bridge Highway Name", "Bridge Highway Direction", "Road ramp",
1230
+ "Bridge Highway Segment"
1231
+ ],
1232
+ value = "Created Date",
1233
+ label = "Select Variable and Run"
1234
+ )
1235
+ dp_service_btn = gr.Button("Run")
1236
+
1237
+ dp_service_report = gr.HTML(value=iframe_dp_service)
1238
+
1239
+ dp_service_btn.click(
1240
+ run_report,
1241
+ [dp_service_state, dp_service_var, dp_service_category],
1242
+ dp_service_report,
1243
+ )
1244
+
1245
+ def main():
1246
+
1247
+
1248
+ app.launch(share=False)
1249
+ return app
1250
+
1251
+
1252
+ if __name__=="__main__":
1253
+
1254
+
1255
+ main()
code/Agency.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Agency": {
3
+ "NYPD": "Security",
4
+ "HPD": "Buildings",
5
+ "DOT": "Transportation",
6
+ "DSNY": "Environment & Sanitation",
7
+ "DEP": "Environment & Sanitation",
8
+ "DOB": "Buildings",
9
+ "DOE": "Buildings",
10
+ "DPR": "Parks",
11
+ "DOHMH": "Health",
12
+ "DOF": "Other",
13
+ "DHS": "Security",
14
+ "TLC": "Transportation",
15
+ "HRA": "Other",
16
+ "DCA": "Other",
17
+ "DFTA": "Other",
18
+ "EDC": "Other",
19
+ "DOITT": "Other",
20
+ "DCAS": "Other",
21
+ "NYCEM": "Other",
22
+ "ACS": "Other",
23
+ "3-1-1": "Other",
24
+ "TAX": "Other",
25
+ "DCP": "Other",
26
+ "DORIS": "Other",
27
+ "FDNY": "Other",
28
+ "TAT": "Other",
29
+ "COIB": "Other",
30
+ "CEO": "Other",
31
+ "MOC": "Other",
32
+ "OMB": "Other"
33
+ }
34
+ }
code/Borough.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Borough": {
3
+ "BRONX" : "BRONX",
4
+ "BROOKLYN": "BROOKLIN",
5
+ "QUEENS": "QUEENS",
6
+ "STATEN ISLAND": "STATEN ISLAND",
7
+ "2017": "OTHER",
8
+ "2018": "OTHER",
9
+ "undefined": "OTHER",
10
+ "null": "OTHER"
11
+ }
12
+ }
code/all_vars.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "y": ["Target"],
3
+ "past_covariates": [
4
+ "num_closed_tickets",
5
+ "AG_Buildings", "AG_Environment & Sanitation", "AG_Health",
6
+ "AG_Parks", "AG_Security", "AG_Transportation",
7
+ "AG_Other",
8
+ "Borough_BRONX", "Borough_BROOKLYN", "Borough_MANHATTAN",
9
+ "Borough_QUEENS", "Borough_STATEN ISLAND",
10
+ "Borough_OTHER",
11
+ "DG_damaged_sign_sidewalk_missing",
12
+ "DG_english_emergency_spanish_chinese",
13
+ "DG_exemption_commercial_tax_business",
14
+ "DG_license_complaint_illegal_violation", "DG_noise_animal_truck_dead",
15
+ "DG_odor_food_air_smoke", "DG_order_property_inspection_condition",
16
+ "DG_water_basin_litter_missed"
17
+ ],
18
+ "future_covariates": [
19
+ "DewPoint",
20
+ "WindSpeed",
21
+ "Gust",
22
+ "SnowDepth",
23
+ "MinTemp"
24
+ ],
25
+ "temporal": [
26
+ "Year",
27
+ "Month",
28
+ "Day",
29
+ "DayOfWeek",
30
+ "DayOfYear",
31
+ "is_weekend",
32
+ "is_holiday",
33
+ "Season"
34
+ ]
35
+ }
code/build_service.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def build_service_data(filename):
2
+ # Loading data directly with polars leads to errors
3
+ # Some rows end up missing for an unknown reason
4
+ # FIX: Load in pandas then convert to polars
5
+ service_data_pd = pd.read_csv(filename)
6
+
7
+ # Quick test to assure the unique key is in fact unique
8
+ assert service_data_pd["Unique Key"].nunique() == len(service_data_pd)
9
+
10
+ # Load from pandas Dataframe
11
+ service_data_pd["Incident Zip"] = service_data_pd["Incident Zip"].astype("string")
12
+ service_data_pd["BBL"] = service_data_pd["BBL"].astype("string")
13
+ service_data = pl.DataFrame(service_data_pd)
14
+
15
+ # Clear some ram
16
+ del service_data_pd
17
+ gc.collect()
18
+
19
+ drop_cols = [
20
+ "Unique Key", "Agency Name", "Location Type", "Incident Zip",
21
+ "Incident Address", "Street Name", "Cross Street 1",
22
+ "Cross Street 2", "Intersection Street 1", "Intersection Street 2",
23
+ "Address Type", "City", "Landmark", "Facility Type",
24
+ "Status", "Due Date", "Resolution Description",
25
+ "Resolution Action Updated Date", "Community Board",
26
+ "BBL", "X Coordinate (State Plane)", "Y Coordinate (State Plane)",
27
+ "Open Data Channel Type", "Park Facility Name", "Park Borough",
28
+ "Vehicle Type", "Taxi Company Borough", "Taxi Pick Up Location",
29
+ "Bridge Highway Name", "Bridge Highway Direction", "Road Ramp",
30
+ "Bridge Highway Segment", "Location", "Created Year"
31
+ ]
32
+
33
+ # Drop columns and create the date variable
34
+ service_data = service_data.drop(drop_cols)
35
+ service_data = create_datetime(service_data, "Created Date")
36
+ service_data = create_datetime(service_data, "Closed Date")
37
+
38
+ # Group by date to get the number of Created tickets (as target)
39
+ sd_grouped = service_data.rename({"Created Date": "Datetime"}).group_by("Datetime").agg(
40
+ pl.len().alias("Target"),
41
+ ).sort(by="Datetime")
42
+
43
+ # Calculate the number of closed tickets
44
+ # Mean diff used to filter service data
45
+ # mean_diff = service_data.with_columns(
46
+ # diff_created_closed = pl.col("Closed Date") - pl.col("Created Date")
47
+ # ).filter((pl.col("Closed Date").dt.year() >= 2016) & (pl.col("Closed Date").dt.year() < 2020))["diff_created_closed"].mean().days
48
+ # Mean diff precalculated as
49
+ mean_diff = 13
50
+
51
+ # Create new Closed date with errors filled using the mean diff above
52
+ service_data = service_data.with_columns(
53
+ Closed_Date_New = pl.when(pl.col("Created Date") - pl.col("Closed Date") > pl.duration(days=1))
54
+ .then(pl.col("Created Date") + pl.duration(days=mean_diff))
55
+ .otherwise(pl.col("Closed Date")).fill_null(pl.col("Created Date") + pl.duration(days=mean_diff))
56
+ )
57
+
58
+ # Filter tickets such that the closed date < the created date to prevent future data leakage in our dataset
59
+ # We want to make sure future data is not accidentally leaked across other points in our data
60
+ closed_tickets = service_data.group_by(["Closed_Date_New", "Created Date"]) \
61
+ .agg((pl.when(pl.col("Created Date") <= pl.col("Closed_Date_New")).then(1).otherwise(0)).sum().alias("count")) \
62
+ .sort("Closed_Date_New") \
63
+ .filter((pl.col("Closed_Date_New").dt.year() >= 2016) & (pl.col("Closed_Date_New").dt.year() < 2019)) \
64
+ .group_by("Closed_Date_New").agg(pl.col("count").sum().alias("num_closed_tickets"))
65
+
66
+ # Rename this column to num closed tickets
67
+ ct_df = closed_tickets.with_columns(
68
+ pl.col("num_closed_tickets")
69
+ )
70
+
71
+ # Concat the new columns into our data
72
+ sd_df = pl.concat([sd_grouped, ct_df.drop("Closed_Date_New")], how="horizontal")
73
+
74
+ assert len(sd_grouped) == len(ct_df)
75
+
76
+ # CATEGORICAL FEATURE MAPPING
77
+ # MAPPING FOR BOROUGH
78
+ Borough_Map = {
79
+ "Unspecified": "OTHER",
80
+ "2017": "OTHER",
81
+ None: "OTHER",
82
+ "2016": "OTHER"
83
+ }
84
+ service_data = service_data.with_columns(
85
+ pl.col("Borough").replace(Borough_Map)
86
+ )
87
+
88
+ # MAPPING FOR AGENCY
89
+ # This mapping was done Manually
90
+ Agency_Map = {
91
+ "NYPD": "Security", "HPD": "Buildings", "DOT": "Transportation",
92
+ "DSNY": "Environment & Sanitation", "DEP": "Environment & Sanitation",
93
+ "DOB": "Buildings", "DOE": "Buildings", "DPR": "Parks",
94
+ "DOHMH": "Health", "DOF": "Other", "DHS": "Security",
95
+ "TLC": "Transportation", "HRA": "Other", "DCA": "Other",
96
+ "DFTA": "Other", "EDC": "Other", "DOITT": "Other", "OMB": "Other",
97
+ "DCAS": "Other", "NYCEM": "Other", "ACS": "Other", "3-1-1": "Other",
98
+ "TAX": "Other", "DCP": "Other", "DORIS": "Other", "FDNY": "Other",
99
+ "TAT": "Other", "COIB": "Other", "CEO": "Other", "MOC": "Other",
100
+ }
101
+
102
+ service_data = service_data.with_columns(
103
+ pl.col("Agency").replace(Agency_Map).alias("AG") # AG Shorthand for Agency Groups
104
+ )
105
+
106
+
107
+ # Mapping for Descriptor using BERTopic
108
+ # Store descriptors as pandas dataframe (polars not supported)
109
+ # Drop any nan values, and we only care about the unique values
110
+ descriptor_docs = service_data["Descriptor"].unique().to_numpy()
111
+
112
+ # Build our topic mapping using the pretrained BERTopic model
113
+ # Load model and get predictions
114
+ topic_model = BERTopic.load("models/BERTopic")
115
+ topics, probs = topic_model.transform(descriptor_docs)
116
+
117
+ # Visualize if wanted
118
+ # topic_model.visualize_barchart(list(range(-1,6,1)))
119
+
120
+ # Create a topic to ID map
121
+ topic_df = topic_model.get_topic_info()
122
+ topic_id_map = {row["Topic"]: row["Name"][2:] for _, row in topic_df.iterrows()}
123
+ topic_id_map[-1] = topic_id_map[-1][1:] # Fix for the -1 topic case
124
+
125
+ # For each document (descriptor string) get a mapping of topics
126
+ doc_to_topic_map = defaultdict(str)
127
+ for topic_id, doc in zip(topics, descriptor_docs):
128
+ topic = topic_id_map[topic_id]
129
+ doc_to_topic_map[doc] = topic
130
+
131
+ service_data = service_data.with_columns(
132
+ pl.col("Descriptor").replace(doc_to_topic_map).alias("DG") # DG Shorthand for descriptor Groups
133
+ )
134
+
135
+
136
+ # One Hot Encode Features
137
+ cat_features = ["AG", "Borough", "DG"]
138
+ service_data = service_data.to_dummies(columns=cat_features)
139
+
140
+
141
+ # Group by Date and create our Category Feature Vector
142
+ cat_df = service_data.rename({"Created Date": "Datetime"}).group_by("Datetime").agg(
143
+ # Categorical Features Sum
144
+ pl.col('^AG_.*$').sum(),
145
+ pl.col('^Borough_.*$').sum(),
146
+ pl.col('^DG_.*$').sum(),
147
+ ).sort(by="Datetime")
148
+
149
+ # Concat our category features to our current dataframe
150
+ sd_df = pl.concat([sd_df, cat_df.drop("Datetime")], how="horizontal")
151
+
152
+ # Now that our dataframe is significantly reduced in size
153
+ # We can finally convert back to a pandas dataframe
154
+ # as pandas is usable across more python packages
155
+ sd_df = sd_df.to_pandas()
156
+
157
+ # Set index to datetime
158
+ sd_df = sd_df.set_index("Datetime")
159
+
160
+ # NOTE we added 7 new rows to our weather df
161
+ # These 7 new rows will essentially be our final pred set
162
+ # The Target for these rows will be null -> indicating it needs to be predicted
163
+ # Add these rows to the service dataframe
164
+ preds_df = pd.DataFrame({'Datetime': pd.date_range(start=sd_df.index[-1], periods=8, freq='D')})[1:]
165
+ sd_df = pd.concat([sd_df, preds_df.set_index("Datetime")], axis=0)
166
+
167
+ return sd_df
code/build_weather.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Build all weather data from file
2
+ def build_weather_data(filename):
3
+ # Use pandas to read file
4
+ weather_data = pd.read_csv(filename)
5
+
6
+ # Quickly aggregate Year, Month, Day into a datetime object
7
+ # This is because the 311 data uses datetime
8
+ weather_data["Datetime"] = weather_data["Year"].astype("str") + "-" + weather_data["Month"].astype("str") + "-" + weather_data["Day"].astype("str")
9
+ weather_data = create_datetime(weather_data, "Datetime", format="%Y-%m-%d")
10
+
11
+ # LOCALIZE
12
+ # Pre-recorded min/max values from the service data (so we don't need again)
13
+ lat_min = 40.49804421521046
14
+ lat_max = 40.91294056699566
15
+ long_min = -74.25521082506387
16
+ long_max = -73.70038354802529
17
+
18
+ # Create the conditions for location matching
19
+ mincon_lat = weather_data["Latitude"] >= lat_min
20
+ maxcon_lat = weather_data["Latitude"] <= lat_max
21
+ mincon_long = weather_data["Longitude"] >= long_min
22
+ maxcon_long = weather_data["Longitude"] <= long_max
23
+
24
+ # Localize our data to match the service data
25
+ wd_localized = weather_data.loc[mincon_lat & maxcon_lat & mincon_long & maxcon_long]
26
+ drop_cols = [
27
+ "USAF",
28
+ "WBAN",
29
+ "StationName",
30
+ "State",
31
+ "Latitude",
32
+ "Longitude"
33
+ ]
34
+ wd_localized = wd_localized.drop(columns=drop_cols)
35
+
36
+ # AGGREGATE
37
+ # Map columns with aggregation method
38
+ mean_cols = [
39
+ 'MeanTemp',
40
+ 'DewPoint',
41
+ 'Percipitation',
42
+ 'WindSpeed',
43
+ 'Gust',
44
+ 'SnowDepth',
45
+ ]
46
+ min_cols = [
47
+ 'MinTemp'
48
+ ]
49
+ max_cols = [
50
+ 'MaxTemp',
51
+ 'MaxSustainedWind'
52
+ ]
53
+ round_cols = [
54
+ 'Rain',
55
+ 'SnowIce'
56
+ ]
57
+
58
+ # Perform Aggregation
59
+ mean_df = wd_localized.groupby("Datetime")[mean_cols].mean()
60
+ min_df = wd_localized.groupby("Datetime")[min_cols].min()
61
+ max_df = wd_localized.groupby("Datetime")[max_cols].max()
62
+ round_df = wd_localized.groupby("Datetime")[round_cols].mean().round().astype(np.int8)
63
+ wd_full = pd.concat([mean_df, min_df, max_df, round_df], axis=1)
64
+
65
+ # Add seasonal features
66
+ wd_full = build_temporal_features(wd_full, "Datetime")
67
+ wd_full["Season"] = wd_full["Season"].astype("category")
68
+ wd_full = wd_full.set_index("Datetime")
69
+
70
+ # We will calculate the imputation for the next 7 days after 12/31/2018
71
+ # Along with the 49 missing days
72
+ # This will act as our "Weather Forecast"
73
+ time_steps = 49 + 7
74
+
75
+ # Impute Cols
76
+ impute_cols = [
77
+ 'MeanTemp', 'MinTemp', 'MaxTemp', 'DewPoint',
78
+ 'Percipitation', 'WindSpeed', 'MaxSustainedWind',
79
+ 'Gust', 'Rain', 'SnowDepth', 'SnowIce',
80
+ ]
81
+
82
+ # Mean Vars
83
+ mean_vars = ["WindSpeed", "MaxSustainedWind", "Gust", "SnowDepth"]
84
+ min_vars = ["SnowIce", "MeanTemp", "MinTemp", "MaxTemp", "DewPoint", "Percipitation"]
85
+ max_vars = ["Rain"]
86
+
87
+ # Use the imported function to create the imputed data
88
+ preds_mean = impute_missing_weather(wd_full, strategy="mean", time_steps=time_steps, impute_cols=mean_vars)
89
+ preds_min = impute_missing_weather(wd_full, strategy="min", time_steps=time_steps, impute_cols=min_vars)
90
+ preds_max = impute_missing_weather(wd_full, strategy="max", time_steps=time_steps, impute_cols=max_vars)
91
+ all_preds = pd.concat([preds_mean, preds_min, preds_max], axis=1)
92
+ all_preds = build_temporal_features(all_preds.loc[:, impute_cols], "Datetime")
93
+ all_preds = all_preds.set_index("Datetime")
94
+
95
+ wd_curr = wd_full.loc[wd_full["Year"] >= 2016]
96
+ wd_df = pd.concat([wd_full, all_preds], axis=0, join="outer")
97
+
98
+ return wd_df
code/create_maps.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import pandas as pd
3
+ import numpy as np
4
+ import folio
5
+ from utils import map_vals
6
+ from matplotlib import pyplot as plt
7
+
8
+ # NOTE
9
+ # This only needed to be ran once to generate the maps
10
+ # Maps are saved in the figures folder and loaded as html
11
+
12
+ service_data_pd = pd.read_csv("data/311-2016-2018.csv")
13
+ service_data_pd["Incident Zip"] = service_data_pd["Incident Zip"].astype("string")
14
+ service_data_pd["BBL"] = service_data_pd["BBL"].astype("string")
15
+ service_data_raw = pl.DataFrame(service_data_pd)
16
+ # service_data_raw = pl.read_csv("data/311-2016-2018.csv", null_values="", infer_schema_length=0)
17
+ # service_data_raw = service_data_raw.with_columns(
18
+ # pl.col("Latitude").cast(pl.Float64),
19
+ # pl.col("Longitude").cast(pl.Float64)
20
+ # )
21
+ # Clear some ram
22
+ del service_data_pd
23
+ gc.collect()
24
+
25
+ weather_data_raw = pd.read_csv("data/weather_NY_2010_2018Nov.csv")
26
+
27
+ def get_map_1():
28
+ fig, weather_map = map_vals(
29
+ weather_data_raw.loc[weather_data_raw["Year"] >= 2016],
30
+ cols=["Latitude", "Longitude"],
31
+ label_cols=["StationName"],
32
+ sample_size=1000,
33
+ color='red',
34
+ radius=3,
35
+ weight=4
36
+ )
37
+ fig, combined_map = map_vals(
38
+ service_data_raw,
39
+ cols=["Latitude", "Longitude"],
40
+ color="blue", submap=weather_map,
41
+ sample_size=1000,
42
+ weight=2,
43
+ radius=1
44
+ )
45
+
46
+ fig.save("figures/map1.html")
47
+
48
+ return fig
49
+
50
+
51
+ def get_map_2():
52
+ fig, service_map = map_vals(
53
+ service_data_raw,
54
+ cols=["Latitude", "Longitude"],
55
+ color="blue",
56
+ weight=2,
57
+ radius=1,
58
+ start_loc=[40.7128, -74.0060],
59
+ sample_size=1000,
60
+ zoom_start=10
61
+ )
62
+ fig, weather_map = map_vals(
63
+ weather_data_raw.loc[weather_data_raw["Year"] >= 2016],
64
+ cols=["Latitude", "Longitude"],
65
+ submap=service_map,
66
+ label_cols=["StationName"],
67
+ color='red',
68
+ radius=5,
69
+ weight=2,
70
+ sample_size=1000,
71
+ )
72
+
73
+ fig.save("figures/map2.html")
74
+
75
+ return fig
76
+
77
+
78
+ def get_bounded_map():
79
+ # Get prerecorded coords for the mins/max to maximize speed here
80
+ # In notebook this is recorded via code
81
+ lat_min = 40.49804421521046
82
+ lat_max = 40.91294056699566
83
+ long_min = -74.25521082506387
84
+ long_max = -73.70038354802529
85
+
86
+ fig = folium.Figure(height=500, width=750)
87
+ service_bounds_map = folium.Map(
88
+ location=[40.7128, -74.0060],
89
+ zoom_start=10,
90
+ tiles='cartodbpositron',
91
+ zoom_control=False,
92
+ scrollWheelZoom=False,
93
+ dragging=False
94
+ )
95
+
96
+ kw = {
97
+ "color": "#F1807E",
98
+ "line_cap": "round",
99
+ "fill": True,
100
+ "fill_color": "blue",
101
+ "weight": 3,
102
+ "popup": "Service Data Coverage Zone",
103
+ }
104
+
105
+ folium.Rectangle(
106
+ bounds=[[lat_min, long_min], [lat_max, long_max]],
107
+ line_join="round",
108
+ dash_array="5 5",
109
+ **kw,
110
+ ).add_to(service_bounds_map)
111
+
112
+ fig.add_child(service_bounds_map)
113
+
114
+ fig.save("figures/bounded_map.html")
115
+
116
+ return fig
117
+
118
+
119
+ def get_final_map():
120
+ lat_min = 40.49804421521046
121
+ lat_max = 40.91294056699566
122
+ long_min = -74.25521082506387
123
+ long_max = -73.70038354802529
124
+
125
+ mincon_lat = weather_data_raw["Latitude"] >= lat_min
126
+ maxcon_lat = weather_data_raw["Latitude"] <= lat_max
127
+ mincon_long = weather_data_raw["Longitude"] >= long_min
128
+ maxcon_long = weather_data_raw["Longitude"] <= long_max
129
+
130
+ service_bounds_map = folium.Map(
131
+ location=[40.7128, -74.0060],
132
+ zoom_start=10,
133
+ tiles='cartodbpositron',
134
+ zoom_control=False,
135
+ scrollWheelZoom=False,
136
+ dragging=False
137
+ )
138
+
139
+ kw = {
140
+ "color": "#F1807E",
141
+ "line_cap": "round",
142
+ "fill": True,
143
+ "fill_color": "blue",
144
+ "weight": 3,
145
+ "popup": "Service Data Coverage Zone",
146
+ }
147
+
148
+ folium.Rectangle(
149
+ bounds=[[lat_min, long_min], [lat_max, long_max]],
150
+ line_join="round",
151
+ dash_array="5 5",
152
+ **kw,
153
+ ).add_to(service_bounds_map)
154
+
155
+ wd_localized = weather_data_raw.loc[mincon_lat & maxcon_lat & mincon_long & maxcon_long]
156
+ fig, wd_local_map = map_vals(
157
+ wd_localized,
158
+ submap=service_bounds_map,
159
+ label_cols=["StationName"],
160
+ color='red',
161
+ radius=5,
162
+ weight=2,
163
+ sample_size=1000,
164
+ )
165
+
166
+ fig.save("figures/final_map.html")
167
+
168
+ return fig
169
+
170
+
171
+ def build_maps():
172
+ get_map_1()
173
+ get_map_2()
174
+ get_bounded_map()
175
+ get_final_map()
176
+
177
+ build_maps()
code/future_features.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FEATURES["future_covariates_final"] = []
2
+ for col in FEATURES["future_covariates"]:
3
+ new_features = data_preprocess[col].to_frame().copy()
4
+ # Lag Features
5
+ new_features[col+"_L0D"] = new_features[col].shift(0)
6
+ new_features[col+"_L1D"] = new_features[col].shift(1)
7
+ new_features[col+"_L2D"] = new_features[col].shift(2)
8
+
9
+ # Rolling Features (No shift needed for future vars)
10
+ new_features[col+"_RMean14D"] = new_features[col].rolling('14D').mean()
11
+ new_features[col+"_RMin14D"] = new_features[col].rolling('14D').min()
12
+
13
+ # Expanding Window (No shift needed for future vars)
14
+ new_features[col+"_EMean14D"] = new_features[col].expanding(min_periods=14).mean()
15
+ new_features[col+"_EMin14D"] = new_features[col].expanding(min_periods=14).min()
16
+
17
+ FEATURES["future_covariates_final"].extend([col+"_L0D", col+"_L1D", col+"_L2D", col+"_RMean14D", col+"_RMin14D", col+"_EMean14D", col+"_EMin14D"])
18
+ new_features = new_features.drop(columns=col)
19
+ data_preprocess = pd.concat([data_preprocess, new_features], axis=1)
20
+
21
+ assert len(data_preprocess.loc[:, FEATURES["future_covariates_final"]].columns) == len(FEATURES["future_covariates"])*7
code/past_features.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FEATURES["past_covariates_final"] = []
2
+ for col in FEATURES["past_covariates"]:
3
+ new_features = data_preprocess[col].to_frame().copy()
4
+ # Lag Features
5
+ new_features[col+"_L7D"] = new_features[col].shift(7)
6
+ new_features[col+"_L14D"] = new_features[col].shift(14)
7
+ new_features[col+"_L21D"] = new_features[col].shift(21)
8
+
9
+ # Rolling Features
10
+ # Shift to move the new features into the prediction space (2019-01-01 to 2019-01-07)
11
+ new_features[col+"_RMean14D"] = new_features[col].shift(7).rolling('14D').mean()
12
+
13
+ # Differencing Features
14
+ # Shift to move the new features into the prediction space (2019-01-01 to 2019-01-07)
15
+ new_features[col+"_Diff7D"] = (new_features[col].shift(7) - new_features[col].shift(7).shift(7))
16
+
17
+ FEATURES["past_covariates_final"].extend([col+"_L7D", col+"_L14D", col+"_L21D", col+"_RMean14D", col+"_Diff7D"])
18
+ new_features = new_features.drop(columns=col)
19
+ data_preprocess = pd.concat([data_preprocess, new_features], axis=1)
20
+
21
+ assert len(data_preprocess.loc[:, FEATURES["past_covariates_final"]].columns) == len(FEATURES["past_covariates"])*5
code/recurse_predict.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def predict_recurse(dataset, test, model, features_to_impute=['Target_L1D', 'Target_Diff7D', 'Target_Diff14D'], last_feature='Target_L6D'):
2
+ n_steps = len(test)
3
+ merged_data = pd.concat([dataset[-14:], test], axis=0)
4
+ all_index = merged_data.index
5
+ X_test = test.drop(columns="Target")
6
+ sd = -6 # Starting point for filling next value
7
+
8
+ # For each step, get the predictions
9
+ for i in range(n_steps-1):
10
+ pred = final_model.predict(X_test)[i]
11
+ # For the three features needed, compute the new value
12
+ X_test.loc[all_index[sd+i], features_to_impute[0]] = pred
13
+ X_test.loc[all_index[sd+i], features_to_impute[1]] = pred - merged_data.loc[all_index[sd+i-7], features_to_impute[1]]
14
+ X_test.loc[all_index[sd+i], features_to_impute[2]] = pred - merged_data.loc[all_index[sd+i-14], features_to_impute[2]]
15
+
16
+ # In the last iteration compute the Lag6D value
17
+ if i == 5:
18
+ X_test.loc[all_index[sd+i], last_feature] = pred - merged_data.loc[all_index[sd+i-6], last_feature]
19
+
20
+
21
+ final_preds = final_model.predict(X_test)
22
+ return final_preds
code/target_features.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FEATURES["y_features"] = []
2
+ col = FEATURES["y"][0]
3
+ new_features = data_preprocess[col].to_frame().copy()
4
+
5
+ # Lag Features
6
+ new_features[col+"_L1D"] = new_features[col].shift(1)
7
+ new_features[col+"_L6D"] = new_features[col].shift(6)
8
+ new_features[col+"_L7D"] = new_features[col].shift(7)
9
+ new_features[col+"_L8D"] = new_features[col].shift(8)
10
+ new_features[col+"_L14D"] = new_features[col].shift(14)
11
+
12
+ # Rolling Features
13
+ # After computing shift by 1 to indicate its computed based off a 1 day lag
14
+ new_features[col+"_RMean14D"] = new_features[col].shift(1).rolling(window='14D').mean()
15
+ # The last 6 days, I need the prediction from time t-1
16
+ # For now set to nan
17
+ new_features[col+"_RMean14D"][-6:] = np.nan
18
+
19
+ # Differencing features
20
+ new_features[col+"_Diff7D"] = (new_features[col].shift(1) - new_features[col].shift(1).shift(7))
21
+ new_features[col+"_Diff14D"] = (new_features[col].shift(1) - new_features[col].shift(1).shift(14))
22
+
23
+ new_features = new_features.drop(columns=col)
24
+ FEATURES["y_features"].extend([col+"_L1D", col+"_L6D", col+"_L7D", col+"_L8D", col+"_L14D", col+"_RMean14D", col+"_Diff7D", col+"_Diff14D"])
25
+ data_preprocess = pd.concat([data_preprocess, new_features], axis=1)
26
+
27
+ assert len(data_preprocess.loc[:, FEATURES["y_features"]].columns) == len(FEATURES["y"])*8
custom.css ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .gr-describe-tb {
2
+ overflow: hidden !important;
3
+ }
4
+ .row.spacing {
5
+ border: 0px;
6
+ }
7
+ .plot-container {
8
+ width: 100vw
9
+ }
10
+ .map * {
11
+ text-align: -webkit-center;
12
+ }
13
+
14
+ .map-legend * {
15
+ width: fit-content;
16
+ max-width: 215px;
17
+ padding: 5px;
18
+ background: var(--border-color-primary);
19
+ margin-top: -50px
20
+ }
21
+
22
+ .map-legend-text * {
23
+ width: fit-content;
24
+ padding: 0px;
25
+ margin-botton: 0px;
26
+ font-size: 16px;
27
+ margin-top: 0px;
28
+ }
29
+
30
+
31
+ .prose {
32
+ # font-size: 16px;
33
+ }
34
+
35
+ .no-padding * {
36
+ padding: 0px;
37
+ margin: 0px;
38
+ }
39
+
40
+ .low-padding * {
41
+ padding: 2px;
42
+ margin: 0px;
43
+ }
data/data_final.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/data_merged_full.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/docs.csv ADDED
@@ -0,0 +1,1315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,docs
2
+ 0,Request Large Bulky Item Collection
3
+ 1,Sewage Odor
4
+ 2,Sidewalk Violation
5
+ 3,Blocked Hydrant
6
+ 4,For One Address
7
+ 5,Blocked Sidewalk
8
+ 6,Commercial Overnight Parking
9
+ 7,Noise: Construction Before/After Hours (NM1)
10
+ 8,Posted Parking Sign Violation
11
+ 9,Congestion/Gridlock
12
+ 10,Ped Multiple Lamps
13
+ 11,Building Shaking/Vibrating/Structural Stability
14
+ 12,Egress - Doors Locked/Blocked/Improper/No Secondary Means
15
+ 13,Working Contrary To Stop Work Order
16
+ 14,E15 Illegal Postering
17
+ 15,E3 Dirty Sidewalk
18
+ 16,Pothole - Highway
19
+ 17,Pothole
20
+ 18,No Access
21
+ 19,Lamppost Base Door/Cover Missing
22
+ 20,Branch Cracked and Will Fall
23
+ 21,Application Renewal
24
+ 22,Illegal Conversion Of Residential Building/Space
25
+ 23,Zoning - Non-Conforming/Illegal Vehicle Storage
26
+ 24,Excessive Water In Basement (WEFB)
27
+ 25,Mouse Sighting
28
+ 26,Administration
29
+ 27,No Receipt
30
+ 28,Site Conditions Endangering Workers
31
+ 29,15I Street Condition Ice-Non Storm
32
+ 30,Food Contaminated
33
+ 31,1 Missed Collection
34
+ 32,E3A Dirty Area/Alleyway
35
+ 33,Street Light Out
36
+ 34,Controller
37
+ 35,Electrical Wiring Defective/Exposed
38
+ 36,With License Plate
39
+ 37,Driver Complaint
40
+ 38,Engine Idling
41
+ 39,Loud Music/Party
42
+ 40,Banging/Pounding
43
+ 41,15S Re-Plow/Spread/Ice-Snow Cond.
44
+ 42,Loud Talking
45
+ 43,Noise: air condition/ventilation equipment (NV1)
46
+ 44,Other (complaint details)
47
+ 45,"Air: Odor/Fumes, Vehicle Idling (AD3)"
48
+ 46,Lead Kit Request (Residential) (L10)
49
+ 47,LOW WATER PRESSURE - WLWP
50
+ 48,Use Indoor
51
+ 49,Cave-in
52
+ 50,Noise: Construction Equipment (NC1)
53
+ 51,15 Street Cond/Dump-Out/Drop-Off
54
+ 52,Sewer Backup (Use Comments) (SA)
55
+ 53,Trees and Sidewalks Program
56
+ 54,Hitting Building
57
+ 55,Water Meter Broken/Leaking - Other (CMO)
58
+ 56,Water Meter Broken/Leaking - Private Residence (CMR)
59
+ 57,Exchange/Refund/Return
60
+ 58,Car/Truck Horn
61
+ 59,St Name - Attached to Pole
62
+ 60,Graffiti
63
+ 61,Dirty Water (WE)
64
+ 62,Hydrant Defective (WC2)
65
+ 63,Other/Unknown
66
+ 64,Hydrant Running (WC3)
67
+ 65,Cons - Contrary/Beyond Approved Plans/Permits
68
+ 66,Sidewalk Shed/Pipe Scafford - Inadequate Defective/None
69
+ 67,Leak (Use Comments) (WA2)
70
+ 68,Possible Water Main Break (Use Comments) (WA1)
71
+ 69,Curb Cut/Driveway/Carport - Illegal
72
+ 70,Plate Condition - Shifted
73
+ 71,1R Missed Recycling-All Materials
74
+ 72,Planted Less Than 2 Years Ago
75
+ 73,Fence - None/Inadequate
76
+ 74,Recycling Electronics
77
+ 75,LED Lense
78
+ 76,Other
79
+ 77,Parking Permit Improper Use
80
+ 78,Other Housing Options
81
+ 79,No Certificate Of Occupancy/Illegal/Contrary To CO
82
+ 80,10 Litter Basket / Request
83
+ 81,Special Events
84
+ 82,E2 Receptacle Violation
85
+ 83,E5 Loose Rubbish
86
+ 84,Boiler - Defective/Inoperative/No Permit
87
+ 85,SRO - Illegal Work/No Permit/Change In Occupancy/Use
88
+ 86,Flood Light Lamp Out
89
+ 87,Entire Tree Has Fallen Down
90
+ 88,Coin or Card Did Not Register
91
+ 89,Tree Leaning/Uprooted
92
+ 90,Line/Marking - Faded
93
+ 91,Plumbing
94
+ 92,"No Parking, Standing, Stopping"
95
+ 93,Branch or Limb Has Fallen Down
96
+ 94,Food Worker Illness
97
+ 95,Use Outside
98
+ 96,Pedestrian Signal
99
+ 97,Plumbing-Defective/Leaking/Not Maintained
100
+ 98,Inadequate or No Heat
101
+ 99,Out of Order
102
+ 100,New Bus Stop Shelter Placement
103
+ 101,Food Spoiled
104
+ 102,Failed Street Repair
105
+ 103,Unclean Condition
106
+ 104,Price Not Posted
107
+ 105,Painted Line/Marking
108
+ 106,Rodents/Insects/Garbage
109
+ 107,Illegal Hotel Rooms In Residential Building
110
+ 108,Air: Other Air Problem (Use Comments) (AZZ)
111
+ 109,"Noise, Barking Dog (NR5)"
112
+ 110,Planted More Than 2 Years Ago
113
+ 111,Catch Basin Clogged/Flooding (Use Comments) (SC)
114
+ 112,Manhole Sunken/Damaged/Raised (SB1)
115
+ 113,Wear & Tear
116
+ 114,E9 Snow / Icy Sidewalk
117
+ 115,No Water (WNW)
118
+ 116,Driver Report
119
+ 117,Double Parked Blocking Vehicle
120
+ 118,"Rough, Pitted or Cracked Roads"
121
+ 119,1RG Missed Recycling Paper
122
+ 120,Traffic Signal Light
123
+ 121,Glassware Broken
124
+ 122,Insurance Information Requested
125
+ 123,Demolition - Unsafe
126
+ 124,Photocell (PEC) Missing
127
+ 125,"Air: Smoke, Chimney or vent (AS1)"
128
+ 126,Assisted Living
129
+ 127,1RO Missed Recycling Organics
130
+ 128,Broken Sidewalk
131
+ 129,Partial Access
132
+ 130,Blocked Bike Lane
133
+ 131,Tattoo Artist Unlicensed
134
+ 132,1 or 2
135
+ 133,E14 ASP/Restricted Parking
136
+ 134,Wood Pole Missing
137
+ 135,Derelict Vehicles
138
+ 136,Veh Signal Head
139
+ 137,Loud Television
140
+ 138,1RB Missed Recycling - M/G/Pl
141
+ 139,LED Pedestrian Unit
142
+ 140,Ped Flasher
143
+ 141,Rat Sighting
144
+ 142,14B Derelict Bicycle
145
+ 143,Street Flooding (SJ)
146
+ 144,Vehicle Signal
147
+ 145,Hydrant Leaking (WC1)
148
+ 146,Hydrant Running Full (WA4)
149
+ 147,Double Parked Blocking Traffic
150
+ 148,2 Bulk-Missed Collection
151
+ 149,Overnight Commercial Storage
152
+ 150,Trunk Damaged
153
+ 151,2R Bulk-Missed Recy Collection
154
+ 152,E1 Improper Disposal
155
+ 153,E8 Canine Violation
156
+ 154,E11 Litter Surveillance
157
+ 155,Snow/Ice
158
+ 156,12 Dead Animals
159
+ 157,E12 Illegal Dumping Surveillance
160
+ 158,Pigeon Waste
161
+ 159,Neglected
162
+ 160,Timer Defect - Fast/Fail
163
+ 161,Blocked - Construction
164
+ 162,Plumbing Work - Illegal/No Permit/Standpipe/Sprinkler
165
+ 163,Glassware Missing
166
+ 164,Lamppost Damaged
167
+ 165,Ventilation
168
+ 166,Street Light Cycling
169
+ 167,Veh Signal Lamp
170
+ 168,Structure - Indoors
171
+ 169,Other School Condition
172
+ 170,Chemical Vapors/Gases/Odors
173
+ 171,Property Refunds and Credits
174
+ 172,Car/Truck Music
175
+ 173,"Air: Odor/Fumes, Restaurant (AD2)"
176
+ 174,Chemical Odor (HD1)
177
+ 175,Manhole Cover Broken/Making Noise (SB)
178
+ 176,Cloudy Or Milky Water (QB1)
179
+ 177,Failure To Maintain
180
+ 178,Litter
181
+ 179,Defective Hardware
182
+ 180,Street Light Lamp Dim
183
+ 181,Gas Hook-Up/Piping - Illegal Or Defective
184
+ 182,Defacement
185
+ 183,Plumbing Problem
186
+ 184,E10 Street Obstruction
187
+ 185,Dead Animal
188
+ 186,Noise: Alarms (NR3)
189
+ 187,E3B Sidewalk Obstruction
190
+ 188,Detached Trailer
191
+ 189,Non-Delivery Goods/Services
192
+ 190,Interest Dispute
193
+ 191,Tree Alive - in Poor Condition
194
+ 192,Condition Attracting Rodents
195
+ 193,Aided/Injury
196
+ 194,One Way
197
+ 195,Property - Other Billing Issue
198
+ 196,Pesticide
199
+ 197,Heating Problem
200
+ 198,Trespassing
201
+ 199,Fixture/Luminaire Out Of Position
202
+ 200,Lamppost Base Door/Cover Open
203
+ 201,Unlicensed
204
+ 202,Broken Curb
205
+ 203,Illegal Tow
206
+ 204,Failure To Retain Water/Improper Drainage- (LL103/89)
207
+ 205,Structural Stability Impacted - New Building Under Construction
208
+ 206,Hitting Power/Phone Lines
209
+ 207,Lamppost Knocked Down
210
+ 208,Vehicle Complaint
211
+ 209,Lamppost Wire Exposed
212
+ 210,Dishwashing/Utensils
213
+ 211,Other (Explain Below)
214
+ 212,Blocking Street
215
+ 213,Canopy Complaint
216
+ 214,Manhole Overflow (Use Comments) (SA1)
217
+ 215,Other Water Problem (Use Comments) (WZZ)
218
+ 216,15R Street Cond/Ref.W Door
219
+ 217,C1 Request Xmas Trees Collection
220
+ 218,Rent Discrepancy
221
+ 219,Food Contains Foreign Object
222
+ 220,Unauthorized Bus Layover
223
+ 221,Veh Signal Sec Door
224
+ 222,Post
225
+ 223,Fixture/Luminaire Door Open
226
+ 224,Chronic Speeding
227
+ 225,Truck Route Violation
228
+ 226,Fixture/Luminaire Hanging
229
+ 227,Suspended (Hanging) Scaffolds - No Pmt/Lic/Dangerous/Accident
230
+ 228,Street Cleaning - ASP
231
+ 229,Illegal. Commercial Use In Resident Zone
232
+ 230,"Building - Vacant, Open And Unguarded"
233
+ 231,Bare Hands in Contact w/ Food
234
+ 232,Adult Establishment
235
+ 233,Sign/Awning/Marquee - Illegal/No Permit
236
+ 234,Privately Owned Public Space/Non-Compliance
237
+ 235,Wall/Retaining Wall - Bulging/Cracked
238
+ 236,Property Value Dispute
239
+ 237,Stop
240
+ 238,Nursing Home
241
+ 239,Electronics/Phones
242
+ 240,False Advertising
243
+ 241,Flashing Hazard
244
+ 242,Unsafe Worksite
245
+ 243,Labor violation
246
+ 244,Public Complaint - Comm Location
247
+ 245,"Unsafe Chemical, Abandoned (HC2)"
248
+ 246,Cable
249
+ 247,Chained
250
+ 248,Tortured
251
+ 249,"Oil Spill On Street, Large (HQL)"
252
+ 250,Noise: Private Carting Noise (NQ1)
253
+ 251,22 Weeds
254
+ 252,Dust from Construction
255
+ 253,Multiple Street Lights Out
256
+ 254,Smoking Ban - Smoking on Construction Site
257
+ 255,After Hours - Licensed Est
258
+ 256,Lamppost Missing
259
+ 257,Pet/Animal
260
+ 258,Toxic Chemical/Material
261
+ 259,Tree Trunk Split
262
+ 260,Metal Protruding - Sign Stump
263
+ 261,Vent/Exhaust - Illegal/Improper
264
+ 262,Sprinkler System - Inadequate
265
+ 263,No Shelter
266
+ 264,Bicycle Chained to Tree
267
+ 265,Bus Stop
268
+ 266,In Car
269
+ 267,Sidewalk Grating - Defective
270
+ 268,General Maintenance
271
+ 269,Rooster
272
+ 270,Damaged/Defective Goods
273
+ 271,Overcharge
274
+ 272,E2A Storage Of Receptacles
275
+ 273,Food Worker Hygiene
276
+ 274,Base Door
277
+ 275,Hydrant Knocked Over/Missing (WC)
278
+ 276,News Gathering
279
+ 277,Sewage Leak
280
+ 278,Dog
281
+ 279,Chronic Stoplight Violation
282
+ 280,Asbestos
283
+ 281,Copy of Approval Order
284
+ 282,Fixture/Luminaire Damaged
285
+ 283,Billing Dispute
286
+ 284,Personal SCHE Exemption
287
+ 285,Safety Netting/Guard Rails - Damaged/Inadequate/None (6 Stories/75 Feet Or Less)
288
+ 286,Excavation Undermining Adjacent Building
289
+ 287,Plate Condition - Anti-Skid
290
+ 288,Plate Condition - Noisy
291
+ 289,Plate Condition - Open
292
+ 290,Car Service Company Complaint
293
+ 291,Damaged Vehicle
294
+ 292,Demand for Cash
295
+ 293,Manhole Cover Missing (Emergency) (SA3)
296
+ 294,Support Bracket
297
+ 295,"Cloudy Or Milky, Other (Use Comments) (QBZ)"
298
+ 296,Affecting Sewer or Foundation
299
+ 297,Signs of Rodents
300
+ 298,Miscellaneous
301
+ 299,Illegal Use Of Hose - Other (CCO)
302
+ 300,Odor In Sewer/Catch Basin (ICB)
303
+ 301,Street Light Dayburning
304
+ 302,Veh Sgnl Mult Lamps
305
+ 303,Odor
306
+ 304,Maintenance Cover
307
+ 305,Dumpster - Construction Waste
308
+ 306,Contract Dispute
309
+ 307,Real Property Tax Assessment/Correction
310
+ 308,E6 Commercial Waste Disposal
311
+ 309,Obstructing Public Use
312
+ 310,Temporary
313
+ 311,Veh Signal Visor
314
+ 312,Plumbing Work - Unlicensed/Illegal/Improper Work In Progress
315
+ 313,Failure to Comply with Vacate Order
316
+ 314,Street Light Feed
317
+ 315,Unleashed Dog in Public
318
+ 316,ID Requirement Not Posted
319
+ 317,Safety Netting/Guard Rails - Damaged/Inadequate/None (Over 6 Stories/75 Feet)
320
+ 318,Sidewalk Staircase
321
+ 319,Debris - Falling Or In Danger Of Falling
322
+ 320,Smoking Violation
323
+ 321,Guard Rail - Street
324
+ 322,Illegal Conversion Of Commercial Bldg/Space To Other Uses
325
+ 323,Lights From Parking Lot Shining On Building
326
+ 324,"Air: Dust, Construction/Demolition (AE4)"
327
+ 325,Asbestos Complaint (B1)
328
+ 326,Car Service Company Report
329
+ 327,Enclosure Cap
330
+ 328,Foreign Attachment On Lamppost
331
+ 329,unknown odor/taste in drinking water (QA6)
332
+ 330,3A Sweeping/Missed
333
+ 331,Dead Branches in Tree
334
+ 332,Sidewalk Collapsed
335
+ 333,Underground
336
+ 334,Over Capacity
337
+ 335,Noise: Jack Hammering (NC2)
338
+ 336,Catch Basin Sunken/Damaged/Raised (SC1)
339
+ 337,21 Collection Truck Noise
340
+ 338,Curb Defect-Metal Protruding
341
+ 339,Too Few on Duty
342
+ 340,Update Tenant Information
343
+ 341,Defective Street Cut (WZZ1)
344
+ 342,Snow or Ice
345
+ 343,Boiler - Fumes/Smoke/Carbon Monoxide
346
+ 344,Damaged/Defective Parts
347
+ 345,Illness Caused by Drinking Water
348
+ 346,Structure - Outdoors
349
+ 347,"Taste/Odor, Chlorine (QA1)"
350
+ 348,Turn Signal
351
+ 349,E1A Litter Basket / Improper Use
352
+ 350,Contact Sign Not Posted
353
+ 351,Graffiti - Bridge
354
+ 352,8 Request to Clean Vacant Lot
355
+ 353,Personal Other Exemption
356
+ 354,Letter Grading
357
+ 355,Food Temperature
358
+ 356,Pedestrian Ramp Defective
359
+ 357,Food Protection
360
+ 358,School Crossing
361
+ 359,Cars Parked on Sidewalk/Street
362
+ 360,Mast Arm
363
+ 361,TAC Report
364
+ 362,ER2 Resident Recyc. (Tenant)
365
+ 363,E13 Throw-Out
366
+ 364,In Prohibited Area
367
+ 365,In Public
368
+ 366,Ped Lamp
369
+ 367,Credit Card Limitations Not Posted
370
+ 368,ELECTRIC/GAS RANGE
371
+ 369,APARTMENT ONLY
372
+ 370,WINDOW GUARD BROKEN/MISSING
373
+ 371,RADIATOR
374
+ 372,STEAM PIPE/RISER
375
+ 373,TOILET
376
+ 374,DOOR
377
+ 375,BASIN/SINK
378
+ 376,FLOOR
379
+ 377,REFRIGERATOR
380
+ 378,Other Animal
381
+ 379,Line/Marking - After Repaving
382
+ 380,Push Button
383
+ 381,DOOR FRAME
384
+ 382,WINDOW FRAME
385
+ 383,WINDOW PANE
386
+ 384,LIGHTING
387
+ 385,NO LIGHTING
388
+ 386,OUTLET/SWITCH
389
+ 387,WIRING
390
+ 388,POWER OUTAGE
391
+ 389,MAINTENANCE
392
+ 390,BELL/BUZZER/INTERCOM
393
+ 391,STAIRS
394
+ 392,CABINET
395
+ 393,COOKING GAS
396
+ 394,JANITOR/SUPER
397
+ 395,MAILBOX
398
+ 396,ENTIRE BUILDING
399
+ 397,Wood Pole Wires Exposed
400
+ 398,Vehicle
401
+ 399,Damaged Telephone
402
+ 400,Open Excavation (WZZ2)
403
+ 401,Sewer Odor (SA2)
404
+ 402,CEILING
405
+ 403,ROOFING
406
+ 404,WALL
407
+ 405,WINDOW/FRAME
408
+ 406,BATHTUB/SHOWER
409
+ 407,Rodents/Mice
410
+ 408,WATER SUPPLY
411
+ 409,CARBON MONOXIDE DETECTOR
412
+ 410,SMOKE DETECTOR
413
+ 411,GARBAGE/RECYCLING STORAGE
414
+ 412,FIRE ESCAPE
415
+ 413,MOLD
416
+ 414,Fire Alarm Lamp Out
417
+ 415,PESTS
418
+ 416,HEAVY FLOW
419
+ 417,SEP - Professional Certification Compliance Audit
420
+ 418,DAMP SPOT
421
+ 419,SLOW LEAK
422
+ 420,St Name - Over Intersection
423
+ 421,BOILER
424
+ 422,Highway Fence
425
+ 423,Bag/Wallet
426
+ 424,Installation/Work Quality
427
+ 425,Veh Signal Lens
428
+ 426,Noise: lawn care equipment (NCL)
429
+ 427,SEWAGE
430
+ 428,PAVEMENT
431
+ 429,DOOR/FRAME
432
+ 430,ROOF DOOR/HATCH
433
+ 431,Lamppost Base Door/Cover Damaged
434
+ 432,Facility Maintenance
435
+ 433,Permit/License/Certificate
436
+ 434,Rodent Sighting
437
+ 435,Allergy Information
438
+ 436,Receipt Incomplete/Not Given
439
+ 437,Street Light Lamp Missing
440
+ 438,Illegal Conversion Of Manufacturing/Industrial Space
441
+ 439,Facility Construction
442
+ 440,VENTILATION SYSTEM
443
+ 441,Clothing Damage
444
+ 442,3B Sweeping/Inadequate
445
+ 443,Unauthorized Tree Pruning
446
+ 444,Concrete In Catch Basin (IEA)
447
+ 445,"Taste/Odor, Chemical (QA2)"
448
+ 446,Flood Light Lamp Cycling
449
+ 447,"\E4 18\""\"" Law\"""""
450
+ 448,Fixture/Luminaire Missing
451
+ 449,3 or More
452
+ 450,Toilet Facility
453
+ 451,GUTTER/LEADER
454
+ 452,Hydrant Locking Device Request (Use Comments) (WC5)
455
+ 453,Water Meter Stolen/Missing - Private Residence (CLR)
456
+ 454,New Con Ed Service Request
457
+ 455,Graffiti or Vandalism
458
+ 456,Illegal Use Of A Hydrant (CIN)
459
+ 457,Cigarette Sale to Minor
460
+ 458,SIGNAGE MISSING
461
+ 459,Fire Globe Missing
462
+ 460,Locker Break-in/Incident
463
+ 461,APS
464
+ 462,Roots Damaged
465
+ 463,ER1 Resident Recyc. (Owner/Manager
466
+ 464,Do Not Enter
467
+ 465,Branches Damaged
468
+ 466,Junction Box
469
+ 467,Food Preparation Location
470
+ 468,Underage - Licensed Est
471
+ 469,Sidewalk Blocked
472
+ 470,Human Capital
473
+ 471,Police Report Requested
474
+ 472,Car Not Available
475
+ 473,Warning Buzzer
476
+ 474,"Education Support, Policy, and Practice"
477
+ 475,Lamppost Leaning
478
+ 476,WiFi/Internet Not Working/Slow
479
+ 477,"Air: Smoke, Vehicular (AA4)"
480
+ 478,Credit Card Stuck in Meter
481
+ 479,PORCH/BALCONY
482
+ 480,Kitchen/Food Prep Area
483
+ 481,RAIN GARDEN DEBRIS (SRGDBR)
484
+ 482,Defective/Missing Curb Piece (SC4)
485
+ 483,Food Worker Activity
486
+ 484,Wastewater Into Catch Basin (IEB)
487
+ 485,SPRINKLER
488
+ 486,Plants- Odor Related Problems (PO1)
489
+ 487,SKYLIGHT
490
+ 488,Yield
491
+ 489,Beach/Pool Water
492
+ 490,Dogs or Cats Sold
493
+ 491,Garbage or Litter
494
+ 492,Plate Missing/Moved-Exposing Hole (WF4)
495
+ 493,Plants- Noise Related Problems (PN1)
496
+ 494,NYPD
497
+ 495,On Messenger
498
+ 496,Dirty/Graffiti
499
+ 497,Playing in Unsuitable Place
500
+ 498,Building
501
+ 499,Injured Wildlife
502
+ 500,6 Overflowing Litter Baskets
503
+ 501,Improper Sale of Items
504
+ 502,Advertising Sign/Billboard/Posters/Flexible Fabric - Illegal
505
+ 503,DOOR TO DUMBWAITER
506
+ 504,Natural Gas In Sewer/Catch Basin (IFB)
507
+ 505,No Permit or License
508
+ 506,Noise: Manufacturing Noise (NK1)
509
+ 507,Broken Glass
510
+ 508,Illegal/Unfair Booting
511
+ 509,Cat
512
+ 510,Speed Limit
513
+ 511,Unauthorized Tree Removal
514
+ 512,Plate Missing/Moved-Exposing Hole (SB4)
515
+ 513,"Air: Odor, Sweet From Unknown Source (AZ1)"
516
+ 514,EEO
517
+ 515,Lighting
518
+ 516,FOIL Request - Request for Records
519
+ 517,Scale Inaccurate/Broken
520
+ 518,"Unsafe Chemical, Storage (HC1)"
521
+ 519,Hours of Operation
522
+ 520,Unsecured Facility
523
+ 521,Safety Equipment/Signs
524
+ 522,Posted Notice Or Order Removed/Tampered With
525
+ 523,"Wasting Faucets,Sinks,Flushometer,Urinal,Etc. - Other (CWO)"
526
+ 524,House/Property Damaged
527
+ 525,Cellar Door Defective
528
+ 526,Multiple St Lts Dayburning
529
+ 527,Crane/Suspension Scaffold - No Permit/License/Cert./Unsafe/Illegal
530
+ 528,Noise: Other Noise Sources (Use Comments) (NZZ)
531
+ 529,Damaged Other
532
+ 530,No Consent Form
533
+ 531,Debt Not Owed
534
+ 532,High Water Pressure (WHP)
535
+ 533,Closed without Notice
536
+ 534,Property
537
+ 535,Real Estate Services
538
+ 536,SEWER
539
+ 537,Hyd Valve Box Cover Missing (WV2)
540
+ 538,Office of Preventive Technical Assistance/OPTA
541
+ 539,Traffic Sign or Signal Blocked
542
+ 540,Dissatisfaction with Provider
543
+ 541,Grass/Weeds
544
+ 542,Catch Basin Grating Missing (SA4)
545
+ 543,Bracket Arm Loose
546
+ 544,Graffiti - Highway
547
+ 545,Initial Application
548
+ 546,Snow Removal Requested
549
+ 547,Non-Compliance w/TTPN 1/00 - Vertical Enlargements
550
+ 548,Landmark Bldg - Illegal Work
551
+ 549,Damaged/Defective
552
+ 550,"Dirt, Debris, Litter Complaint"
553
+ 551,Illegal Tree Removal/Topo. Change in SNAD
554
+ 552,Relocation of Bus Stop Shelter
555
+ 553,Controller Flasher
556
+ 554,Annual Report
557
+ 555,Facilities Management
558
+ 556,Culvert Blocked/Needs Cleaning (SE)
559
+ 557,Sign - In Danger Of Falling
560
+ 558,Loose Plate
561
+ 559,Commercial ICP or ICAP Exemption
562
+ 560,Time Switch
563
+ 561,Fiscal and Business Management
564
+ 562,Crash Cushion Defect
565
+ 563,Glassware Hanging
566
+ 564,"Noise, Other Animals (NR6)"
567
+ 565,Missing/Stump
568
+ 566,ECR Commercial Routing Sticker
569
+ 567,Water Meter Stolen/Missing - Other (CLO)
570
+ 568,Clear Street Light
571
+ 569,Rates Not Posted
572
+ 570,"No Sampling Required, Requested Information (QG2)"
573
+ 571,Damaged Leg or Pole Bent
574
+ 572,Rooftank Leak Or Overflow (CKO)
575
+ 573,"Wasting Faucets,Sinks,Flushometer,Urinal,Etc. - Private Residence (CWR)"
576
+ 574,Smoking
577
+ 575,E30 Transfer Station
578
+ 576,Equipment Not Safe
579
+ 577,Domestic Strays
580
+ 578,Weather Head
581
+ 579,Broken Lock
582
+ 580,Dog Off Leash
583
+ 581,Oil Spill Into Basin/Sewer - Large (IABL)
584
+ 582,Plate Noisy/Sunken/Raised (SB5)
585
+ 583,Personal STAR Exemption
586
+ 584,Handwashing
587
+ 585,"Taste/Odor, Musty/Stale (QA4)"
588
+ 586,Citywide Procurement
589
+ 587,"Taste/Odor, Bitter/Metallic (QA3)"
590
+ 588,Animal Waste
591
+ 589,Parking Card Stuck in Meter
592
+ 590,Door Open with Air Conditioning On
593
+ 591,Inadequate Support Shoring
594
+ 592,Non-Delivery of Papers
595
+ 593,1C Uncollected Xmas Trees
596
+ 594,Personal Exemptions
597
+ 595,Controller Cabinet
598
+ 596,Executive
599
+ 597,Chemical Spill/Release (HA1)
600
+ 598,Tax Commission Rules
601
+ 599,Co-op or Condo Abatement
602
+ 600,Guard Rail - Bridge
603
+ 601,Chemical Spill (IAC)
604
+ 602,Door
605
+ 603,In-Line Fuse Missing
606
+ 604,"Oil Spill On Street, Small (HQS)"
607
+ 605,Drag Racing
608
+ 606,Cellar Door Open/Unprotected
609
+ 607,Bag
610
+ 608,Disclosure Not Provided
611
+ 609,Contrary To LL 58/87(Handicapped Access)
612
+ 610,Lack of Supplies
613
+ 611,Vehicle Report
614
+ 612,Accident - Elevator
615
+ 613,Unauthorized Film Shoot
616
+ 614,Clear Water With Other Particles (Use Comments) (QEZ)
617
+ 615,Catch Basin Search (SC2)
618
+ 616,Sidewalk Grating - Missing
619
+ 617,Dirty/Inadequate Equip./Facility
620
+ 618,Removing Flowers/Plants
621
+ 619,Foundation
622
+ 620,Time Clock Maladjusted
623
+ 621,Unlicensed Day Care
624
+ 622,Graffiti/Litter on Phone
625
+ 623,Sway Bar
626
+ 624,Unlicensed Vendors
627
+ 625,Turtle Under 4 inches Long
628
+ 626,Biking/Rollerblading off Path
629
+ 627,Mandated Reporters
630
+ 628,Waterway-Sewage (IHA)
631
+ 629,Layaway Terms Not Provided
632
+ 630,Grease In Sewer/Catch Basin (IDG)
633
+ 631,ER5 Comm. Recyc. (Bldg Mgmt)
634
+ 632,Police Report Not Requested
635
+ 633,Failure to Post Calorie Information
636
+ 634,FDNY Referral - Pilot
637
+ 635,Wildlife Sighting
638
+ 636,Oil Spill Into Basin/Sewer - Small (IABS)
639
+ 637,Flood Light Lamp Missing
640
+ 638,Noise: Loud Music/Nighttime(Mark Date And Time) (NP1)
641
+ 639,Removing Wildlife
642
+ 640,Detour
643
+ 641,Foreign Attachment On Wood Pole
644
+ 642,Mandatory Tip
645
+ 643,10A Adopt-A-Basket
646
+ 644,Bike Rack Repair
647
+ 645,Honorary
648
+ 646,Rough Pavement
649
+ 647,Warning Signal Lamp
650
+ 648,Equipment Complaint
651
+ 649,Harassment
652
+ 650,Other Sewer Problem (Use Comments) (SZZ)
653
+ 651,"Air: Smoke, Other (Use Comments) (AA5)"
654
+ 652,Damaged Bench
655
+ 653,Snow on Overpass
656
+ 654,Exposure Unnecessary
657
+ 655,Bracket Arm Broken
658
+ 656,Door Lock
659
+ 657,Exit/Route
660
+ 658,Returns Not Filed
661
+ 659,Remove Hydrant Locking Device (WC6)
662
+ 660,Reflector/Louvre
663
+ 661,Illegal Activity by Phone
664
+ 662,Fallen Debris from Bridge
665
+ 663,"Air: Odor, Nail Salon (AD8)"
666
+ 664,Sign Missing or Defective
667
+ 665,General Counsel
668
+ 666,Unauthorized Posting of Signs
669
+ 667,Wood Pole Knocked Down
670
+ 668,Conduit
671
+ 669,"Taste/Odor, Sewer (QA5)"
672
+ 670,Sign
673
+ 671,Warning Signal
674
+ 672,Prohibited Item Sale to Minor
675
+ 673,Investigative Inspection
676
+ 674,Hummock
677
+ 675,Dogs or Cats Not Sold
678
+ 676,Other Water Problem (Use Comments) (QZZ)
679
+ 677,Sidewalk Café
680
+ 678,Newspaper Box Complaint
681
+ 679,Leaky Roof
682
+ 680,Concrete Barrier
683
+ 681,Illness/Injury
684
+ 682,Pigeon Odor
685
+ 683,Ticket Scalping
686
+ 684,Bent/Loose
687
+ 685,Ped Visor
688
+ 686,Milk Not Pasteurized
689
+ 687,Sewage
690
+ 688,Projects
691
+ 689,ER6 Comm. Recyc. (Comm. Tenant)
692
+ 690,Material Storage - Unsafe
693
+ 691,TAL 2 Wheelchair
694
+ 692,Noise: Loud Music/Daytime (Mark Date And Time) (NN1)
695
+ 693,Community Outreach
696
+ 694,Bracket Arm Missing
697
+ 695,Relocation of Parking Meter
698
+ 696,Dry Cleaning Vapors (PERC)
699
+ 697,Gasoline Spill (IAA)
700
+ 698,MCI Abatement
701
+ 699,Required Signage Not Posted
702
+ 700,Advice Request
703
+ 701,Beach/Pool/Sauna Unpermitted
704
+ 702,Illegal Use Of Hose - Private Residence (CCR)
705
+ 703,Flood Light Lamp Dayburning
706
+ 704,Control Panel Damaged
707
+ 705,installation of hydrant side post (WHFP)
708
+ 706,Non-Disclosure of Fees
709
+ 707,Flood Light Lamp Dim
710
+ 708,"Noise, Ice Cream Truck (NR4)"
711
+ 709,"Air: Odor/Fumes, Dry Cleaners (AD1)"
712
+ 710,"Clear Water With Organisms (Insects, Worms) (QE2)"
713
+ 711,"Oil, Grease In Water (QD1)"
714
+ 712,Nuisance/Truant
715
+ 713,Air Conditioning Problem
716
+ 714,Gender Pricing
717
+ 715,American Flag
718
+ 716,Water
719
+ 717,Traffic Camera
720
+ 718,Highway Flooding (SH)
721
+ 719,Elevator - Multiple Devices On Property
722
+ 720,Misleading Appraisal
723
+ 721,Animal Odor
724
+ 722,Information Technology
725
+ 723,Media Inquiries
726
+ 724,About NYC Opportunity
727
+ 725,9 Spill/Oil etc
728
+ 726,Plate Noisy/Sunken/Raised (WF5)
729
+ 727,Touchscreen/Button Not Working
730
+ 728,Broken/Defective
731
+ 729,Street Con Game
732
+ 730,Fleet
733
+ 731,Paid in Advance
734
+ 732,Jewelry
735
+ 733,Sewer Break (SBR)
736
+ 734,Broken Water Fountain
737
+ 735,Sidewalk Pull Box Co
738
+ 736,Stalled Construction Site
739
+ 737,Large Number of Mosquitoes
740
+ 738,Telco Connection Blk
741
+ 739,In Post Base
742
+ 740,Personal DHE Exemption
743
+ 741,Construction
744
+ 742,Blocking Sidewalk
745
+ 743,ER4 City Agency (Inst. Recycling)
746
+ 744,No Dial Tone
747
+ 745,Lane Control Signal
748
+ 746,Unrequested Services Provided
749
+ 747,Tenant Refusal
750
+ 748,Unsafe Use of Playground
751
+ 749,MICROWAVE
752
+ 750,"Dirt, Litter, Debris - Lot"
753
+ 751,"Noise: Boat(Engine,Music,Etc) (NR10)"
754
+ 752,Lost/Missing Person
755
+ 753,Clothing/Glasses
756
+ 754,High Pressure to Take on Loan/Debt
757
+ 755,Time Insufficient
758
+ 756,Blocked - ATM
759
+ 757,Fire Alarm Lamp Missing
760
+ 758,Bees/Wasps - Not a beekeper
761
+ 759,CMU Communication
762
+ 760,Sodium Warning
763
+ 761,Facility General
764
+ 762,Lost Property
765
+ 763,Capital Construction
766
+ 764,Business Tax
767
+ 765,Non-Compliance w/Lightweight Materials
768
+ 766,Dead End Signal
769
+ 767,Apply Payment or Credit
770
+ 768,Payment Not Posted
771
+ 769,Refund/Credit Info or Status
772
+ 770,EFT or Online Payment Problem
773
+ 771,DAMAGE STRUCTURE/RAILING (SRGDM)
774
+ 772,Request To Open A Hydrant (WC4)
775
+ 773,Amount Owed Dispute
776
+ 774,Payment Misapplied
777
+ 775,Other Agency Charge
778
+ 776,Bill Received in Error
779
+ 777,"\Smoking Signs - \""\""No Smoking\""\"" Signs Not Observed on Construction Site\"""""
780
+ 778,Application Appeal
781
+ 779,Speed Board Sign
782
+ 780,OUTLET COVER
783
+ 781,Material Stored Improperly
784
+ 782,Other Health Matters
785
+ 783,Damaged or Missing Ad Box
786
+ 784,Lack of Safety Equipment
787
+ 785,Wrong Amount Paid or Withdrawn
788
+ 786,Missing Complaint Sign
789
+ 787,Unsanitary Condition
790
+ 788,RAIN GARDEN FLOODING (SRGFLD)
791
+ 789,Property Value
792
+ 790,Commercial Rent Tax- Refund
793
+ 791,Accident - Cranes/Derricks/Suspension Scaffold
794
+ 792,Ferret
795
+ 793,Hangers
796
+ 794,In Conduit
797
+ 795,Farm Animal
798
+ 796,No or Defective Headphones
799
+ 797,Phone Blocking Sidewalk
800
+ 798,Equipment Malfunction
801
+ 799,Beekeeping - Honeybees
802
+ 800,Damaged Door
803
+ 801,Guard Rail - Highway
804
+ 802,Illegal Dumping
805
+ 803,6R Overflowing Recycling Baskets
806
+ 804,Ewaste appointment
807
+ 805,Graffiti/Dirty Condition
808
+ 806,Application Portability
809
+ 807,Public Event Seating
810
+ 808,Inattentive
811
+ 809,Equipment Maintenance
812
+ 810,Snake
813
+ 811,Defective Water Sampling Station (QSS)
814
+ 812,Fence
815
+ 813,BBQ Outside Authorized Area
816
+ 814,Bracket Arm Bent
817
+ 815,Not Received - Vending Machine
818
+ 816,E7 Private Carter Spillage
819
+ 817,Lane Station
820
+ 818,Injury/Safety
821
+ 819,Condulet Cover
822
+ 820,Absent
823
+ 821,Labor Violation
824
+ 822,Swimming Pool - Unmaintained
825
+ 823,Tie Rods
826
+ 824,Other - Explain Below
827
+ 825,Elevator - Dangerous Condition/Shaft Open/Unguarded
828
+ 826,Puddle on Driveway
829
+ 827,Puddle on Roof
830
+ 828,Basement
831
+ 829,Container - Over 5 Gallons
832
+ 830,Commercial Other Exemption
833
+ 831,Puddle in Ground
834
+ 832,Elevator - Single Device On Property/No Alternate Service
835
+ 833,Lien Sale
836
+ 834,Sewer or Drain
837
+ 835,Flooded
838
+ 836,Box Cover
839
+ 837,Container - Under 5 Gallons
840
+ 838,Special Agency Projects/Initiatives
841
+ 839,Bird Bath
842
+ 840,Swimming Pool Cover
843
+ 841,Lighting - Garage
844
+ 842,Advance Fee
845
+ 843,Roof Gutters
846
+ 844,Building Foundation
847
+ 845,Lost Coin
848
+ 846,Puddle on Sidewalk
849
+ 847,Shisha
850
+ 848,Transducer-Loop
851
+ 849,Decorative Necklace Lighting
852
+ 850,Monkey
853
+ 851,Property Misclassified
854
+ 852,Flavored Tobacco
855
+ 853,Taste
856
+ 854,"Dirt, Litter, Debris - Garage"
857
+ 855,Fountain - Over 5 Gallons
858
+ 856,Inaccurate Meter
859
+ 857,Stop Temporary
860
+ 858,Language Access Coordinator
861
+ 859,Ped Lens
862
+ 860,Tires
863
+ 861,Damaged Toilet/Sink
864
+ 862,Broken Fence
865
+ 863,Seizure of Funds
866
+ 864,Flower Planters
867
+ 865,Scale Inaccurate
868
+ 866,High Grass
869
+ 867,Minor Received Tattoo
870
+ 868,Wood Pole Leaning
871
+ 869,Waterway-Color (IHD)
872
+ 870,User Unlicensed
873
+ 871,Supervisory
874
+ 872,Unauthorized Climbing
875
+ 873,Complaint
876
+ 874,Red Lt Camera Feed
877
+ 875,No Idling
878
+ 876,RPIE - Filing and Technical Issues
879
+ 877,General Business Tax - Other
880
+ 878,Fountain - Under 5 Gallons
881
+ 879,No Bill of Rights
882
+ 880,Poison Ivy
883
+ 881,Spanish Transaction
884
+ 882,Book/Stationery
885
+ 883,SCRIE Miscellaneous
886
+ 884,New Automatic Public Toilet Request
887
+ 885,Personal Clergy Exemption
888
+ 886,Color
889
+ 887,Fire Hydrant Emergency (FHE)
890
+ 888,Documents/Paperwork Missing
891
+ 889,City Planning Commission
892
+ 890,BBS Failure
893
+ 891,Exposure from Nearby Facility
894
+ 892,Information on Contracts and Contractors
895
+ 893,Deck Inspection
896
+ 894,Gas Utility Referral
897
+ 895,Sports Equipment
898
+ 896,Appeals Division
899
+ 897,Marine Lamp
900
+ 898,Safety Inspection-Retaining Walls (May 2005)
901
+ 899,Integrity Complaint Referral
902
+ 900,Property Misclassification
903
+ 901,Non-Public Schools
904
+ 902,On Structure
905
+ 903,421A Exemption
906
+ 904,Zoning and Land Use Questions/Information
907
+ 905,Contamination Risk
908
+ 906,Energy
909
+ 907,FENCING
910
+ 908,Cellar Door New
911
+ 909,Use of Newly Seeded Lawn
912
+ 910,Electronic Sign - Overhead
913
+ 911,ULURP Project Status Questions
914
+ 912,Pedestrian Sign
915
+ 913,1RE Recycling Electronics
916
+ 914,Bikes in Buildings
917
+ 915,Blank Out Matrix Sgn
918
+ 916,Broken Window
919
+ 917,High Pressure Sales
920
+ 918,Building Permit - None
921
+ 919,Do Not Block the Box
922
+ 920,12P Dead Deer
923
+ 921,Ver Message Sign
924
+ 922,1RE missed collection for E-waste
925
+ 923,Illegal Use Of Hose - Private Residence
926
+ 924,Budget
927
+ 925,Enforcement Work Order (DOB)
928
+ 926,Sign Defect - Garage
929
+ 927,1L Missed Recycling Leaves
930
+ 928,Debris - Excessive
931
+ 929,Adjacent Buildings Not Protected
932
+ 930,After Hours Work - Illegal
933
+ 931,Accident - Construction/Plumbing
934
+ 932,Construction - Change Grade/Watercourse
935
+ 933,Landlord Inquiries
936
+ 934,Det-Sens Amplifier
937
+ 935,Wood Pole Damaged
938
+ 936,Contractor Responsibility/VENDEX
939
+ 937,Unlicensed/Illegal/Improper Work In Progress
940
+ 938,Commercial Exemptions
941
+ 939,General Business Tax - Refund
942
+ 940,Damaged or Leaking Roof
943
+ 941,General Bad Condition
944
+ 942,Detector Sensor
945
+ 943,Accessibility Accommodations
946
+ 944,DRY WEATHER DISCHARGE - DWD
947
+ 945,General Business Tax- Audit
948
+ 946,Commercial Not For Profit Exemption
949
+ 947,Restroom Non-Complaince With Local Law 79/16
950
+ 948,Best - DM Tracking Complaint
951
+ 949,Best - High-Rise Tracking Complaint
952
+ 950,SST Tracking Complaint
953
+ 951,M.A.R.C.H. Program (Interagency)
954
+ 952,Facade (LL11/98)- Unsafe Notification
955
+ 953,Inspection Work Order (DOB)
956
+ 954,Plumbing Enforcement Work Order (DOB)
957
+ 955,Illegal Conversion No Access Follow - UP
958
+ 956,Best - Low-Rise Tracking Complaint
959
+ 957,Construction Enforcement Work Order (DOB)
960
+ 958,Illegal Activity
961
+ 959,Excavation Tracking Complaint
962
+ 960,Sustainability Enforcement Work Order
963
+ 961,Interior Demo Tracking Complaint
964
+ 962,Electrical Enforcement Work Order (DOB)
965
+ 963,Sandy: Building Destroyed
966
+ 964,Amusement Ride Accident/Incident
967
+ 965,Complaince Inspection
968
+ 966,Demolition Notification Received
969
+ 967,V.E.S.T. Program (DOB & NYPD)
970
+ 968,Personal Veteran Exemption
971
+ 969,Depression Maintenance
972
+ 970,Driver Complaint - Passenger
973
+ 971,Elevator - Defective/Not Working
974
+ 972,DRIE Exemption
975
+ 973,Mailed - Not Reflected
976
+ 974,The ABCs of Housing
977
+ 975,Full Term Mobile Food Vendor License
978
+ 976,Medicaid
979
+ 977,Food Stamp
980
+ 978,Cash Assistance
981
+ 979,Billing Name Incorrect
982
+ 980,Waive Penalty for Late Payment
983
+ 981,The ABCs of Housing - Chinese
984
+ 982,Lost and Found
985
+ 983,Heat Bulletin
986
+ 984,The ABCs of Housing - Spanish
987
+ 985,Homeless Issue
988
+ 986,Electronic Fund Transfer (EFT) Problem
989
+ 987,Copy of Account Information
990
+ 988,Condo or Co-op Abatement
991
+ 989,Copy of Statement
992
+ 990,Property Address Incorrect
993
+ 991,Other Billing Issue
994
+ 992,Card - DOF Confirmation Number Issued
995
+ 993,Mitchell-Lama Housing List
996
+ 994,Billing Address Incorrect
997
+ 995,Waterway-Oil/Gasoline (IHB)
998
+ 996,Status of Payment Adjustment
999
+ 997,Cleanliness
1000
+ 998,Barbershop License
1001
+ 999,Food Service Establishment License
1002
+ 1000,Debt Collection Agency License
1003
+ 1001,Housing Information Guide For Tenants and Owners Notice
1004
+ 1002,The ABCs of Housing - Arabic
1005
+ 1003,Applied to Wrong Ticket
1006
+ 1004,Tax Exemption
1007
+ 1005,Commercial ICIP or ICAP Exemption
1008
+ 1006,Misapplied Payment
1009
+ 1007,Remove Mortgage
1010
+ 1008,Frozen Dessert Manufacturer License
1011
+ 1009,General Inquiry
1012
+ 1010,Image of Ticket
1013
+ 1011,Incorrect Amount Paid
1014
+ 1012,Status of Appeal
1015
+ 1013,Card - No DOF Confirmation Number Issued
1016
+ 1014,Status of PV Refund
1017
+ 1015,Filing and Technical Issues
1018
+ 1016,Full Term Mobile Food Unit Permit
1019
+ 1017,General Street Vendor License
1020
+ 1018,Ready NY - English - Full Size
1021
+ 1019,Condo Billing Issue
1022
+ 1020,Locksmith License
1023
+ 1021,Status of Hearing
1024
+ 1022,General Complaint
1025
+ 1023,Home Ownership Kit
1026
+ 1024,Registration Clearance Request
1027
+ 1025,Commercial Green Roof or Solar Panel Exemption
1028
+ 1026,Driver Compliment
1029
+ 1027,Commercial 421A Exemption
1030
+ 1028,HomeFirst Down Payment Information
1031
+ 1029,Ready NY - Businesses - English
1032
+ 1030,Copy of Notice of Property Value
1033
+ 1031,Seasonal Mobile Food Vendor License
1034
+ 1032,Ready NY Guide - Pocket Sized - English
1035
+ 1033,Newsstand License
1036
+ 1034,ACRIS Incorrect
1037
+ 1035,Secondhand Dealer Firearms License
1038
+ 1036,Catering Establishment License
1039
+ 1037,Cigarette Retail Dealer License
1040
+ 1038,Housing Quality Standards (HQS) Inspections FAQs - English
1041
+ 1039,Commercial CEP or CRP Exemption
1042
+ 1040,Finance Business Center - Not Reflected
1043
+ 1041,Street Fair Vendor License
1044
+ 1042,Stoop Line Stand License
1045
+ 1043,Home Improvement Contractor License
1046
+ 1044,Individual Process Server License
1047
+ 1045,Decision and Order
1048
+ 1046,Card - Charged Twice
1049
+ 1047,Status of Request to file Paper RPIE
1050
+ 1048,Full Term Tattoo License
1051
+ 1049,List of Outstanding Tickets
1052
+ 1050,Disruptive Passenger
1053
+ 1051,Interruption of Essential Services Notice
1054
+ 1052,Commercial J51 Exemption
1055
+ 1053,Secondhand Dealer Auto License
1056
+ 1054,Ready NY My Emergency Plan - English
1057
+ 1055,Winter Health and Safety Tips Guide
1058
+ 1056,Sightseeing Guide License
1059
+ 1057,Home Improvement Salesperson License
1060
+ 1058,The ABCs of Housing - Russian
1061
+ 1059,Delays
1062
+ 1060,General Vendor Distributor License
1063
+ 1061,Senior Citizen Home Assistance Program (SCHAP) Loan
1064
+ 1062,Performance
1065
+ 1063,Employment Agency License
1066
+ 1064,Secondhand Dealer General License
1067
+ 1065,Ready NY - English - Pocket Size
1068
+ 1066,Commercial UDAAP Exemption
1069
+ 1067,Death Certificate Before 1949 Order Form
1070
+ 1068,Birth Certificate Before 1910 Order Form
1071
+ 1069,Garage or Parking Lot License
1072
+ 1070,Marriage Certificate Order Form
1073
+ 1071,Elevator Not Inspected/Illegal/No Permit
1074
+ 1072,Certificate of No Harassment or Exemption - SRO
1075
+ 1073,Dead/Dying Tree
1076
+ 1074,Certificate of No Harassment - Zoning
1077
+ 1075,Hurricane Preparedness - English
1078
+ 1076,Tow Truck Driver License
1079
+ 1077,Temporary Food Service Establishment Permit
1080
+ 1078,Settlement Reduction Not Shown
1081
+ 1079,Ready NY - Arabic - Full Size
1082
+ 1080,Electronic Sign - Portable
1083
+ 1081,Ready NY - Kids - Middle and High School Students
1084
+ 1082,Ready NY - French - Full Size
1085
+ 1083,City Rebate
1086
+ 1084,Restrooms
1087
+ 1085,Non Retail Food Processing Establishment License
1088
+ 1086,Homestead
1089
+ 1087,Ready NY - Kids - Elementary School Students
1090
+ 1088,Ready NY - Flooding
1091
+ 1089,Fallen Debris from Tunnel
1092
+ 1090,Ready NY - Reference Card
1093
+ 1091,Genealogy Research Application
1094
+ 1092,Emergency Notice
1095
+ 1093,Dealer in Devices for Disabled License
1096
+ 1094,Ready NY My Emergency Plan - Spanish
1097
+ 1095,Hurricane Preparedness - Spanish
1098
+ 1096,Hurricane Preparedness - Haitian Creole
1099
+ 1097,Sightseeing Bus License
1100
+ 1098,Hurricane Preparedness - Arabic
1101
+ 1099,Process Server Organization License
1102
+ 1100,Licensed Home Improvement Contractor Bumper Sticker
1103
+ 1101,Ready NY Beat the Heat - English
1104
+ 1102,Temporary Amusement Device License
1105
+ 1103,Ready NY- Pandemic Flu
1106
+ 1104,Pothole - Tunnel
1107
+ 1105,Shelter for Homeless Animals License
1108
+ 1106,Auctioneer License
1109
+ 1107,Marshal - Not Reflected
1110
+ 1108,Ready NY My Emergency Plan - Traditional Chinese
1111
+ 1109,Sidewalk Cafe License
1112
+ 1110,Cabaret License
1113
+ 1111,Locksmith Apprentice License
1114
+ 1112,New Lead Law Rights and Requirements
1115
+ 1113,Ready NY - Spanish - Full Size
1116
+ 1114,Scrap Metal Processor License
1117
+ 1115,Announcements
1118
+ 1116,3 Sweeping/Missed-Inadequate
1119
+ 1117,Electronics and Home Appliance Service Dealer License
1120
+ 1118,Ready NY - Pets - English
1121
+ 1119,Pedicab Driver
1122
+ 1120,Ready NY - Chinese Traditional - Full Size
1123
+ 1121,Summer Heat - English
1124
+ 1122,Laundry License
1125
+ 1123,Summer Heat - Russian
1126
+ 1124,Ready NY My Emergency Plan - Russian
1127
+ 1125,The ABCs of Housing - Korean
1128
+ 1126,Ready NY - Russian - Pocket Size
1129
+ 1127,Strip Paving
1130
+ 1128,Sign Blocked by Tree
1131
+ 1129,Ready NY - Haitian Creole - Full Size
1132
+ 1130,Beach/Pool Closure
1133
+ 1131,Conflict Monitor
1134
+ 1132,Dead End Sign
1135
+ 1133,Seasonal Food Cart Vendor Permit
1136
+ 1134,Amusement Arcade License
1137
+ 1135,Tow Truck Company License
1138
+ 1136,Commercial DAMP Exemption
1139
+ 1137,Waterway-Floatables (IHC)
1140
+ 1138,Pet Store - New License
1141
+ 1139,Ready NY - Chinese Traditional - Pocket Size
1142
+ 1140,The ABCs of Housing - Haitian Creole
1143
+ 1141,Ready NY - Spanish - Pocket Size
1144
+ 1142,Ready NY - Small and Mid-Sized Companies
1145
+ 1143,Commercial 421B Exemption
1146
+ 1144,Booting Company License
1147
+ 1145,Animal Grooming License
1148
+ 1146,Temporary Tattoo License
1149
+ 1147,Commercial 421G Exemption
1150
+ 1148,Scale Dealer or Repairer License
1151
+ 1149,General Vendor Waiting List Application
1152
+ 1150,Ready NY - Russian - Full Size
1153
+ 1151,Ready NY My Emergency Plan - Haitian Creole
1154
+ 1152,Going Out of Business Sale License
1155
+ 1153,Graffiti - Tunnel
1156
+ 1154,Compressed Air License
1157
+ 1155,Det-Sens Cabinet
1158
+ 1156,Ready NY - Chinese Simplified - Full Size
1159
+ 1157,Laundry Jobber License
1160
+ 1158,NO WATER - WNW
1161
+ 1159,Animal Boarding License
1162
+ 1160,Electronics Store License
1163
+ 1161,Auction House License
1164
+ 1162,Hydrotest
1165
+ 1163,Pawn Broker License
1166
+ 1164,Smoke/Odor
1167
+ 1165,Food Source/Protection
1168
+ 1166,Garbage
1169
+ 1167,Equipment
1170
+ 1168,Pool or Billiard Hall License
1171
+ 1169,EXPY Sign Fixt Cover
1172
+ 1170,Personal Crime Victim or Good Samaritan Exemption
1173
+ 1171,Dust Cover
1174
+ 1172,Ready NY My Emergency Plan - Italian
1175
+ 1173,Gaming Cafe License
1176
+ 1174,Portable Amusement Ride License
1177
+ 1175,Curb Violation
1178
+ 1176,Status Call
1179
+ 1177,No Status Call
1180
+ 1178,Summer Heat - Spanish
1181
+ 1179,Electrical - Unlicensed/Illegal/Improper Work In Progress
1182
+ 1180,Messenger
1183
+ 1181,Hurricane Preparedness - Chinese
1184
+ 1182,ABANDONED APARTMENT UNIT
1185
+ 1183,Hurricane Preparedness - Russian
1186
+ 1184,License Violation
1187
+ 1185,Placement
1188
+ 1186,Insects / Pests
1189
+ 1187,Driver Complaint - Non Passenger
1190
+ 1188,Retail Store
1191
+ 1189,Initial
1192
+ 1190,To FDNY Approved System
1193
+ 1191,Ready NY - Korean - Full Size
1194
+ 1192,For Violation
1195
+ 1193,Licensee Complaint
1196
+ 1194,Bodega/Deli/Supermarket
1197
+ 1195,Multi Agency Joint Inspection
1198
+ 1196,Inhalation Therapy Supervising Technician License
1199
+ 1197,Horse Drawn Carriage Driver License
1200
+ 1198,Bowstring Truss Tracking Complaint
1201
+ 1199,Retail Laundry License Application
1202
+ 1200,14 Derelict Vehicles
1203
+ 1201,Retaining Wall Tracking Complaint
1204
+ 1202,Ready NY - Businesses - Spanish
1205
+ 1203,Notice of Housing Code Enforcement Issues
1206
+ 1204,Pathogens License
1207
+ 1205,Tobacco Retail Dealer License Application
1208
+ 1206,Sample Suspected Gas Leak Notice
1209
+ 1207,Street Cave-In *Dep Internal Use Only* (SG1)
1210
+ 1208,Housing Quality Standards (HQS) Inspections FAQs - Spanish
1211
+ 1209,HOUSING QUALITY STANDARDS
1212
+ 1210,For Letter of Defect
1213
+ 1211,New Building
1214
+ 1212,Milk/Dairy Products
1215
+ 1213,Voluntary
1216
+ 1214,Business Opportunities/RFPs
1217
+ 1215,Reinspection
1218
+ 1216,Construction Safety Compliance Action
1219
+ 1217,Amusement Ride
1220
+ 1218,Structurally Compromised Building (LL33/08)
1221
+ 1219,Non-med Compressed Gas - New
1222
+ 1220,Other Hazmats
1223
+ 1221,Cell Phone Store
1224
+ 1222,Existing Building
1225
+ 1223,Re-inspection
1226
+ 1224,Iguana
1227
+ 1225,Because of Violation
1228
+ 1226,Disabled Device Dealer
1229
+ 1227,Debt Collection Agency
1230
+ 1228,Semi-Annual Homeless Shelter Inspection: Electrical
1231
+ 1229,Semi-Annual Homeless Shelter Inspection: Construction
1232
+ 1230,DCP/BSA Compliance Inspection
1233
+ 1231,Semi-Annual Homeless Shelter Inspection: Plumbing
1234
+ 1232,Illegal Commercial Or Manufacturing Use In a C1 Or C2 Zone
1235
+ 1233,Car Dealer - Used
1236
+ 1234,Permission to Publish Contract
1237
+ 1235,Ticket Seller Business License Application
1238
+ 1236,Certificate of No Harassment (CONH) Application
1239
+ 1237,Certificate of No Harassment (CONH) Exemption
1240
+ 1238,Commercial Government Exemption
1241
+ 1239,Tow Truck Exemption License
1242
+ 1240,EXPY Fixture
1243
+ 1241,Industrial Laundry Delivery License Application
1244
+ 1242,Guide Rail
1245
+ 1243,Wireless Antenna
1246
+ 1244,Ready NY My Emergency Plan - Korean
1247
+ 1245,Ready NY My Emergency Plan - Polish
1248
+ 1246,Accident/Explosion - Boiler
1249
+ 1247,Summer Heat - Chinese
1250
+ 1248,Ready NY - Polish - Full Size
1251
+ 1249,Sidewalk Consultation
1252
+ 1250,Noise: Vehicle (NR2)
1253
+ 1251,Con Edison Referral
1254
+ 1252,Documents Not Returned
1255
+ 1253,Initial - Construction
1256
+ 1254,Snow Removal
1257
+ 1255,Snow Emergency
1258
+ 1256,Relocation of Muni Meter
1259
+ 1257,Elevator In (Fdny) Readiness - None
1260
+ 1258,Suspected Street Cut
1261
+ 1259,Overexposure During Treatment
1262
+ 1260,SCRIE Application Denial
1263
+ 1261,Unincorporated Business Tax - Other
1264
+ 1262,"Air: Smoke, Residential (AA1)"
1265
+ 1263,"BUILDING COLLAPSE/FIRE, (ASBESTOS RELATED) *FOR DEP INTERNAL USE ONLY* (HH2)"
1266
+ 1264,Unincorporated Business Tax - Return Filing
1267
+ 1265,No Statement of Job Conditions
1268
+ 1266,Excise Taxes-Refund
1269
+ 1267,Child or Minor Tanning
1270
+ 1268,Injury or Illness from Tanning
1271
+ 1269,Loan Offer
1272
+ 1270,Defective Streetlight
1273
+ 1271,EXPY Sign Reflector
1274
+ 1272,Commercial Rent Tax-Other
1275
+ 1273,Sediment
1276
+ 1274,Workplace - 10 or Less Staff
1277
+ 1275,Failure to Comply with Annual Crane Inspection
1278
+ 1276,Facility Unregistered
1279
+ 1277,"Air: Open Fire, Construction/Demolition (AC4)"
1280
+ 1278,Extra Parts
1281
+ 1279,Unincorporated Business Tax - Refund
1282
+ 1280,Musical Instrument
1283
+ 1281,Green Roof or Solar Panel Exemption
1284
+ 1282,Sign Defect - Lot
1285
+ 1283,Crack Sealing
1286
+ 1284,Cigarette Vending Machine
1287
+ 1285,Marine Globe
1288
+ 1286,Mssg Sign Multi Lamp
1289
+ 1287,Marine Flasher
1290
+ 1288,Technician Unlicensed
1291
+ 1289,Dumpster - Causing Damage
1292
+ 1290,Minor Access
1293
+ 1291,Excise Taxes-Audit
1294
+ 1292,Not Certified
1295
+ 1293,Toy Gun Sale
1296
+ 1294,Fire Alarm Lamp Cycling
1297
+ 1295,High Interest Loan
1298
+ 1296,SCRIE Application Appeal
1299
+ 1297,Cable Television
1300
+ 1298,Mapping Information
1301
+ 1299,NYC.gov Web Site
1302
+ 1300,EZ PASS READER
1303
+ 1301,RPIE
1304
+ 1302,RTMS
1305
+ 1303,Excise Taxes-Other
1306
+ 1304,Facility Complaint
1307
+ 1305,Inspection Requests/Complaints
1308
+ 1306,Building Information/Construction History
1309
+ 1307,Birth/Death Certificates
1310
+ 1308,Communications/Intergovernmental
1311
+ 1309,Ethernet Cable
1312
+ 1310,Mental Health
1313
+ 1311,ALJ Division
1314
+ 1312,Fixture(S)
1315
+ 1313,General Business Tax - Return filing
data/drop_vars.xlsx ADDED
Binary file (10.8 kB). View file
 
data/weather_aggregated_2010-2018.csv ADDED
The diff for this file is too large to render. See raw diff
 
figures/bounded_map.html ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+
5
+ <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
6
+
7
+ <script>
8
+ L_NO_TOUCH = false;
9
+ L_DISABLE_3D = false;
10
+ </script>
11
+
12
+ <style>html, body {width: 100%;height: 100%;margin: 0;padding: 0;}</style>
13
+ <style>#map {position:absolute;top:0;bottom:0;right:0;left:0;}</style>
14
+ <script src="https://cdn.jsdelivr.net/npm/leaflet@1.9.3/dist/leaflet.js"></script>
15
+ <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
16
+ <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/js/bootstrap.bundle.min.js"></script>
17
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.js"></script>
18
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/leaflet@1.9.3/dist/leaflet.css"/>
19
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.2/dist/css/bootstrap.min.css"/>
20
+ <link rel="stylesheet" href="https://netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css"/>
21
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.2.0/css/all.min.css"/>
22
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Leaflet.awesome-markers/2.0.2/leaflet.awesome-markers.css"/>
23
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/python-visualization/folium/folium/templates/leaflet.awesome.rotate.min.css"/>
24
+
25
+ <meta name="viewport" content="width=device-width,
26
+ initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
27
+ <style>
28
+ #map_1bca46dd8c0ecb99e8cf98a8490d26c6 {
29
+ position: relative;
30
+ width: 100.0%;
31
+ height: 100.0%;
32
+ left: 0.0%;
33
+ top: 0.0%;
34
+ }
35
+ .leaflet-container { font-size: 1rem; }
36
+ </style>
37
+
38
+ </head>
39
+ <body>
40
+
41
+
42
+ <div class="folium-map" id="map_1bca46dd8c0ecb99e8cf98a8490d26c6" ></div>
43
+
44
+ </body>
45
+ <script>
46
+
47
+
48
+ var map_1bca46dd8c0ecb99e8cf98a8490d26c6 = L.map(
49
+ "map_1bca46dd8c0ecb99e8cf98a8490d26c6",
50
+ {
51
+ center: [40.7128, -74.006],
52
+ crs: L.CRS.EPSG3857,
53
+ zoom: 10,
54
+ zoomControl: false,
55
+ preferCanvas: false,
56
+ scrollWheelZoom: false,
57
+ dragging: false,
58
+ }
59
+ );
60
+
61
+
62
+
63
+
64
+
65
+ var tile_layer_5610f1ba4421bfdd6b11b0d3a8230311 = L.tileLayer(
66
+ "https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}{r}.png",
67
+ {"attribution": "\u0026copy; \u003ca href=\"https://www.openstreetmap.org/copyright\"\u003eOpenStreetMap\u003c/a\u003e contributors \u0026copy; \u003ca href=\"https://carto.com/attributions\"\u003eCARTO\u003c/a\u003e", "detectRetina": false, "maxNativeZoom": 20, "maxZoom": 20, "minZoom": 0, "noWrap": false, "opacity": 1, "subdomains": "abcd", "tms": false}
68
+ );
69
+
70
+
71
+ tile_layer_5610f1ba4421bfdd6b11b0d3a8230311.addTo(map_1bca46dd8c0ecb99e8cf98a8490d26c6);
72
+
73
+
74
+ var rectangle_7a26a5f5f0553f8e9c5a706c1184bf75 = L.rectangle(
75
+ [[40.49804421521046, -74.25521082506387], [40.91294056699566, -73.70038354802529]],
76
+ {"bubblingMouseEvents": true, "color": "#F1807E", "dashArray": "5 5", "dashOffset": null, "fill": true, "fillColor": "blue", "fillOpacity": 0.2, "fillRule": "evenodd", "lineCap": "round", "lineJoin": "round", "noClip": false, "opacity": 1.0, "smoothFactor": 1.0, "stroke": true, "weight": 3}
77
+ ).addTo(map_1bca46dd8c0ecb99e8cf98a8490d26c6);
78
+
79
+
80
+ var popup_c20294d340dae6e3dee1251d70105f4e = L.popup({"maxWidth": "100%"});
81
+
82
+
83
+
84
+ var html_e2caf4fa03251f2359325a8b2c62d96d = $(`<div id="html_e2caf4fa03251f2359325a8b2c62d96d" style="width: 100.0%; height: 100.0%;">Service Data Coverage Zone</div>`)[0];
85
+ popup_c20294d340dae6e3dee1251d70105f4e.setContent(html_e2caf4fa03251f2359325a8b2c62d96d);
86
+
87
+
88
+
89
+ rectangle_7a26a5f5f0553f8e9c5a706c1184bf75.bindPopup(popup_c20294d340dae6e3dee1251d70105f4e)
90
+ ;
91
+
92
+
93
+
94
+ </script>
95
+ </html>
figures/final_map.html ADDED
The diff for this file is too large to render. See raw diff
 
figures/map1.html ADDED
The diff for this file is too large to render. See raw diff
 
figures/map2.html ADDED
The diff for this file is too large to render. See raw diff
 
figures/model_performance.png ADDED
models/BERTopic/config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "calculate_probabilities": false,
3
+ "language": null,
4
+ "low_memory": false,
5
+ "min_topic_size": 10,
6
+ "n_gram_range": [
7
+ 1,
8
+ 1
9
+ ],
10
+ "nr_topics": 8,
11
+ "seed_topic_list": null,
12
+ "top_n_words": 5,
13
+ "verbose": true,
14
+ "zeroshot_min_similarity": 0.7,
15
+ "zeroshot_topic_list": null,
16
+ "embedding_model": "sentence-transformers/all-MiniLM-L6-v2"
17
+ }
models/BERTopic/ctfidf.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0e5537b25ef16e60f33d219dbc53128240bdd3ef0677273cbcc337157562112
3
+ size 14020
models/BERTopic/ctfidf_config.json ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ctfidf_model": {
3
+ "bm25_weighting": false,
4
+ "reduce_frequent_words": false
5
+ },
6
+ "vectorizer_model": {
7
+ "params": {
8
+ "analyzer": "word",
9
+ "binary": false,
10
+ "decode_error": "strict",
11
+ "encoding": "utf-8",
12
+ "input": "content",
13
+ "lowercase": true,
14
+ "max_df": 1.0,
15
+ "max_features": null,
16
+ "min_df": 2,
17
+ "ngram_range": [
18
+ 1,
19
+ 2
20
+ ],
21
+ "stop_words": "english",
22
+ "strip_accents": null,
23
+ "token_pattern": "(?u)\\b\\w\\w+\\b",
24
+ "vocabulary": null
25
+ },
26
+ "vocab": {
27
+ "request": 285,
28
+ "large": 197,
29
+ "collection": 63,
30
+ "posted": 261,
31
+ "parking": 245,
32
+ "sign": 312,
33
+ "violation": 365,
34
+ "working": 376,
35
+ "contrary": 83,
36
+ "stop": 328,
37
+ "work": 374,
38
+ "order": 241,
39
+ "dirty": 110,
40
+ "sidewalk": 311,
41
+ "access": 3,
42
+ "receipt": 273,
43
+ "site": 316,
44
+ "street": 331,
45
+ "condition": 76,
46
+ "ice": 169,
47
+ "non": 232,
48
+ "missed": 224,
49
+ "area": 20,
50
+ "license": 203,
51
+ "engine": 126,
52
+ "idling": 170,
53
+ "cond": 75,
54
+ "lead": 200,
55
+ "residential": 290,
56
+ "sewer": 309,
57
+ "use": 358,
58
+ "comments": 66,
59
+ "water": 370,
60
+ "meter": 221,
61
+ "broken": 34,
62
+ "leaking": 202,
63
+ "private": 263,
64
+ "residence": 288,
65
+ "refund": 277,
66
+ "return": 295,
67
+ "permit": 249,
68
+ "improper": 175,
69
+ "certificate": 50,
70
+ "occupancy": 236,
71
+ "illegal": 171,
72
+ "plumbing": 256,
73
+ "pedestrian": 248,
74
+ "signal": 313,
75
+ "defective": 101,
76
+ "inadequate": 179,
77
+ "heat": 159,
78
+ "new": 230,
79
+ "bus": 36,
80
+ "placement": 254,
81
+ "repair": 284,
82
+ "building": 35,
83
+ "damaged": 93,
84
+ "cracked": 88,
85
+ "bicycle": 28,
86
+ "flooding": 140,
87
+ "overnight": 242,
88
+ "commercial": 67,
89
+ "storage": 329,
90
+ "surveillance": 337,
91
+ "waste": 369,
92
+ "blocked": 30,
93
+ "construction": 79,
94
+ "school": 306,
95
+ "property": 270,
96
+ "cover": 86,
97
+ "noise": 231,
98
+ "gas": 147,
99
+ "problem": 265,
100
+ "delivery": 103,
101
+ "goods": 150,
102
+ "curb": 90,
103
+ "hitting": 162,
104
+ "phone": 252,
105
+ "c1": 40,
106
+ "trees": 351,
107
+ "rent": 283,
108
+ "unauthorized": 354,
109
+ "chronic": 55,
110
+ "hanging": 157,
111
+ "accident": 4,
112
+ "cleaning": 58,
113
+ "asp": 22,
114
+ "establishment": 129,
115
+ "public": 272,
116
+ "space": 320,
117
+ "dispute": 111,
118
+ "home": 163,
119
+ "electronics": 123,
120
+ "chemical": 53,
121
+ "chained": 51,
122
+ "smoking": 319,
123
+ "car": 42,
124
+ "general": 149,
125
+ "maintenance": 214,
126
+ "asbestos": 21,
127
+ "open": 239,
128
+ "missing": 226,
129
+ "emergency": 124,
130
+ "odor": 237,
131
+ "catch": 48,
132
+ "basin": 26,
133
+ "tax": 342,
134
+ "temporary": 345,
135
+ "failure": 134,
136
+ "debris": 98,
137
+ "falling": 136,
138
+ "danger": 95,
139
+ "air": 9,
140
+ "defect": 100,
141
+ "metal": 219,
142
+ "protruding": 271,
143
+ "information": 183,
144
+ "cut": 91,
145
+ "vacant": 360,
146
+ "lot": 211,
147
+ "resident": 289,
148
+ "pipe": 253,
149
+ "toilet": 349,
150
+ "button": 38,
151
+ "wiring": 373,
152
+ "buzzer": 39,
153
+ "vehicle": 361,
154
+ "carbon": 43,
155
+ "monoxide": 227,
156
+ "smoke": 318,
157
+ "audit": 24,
158
+ "damp": 94,
159
+ "leak": 201,
160
+ "st": 324,
161
+ "facility": 133,
162
+ "law": 198,
163
+ "cigarette": 56,
164
+ "sale": 303,
165
+ "minor": 222,
166
+ "pool": 258,
167
+ "graffiti": 151,
168
+ "speed": 322,
169
+ "scale": 305,
170
+ "hours": 165,
171
+ "safety": 301,
172
+ "equipment": 128,
173
+ "signs": 314,
174
+ "notice": 233,
175
+ "box": 31,
176
+ "weeds": 372,
177
+ "grating": 152,
178
+ "removal": 280,
179
+ "requested": 286,
180
+ "controller": 85,
181
+ "flasher": 139,
182
+ "loose": 210,
183
+ "time": 347,
184
+ "switch": 340,
185
+ "stump": 335,
186
+ "sampling": 304,
187
+ "required": 287,
188
+ "head": 158,
189
+ "card": 45,
190
+ "stuck": 333,
191
+ "commission": 68,
192
+ "lack": 195,
193
+ "litter": 207,
194
+ "comm": 65,
195
+ "bldg": 29,
196
+ "basket": 27,
197
+ "fallen": 135,
198
+ "bridge": 33,
199
+ "warning": 367,
200
+ "prohibited": 269,
201
+ "inspection": 187,
202
+ "roof": 298,
203
+ "illness": 174,
204
+ "injury": 185,
205
+ "ticket": 346,
206
+ "clear": 59,
207
+ "insects": 186,
208
+ "highway": 161,
209
+ "multiple": 229,
210
+ "devices": 107,
211
+ "animal": 13,
212
+ "lane": 196,
213
+ "control": 84,
214
+ "dirt": 108,
215
+ "clothing": 61,
216
+ "high": 160,
217
+ "pressure": 262,
218
+ "debt": 99,
219
+ "materials": 218,
220
+ "agency": 8,
221
+ "application": 17,
222
+ "station": 325,
223
+ "unguarded": 355,
224
+ "driveway": 117,
225
+ "gallons": 144,
226
+ "device": 106,
227
+ "service": 307,
228
+ "swimming": 338,
229
+ "coin": 62,
230
+ "tobacco": 348,
231
+ "taste": 341,
232
+ "filing": 138,
233
+ "technical": 343,
234
+ "issues": 192,
235
+ "rights": 297,
236
+ "miscellaneous": 223,
237
+ "color": 64,
238
+ "division": 112,
239
+ "retaining": 293,
240
+ "zoning": 378,
241
+ "lawn": 199,
242
+ "status": 326,
243
+ "enforcement": 125,
244
+ "excessive": 131,
245
+ "contractor": 82,
246
+ "dry": 118,
247
+ "complaince": 71,
248
+ "electrical": 121,
249
+ "amusement": 11,
250
+ "ride": 296,
251
+ "incident": 180,
252
+ "received": 274,
253
+ "program": 268,
254
+ "nypd": 235,
255
+ "issue": 191,
256
+ "electronic": 122,
257
+ "transfer": 350,
258
+ "eft": 120,
259
+ "address": 6,
260
+ "incorrect": 181,
261
+ "wrong": 377,
262
+ "paper": 244,
263
+ "list": 206,
264
+ "passenger": 247,
265
+ "guide": 154,
266
+ "assistance": 23,
267
+ "exemption": 132,
268
+ "sro": 323,
269
+ "truck": 352,
270
+ "driver": 115,
271
+ "city": 57,
272
+ "tunnel": 353,
273
+ "licensed": 204,
274
+ "improvement": 177,
275
+ "sticker": 327,
276
+ "animals": 14,
277
+ "company": 69,
278
+ "waterway": 371,
279
+ "abcs": 1,
280
+ "housing": 167,
281
+ "haitian": 155,
282
+ "creole": 89,
283
+ "apartment": 16,
284
+ "unit": 356,
285
+ "retail": 292,
286
+ "store": 330,
287
+ "initial": 184,
288
+ "fdny": 137,
289
+ "approved": 18,
290
+ "multi": 228,
291
+ "business": 37,
292
+ "annual": 15,
293
+ "related": 278,
294
+ "dep": 104,
295
+ "internal": 189,
296
+ "vending": 362,
297
+ "machine": 213,
298
+ "marine": 217,
299
+ "dumpster": 119,
300
+ "damage": 92,
301
+ "cable": 41,
302
+ "missed collection": 225,
303
+ "street cond": 332,
304
+ "use comments": 359,
305
+ "private residence": 264,
306
+ "improper use": 176,
307
+ "residential building": 291,
308
+ "plumbing work": 257,
309
+ "work illegal": 375,
310
+ "construction site": 80,
311
+ "cover missing": 87,
312
+ "odor sewer": 238,
313
+ "sewer catch": 310,
314
+ "catch basin": 49,
315
+ "danger falling": 96,
316
+ "metal protruding": 220,
317
+ "defective street": 102,
318
+ "carbon monoxide": 44,
319
+ "safety equipment": 302,
320
+ "permit license": 250,
321
+ "grating missing": 153,
322
+ "card stuck": 46,
323
+ "stuck meter": 334,
324
+ "warning signal": 368,
325
+ "clear water": 60,
326
+ "dirt litter": 109,
327
+ "litter debris": 208,
328
+ "open unguarded": 240,
329
+ "swimming pool": 339,
330
+ "amusement ride": 12,
331
+ "address incorrect": 7,
332
+ "incorrect status": 182,
333
+ "driver license": 116,
334
+ "home improvement": 164,
335
+ "improvement contractor": 178,
336
+ "company license": 70,
337
+ "abcs housing": 2,
338
+ "haitian creole": 156,
339
+ "dep internal": 105,
340
+ "internal use": 190,
341
+ "vending machine": 363,
342
+ "unknown": 357,
343
+ "line": 205,
344
+ "knocked": 193,
345
+ "post": 260,
346
+ "wall": 366,
347
+ "excavation": 130,
348
+ "support": 336,
349
+ "foreign": 142,
350
+ "dead": 97,
351
+ "contact": 81,
352
+ "installation": 188,
353
+ "break": 32,
354
+ "house": 166,
355
+ "change": 52,
356
+ "management": 215,
357
+ "conditioning": 77,
358
+ "condo": 78,
359
+ "foundation": 143,
360
+ "referral": 275,
361
+ "route": 299,
362
+ "concrete": 74,
363
+ "panel": 243,
364
+ "complaint": 72,
365
+ "basement": 25,
366
+ "garage": 145,
367
+ "sink": 315,
368
+ "reflected": 276,
369
+ "chinese": 54,
370
+ "spanish": 321,
371
+ "arabic": 19,
372
+ "hqs": 168,
373
+ "english": 127,
374
+ "russian": 300,
375
+ "portable": 259,
376
+ "korean": 194,
377
+ "10": 0,
378
+ "television": 344,
379
+ "retaining wall": 294,
380
+ "parking lot": 246,
381
+ "air conditioning": 10,
382
+ "location": 209,
383
+ "manufacturing": 216,
384
+ "care": 47,
385
+ "activity": 5,
386
+ "low": 212,
387
+ "food": 141,
388
+ "number": 234,
389
+ "remove": 281,
390
+ "pet": 251,
391
+ "compressed": 73,
392
+ "illegal use": 173,
393
+ "illegal improper": 172,
394
+ "sewage": 308,
395
+ "drinking": 113,
396
+ "garbage": 146,
397
+ "small": 317,
398
+ "removing": 282,
399
+ "plants": 255,
400
+ "problem use": 266,
401
+ "drinking water": 114,
402
+ "gas sewer": 148,
403
+ "ventilation": 364,
404
+ "problems": 267,
405
+ "related problems": 279
406
+ }
407
+ }
408
+ }
models/BERTopic/topic_embeddings.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f9c82186355ce319ce3da6352c0a285b91e216bdb680ec4e453d2df2f3c3d1
3
+ size 12376
models/BERTopic/topics.json ADDED
@@ -0,0 +1,1671 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "topic_representations": {
3
+ "-1": [
4
+ [
5
+ "order",
6
+ 0.05415367852300953
7
+ ],
8
+ [
9
+ "property",
10
+ 0.05110874633317529
11
+ ],
12
+ [
13
+ "inspection",
14
+ 0.047957198774650545
15
+ ],
16
+ [
17
+ "condition",
18
+ 0.04684968413874401
19
+ ],
20
+ [
21
+ "construction",
22
+ 0.040871670084454234
23
+ ]
24
+ ],
25
+ "0": [
26
+ [
27
+ "damaged",
28
+ 0.12203031954457103
29
+ ],
30
+ [
31
+ "sign",
32
+ 0.10565370415490198
33
+ ],
34
+ [
35
+ "sidewalk",
36
+ 0.09204086251770861
37
+ ],
38
+ [
39
+ "missing",
40
+ 0.08904351211067452
41
+ ],
42
+ [
43
+ "housing",
44
+ 0.08425536080287954
45
+ ]
46
+ ],
47
+ "1": [
48
+ [
49
+ "license",
50
+ 0.2485641290752132
51
+ ],
52
+ [
53
+ "complaint",
54
+ 0.14648917413895213
55
+ ],
56
+ [
57
+ "illegal",
58
+ 0.10854741509204496
59
+ ],
60
+ [
61
+ "violation",
62
+ 0.06196592365898547
63
+ ],
64
+ [
65
+ "permit",
66
+ 0.054220183201612294
67
+ ]
68
+ ],
69
+ "2": [
70
+ [
71
+ "water",
72
+ 0.20043627364808767
73
+ ],
74
+ [
75
+ "basin",
76
+ 0.1360096285478291
77
+ ],
78
+ [
79
+ "litter",
80
+ 0.12766055935466478
81
+ ],
82
+ [
83
+ "missed",
84
+ 0.12411889941590681
85
+ ],
86
+ [
87
+ "sewer",
88
+ 0.11794480155381776
89
+ ]
90
+ ],
91
+ "3": [
92
+ [
93
+ "noise",
94
+ 0.7067969405376407
95
+ ],
96
+ [
97
+ "animal",
98
+ 0.23151186956043018
99
+ ],
100
+ [
101
+ "truck",
102
+ 0.18520949564834413
103
+ ],
104
+ [
105
+ "dead",
106
+ 0.1440316275215734
107
+ ],
108
+ [
109
+ "equipment",
110
+ 0.1267727574626285
111
+ ]
112
+ ],
113
+ "4": [
114
+ [
115
+ "odor",
116
+ 0.40165153174580426
117
+ ],
118
+ [
119
+ "food",
120
+ 0.30714528898208565
121
+ ],
122
+ [
123
+ "air",
124
+ 0.29978554690340886
125
+ ],
126
+ [
127
+ "smoke",
128
+ 0.19547149449356388
129
+ ],
130
+ [
131
+ "taste",
132
+ 0.19547149449356388
133
+ ]
134
+ ],
135
+ "5": [
136
+ [
137
+ "english",
138
+ 0.4504386781775388
139
+ ],
140
+ [
141
+ "emergency",
142
+ 0.379178358375766
143
+ ],
144
+ [
145
+ "spanish",
146
+ 0.3611251470424905
147
+ ],
148
+ [
149
+ "chinese",
150
+ 0.3317092027769569
151
+ ],
152
+ [
153
+ "heat",
154
+ 0.3317092027769569
155
+ ]
156
+ ],
157
+ "6": [
158
+ [
159
+ "exemption",
160
+ 0.693831167446274
161
+ ],
162
+ [
163
+ "commercial",
164
+ 0.49112096865161
165
+ ],
166
+ [
167
+ "tax",
168
+ 0.40939072124701686
169
+ ],
170
+ [
171
+ "business",
172
+ 0.33495604465665013
173
+ ],
174
+ [
175
+ "refund",
176
+ 0.17799030392909884
177
+ ]
178
+ ]
179
+ },
180
+ "topics": [
181
+ -1,
182
+ 4,
183
+ 0,
184
+ 2,
185
+ 0,
186
+ 0,
187
+ 0,
188
+ 3,
189
+ -1,
190
+ 0,
191
+ 0,
192
+ 0,
193
+ 0,
194
+ -1,
195
+ 1,
196
+ -1,
197
+ 0,
198
+ 0,
199
+ -1,
200
+ 0,
201
+ 0,
202
+ 0,
203
+ 1,
204
+ 1,
205
+ 2,
206
+ 3,
207
+ 0,
208
+ -1,
209
+ -1,
210
+ -1,
211
+ 4,
212
+ -1,
213
+ -1,
214
+ 0,
215
+ 0,
216
+ 0,
217
+ -1,
218
+ 1,
219
+ -1,
220
+ 3,
221
+ 3,
222
+ -1,
223
+ 3,
224
+ 3,
225
+ 1,
226
+ 4,
227
+ -1,
228
+ 2,
229
+ 0,
230
+ 0,
231
+ 3,
232
+ -1,
233
+ -1,
234
+ 0,
235
+ 0,
236
+ -1,
237
+ -1,
238
+ -1,
239
+ 3,
240
+ 0,
241
+ 0,
242
+ 2,
243
+ 2,
244
+ 0,
245
+ 2,
246
+ 1,
247
+ 0,
248
+ 2,
249
+ 2,
250
+ 0,
251
+ -1,
252
+ 2,
253
+ -1,
254
+ 0,
255
+ 2,
256
+ 0,
257
+ 0,
258
+ -1,
259
+ 0,
260
+ -1,
261
+ 2,
262
+ 0,
263
+ 2,
264
+ 2,
265
+ 4,
266
+ 1,
267
+ 0,
268
+ 0,
269
+ 1,
270
+ 0,
271
+ 0,
272
+ -1,
273
+ 0,
274
+ 0,
275
+ 4,
276
+ -1,
277
+ -1,
278
+ -1,
279
+ -1,
280
+ 0,
281
+ -1,
282
+ 4,
283
+ -1,
284
+ -1,
285
+ 1,
286
+ 0,
287
+ 3,
288
+ -1,
289
+ 4,
290
+ 3,
291
+ -1,
292
+ 2,
293
+ -1,
294
+ 0,
295
+ -1,
296
+ 2,
297
+ 1,
298
+ 0,
299
+ -1,
300
+ 2,
301
+ 0,
302
+ 0,
303
+ 1,
304
+ -1,
305
+ 0,
306
+ 4,
307
+ 0,
308
+ 2,
309
+ 0,
310
+ -1,
311
+ 0,
312
+ 1,
313
+ -1,
314
+ 0,
315
+ 0,
316
+ -1,
317
+ 0,
318
+ 3,
319
+ 2,
320
+ 0,
321
+ 0,
322
+ 3,
323
+ -1,
324
+ -1,
325
+ 0,
326
+ 2,
327
+ 2,
328
+ 0,
329
+ 2,
330
+ -1,
331
+ 0,
332
+ 2,
333
+ 2,
334
+ 1,
335
+ 2,
336
+ -1,
337
+ 3,
338
+ -1,
339
+ -1,
340
+ -1,
341
+ 0,
342
+ -1,
343
+ -1,
344
+ 0,
345
+ 0,
346
+ 4,
347
+ 0,
348
+ 0,
349
+ 0,
350
+ -1,
351
+ 4,
352
+ -1,
353
+ 3,
354
+ 4,
355
+ 4,
356
+ -1,
357
+ -1,
358
+ 0,
359
+ 2,
360
+ 0,
361
+ 0,
362
+ -1,
363
+ 0,
364
+ -1,
365
+ 0,
366
+ 3,
367
+ 3,
368
+ 0,
369
+ -1,
370
+ -1,
371
+ 1,
372
+ 0,
373
+ 3,
374
+ 0,
375
+ -1,
376
+ 1,
377
+ -1,
378
+ -1,
379
+ -1,
380
+ 0,
381
+ 0,
382
+ 1,
383
+ -1,
384
+ -1,
385
+ 2,
386
+ 0,
387
+ -1,
388
+ 0,
389
+ 1,
390
+ 0,
391
+ -1,
392
+ 0,
393
+ 0,
394
+ 1,
395
+ -1,
396
+ 2,
397
+ 0,
398
+ -1,
399
+ -1,
400
+ 4,
401
+ -1,
402
+ 0,
403
+ 0,
404
+ 0,
405
+ -1,
406
+ 1,
407
+ 0,
408
+ -1,
409
+ -1,
410
+ 1,
411
+ 0,
412
+ 4,
413
+ -1,
414
+ -1,
415
+ -1,
416
+ 0,
417
+ -1,
418
+ 0,
419
+ -1,
420
+ -1,
421
+ -1,
422
+ 0,
423
+ -1,
424
+ 1,
425
+ 1,
426
+ -1,
427
+ 0,
428
+ -1,
429
+ -1,
430
+ 2,
431
+ 3,
432
+ 2,
433
+ -1,
434
+ 0,
435
+ -1,
436
+ 1,
437
+ 0,
438
+ 3,
439
+ -1,
440
+ 0,
441
+ 0,
442
+ 4,
443
+ -1,
444
+ -1,
445
+ 0,
446
+ -1,
447
+ -1,
448
+ 0,
449
+ -1,
450
+ 3,
451
+ 0,
452
+ -1,
453
+ 2,
454
+ 4,
455
+ 0,
456
+ 2,
457
+ 0,
458
+ 2,
459
+ 3,
460
+ 0,
461
+ -1,
462
+ -1,
463
+ 0,
464
+ 1,
465
+ 6,
466
+ 0,
467
+ 0,
468
+ -1,
469
+ -1,
470
+ -1,
471
+ 1,
472
+ 0,
473
+ 1,
474
+ -1,
475
+ 0,
476
+ -1,
477
+ 2,
478
+ 3,
479
+ 0,
480
+ 1,
481
+ -1,
482
+ 0,
483
+ 0,
484
+ 4,
485
+ 0,
486
+ 2,
487
+ 1,
488
+ -1,
489
+ 2,
490
+ 1,
491
+ -1,
492
+ 0,
493
+ 1,
494
+ -1,
495
+ 0,
496
+ 3,
497
+ 1,
498
+ 0,
499
+ 0,
500
+ -1,
501
+ 1,
502
+ 0,
503
+ 1,
504
+ 0,
505
+ -1,
506
+ 1,
507
+ 1,
508
+ -1,
509
+ 0,
510
+ 4,
511
+ -1,
512
+ 0,
513
+ 0,
514
+ 0,
515
+ -1,
516
+ 3,
517
+ -1,
518
+ 3,
519
+ -1,
520
+ -1,
521
+ -1,
522
+ -1,
523
+ -1,
524
+ 4,
525
+ 0,
526
+ 2,
527
+ 0,
528
+ 4,
529
+ 0,
530
+ 2,
531
+ 0,
532
+ 0,
533
+ -1,
534
+ 6,
535
+ -1,
536
+ 4,
537
+ 0,
538
+ 4,
539
+ 0,
540
+ 0,
541
+ 0,
542
+ 1,
543
+ -1,
544
+ 2,
545
+ 1,
546
+ 0,
547
+ 0,
548
+ 1,
549
+ 4,
550
+ 0,
551
+ 0,
552
+ -1,
553
+ -1,
554
+ -1,
555
+ 0,
556
+ 2,
557
+ 0,
558
+ -1,
559
+ 3,
560
+ 0,
561
+ -1,
562
+ 0,
563
+ 0,
564
+ 0,
565
+ 0,
566
+ 0,
567
+ 0,
568
+ -1,
569
+ -1,
570
+ 0,
571
+ -1,
572
+ 0,
573
+ -1,
574
+ 4,
575
+ 0,
576
+ 0,
577
+ 0,
578
+ 0,
579
+ -1,
580
+ 0,
581
+ 0,
582
+ 4,
583
+ 0,
584
+ -1,
585
+ 0,
586
+ 0,
587
+ -1,
588
+ 3,
589
+ 2,
590
+ -1,
591
+ -1,
592
+ 2,
593
+ -1,
594
+ -1,
595
+ 0,
596
+ 3,
597
+ 2,
598
+ -1,
599
+ -1,
600
+ -1,
601
+ -1,
602
+ 4,
603
+ 0,
604
+ -1,
605
+ 0,
606
+ 0,
607
+ 3,
608
+ 2,
609
+ -1,
610
+ 0,
611
+ 0,
612
+ 0,
613
+ -1,
614
+ 1,
615
+ 3,
616
+ 4,
617
+ 1,
618
+ 0,
619
+ 1,
620
+ -1,
621
+ 4,
622
+ 0,
623
+ -1,
624
+ 0,
625
+ 2,
626
+ 4,
627
+ 0,
628
+ -1,
629
+ 0,
630
+ -1,
631
+ -1,
632
+ 0,
633
+ 2,
634
+ -1,
635
+ 0,
636
+ 0,
637
+ 2,
638
+ -1,
639
+ 0,
640
+ 0,
641
+ 0,
642
+ 0,
643
+ 0,
644
+ -1,
645
+ 0,
646
+ 0,
647
+ 0,
648
+ 4,
649
+ 1,
650
+ 0,
651
+ 0,
652
+ 1,
653
+ -1,
654
+ 0,
655
+ 5,
656
+ 0,
657
+ -1,
658
+ 4,
659
+ 1,
660
+ 0,
661
+ 4,
662
+ -1,
663
+ -1,
664
+ 4,
665
+ 2,
666
+ -1,
667
+ 4,
668
+ 0,
669
+ -1,
670
+ -1,
671
+ 3,
672
+ 2,
673
+ -1,
674
+ 3,
675
+ 5,
676
+ -1,
677
+ -1,
678
+ -1,
679
+ 0,
680
+ 3,
681
+ 2,
682
+ -1,
683
+ -1,
684
+ 0,
685
+ 2,
686
+ 1,
687
+ 3,
688
+ 0,
689
+ 1,
690
+ 3,
691
+ -1,
692
+ 0,
693
+ -1,
694
+ 4,
695
+ 0,
696
+ 0,
697
+ 1,
698
+ -1,
699
+ -1,
700
+ -1,
701
+ -1,
702
+ -1,
703
+ -1,
704
+ -1,
705
+ 0,
706
+ 0,
707
+ 0,
708
+ -1,
709
+ 3,
710
+ 0,
711
+ -1,
712
+ 1,
713
+ 2,
714
+ -1,
715
+ -1,
716
+ -1,
717
+ -1,
718
+ -1,
719
+ 0,
720
+ 0,
721
+ -1,
722
+ -1,
723
+ -1,
724
+ 0,
725
+ 0,
726
+ 0,
727
+ -1,
728
+ -1,
729
+ 1,
730
+ 0,
731
+ 2,
732
+ 0,
733
+ -1,
734
+ -1,
735
+ 1,
736
+ 0,
737
+ -1,
738
+ 0,
739
+ -1,
740
+ 6,
741
+ -1,
742
+ 6,
743
+ 0,
744
+ 0,
745
+ 3,
746
+ -1,
747
+ 0,
748
+ -1,
749
+ 0,
750
+ 1,
751
+ -1,
752
+ 0,
753
+ -1,
754
+ -1,
755
+ -1,
756
+ 2,
757
+ -1,
758
+ 3,
759
+ -1,
760
+ 0,
761
+ 3,
762
+ 2,
763
+ -1,
764
+ 6,
765
+ 4,
766
+ 4,
767
+ -1,
768
+ 4,
769
+ 3,
770
+ -1,
771
+ 0,
772
+ 0,
773
+ -1,
774
+ -1,
775
+ 6,
776
+ -1,
777
+ 0,
778
+ 2,
779
+ -1,
780
+ 0,
781
+ 0,
782
+ 2,
783
+ 0,
784
+ 0,
785
+ 2,
786
+ -1,
787
+ 0,
788
+ -1,
789
+ -1,
790
+ -1,
791
+ -1,
792
+ 1,
793
+ -1,
794
+ 1,
795
+ 2,
796
+ 2,
797
+ 0,
798
+ -1,
799
+ 2,
800
+ 0,
801
+ -1,
802
+ 1,
803
+ -1,
804
+ -1,
805
+ 1,
806
+ 3,
807
+ -1,
808
+ 0,
809
+ 2,
810
+ -1,
811
+ 2,
812
+ -1,
813
+ 1,
814
+ 1,
815
+ 0,
816
+ 3,
817
+ 2,
818
+ 0,
819
+ 3,
820
+ 3,
821
+ -1,
822
+ 0,
823
+ -1,
824
+ -1,
825
+ 0,
826
+ 0,
827
+ -1,
828
+ 0,
829
+ 1,
830
+ -1,
831
+ 2,
832
+ 4,
833
+ 0,
834
+ -1,
835
+ -1,
836
+ 0,
837
+ 0,
838
+ 0,
839
+ -1,
840
+ 2,
841
+ 0,
842
+ 1,
843
+ -1,
844
+ 4,
845
+ 0,
846
+ 0,
847
+ 0,
848
+ 0,
849
+ -1,
850
+ 4,
851
+ 0,
852
+ -1,
853
+ -1,
854
+ -1,
855
+ 3,
856
+ 3,
857
+ 2,
858
+ 0,
859
+ 1,
860
+ -1,
861
+ 0,
862
+ -1,
863
+ -1,
864
+ -1,
865
+ 0,
866
+ 0,
867
+ 4,
868
+ 2,
869
+ 0,
870
+ -1,
871
+ -1,
872
+ -1,
873
+ 3,
874
+ -1,
875
+ 0,
876
+ -1,
877
+ 4,
878
+ 2,
879
+ 0,
880
+ 0,
881
+ 0,
882
+ -1,
883
+ 1,
884
+ 0,
885
+ 0,
886
+ 2,
887
+ -1,
888
+ 0,
889
+ 3,
890
+ 4,
891
+ -1,
892
+ 2,
893
+ -1,
894
+ 4,
895
+ -1,
896
+ 0,
897
+ 2,
898
+ 0,
899
+ -1,
900
+ -1,
901
+ -1,
902
+ -1,
903
+ 0,
904
+ 0,
905
+ 5,
906
+ 2,
907
+ -1,
908
+ 0,
909
+ 0,
910
+ 0,
911
+ -1,
912
+ 1,
913
+ -1,
914
+ 2,
915
+ 2,
916
+ 0,
917
+ 0,
918
+ 3,
919
+ 0,
920
+ 0,
921
+ 6,
922
+ 0,
923
+ 0,
924
+ 2,
925
+ -1,
926
+ -1,
927
+ -1,
928
+ -1,
929
+ -1,
930
+ -1,
931
+ -1,
932
+ 3,
933
+ -1,
934
+ -1,
935
+ -1,
936
+ -1,
937
+ 0,
938
+ 0,
939
+ 3,
940
+ -1,
941
+ -1,
942
+ 0,
943
+ -1,
944
+ 0,
945
+ 6,
946
+ -1,
947
+ 0,
948
+ 1,
949
+ 1,
950
+ 1,
951
+ 1,
952
+ 0,
953
+ 2,
954
+ 1,
955
+ 1,
956
+ -1,
957
+ 1,
958
+ -1,
959
+ -1,
960
+ 0,
961
+ 0,
962
+ -1,
963
+ -1,
964
+ 0,
965
+ 0,
966
+ 1,
967
+ 0,
968
+ -1,
969
+ -1,
970
+ -1,
971
+ 6,
972
+ -1,
973
+ 3,
974
+ -1,
975
+ -1,
976
+ 3,
977
+ 0,
978
+ 0,
979
+ 0,
980
+ 3,
981
+ 0,
982
+ 0,
983
+ -1,
984
+ 2,
985
+ 0,
986
+ -1,
987
+ -1,
988
+ 0,
989
+ -1,
990
+ -1,
991
+ 3,
992
+ 2,
993
+ 0,
994
+ -1,
995
+ 0,
996
+ 1,
997
+ -1,
998
+ -1,
999
+ 0,
1000
+ -1,
1001
+ -1,
1002
+ 1,
1003
+ 2,
1004
+ -1,
1005
+ 0,
1006
+ -1,
1007
+ -1,
1008
+ -1,
1009
+ 0,
1010
+ -1,
1011
+ 6,
1012
+ -1,
1013
+ -1,
1014
+ -1,
1015
+ 2,
1016
+ -1,
1017
+ -1,
1018
+ -1,
1019
+ 0,
1020
+ -1,
1021
+ -1,
1022
+ 0,
1023
+ 1,
1024
+ -1,
1025
+ 0,
1026
+ -1,
1027
+ -1,
1028
+ -1,
1029
+ -1,
1030
+ 0,
1031
+ 3,
1032
+ -1,
1033
+ -1,
1034
+ -1,
1035
+ 2,
1036
+ 2,
1037
+ -1,
1038
+ 0,
1039
+ 0,
1040
+ 0,
1041
+ -1,
1042
+ 0,
1043
+ 0,
1044
+ 1,
1045
+ 2,
1046
+ -1,
1047
+ -1,
1048
+ 1,
1049
+ 0,
1050
+ 2,
1051
+ 1,
1052
+ 0,
1053
+ 1,
1054
+ 1,
1055
+ 0,
1056
+ 0,
1057
+ -1,
1058
+ 6,
1059
+ 2,
1060
+ -1,
1061
+ -1,
1062
+ 5,
1063
+ -1,
1064
+ -1,
1065
+ -1,
1066
+ 6,
1067
+ -1,
1068
+ 2,
1069
+ -1,
1070
+ 0,
1071
+ 0,
1072
+ -1,
1073
+ -1,
1074
+ -1,
1075
+ 0,
1076
+ -1,
1077
+ -1,
1078
+ 0,
1079
+ -1,
1080
+ 1,
1081
+ -1,
1082
+ 5,
1083
+ 0,
1084
+ 6,
1085
+ -1,
1086
+ -1,
1087
+ -1,
1088
+ 0,
1089
+ 0,
1090
+ -1,
1091
+ 0,
1092
+ -1,
1093
+ 0,
1094
+ 2,
1095
+ 0,
1096
+ -1,
1097
+ 0,
1098
+ -1,
1099
+ 1,
1100
+ 0,
1101
+ 3,
1102
+ 0,
1103
+ 2,
1104
+ 1,
1105
+ -1,
1106
+ -1,
1107
+ 0,
1108
+ 2,
1109
+ -1,
1110
+ 0,
1111
+ 1,
1112
+ -1,
1113
+ 0,
1114
+ 0,
1115
+ -1,
1116
+ 0,
1117
+ -1,
1118
+ 1,
1119
+ 6,
1120
+ 6,
1121
+ 0,
1122
+ -1,
1123
+ -1,
1124
+ -1,
1125
+ -1,
1126
+ 6,
1127
+ 6,
1128
+ -1,
1129
+ 1,
1130
+ 1,
1131
+ 1,
1132
+ 0,
1133
+ -1,
1134
+ -1,
1135
+ -1,
1136
+ 1,
1137
+ 1,
1138
+ -1,
1139
+ 1,
1140
+ 1,
1141
+ -1,
1142
+ 1,
1143
+ -1,
1144
+ 0,
1145
+ -1,
1146
+ 1,
1147
+ -1,
1148
+ -1,
1149
+ 6,
1150
+ -1,
1151
+ 1,
1152
+ -1,
1153
+ 6,
1154
+ 0,
1155
+ 0,
1156
+ 1,
1157
+ -1,
1158
+ 4,
1159
+ 1,
1160
+ 1,
1161
+ 1,
1162
+ 0,
1163
+ -1,
1164
+ 5,
1165
+ 0,
1166
+ -1,
1167
+ -1,
1168
+ -1,
1169
+ 0,
1170
+ -1,
1171
+ -1,
1172
+ 1,
1173
+ 1,
1174
+ 0,
1175
+ 1,
1176
+ 2,
1177
+ 1,
1178
+ -1,
1179
+ 1,
1180
+ 1,
1181
+ 1,
1182
+ 0,
1183
+ 0,
1184
+ -1,
1185
+ 6,
1186
+ 6,
1187
+ 1,
1188
+ 1,
1189
+ 1,
1190
+ 0,
1191
+ -1,
1192
+ 1,
1193
+ -1,
1194
+ 1,
1195
+ -1,
1196
+ -1,
1197
+ 1,
1198
+ 1,
1199
+ 5,
1200
+ 1,
1201
+ 1,
1202
+ 3,
1203
+ 1,
1204
+ -1,
1205
+ -1,
1206
+ 6,
1207
+ 1,
1208
+ 6,
1209
+ 1,
1210
+ 5,
1211
+ -1,
1212
+ 1,
1213
+ 5,
1214
+ 1,
1215
+ -1,
1216
+ 1,
1217
+ 1,
1218
+ 1,
1219
+ 0,
1220
+ 6,
1221
+ 1,
1222
+ 1,
1223
+ 1,
1224
+ 1,
1225
+ 1,
1226
+ 0,
1227
+ 1,
1228
+ -1,
1229
+ 1,
1230
+ -1,
1231
+ -1,
1232
+ -1,
1233
+ 6,
1234
+ 1,
1235
+ 5,
1236
+ -1,
1237
+ 1,
1238
+ 1,
1239
+ 0,
1240
+ -1,
1241
+ 1,
1242
+ -1,
1243
+ 0,
1244
+ 1,
1245
+ 1,
1246
+ 5,
1247
+ 6,
1248
+ -1,
1249
+ -1,
1250
+ 1,
1251
+ -1,
1252
+ -1,
1253
+ -1,
1254
+ 0,
1255
+ -1,
1256
+ 5,
1257
+ -1,
1258
+ 1,
1259
+ 1,
1260
+ 5,
1261
+ 0,
1262
+ 5,
1263
+ 5,
1264
+ -1,
1265
+ -1,
1266
+ 1,
1267
+ 0,
1268
+ 5,
1269
+ 5,
1270
+ -1,
1271
+ 5,
1272
+ -1,
1273
+ -1,
1274
+ 1,
1275
+ 5,
1276
+ 5,
1277
+ 5,
1278
+ 1,
1279
+ 5,
1280
+ 1,
1281
+ -1,
1282
+ 5,
1283
+ 1,
1284
+ 5,
1285
+ 0,
1286
+ -1,
1287
+ 1,
1288
+ 0,
1289
+ 5,
1290
+ 1,
1291
+ 1,
1292
+ 1,
1293
+ 1,
1294
+ 5,
1295
+ 1,
1296
+ 0,
1297
+ -1,
1298
+ 1,
1299
+ 5,
1300
+ 0,
1301
+ 5,
1302
+ 5,
1303
+ 1,
1304
+ 5,
1305
+ 5,
1306
+ 0,
1307
+ 5,
1308
+ -1,
1309
+ 0,
1310
+ 5,
1311
+ -1,
1312
+ -1,
1313
+ 0,
1314
+ 1,
1315
+ 1,
1316
+ -1,
1317
+ 6,
1318
+ -1,
1319
+ 1,
1320
+ 5,
1321
+ -1,
1322
+ 5,
1323
+ 5,
1324
+ 6,
1325
+ 1,
1326
+ 1,
1327
+ 1,
1328
+ 6,
1329
+ 1,
1330
+ 1,
1331
+ 5,
1332
+ 5,
1333
+ 1,
1334
+ 0,
1335
+ 1,
1336
+ -1,
1337
+ 5,
1338
+ 1,
1339
+ 2,
1340
+ 1,
1341
+ 1,
1342
+ 1,
1343
+ 2,
1344
+ 1,
1345
+ 4,
1346
+ 4,
1347
+ 2,
1348
+ -1,
1349
+ 1,
1350
+ 0,
1351
+ 6,
1352
+ -1,
1353
+ 5,
1354
+ 1,
1355
+ 1,
1356
+ -1,
1357
+ -1,
1358
+ -1,
1359
+ 5,
1360
+ 1,
1361
+ -1,
1362
+ 5,
1363
+ -1,
1364
+ 5,
1365
+ 1,
1366
+ 0,
1367
+ 3,
1368
+ 1,
1369
+ -1,
1370
+ -1,
1371
+ -1,
1372
+ 5,
1373
+ 1,
1374
+ 1,
1375
+ 4,
1376
+ -1,
1377
+ 1,
1378
+ 1,
1379
+ 1,
1380
+ 1,
1381
+ -1,
1382
+ 1,
1383
+ 5,
1384
+ 0,
1385
+ 1,
1386
+ 1,
1387
+ -1,
1388
+ 0,
1389
+ 0,
1390
+ 0,
1391
+ -1,
1392
+ 0,
1393
+ 4,
1394
+ 0,
1395
+ -1,
1396
+ -1,
1397
+ -1,
1398
+ -1,
1399
+ 0,
1400
+ 4,
1401
+ 0,
1402
+ -1,
1403
+ 0,
1404
+ -1,
1405
+ 3,
1406
+ 1,
1407
+ 1,
1408
+ 1,
1409
+ -1,
1410
+ -1,
1411
+ -1,
1412
+ -1,
1413
+ 1,
1414
+ 1,
1415
+ 1,
1416
+ 1,
1417
+ -1,
1418
+ -1,
1419
+ 6,
1420
+ -1,
1421
+ 0,
1422
+ 1,
1423
+ 0,
1424
+ -1,
1425
+ 5,
1426
+ 5,
1427
+ 4,
1428
+ 5,
1429
+ 5,
1430
+ 0,
1431
+ 3,
1432
+ 0,
1433
+ -1,
1434
+ 0,
1435
+ -1,
1436
+ -1,
1437
+ -1,
1438
+ -1,
1439
+ -1,
1440
+ -1,
1441
+ -1,
1442
+ 6,
1443
+ 4,
1444
+ -1,
1445
+ 6,
1446
+ -1,
1447
+ 6,
1448
+ -1,
1449
+ -1,
1450
+ -1,
1451
+ 0,
1452
+ 0,
1453
+ 6,
1454
+ -1,
1455
+ 0,
1456
+ -1,
1457
+ 0,
1458
+ -1,
1459
+ 0,
1460
+ 6,
1461
+ 3,
1462
+ 6,
1463
+ 0,
1464
+ 0,
1465
+ -1,
1466
+ 0,
1467
+ 0,
1468
+ -1,
1469
+ 1,
1470
+ -1,
1471
+ -1,
1472
+ 6,
1473
+ -1,
1474
+ -1,
1475
+ 0,
1476
+ -1,
1477
+ -1,
1478
+ 0,
1479
+ -1,
1480
+ 5,
1481
+ 2,
1482
+ -1,
1483
+ -1,
1484
+ 6,
1485
+ 1,
1486
+ 1,
1487
+ 0,
1488
+ -1,
1489
+ 0,
1490
+ -1,
1491
+ -1,
1492
+ 0,
1493
+ 0,
1494
+ 6
1495
+ ],
1496
+ "topic_sizes": {
1497
+ "-1": 458,
1498
+ "4": 53,
1499
+ "0": 366,
1500
+ "2": 94,
1501
+ "3": 62,
1502
+ "1": 191,
1503
+ "6": 40,
1504
+ "5": 50
1505
+ },
1506
+ "topic_mapper": [
1507
+ [
1508
+ -1,
1509
+ -1,
1510
+ -1,
1511
+ -1
1512
+ ],
1513
+ [
1514
+ 0,
1515
+ 0,
1516
+ 3,
1517
+ 5
1518
+ ],
1519
+ [
1520
+ 1,
1521
+ 1,
1522
+ 2,
1523
+ 3
1524
+ ],
1525
+ [
1526
+ 2,
1527
+ 2,
1528
+ 1,
1529
+ 0
1530
+ ],
1531
+ [
1532
+ 3,
1533
+ 3,
1534
+ 4,
1535
+ 4
1536
+ ],
1537
+ [
1538
+ 4,
1539
+ 4,
1540
+ 4,
1541
+ 4
1542
+ ],
1543
+ [
1544
+ 5,
1545
+ 5,
1546
+ 2,
1547
+ 3
1548
+ ],
1549
+ [
1550
+ 6,
1551
+ 6,
1552
+ 0,
1553
+ 2
1554
+ ],
1555
+ [
1556
+ 7,
1557
+ 7,
1558
+ 0,
1559
+ 2
1560
+ ],
1561
+ [
1562
+ 8,
1563
+ 8,
1564
+ 0,
1565
+ 2
1566
+ ],
1567
+ [
1568
+ 9,
1569
+ 9,
1570
+ 0,
1571
+ 2
1572
+ ],
1573
+ [
1574
+ 10,
1575
+ 10,
1576
+ 1,
1577
+ 0
1578
+ ],
1579
+ [
1580
+ 11,
1581
+ 11,
1582
+ 1,
1583
+ 0
1584
+ ],
1585
+ [
1586
+ 12,
1587
+ 12,
1588
+ 1,
1589
+ 0
1590
+ ],
1591
+ [
1592
+ 13,
1593
+ 13,
1594
+ 1,
1595
+ 0
1596
+ ],
1597
+ [
1598
+ 14,
1599
+ 14,
1600
+ 1,
1601
+ 0
1602
+ ],
1603
+ [
1604
+ 15,
1605
+ 15,
1606
+ 5,
1607
+ 1
1608
+ ],
1609
+ [
1610
+ 16,
1611
+ 16,
1612
+ 1,
1613
+ 0
1614
+ ],
1615
+ [
1616
+ 17,
1617
+ 17,
1618
+ 1,
1619
+ 0
1620
+ ],
1621
+ [
1622
+ 18,
1623
+ 18,
1624
+ 1,
1625
+ 0
1626
+ ],
1627
+ [
1628
+ 19,
1629
+ 19,
1630
+ 5,
1631
+ 1
1632
+ ],
1633
+ [
1634
+ 20,
1635
+ 20,
1636
+ 5,
1637
+ 1
1638
+ ],
1639
+ [
1640
+ 21,
1641
+ 21,
1642
+ 5,
1643
+ 1
1644
+ ],
1645
+ [
1646
+ 22,
1647
+ 22,
1648
+ 6,
1649
+ 6
1650
+ ],
1651
+ [
1652
+ 23,
1653
+ 23,
1654
+ 6,
1655
+ 6
1656
+ ]
1657
+ ],
1658
+ "topic_labels": {
1659
+ "-1": "-1_order_property_inspection_condition",
1660
+ "0": "0_damaged_sign_sidewalk_missing",
1661
+ "1": "1_license_complaint_illegal_violation",
1662
+ "2": "2_water_basin_litter_missed",
1663
+ "3": "3_noise_animal_truck_dead",
1664
+ "4": "4_odor_food_air_smoke",
1665
+ "5": "5_english_emergency_spanish_chinese",
1666
+ "6": "6_exemption_commercial_tax_business"
1667
+ },
1668
+ "custom_labels": null,
1669
+ "_outliers": 1,
1670
+ "topic_aspects": {}
1671
+ }
models/final_model.json ADDED
The diff for this file is too large to render. See raw diff
 
reports/311_data_1.html ADDED
The diff for this file is too large to render. See raw diff
 
reports/weather_data_after2016_ts.html ADDED
The diff for this file is too large to render. See raw diff
 
reports/weather_data_ts.html ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ beautifulsoup4==4.12.3
2
+ bertopic==0.16.1
3
+ bs4==0.0.2
4
+ bokeh==3.4.1
5
+ darts==0.29.0
6
+ folium==0.16.0
7
+ gradio==4.27.0
8
+ ipykernel==6.29.4
9
+ ipywidgets==8.1.2
10
+ jupyterlab==4.1.8
11
+ matplotlib==3.8.4
12
+ nbformat==5.10.4
13
+ nltk==3.8.1
14
+ numpy==1.26.4
15
+ openpyxl==3.1.2
16
+ pandas==2.2.2
17
+ plotly==5.21.0
18
+ polars==0.20.21
19
+ prophet==1.1.5
20
+ pyarrow==16.0.0
21
+ scikit-learn==1.4.2
22
+ scipy==1.13.0
23
+ seaborn==0.13.2
24
+ --extra-index-url https://download.pytorch.org/whl/cu121
25
+ torch==2.2.2
26
+ xgboost==2.0.3
utils.py ADDED
@@ -0,0 +1,1028 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import polars as pl
3
+ import numpy as np
4
+ import json
5
+ import gc
6
+ import folium
7
+ import html
8
+ from matplotlib import pyplot as plt
9
+ import seaborn as sns
10
+ import xgboost as xgb
11
+ from xgboost import plot_importance
12
+ from bs4 import BeautifulSoup
13
+ import plotly.express as px
14
+ import plotly.graph_objects as go
15
+ import plotly.figure_factory as ff
16
+ from plotly.subplots import make_subplots
17
+ import plotly.io as pio
18
+ from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
19
+ from statsmodels.tsa.stattools import kpss, adfuller
20
+ from bertopic import BERTopic
21
+ from collections import defaultdict
22
+
23
+ color_pal = sns.color_palette("tab10")
24
+
25
+ impute_cols = [
26
+ 'MeanTemp', 'MinTemp', 'MaxTemp', 'DewPoint',
27
+ 'Percipitation', 'WindSpeed', 'MaxSustainedWind',
28
+ 'Gust', 'Rain', 'SnowDepth', 'SnowIce',
29
+ ]
30
+
31
+ def convert_schema_to_polars(schema):
32
+ pl_schema = {}
33
+ for k, v in schema.items():
34
+ if v == "String":
35
+ pl_schema[k] = pl.String
36
+ elif v == "Float64":
37
+ pl_schema[k] = pl.Float64
38
+ elif v == "Int64":
39
+ pl_schema[k] = pl.Int64
40
+ return pl_schema
41
+
42
+
43
+ def create_datetime(data, dt_col, format="%m/%d/%Y %I:%M:%S %p"):
44
+ # df type is either pandas or polars
45
+ df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
46
+ if "datetime" in str(data[dt_col].dtype).lower():
47
+ return data
48
+
49
+ if df_type == "pandas":
50
+ data[dt_col] = pd.to_datetime(data[dt_col], format=format)
51
+ elif df_type == "polars":
52
+ data = data.with_columns(
53
+ pl.col(dt_col).str.strptime(pl.Date, format=format).cast(pl.Datetime)
54
+ )
55
+
56
+ return data
57
+
58
+
59
+ def create_seasons(data, dt_col="Datetime", out_col="Season", prefix=""):
60
+ df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
61
+ out_col = prefix + out_col
62
+ spring_start = pd.to_datetime("2018-3-20", format = "%Y-%m-%d").dayofyear
63
+ summer_start = pd.to_datetime("2018-6-21", format = "%Y-%m-%d").dayofyear
64
+ autumn_start = pd.to_datetime("2018-9-22", format = "%Y-%m-%d").dayofyear
65
+ winter_start = pd.to_datetime("2018-12-21", format = "%Y-%m-%d").dayofyear
66
+
67
+ if df_type == "pandas":
68
+ def map_season(date):
69
+ if date.dayofyear < spring_start or date.dayofyear >= winter_start:
70
+ return "Winter"
71
+ elif date.dayofyear >= spring_start and date.dayofyear < summer_start:
72
+ return "Spring"
73
+ elif date.dayofyear >= summer_start and date.dayofyear < autumn_start:
74
+ return "Summer"
75
+ elif date.dayofyear >= autumn_start and date.dayofyear < winter_start:
76
+ return "Autumn"
77
+ data[out_col] = data[dt_col].apply(map_season)
78
+ return data
79
+
80
+ elif df_type == "polars":
81
+
82
+ def map_season(date):
83
+ # for date in dates:
84
+ if date.timetuple().tm_yday < spring_start or date.timetuple().tm_yday >= winter_start:
85
+ return "Winter"
86
+ elif date.timetuple().tm_yday >= spring_start and date.timetuple().tm_yday < summer_start:
87
+ return "Spring"
88
+ elif date.timetuple().tm_yday >= summer_start and date.timetuple().tm_yday < autumn_start:
89
+ return "Summer"
90
+ elif date.timetuple().tm_yday >= autumn_start and date.timetuple().tm_yday < winter_start:
91
+ return "Autumn"
92
+
93
+ data = data.with_columns(
94
+ pl.col(dt_col).map_elements(map_season, return_dtype=pl.String).alias(out_col)
95
+ )
96
+ return data
97
+
98
+
99
+ def create_weekend(data, dt_col="Datetime", out_col="is_weekend", prefix=""):
100
+ df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
101
+ out_col = prefix + out_col
102
+
103
+ if df_type == "pandas":
104
+ data[out_col] = (data[dt_col].dt.weekday.isin([5,6])).astype(np.int8)
105
+
106
+ elif df_type == "polars":
107
+ data = data.with_columns(
108
+ pl.col(dt_col).dt.weekday().is_in([6,7]).cast(pl.Int8).alias(out_col)
109
+ )
110
+
111
+ return data
112
+
113
+
114
+ def create_holidays(data, dt_col="Datetime", out_col="is_holiday", prefix=""):
115
+ df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
116
+ out_col = prefix + out_col
117
+
118
+ # The only holiday not included will be new years as I expect a potential affect
119
+ HOLIDAYS = [
120
+ pd.to_datetime("2016-01-18"), pd.to_datetime("2016-02-15"),
121
+ pd.to_datetime("2016-05-30"), pd.to_datetime("2016-07-04"), pd.to_datetime("2016-09-05"),
122
+ pd.to_datetime("2016-10-10"), pd.to_datetime("2016-11-11"), pd.to_datetime("2016-11-24"),
123
+ # Christmas is variable (depends on what day is actually holiday vs. what day is XMAS)
124
+ pd.to_datetime("2016-12-24"), pd.to_datetime("2016-12-25"), pd.to_datetime("2016-12-26"),
125
+
126
+
127
+ pd.to_datetime("2017-01-16"), pd.to_datetime("2017-02-20"),
128
+ pd.to_datetime("2017-05-29"), pd.to_datetime("2017-07-04"), pd.to_datetime("2017-09-04"),
129
+ pd.to_datetime("2017-10-09"), pd.to_datetime("2017-11-10"), pd.to_datetime("2017-11-23"),
130
+ pd.to_datetime("2017-12-24"), pd.to_datetime("2017-12-25"),
131
+
132
+ pd.to_datetime("2018-01-15"), pd.to_datetime("2018-02-19"),
133
+ pd.to_datetime("2018-05-28"), pd.to_datetime("2018-07-04"), pd.to_datetime("2018-09-03"),
134
+ pd.to_datetime("2018-10-08"), pd.to_datetime("2018-11-12"), pd.to_datetime("2018-11-22"),
135
+ pd.to_datetime("2018-12-24"), pd.to_datetime("2018-12-25"),
136
+ ]
137
+
138
+
139
+ if df_type == "pandas":
140
+ data[out_col] = (data[dt_col].isin(HOLIDAYS)).astype(np.int8)
141
+
142
+ elif df_type == "polars":
143
+ data = data.with_columns(
144
+ pl.col(dt_col).dt.datetime().is_in(HOLIDAYS).cast(pl.Int8).alias(out_col)
145
+ )
146
+ return data
147
+
148
+
149
+ def build_temporal_features(data, dt_col, prefix=""):
150
+ df_type = "pandas" if isinstance(data, pd.DataFrame) else "polars"
151
+ if df_type == "pandas" and data.index.name == dt_col:
152
+ data = data.reset_index()
153
+
154
+ if df_type == "pandas":
155
+ data[prefix+"Year"] = data[dt_col].dt.year.astype(np.int16)
156
+ data[prefix+"Month"] = data[dt_col].dt.month.astype(np.int8)
157
+ data[prefix+"Day"] = data[dt_col].dt.day.astype(np.int8)
158
+ data[prefix+"DayOfYear"] = data[dt_col].dt.dayofyear.astype(np.int16)
159
+ data[prefix+"DayOfWeek"] = data[dt_col].dt.dayofweek.astype(np.int8)
160
+ else:
161
+ data = data.with_columns (**{
162
+ prefix+"Year": pl.col(dt_col).dt.year().cast(pl.Int16),
163
+ prefix+"Month": pl.col(dt_col).dt.month().cast(pl.Int8),
164
+ prefix+"Day": pl.col(dt_col).dt.day().cast(pl.Int8),
165
+ prefix+"DayOfYear": pl.col(dt_col).dt.ordinal_day().cast(pl.Int16),
166
+ prefix+"DayOfWeek": pl.col(dt_col).dt.weekday().cast(pl.Int8)
167
+ })
168
+
169
+ data = create_seasons(data, dt_col, prefix=prefix)
170
+ data = create_weekend(data, dt_col, prefix=prefix)
171
+ data = create_holidays(data, dt_col, prefix=prefix)
172
+ return data
173
+
174
+
175
+ def agg_and_merge_historical(curr_df, hist_df, col, agg_cols=[], ops=["mean", "max", "min"]):
176
+ merge_dict = {}
177
+ for agg_col in agg_cols:
178
+ describe_tb = hist_df.groupby(col)[agg_col].describe().reset_index()
179
+ if col not in merge_dict:
180
+ merge_dict[col] = describe_tb[col].values
181
+ for op in ops:
182
+ merge_col_name = "historical_" + col + "_" + op + "_" + agg_col
183
+ if op == "mean":
184
+ merge_dict[merge_col_name] = describe_tb["mean"].values
185
+ elif op == "max":
186
+ merge_dict[merge_col_name] = describe_tb["max"].values
187
+ elif op == "min":
188
+ merge_dict[merge_col_name] = describe_tb["min"].values
189
+ elif op == "median":
190
+ merge_dict[merge_col_name] = describe_tb["50%"].values
191
+ elif op == "std":
192
+ merge_dict[merge_col_name] = describe_tb["std"].values
193
+
194
+ merge_df = pd.merge(curr_df, pd.DataFrame(merge_dict), on=col, how="left")
195
+ return merge_df
196
+
197
+
198
+ def map_vals(data, cols=["Latitude", "Longitude"], label_cols=[], color="red", submap=None, weight=3, radius=1, sample_size=10000, start_loc=[42.1657, -74.9481], zoom_start=6):
199
+ cols = cols
200
+ df_type = "pandas" if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series) else "polars"
201
+ fig = folium.Figure(height=500, width=750)
202
+
203
+ if submap is None:
204
+ map_nyc = folium.Map(
205
+ location=start_loc,
206
+ zoom_start=zoom_start,
207
+ tiles='cartodbpositron',
208
+ zoom_control=False,
209
+ scrollWheelZoom=False,
210
+ dragging=False
211
+ )
212
+ else:
213
+ map_nyc = submap
214
+
215
+ cols.extend(label_cols)
216
+ if df_type == "pandas":
217
+ for idx, row in data.loc[:, cols].dropna().sample(sample_size).iterrows():
218
+ label = ""
219
+ lat, long = row.iloc[0,], row.iloc[1,]
220
+ for i, label_col in enumerate(label_cols):
221
+ label += label_col + ": " + str(row.iloc[2+i,]) + "\n"
222
+
223
+ label_params = {"popup": label, "tooltip": label} if len(label_cols) > 0 else {}
224
+ folium.CircleMarker(location=[lat, long], radius=radius, weight=weight, color=color, fill_color=color, fill_opacity=0.7, **label_params).add_to(map_nyc)
225
+ else:
226
+ for row in data[:, cols].drop_nulls().sample(sample_size).rows():
227
+ label = ""
228
+ lat, long = row[0], row[1]
229
+ for i, label_col in enumerate(label_cols):
230
+ label += label_col + ": " + str(row[2+i]) + "\n"
231
+
232
+ label_params = {"popup": label, "tooltip": label} if len(label_cols) > 0 else {}
233
+ folium.CircleMarker(location=[lat, long], radius=radius, weight=weight, color=color, fill_color=color, fill_opacity=0.7, **label_params).add_to(map_nyc)
234
+
235
+ fig.add_child(map_nyc)
236
+ return fig, map_nyc
237
+
238
+
239
+ def find_variable_data(soup, curr_var = "Created Date"):
240
+ src = "<!doctype html>"
241
+ # HTML and head start
242
+ src += "<html lang=\"en\">"
243
+ src += str(soup.find("head"))
244
+
245
+ # Body -> content -> container -> row -> variable
246
+ src += "<body style=\"background-color: var(--table-odd-background-fill); padding-top: 20px;\">"
247
+ src += "<div class=\"content\" style=\"padding-left: 150px; padding-right: 150px; border: 0px !important; \">"
248
+ # src += "<div class=\"container\">"
249
+ src += "<div class=\"section-items\" style=\"background-color: white;\">"
250
+ # src += "<div class=\"row spacing\">"
251
+ variables_html = soup.find_all("div", class_="variable")
252
+ for var_html in variables_html:
253
+ if var_html.text[:len(curr_var)] == curr_var:
254
+ parent = var_html.parent
255
+ parent['style'] = "border: 0px"
256
+ src += str(parent)
257
+ break
258
+
259
+ src += "</div></div>"
260
+
261
+ # Scripts
262
+ for script in soup.find_all("script"):
263
+ src += str(script)
264
+
265
+ # End
266
+ src += "</body>"
267
+ src += "</html>"
268
+
269
+ # src = BeautifulSoup(src, 'html.parser').prettify()
270
+ src_doc = html.escape(src)
271
+ iframe = f'<iframe width="100%" height="1200px" srcdoc="{src_doc}" frameborder="0"></iframe>'
272
+ return iframe, src_doc
273
+
274
+
275
+ def plot_autocorr(data, col, apply=None):
276
+ time_series = data.loc[:, col].to_frame().copy()
277
+ if apply:
278
+ time_series[col] = time_series[col].apply(apply)
279
+ fig, ax = plt.subplots(2, 1, figsize=(12, 8))
280
+ _ = plot_acf(time_series[col], lags=30, ax=ax[0])
281
+ _ = plot_pacf(time_series[col], lags=30, method="ols-adjusted", ax=ax[1])
282
+ _ = plt.suptitle(f"{col}", y=0.95)
283
+ return fig
284
+
285
+
286
+ def adf_test(timeseries):
287
+ dftest = adfuller(timeseries, autolag='AIC')
288
+ dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','Lags Used','Number of Observations Used'])
289
+ dfoutput['Number of Observations Used'] = dfoutput['Number of Observations Used'].astype(np.int64)
290
+ for key,value in dftest[4].items():
291
+ dfoutput['Critical Value (%s)'%key] = value
292
+ return dfoutput
293
+
294
+
295
+ def kpss_test(timeseries):
296
+ kpsstest = kpss(timeseries, regression='ct')
297
+ kpss_output = pd.Series(kpsstest[0:3], index=['Test Statistic','p-value','Lags Used'])
298
+ for key,value in kpsstest[3].items():
299
+ kpss_output['Critical Value (%s)'%key] = value
300
+ return kpss_output
301
+
302
+
303
+ def test_stationary(data, var):
304
+ adf_df = adf_test(data[var].dropna())
305
+ kpss_df = kpss_test(data[var].dropna())
306
+ result_df = adf_df.to_frame(name="Augmented-Dickey-Fuller")
307
+ result_df["KPSS Test"] = kpss_df
308
+ def pass_hypothesis(col):
309
+ test_stat, p_val = col.iloc[0], col.iloc[1]
310
+ one_p, five_p, ten_p = col.iloc[4], col.iloc[5], col.iloc[6]
311
+ if col.name == "KPSS Test":
312
+ if test_stat < one_p and p_val < 0.01:
313
+ color_fmt = ["background-color: #fc5749; font-weight: bold; color: black"]
314
+ elif test_stat < five_p and p_val < 0.05:
315
+ color_fmt = ["background-color: #F88379; font-weight: bold; color: black"]
316
+ elif test_stat < ten_p and p_val < 0.1:
317
+ color_fmt = ["background-color: #ff9f96; font-weight: bold; color: black"]
318
+ else:
319
+ color_fmt = ["background-color: green; font-weight: bold; color: black"]
320
+ else:
321
+ if test_stat < one_p and p_val < 0.01:
322
+ color_fmt = ["background-color: green; font-weight: bold; color: black"]
323
+ elif test_stat < five_p and p_val < 0.05:
324
+ color_fmt = ["background-color: greenyellow; font-weight: bold; color: black"]
325
+ elif test_stat < ten_p and p_val < 0.1:
326
+ color_fmt = ["background-color: lightgreen; font-weight: bold; color: black"]
327
+ else:
328
+ color_fmt = ["background-color: #fc5749; font-weight: bold; color: black"]
329
+
330
+ color_fmt.extend(['' for _ in col[1:]])
331
+ return color_fmt
332
+
333
+ result_df.loc["Lags Used",:] = result_df.loc["Lags Used",:].astype(np.int32)
334
+ return result_df.style.apply(pass_hypothesis)
335
+
336
+
337
+ def plot_timeseries(data, var, data_name="My", all_vars=[], height=800, width=600, start_date="2017-12-31", end_date="2018-12-31"):
338
+ if var == "":
339
+ return gr.update()
340
+
341
+ fig = go.Figure()
342
+ fig.add_trace(
343
+ go.Scatter(
344
+ x=data.index,
345
+ y=data[var],
346
+ name=var,
347
+ customdata=np.dstack((data["Season"].to_numpy(), data.reset_index()["Datetime"].dt.day_name().to_numpy(), data["is_holiday"].astype(bool).to_numpy()))[0],
348
+ hovertemplate='<br>value:%{y:.3f} <br>Season: %{customdata[0]} <br>Weekday: %{customdata[1]} <br>Is Holiday: %{customdata[2]}',
349
+ )
350
+ )
351
+ fig.update_layout(
352
+ autosize=True,
353
+ title=f"{data_name} Time Series by {var}",
354
+ xaxis_title='Date',
355
+ yaxis_title=var,
356
+ hovermode='x unified',
357
+ )
358
+
359
+ fig.update_layout(
360
+ autosize=True,
361
+ xaxis=dict(
362
+ rangeselector=dict(
363
+ buttons=list([
364
+ dict(count=7, label="1w", step="day", stepmode="backward"),
365
+ dict(count=21, label="3w", step="day", stepmode="backward"),
366
+ dict(count=1, label="1m", step="month", stepmode="backward"),
367
+ dict(count=6, label="6m", step="month", stepmode="backward"),
368
+ dict(count=1, label="1y", step="year", stepmode="backward"),
369
+ dict(step="all")
370
+ ])
371
+ ),
372
+ rangeslider=dict(
373
+ visible=True,
374
+ #
375
+ ),
376
+ type="date",
377
+ range=(start_date, end_date),
378
+ ),
379
+ )
380
+ return fig
381
+
382
+
383
+ def plot_bivariate(data, x, y, subset=None, trendline=True):
384
+ title = f"Scatterplot of {x} vs. {y}"
385
+
386
+ if subset == "None" or subset is None:
387
+ subset = None
388
+ height = 450
389
+ else:
390
+ subset_title = subset.replace(" String","")
391
+ title += f" By {subset_title}"
392
+ if subset_title in ["Season", "Year"]:
393
+ height = 450
394
+ else:
395
+ height = 800
396
+
397
+ if trendline:
398
+ trendline = "ols"
399
+ else:
400
+ trendline = None
401
+
402
+ # Special case to view categorical features
403
+ if x in ["Agency", "Borough", "Descriptor"]:
404
+ if x == "Agency":
405
+ prefix = 'AG'
406
+ elif x == "Borough":
407
+ prefix = "Borough"
408
+ else:
409
+ prefix="DG"
410
+
411
+ categories = [col for col in data.columns if prefix in col]
412
+ melt_df = pd.melt(data, id_vars=["Target"], value_vars=categories)
413
+ fig = px.scatter(
414
+ melt_df,
415
+ x="value",
416
+ y="Target",
417
+ trendline=trendline,
418
+ facet_col="variable",
419
+ facet_col_wrap=4,
420
+ facet_col_spacing=0.05,
421
+ title=title
422
+ )
423
+ height = 800
424
+
425
+ else:
426
+ fig = px.scatter(
427
+ data,
428
+ x=x, y=y,
429
+ trendline=trendline,
430
+ facet_col=subset,
431
+ facet_col_wrap=4,
432
+ facet_col_spacing=0.05,
433
+ title=title
434
+ )
435
+
436
+ fig.update_layout(
437
+ autosize=True,
438
+ height=height,
439
+ )
440
+
441
+ return fig
442
+
443
+
444
+ def plot_seasonality(data, x, y, show_box=True, show_outliers=False):
445
+ title = f"{y} by {x}"
446
+
447
+ if show_box:
448
+ if show_outliers:
449
+ points = "outliers"
450
+ else:
451
+ points = "all"
452
+ fig = px.box(data, x=x, y=y, points=points, title=title, facet_col_wrap=4, facet_col_spacing=0.05,)
453
+ else:
454
+ fig = px.strip(data, x=x, y=y, title=title, facet_col_wrap=4, facet_col_spacing=0.05,)
455
+
456
+ fig.update_layout(
457
+ autosize=True,
458
+ height=600,
459
+ )
460
+ return fig
461
+
462
+
463
+ def build_service_data(filename):
464
+ # Loading data directly with polars leads to errors
465
+ # Some rows end up missing for an unknown reason
466
+ # FIX: Load in pandas then convert to polars
467
+ service_data_pd = pd.read_csv(filename)
468
+
469
+ # Quick test to assure the unique key is in fact unique
470
+ assert service_data_pd["Unique Key"].nunique() == len(service_data_pd)
471
+
472
+ # Load from pandas Dataframe
473
+ service_data_pd["Incident Zip"] = service_data_pd["Incident Zip"].astype("string")
474
+ service_data_pd["BBL"] = service_data_pd["BBL"].astype("string")
475
+ service_data = pl.DataFrame(service_data_pd)
476
+
477
+ # Clear some ram
478
+ del service_data_pd
479
+ gc.collect()
480
+
481
+ drop_cols = [
482
+ "Unique Key", "Agency Name", "Location Type", "Incident Zip",
483
+ "Incident Address", "Street Name", "Cross Street 1",
484
+ "Cross Street 2", "Intersection Street 1", "Intersection Street 2",
485
+ "Address Type", "City", "Landmark", "Facility Type",
486
+ "Status", "Due Date", "Resolution Description",
487
+ "Resolution Action Updated Date", "Community Board",
488
+ "BBL", "X Coordinate (State Plane)", "Y Coordinate (State Plane)",
489
+ "Open Data Channel Type", "Park Facility Name", "Park Borough",
490
+ "Vehicle Type", "Taxi Company Borough", "Taxi Pick Up Location",
491
+ "Bridge Highway Name", "Bridge Highway Direction", "Road Ramp",
492
+ "Bridge Highway Segment", "Location", "Created Year"
493
+ ]
494
+
495
+ # Drop columns and create the date variable
496
+ service_data = service_data.drop(drop_cols)
497
+ service_data = create_datetime(service_data, "Created Date")
498
+ service_data = create_datetime(service_data, "Closed Date")
499
+
500
+ # Group by date to get the number of Created tickets (as target)
501
+ sd_grouped = service_data.rename({"Created Date": "Datetime"}).group_by("Datetime").agg(
502
+ pl.len().alias("Target"),
503
+ ).sort(by="Datetime")
504
+
505
+ # Calculate the number of closed tickets
506
+ # Mean diff used to filter service data
507
+ # mean_diff = service_data.with_columns(
508
+ # diff_created_closed = pl.col("Closed Date") - pl.col("Created Date")
509
+ # ).filter((pl.col("Closed Date").dt.year() >= 2016) & (pl.col("Closed Date").dt.year() < 2020))["diff_created_closed"].mean().days
510
+ # Mean diff precalculated as
511
+ mean_diff = 13
512
+
513
+ # Create new Closed date with errors filled using the mean diff above
514
+ service_data = service_data.with_columns(
515
+ Closed_Date_New = pl.when(pl.col("Created Date") - pl.col("Closed Date") > pl.duration(days=1))
516
+ .then(pl.col("Created Date") + pl.duration(days=mean_diff))
517
+ .otherwise(pl.col("Closed Date")).fill_null(pl.col("Created Date") + pl.duration(days=mean_diff))
518
+ )
519
+
520
+ # Filter tickets such that the closed date < the created date to prevent future data leakage in our dataset
521
+ # We want to make sure future data is not accidentally leaked across other points in our data
522
+ closed_tickets = service_data.group_by(["Closed_Date_New", "Created Date"]) \
523
+ .agg((pl.when(pl.col("Created Date") <= pl.col("Closed_Date_New")).then(1).otherwise(0)).sum().alias("count")) \
524
+ .sort("Closed_Date_New") \
525
+ .filter((pl.col("Closed_Date_New").dt.year() >= 2016) & (pl.col("Closed_Date_New").dt.year() < 2019)) \
526
+ .group_by("Closed_Date_New").agg(pl.col("count").sum().alias("num_closed_tickets"))
527
+
528
+ # Rename this column to num closed tickets
529
+ ct_df = closed_tickets.with_columns(
530
+ pl.col("num_closed_tickets")
531
+ )
532
+
533
+ # Concat the new columns into our data
534
+ sd_df = pl.concat([sd_grouped, ct_df.drop("Closed_Date_New")], how="horizontal")
535
+
536
+ assert len(sd_grouped) == len(ct_df)
537
+
538
+ # CATEGORICAL FEATURE MAPPING
539
+ # MAPPING FOR BOROUGH
540
+ Borough_Map = {
541
+ "Unspecified": "OTHER",
542
+ "2017": "OTHER",
543
+ None: "OTHER",
544
+ "2016": "OTHER"
545
+ }
546
+ service_data = service_data.with_columns(
547
+ pl.col("Borough").replace(Borough_Map)
548
+ )
549
+
550
+ # MAPPING FOR AGENCY
551
+ # This mapping was done Manually
552
+ Agency_Map = {
553
+ "NYPD": "Security", "HPD": "Buildings", "DOT": "Transportation",
554
+ "DSNY": "Environment & Sanitation", "DEP": "Environment & Sanitation",
555
+ "DOB": "Buildings", "DOE": "Buildings", "DPR": "Parks",
556
+ "DOHMH": "Health", "DOF": "Other", "DHS": "Security",
557
+ "TLC": "Transportation", "HRA": "Other", "DCA": "Other",
558
+ "DFTA": "Other", "EDC": "Other", "DOITT": "Other", "OMB": "Other",
559
+ "DCAS": "Other", "NYCEM": "Other", "ACS": "Other", "3-1-1": "Other",
560
+ "TAX": "Other", "DCP": "Other", "DORIS": "Other", "FDNY": "Other",
561
+ "TAT": "Other", "COIB": "Other", "CEO": "Other", "MOC": "Other",
562
+ }
563
+
564
+ service_data = service_data.with_columns(
565
+ pl.col("Agency").replace(Agency_Map).alias("AG") # AG Shorthand for Agency Groups
566
+ )
567
+
568
+
569
+ # Mapping for Descriptor using BERTopic
570
+ # Store descriptors as pandas dataframe (polars not supported)
571
+ # Drop any nan values, and we only care about the unique values
572
+ descriptor_docs = service_data["Descriptor"].unique().to_numpy()
573
+
574
+ # Build our topic mapping using the pretrained BERTopic model
575
+ # Load model and get predictions
576
+ topic_model = BERTopic.load("models/BERTopic")
577
+ topics, probs = topic_model.transform(descriptor_docs)
578
+
579
+ # Visualize if wanted
580
+ # topic_model.visualize_barchart(list(range(-1,6,1)))
581
+
582
+ # Create a topic to ID map
583
+ topic_df = topic_model.get_topic_info()
584
+ topic_id_map = {row["Topic"]: row["Name"][2:] for _, row in topic_df.iterrows()}
585
+ topic_id_map[-1] = topic_id_map[-1][1:] # Fix for the -1 topic case
586
+
587
+ # For each document (descriptor string) get a mapping of topics
588
+ doc_to_topic_map = defaultdict(str)
589
+ for topic_id, doc in zip(topics, descriptor_docs):
590
+ topic = topic_id_map[topic_id]
591
+ doc_to_topic_map[doc] = topic
592
+
593
+ service_data = service_data.with_columns(
594
+ pl.col("Descriptor").replace(doc_to_topic_map).alias("DG") # DG Shorthand for descriptor Groups
595
+ )
596
+
597
+
598
+ # One Hot Encode Features
599
+ cat_features = ["AG", "Borough", "DG"]
600
+ service_data = service_data.to_dummies(columns=cat_features)
601
+
602
+
603
+ # Group by Date and create our Category Feature Vector
604
+ cat_df = service_data.rename({"Created Date": "Datetime"}).group_by("Datetime").agg(
605
+ # Categorical Features Sum
606
+ pl.col('^AG_.*$').sum(),
607
+ pl.col('^Borough_.*$').sum(),
608
+ pl.col('^DG_.*$').sum(),
609
+ ).sort(by="Datetime")
610
+
611
+ # Concat our category features to our current dataframe
612
+ sd_df = pl.concat([sd_df, cat_df.drop("Datetime")], how="horizontal")
613
+
614
+ # Now that our dataframe is significantly reduced in size
615
+ # We can finally convert back to a pandas dataframe
616
+ # as pandas is usable across more python packages
617
+ sd_df = sd_df.to_pandas()
618
+
619
+ # Set index to datetime
620
+ sd_df = sd_df.set_index("Datetime")
621
+
622
+ # NOTE we added 7 new rows to our weather df
623
+ # These 7 new rows will essentially be our final pred set
624
+ # The Target for these rows will be null -> indicating it needs to be predicted
625
+ # Add these rows to the service dataframe
626
+ preds_df = pd.DataFrame({'Datetime': pd.date_range(start=sd_df.index[-1], periods=8, freq='D')})[1:]
627
+ sd_df = pd.concat([sd_df, preds_df.set_index("Datetime")], axis=0)
628
+
629
+ return sd_df
630
+
631
+
632
+ # Build all weather data from file
633
+ def build_weather_data(filename):
634
+ # Use pandas to read file
635
+ weather_data = pd.read_csv(filename)
636
+
637
+ # Quickly aggregate Year, Month, Day into a datetime object
638
+ # This is because the 311 data uses datetime
639
+ weather_data["Datetime"] = weather_data["Year"].astype("str") + "-" + weather_data["Month"].astype("str") + "-" + weather_data["Day"].astype("str")
640
+ weather_data = create_datetime(weather_data, "Datetime", format="%Y-%m-%d")
641
+
642
+ # LOCALIZE
643
+ # Pre-recorded min/max values from the service data (so we don't need again)
644
+ lat_min = 40.49804421521046
645
+ lat_max = 40.91294056699566
646
+ long_min = -74.25521082506387
647
+ long_max = -73.70038354802529
648
+
649
+ # Create the conditions for location matching
650
+ mincon_lat = weather_data["Latitude"] >= lat_min
651
+ maxcon_lat = weather_data["Latitude"] <= lat_max
652
+ mincon_long = weather_data["Longitude"] >= long_min
653
+ maxcon_long = weather_data["Longitude"] <= long_max
654
+
655
+ # Localize our data to match the service data
656
+ wd_localized = weather_data.loc[mincon_lat & maxcon_lat & mincon_long & maxcon_long]
657
+ drop_cols = [
658
+ "USAF",
659
+ "WBAN",
660
+ "StationName",
661
+ "State",
662
+ "Latitude",
663
+ "Longitude"
664
+ ]
665
+ wd_localized = wd_localized.drop(columns=drop_cols)
666
+
667
+ # AGGREGATE
668
+ # Map columns with aggregation method
669
+ mean_cols = [
670
+ 'MeanTemp',
671
+ 'DewPoint',
672
+ 'Percipitation',
673
+ 'WindSpeed',
674
+ 'Gust',
675
+ 'SnowDepth',
676
+ ]
677
+ min_cols = [
678
+ 'MinTemp'
679
+ ]
680
+ max_cols = [
681
+ 'MaxTemp',
682
+ 'MaxSustainedWind'
683
+ ]
684
+ round_cols = [
685
+ 'Rain',
686
+ 'SnowIce'
687
+ ]
688
+
689
+ # Perform Aggregation
690
+ mean_df = wd_localized.groupby("Datetime")[mean_cols].mean()
691
+ min_df = wd_localized.groupby("Datetime")[min_cols].min()
692
+ max_df = wd_localized.groupby("Datetime")[max_cols].max()
693
+ round_df = wd_localized.groupby("Datetime")[round_cols].mean().round().astype(np.int8)
694
+ wd_full = pd.concat([mean_df, min_df, max_df, round_df], axis=1)
695
+
696
+ # Add seasonal features
697
+ wd_full = build_temporal_features(wd_full, "Datetime")
698
+ wd_full["Season"] = wd_full["Season"].astype("category")
699
+ wd_full = wd_full.set_index("Datetime")
700
+
701
+ # We will calculate the imputation for the next 7 days after 12/31/2018
702
+ # Along with the 49 missing days
703
+ # This will act as our "Weather Forecast"
704
+ time_steps = 49 + 7
705
+
706
+ # Impute Cols
707
+ impute_cols = [
708
+ 'MeanTemp', 'MinTemp', 'MaxTemp', 'DewPoint',
709
+ 'Percipitation', 'WindSpeed', 'MaxSustainedWind',
710
+ 'Gust', 'Rain', 'SnowDepth', 'SnowIce',
711
+ ]
712
+
713
+ # Mean Vars
714
+ mean_vars = ["WindSpeed", "MaxSustainedWind", "Gust", "SnowDepth"]
715
+ min_vars = ["SnowIce", "MeanTemp", "MinTemp", "MaxTemp", "DewPoint", "Percipitation"]
716
+ max_vars = ["Rain"]
717
+
718
+ # Use the imported function to create the imputed data
719
+ preds_mean = impute_missing_weather(wd_full, strategy="mean", time_steps=time_steps, impute_cols=mean_vars)
720
+ preds_min = impute_missing_weather(wd_full, strategy="min", time_steps=time_steps, impute_cols=min_vars)
721
+ preds_max = impute_missing_weather(wd_full, strategy="max", time_steps=time_steps, impute_cols=max_vars)
722
+ all_preds = pd.concat([preds_mean, preds_min, preds_max], axis=1)
723
+ all_preds = build_temporal_features(all_preds.loc[:, impute_cols], "Datetime")
724
+ all_preds = all_preds.set_index("Datetime")
725
+
726
+ wd_curr = wd_full.loc[wd_full["Year"] >= 2016]
727
+ wd_df = pd.concat([wd_full, all_preds], axis=0, join="outer")
728
+
729
+ time_vars = ["Year", "Month", "Day", "DayOfWeek", "DayOfYear", "is_weekend", "is_holiday", "Season"]
730
+ wd_df.drop(columns=time_vars)
731
+
732
+ return wd_df
733
+
734
+
735
+ class MyNaiveImputer():
736
+ def __init__(self, data, time_steps=49, freq="D"):
737
+ self.data = data.reset_index().copy()
738
+ start_date = self.data["Datetime"].max() + pd.Timedelta(days=1)
739
+ end_date = start_date + pd.Timedelta(days=time_steps-1)
740
+ missing_range = pd.date_range(start_date, end_date, freq="D")
741
+ self.missing_df = pd.DataFrame(missing_range, columns=["Datetime"])
742
+ self.missing_df = build_temporal_features(self.missing_df, "Datetime")
743
+
744
+ def impute(self, col, by="DayOfYear", strategy="mean"):
745
+ def naive_impute_by(val, impute_X, data, by=by, strategy=strategy):
746
+ if strategy.lower() == "mean":
747
+ func = pd.core.groupby.DataFrameGroupBy.mean
748
+ elif strategy.lower() == "median":
749
+ func = pd.core.groupby.DataFrameGroupBy.median
750
+ elif strategy.lower() == "max":
751
+ func = pd.core.groupby.DataFrameGroupBy.max
752
+ elif strategy.lower() == "min":
753
+ func = pd.core.groupby.DataFrameGroupBy.min
754
+ grouped = func(data.groupby(by)[impute_X])
755
+ return grouped[val]
756
+
757
+ return self.missing_df["DayOfYear"].apply(naive_impute_by, args=(col, self.data, by, strategy))
758
+
759
+ def impute_all(self, cols, by="DayOfYear", strategy="mean"):
760
+ output_df = self.missing_df.copy()
761
+ for col in cols:
762
+ output_df[col] = self.impute(col, by, strategy)
763
+ return output_df
764
+
765
+
766
+ def impute_missing_weather(data, strategy="mean", time_steps=7, impute_cols=impute_cols):
767
+ final_imputer = MyNaiveImputer(data, time_steps=time_steps)
768
+ preds = final_imputer.impute_all(impute_cols, strategy=strategy).set_index("Datetime")
769
+ return preds
770
+
771
+
772
+ def get_feature_importance(data, target, split_date="01-01-2016", print_score=False):
773
+ import torch
774
+ device = "cuda" if torch.cuda.is_available() else "cpu"
775
+
776
+ train = data.loc[data.index <= pd.to_datetime(split_date)]
777
+ test = data.loc[data.index > pd.to_datetime(split_date)]
778
+
779
+ if type(target) == str:
780
+ X_train, X_test = train.drop(columns=target), test.drop(columns=target)
781
+ y_train, y_test = train[target], test[target]
782
+ else:
783
+ X_train, X_test = train, test
784
+ y_train, y_test = target.loc[train.index], target.loc[test.index]
785
+ target = str(target.name)
786
+
787
+ if 'int' in y_train.dtype.name:
788
+ # Use binary Classifier
789
+ metric = "logloss"
790
+ model = xgb.XGBClassifier(
791
+ base_score=0.25,
792
+ n_estimators=500,
793
+ early_stopping_rounds=50,
794
+ objective='binary:logistic',
795
+ device=device,
796
+ max_depth=3,
797
+ learning_rate=0.01,
798
+ enable_categorical=True,
799
+ eval_metric="logloss",
800
+ importance_type="gain",
801
+ random_state=22,
802
+ )
803
+ else:
804
+ metric = "MAPE"
805
+ model = xgb.XGBRegressor(
806
+ n_estimators=500,
807
+ early_stopping_rounds=50,
808
+ objective='reg:squarederror',
809
+ device=device,
810
+ max_depth=3,
811
+ learning_rate=0.01,
812
+ enable_categorical=True,
813
+ eval_metric="mape",
814
+ importance_type="gain",
815
+ random_state=22,
816
+ )
817
+
818
+ _ = model.fit(
819
+ X_train, y_train,
820
+ eval_set=[(X_train, y_train), (X_test, y_test)],
821
+ verbose=False
822
+ )
823
+
824
+ fig, ax = plt.subplots()
825
+ ax = plot_importance(model, title=f"Feature Importance for {target}", ax=ax)
826
+ if print_score:
827
+ best_score = str(round(100*model.best_score,2))+"%"
828
+ print(f"Best {metric}: {best_score}")
829
+ return fig, model
830
+
831
+
832
+ def corr_with_lag(data, target_col, covar, lags=[1], method="pearson"):
833
+ data_lagged = pd.DataFrame()
834
+ data_lagged["Target"] = data[target_col]
835
+ for lag in lags:
836
+ new_col = f"lag_{lag}D"
837
+ data_lagged[new_col] = data[covar].shift(lag)
838
+ return data_lagged.dropna().corr(method=method)
839
+
840
+
841
+ def plot_correlations(data, target, covar, lags=[0,1,2,3,4,5,6,7,10,14,18,21], method="pearson"):
842
+ df_corr = corr_with_lag(data, target, covar, lags, method)
843
+ mask = np.triu(np.ones_like(df_corr, dtype=bool))
844
+ z_dim, x_dim = len(df_corr.to_numpy()), len(df_corr.columns)
845
+ y_dim = x_dim
846
+ fig = ff.create_annotated_heatmap(
847
+ z=df_corr.mask(mask).to_numpy(),
848
+ x=df_corr.columns.tolist(),
849
+ y=df_corr.columns.tolist(),
850
+ colorscale=px.colors.diverging.RdBu,
851
+ zmin=-1,
852
+ zmax=1,
853
+ ygap=2,
854
+ xgap=2,
855
+ name="",
856
+ customdata=np.full((x_dim, y_dim, z_dim), covar),
857
+ hovertemplate='%{customdata[0]}<br>%{x} to %{y}<br>Correlation: %{z:.4f}',
858
+ showscale=True
859
+ )
860
+
861
+ fig.update_layout(
862
+ title_text=f"Correlation Heatmap of Lagged {covar}",
863
+ title_x=0.5,
864
+ height=600,
865
+ xaxis_showgrid=False,
866
+ yaxis_showgrid=False,
867
+ xaxis_zeroline=False,
868
+ yaxis_zeroline=False,
869
+ yaxis_autorange='reversed',
870
+ template='plotly_white'
871
+ )
872
+
873
+ # fig.update_annotations(font=dict(color="black"))
874
+
875
+ for i in range(len(fig.layout.annotations)):
876
+ if fig.layout.annotations[i].text == 'nan':
877
+ fig.layout.annotations[i].text = ""
878
+ else:
879
+ corr_i = round(float(fig.layout.annotations[i].text), 3)
880
+ fig.layout.annotations[i].text = corr_i
881
+ if (corr_i > 0.2 and corr_i < 0.5) or (corr_i < -0.2 and corr_i > -0.5):
882
+ fig.layout.annotations[i].font.color = "white"
883
+
884
+ return fig
885
+
886
+
887
+ def plot_all_correlations(data, data_name="weather", method="pearson", width=1392, height=600):
888
+ if data_name == "weather":
889
+ covars = ["MeanTemp", "MinTemp", "MaxTemp", 'DewPoint', 'Percipitation', 'WindSpeed', 'Gust', 'MaxSustainedWind', "SnowDepth", "SnowIce", "Rain", "Target"]
890
+ elif data_name == "service":
891
+ covars = [
892
+ "num_closed_tickets",
893
+ # Agency Group Counts
894
+ 'AG_Buildings', 'AG_Environment & Sanitation', 'AG_Health',
895
+ 'AG_Parks', 'AG_Security', 'AG_Transportation',
896
+ 'AG_Other',
897
+ # Borough Counts
898
+ 'Borough_BRONX', 'Borough_BROOKLYN', 'Borough_MANHATTAN',
899
+ 'Borough_QUEENS', 'Borough_STATEN ISLAND',
900
+ 'Borough_OTHER',
901
+ # Descriptor Group Counts
902
+ 'DG_damaged_sign_sidewalk_missing',
903
+ 'DG_english_emergency_spanish_chinese',
904
+ 'DG_exemption_commercial_tax_business',
905
+ 'DG_license_complaint_illegal_violation', 'DG_noise_animal_truck_dead',
906
+ 'DG_odor_food_air_smoke', 'DG_order_property_inspection_condition',
907
+ 'DG_water_basin_litter_missed', "Target"
908
+ ]
909
+
910
+ df_corr = data.loc[:, covars].corr(method=method)
911
+
912
+ mask = np.triu(np.ones_like(df_corr, dtype=bool))
913
+ fig = ff.create_annotated_heatmap(
914
+ z=df_corr.mask(mask).to_numpy(),
915
+ x=df_corr.columns.tolist(),
916
+ y=df_corr.columns.tolist(),
917
+ colorscale=px.colors.diverging.RdBu,
918
+ zmin=-1,
919
+ zmax=1,
920
+ ygap=2,
921
+ xgap=2,
922
+ name="",
923
+ hovertemplate='%{x}-%{y} <br>Correlation: %{z:.4f}',
924
+ showscale=True
925
+ )
926
+
927
+
928
+ fig.update_layout(
929
+ title_text=f"Correlation Heatmap of Weather Variables & Target",
930
+ title_x=0.5,
931
+ height=600,
932
+ width=width,
933
+ xaxis_showgrid=False,
934
+ yaxis_showgrid=False,
935
+ xaxis_zeroline=False,
936
+ yaxis_zeroline=False,
937
+ yaxis_autorange='reversed',
938
+ template='plotly_white'
939
+ )
940
+
941
+ fig.update_annotations(font=dict(color="black"))
942
+
943
+
944
+ for i in range(len(fig.layout.annotations)):
945
+ if fig.layout.annotations[i].text == 'nan':
946
+ fig.layout.annotations[i].text = ""
947
+ else:
948
+ corr_i = round(float(fig.layout.annotations[i].text), 3)
949
+ fig.layout.annotations[i].text = corr_i
950
+ if corr_i > 0.5 or corr_i < -0.5:
951
+ fig.layout.annotations[i].font.color = "white"
952
+
953
+ return fig
954
+
955
+
956
+ def plot_gust_interpolation(data):
957
+ fig, ax = plt.subplots(2, 2, figsize=(15,12))
958
+ data["Gust_lin"].plot(ax=ax[0][0], color=color_pal[0], title="linear")
959
+ data["Gust_spline3"].plot(ax=ax[0][1], color=color_pal[1], title="spline3")
960
+ data["Gust_spline5"].plot(ax=ax[1][0], color=color_pal[2], title="spline5")
961
+ data["Gust_quad"].plot(ax=ax[1][1], color=color_pal[3], title="quadratic")
962
+ curr_fig = plt.gcf()
963
+ plt.close()
964
+ return curr_fig
965
+
966
+
967
+ def plot_train_split(train, val):
968
+ fig = plt.subplots(figsize=(15, 5))
969
+ ax = train["Target"].plot(label="Training Set")
970
+ val["Target"].plot(label="Validation Set", ax=ax)
971
+ ax.axvline('2018-04-01', color='black', ls='--')
972
+ ax.legend()
973
+ ax.set_title("Train Test Split (2018-04-01)")
974
+ curr_fig = plt.gcf()
975
+ plt.close()
976
+ return curr_fig
977
+
978
+
979
+ def plot_predictions(train, val, preds):
980
+ fig = plt.subplots(figsize=(16, 5))
981
+ ax = train["Target"].plot(label="Training Set")
982
+ val["Target"].plot(label="Validation Set", ax=ax)
983
+ val["Prediction"] = preds
984
+ val["Prediction"].plot(label="Prediction", ax=ax)
985
+ ax.axvline('2018-04-01', color='black', ls='--')
986
+ ax.legend()
987
+ ax.set_title("Model Prediction for 311 Call Volume")
988
+
989
+ curr_fig = plt.gcf()
990
+ plt.close()
991
+ return curr_fig
992
+
993
+ def plot_final_feature_importance(model):
994
+ fig, ax = plt.subplots(figsize=(12,6))
995
+ ax = plot_importance(model, max_num_features=20, title=f"Feature Importance for 311 Service Calls", ax=ax)
996
+
997
+ curr_fig = plt.gcf()
998
+ plt.close()
999
+
1000
+ return curr_fig
1001
+
1002
+
1003
+ def predict_recurse(dataset, test, model, features_to_impute=['Target_L1D', 'Target_Diff7D', 'Target_Diff14D'], last_feature='Target_L6D'):
1004
+ n_steps = len(test)
1005
+ merged_data = pd.concat([dataset[-14:], test], axis=0)
1006
+ all_index = merged_data.index
1007
+ X_test = test.drop(columns="Target")
1008
+ sd = -6 # Starting point for filling next value
1009
+
1010
+ # For each step, get the predictions
1011
+ for i in range(n_steps-1):
1012
+ pred = model.predict(X_test)[i]
1013
+ # For the three features needed, compute the new value
1014
+ X_test.loc[all_index[sd+i], features_to_impute[0]] = pred
1015
+ X_test.loc[all_index[sd+i], features_to_impute[1]] = pred - merged_data.loc[all_index[sd+i-7], features_to_impute[1]]
1016
+ X_test.loc[all_index[sd+i], features_to_impute[2]] = pred - merged_data.loc[all_index[sd+i-14], features_to_impute[2]]
1017
+
1018
+ # In the last iteration compute the Lag6D value
1019
+ if i == 5:
1020
+ X_test.loc[all_index[sd+i], last_feature] = pred - merged_data.loc[all_index[sd+i-6], last_feature]
1021
+
1022
+
1023
+ final_preds = model.predict(X_test)
1024
+ return final_preds
1025
+
1026
+
1027
+
1028
+