rishabh-mondal commited on
Commit
423d6f2
·
0 Parent(s):

Initial HF Space

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ data/waterways_points.csv filter=lfs diff=lfs merge=lfs -text
2
+ data/waterways_wkt.csv filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Brick Kiln Compliance Monitor (Gradio / Hugging Face Space)
2
+
3
+ An interactive app to flag **compliant vs non-compliant** brick kilns and visualize them on a map.
4
+ - Upload **Kilns CSV** with `lat, lon`
5
+ - Optional **Hospitals CSV** (`Latitude, Longitude` or `lat, lon`)
6
+ - Optional **Waterways CSV**:
7
+ - points (`lat, lon`) **or**
8
+ - WKT LineString/MultiLineString in `geometry` column
9
+
10
+ ## Thresholds (km)
11
+ - Kiln–Kiln ≥ 1.0 km
12
+ - Kiln–Hospital ≥ 0.8 km
13
+ - Kiln–Water ≥ 0.5 km
14
+
15
+ All distances computed fast via **BallTree (haversine)** on WGS84.
16
+
17
+ ## Deploy on Hugging Face Spaces
18
+ 1. Create new Space → **Gradio** SDK.
19
+ 2. Upload `app.py` and `requirements.txt` (and this README if you like).
20
+ 3. Click **Deploy**. The app will build and run automatically.
21
+
22
+ ## Run locally
23
+ ```bash
24
+ pip install -r requirements.txt
25
+ python app.py
app.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import io
5
+ import os
6
+ from typing import Tuple, List
7
+
8
+ import gradio as gr
9
+ import pandas as pd
10
+ import numpy as np
11
+ from sklearn.neighbors import BallTree
12
+ import folium
13
+ from folium.plugins import MarkerCluster, HeatMap
14
+ import shapely.wkt
15
+ import matplotlib.pyplot as plt
16
+
17
+ # ------------------------------
18
+ # Utilities
19
+ # ------------------------------
20
+
21
+ EARTH_RADIUS_KM = 6371.0088
22
+
23
+ def _to_radians(latlon: np.ndarray) -> np.ndarray:
24
+ """latlon in degrees -> radians (n,2)"""
25
+ return np.radians(latlon.astype(float))
26
+
27
+ def _balltree_haversine_min_km(a_latlon_deg: np.ndarray, b_latlon_deg: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
28
+ """
29
+ Fast nearest-neighbor distance between points A and B using haversine metric.
30
+ Returns (min_distance_km, index_in_B).
31
+ """
32
+ if len(a_latlon_deg) == 0 or len(b_latlon_deg) == 0:
33
+ return np.array([]), np.array([], dtype=int)
34
+
35
+ # convert to (lon,lat) radians for BallTree(haversine)
36
+ a_rad = _to_radians(a_latlon_deg[:, [0,1]])[:, ::-1]
37
+ b_rad = _to_radians(b_latlon_deg[:, [0,1]])[:, ::-1]
38
+ tree = BallTree(b_rad, metric="haversine")
39
+ dist_rad, idx = tree.query(a_rad, k=1)
40
+ dist_km = dist_rad.flatten() * EARTH_RADIUS_KM
41
+ return dist_km, idx.flatten()
42
+
43
+ def _lines_to_vertices_df(lines_like: pd.DataFrame) -> pd.DataFrame:
44
+ """
45
+ Convert lines in WKT (column 'geometry') to a vertex cloud (lon,lat).
46
+ If already has lon/lat columns, return those as-is.
47
+ """
48
+ if {"lon", "lat"}.issubset(lines_like.columns):
49
+ return lines_like[["lon", "lat"]].dropna().reset_index(drop=True)
50
+ if "geometry" not in lines_like.columns:
51
+ return pd.DataFrame(columns=["lon", "lat"])
52
+
53
+ out = []
54
+ for _, row in lines_like.iterrows():
55
+ geom = row["geometry"]
56
+ if isinstance(geom, str):
57
+ try:
58
+ geom = shapely.wkt.loads(geom)
59
+ except Exception:
60
+ geom = None
61
+ if geom is None:
62
+ continue
63
+ gtype = getattr(geom, "geom_type", "")
64
+ if gtype == "LineString":
65
+ out.extend([(x, y) for x, y in geom.coords])
66
+ elif gtype == "MultiLineString":
67
+ for line in geom.geoms:
68
+ out.extend([(x, y) for x, y in line.coords])
69
+ elif gtype == "Point":
70
+ out.append((geom.x, geom.y))
71
+ return pd.DataFrame(out, columns=["lon", "lat"]).dropna()
72
+
73
+ def _ensure_cols(df: pd.DataFrame, needed: List[str], name_for_error: str):
74
+ missing = [c for c in needed if c not in df.columns]
75
+ if missing:
76
+ raise ValueError(f"{name_for_error}: missing columns {missing}. Expected at least {needed}.")
77
+
78
+ def _read_csv(file) -> pd.DataFrame:
79
+ """
80
+ Accepts: None, string path, gradio File object, or old-style dict {name/path/data}.
81
+ Tries path first; falls back to bytes if needed.
82
+ """
83
+ if file is None:
84
+ return pd.DataFrame()
85
+
86
+ # String path
87
+ if isinstance(file, str):
88
+ return pd.read_csv(file)
89
+
90
+ # Older gradio may pass dict
91
+ if isinstance(file, dict):
92
+ for key in ("path", "name"):
93
+ p = file.get(key)
94
+ if isinstance(p, str) and os.path.exists(p):
95
+ return pd.read_csv(p)
96
+ data = file.get("data")
97
+ if data is not None:
98
+ return pd.read_csv(io.BytesIO(data))
99
+ return pd.DataFrame()
100
+
101
+ # File-like with .name
102
+ path = getattr(file, "name", None)
103
+ if isinstance(path, str) and os.path.exists(path):
104
+ return pd.read_csv(path)
105
+
106
+ # Last resort
107
+ try:
108
+ return pd.read_csv(file)
109
+ except Exception:
110
+ return pd.DataFrame()
111
+
112
+ def _center_from_points(latlon: np.ndarray) -> Tuple[float, float]:
113
+ if len(latlon) == 0:
114
+ return 28.6, 77.2 # fallback (Delhi-ish)
115
+ return float(np.mean(latlon[:, 0])), float(np.mean(latlon[:, 1]))
116
+
117
+ # ------------------------------
118
+ # Core: compute compliance
119
+ # ------------------------------
120
+
121
+ def compute_compliance(
122
+ kilns_csv,
123
+ hospitals_csv=None,
124
+ waterways_csv=None,
125
+ kiln_km_thresh: float = 1.0,
126
+ hosp_km_thresh: float = 0.8,
127
+ water_km_thresh: float = 0.5,
128
+ add_heatmap: bool = False,
129
+ cluster_points: bool = True
130
+ ):
131
+ # Load data
132
+ kilns = _read_csv(kilns_csv)
133
+ _ensure_cols(kilns, ["lat", "lon"], "Kilns CSV")
134
+
135
+ hospitals = _read_csv(hospitals_csv) if hospitals_csv else pd.DataFrame()
136
+ waterways = _read_csv(waterways_csv) if waterways_csv else pd.DataFrame()
137
+
138
+ # Arrays
139
+ kiln_latlon = kilns[["lat", "lon"]].to_numpy(dtype=float)
140
+
141
+ # Nearest kiln (exclude self): query k=2, take index 1
142
+ if len(kilns) >= 2:
143
+ rad = _to_radians(kiln_latlon)[:, ::-1]
144
+ tree = BallTree(rad, metric="haversine")
145
+ dist_rad, _ = tree.query(rad, k=2)
146
+ nearest_km = dist_rad[:, 1] * EARTH_RADIUS_KM
147
+ else:
148
+ nearest_km = np.full(len(kilns), np.nan)
149
+
150
+ # Nearest hospital
151
+ if not hospitals.empty and {"Latitude", "Longitude"}.issubset(hospitals.columns):
152
+ hosp_latlon = hospitals[["Latitude", "Longitude"]].to_numpy(dtype=float)
153
+ hosp_km, _ = _balltree_haversine_min_km(kiln_latlon, hosp_latlon)
154
+ elif not hospitals.empty and {"lat", "lon"}.issubset(hospitals.columns):
155
+ hosp_latlon = hospitals[["lat", "lon"]].to_numpy(dtype=float)
156
+ hosp_km, _ = _balltree_haversine_min_km(kiln_latlon, hosp_latlon)
157
+ else:
158
+ hosp_km = np.full(len(kilns), np.nan)
159
+
160
+ # Nearest water (lines/points -> vertices)
161
+ if not waterways.empty:
162
+ water_pts = _lines_to_vertices_df(waterways)
163
+ if len(water_pts) > 0:
164
+ water_latlon = water_pts[["lat", "lon"]].to_numpy(dtype=float)
165
+ water_km, _ = _balltree_haversine_min_km(kiln_latlon, water_latlon)
166
+ else:
167
+ water_km = np.full(len(kilns), np.nan)
168
+ else:
169
+ water_km = np.full(len(kilns), np.nan)
170
+
171
+ # Flags
172
+ flags = np.ones(len(kilns), dtype=bool)
173
+ if kiln_km_thresh is not None and kiln_km_thresh > 0:
174
+ flags &= (nearest_km >= kiln_km_thresh) | np.isnan(nearest_km)
175
+ if hosp_km_thresh is not None and hosp_km_thresh > 0:
176
+ flags &= (hosp_km >= hosp_km_thresh) | np.isnan(hosp_km)
177
+ if water_km_thresh is not None and water_km_thresh > 0:
178
+ flags &= (water_km >= water_km_thresh) | np.isnan(water_km)
179
+
180
+ # Output DF
181
+ out = kilns.copy()
182
+ out["nearest_kiln_km"] = np.round(nearest_km, 4)
183
+ out["nearest_hospital_km"] = np.round(hosp_km, 4)
184
+ out["nearest_water_km"] = np.round(water_km, 4)
185
+ out["compliant"] = flags
186
+
187
+ # Summary
188
+ total = len(out)
189
+ non_compliant = int((~out["compliant"]).sum())
190
+ compliant = int(out["compliant"].sum())
191
+
192
+ # Folium map
193
+ ctr_lat, ctr_lon = _center_from_points(kiln_latlon)
194
+ m = folium.Map(
195
+ location=[ctr_lat, ctr_lon],
196
+ zoom_start=6,
197
+ control_scale=True,
198
+ tiles="CartoDB positron"
199
+ )
200
+
201
+ g_compliant = folium.FeatureGroup(name="Compliant kilns", show=True)
202
+ g_noncomp = folium.FeatureGroup(name="Non-compliant kilns", show=True)
203
+
204
+ def _add_markers(df: pd.DataFrame, group: folium.FeatureGroup, color: str):
205
+ if len(df) == 0:
206
+ return
207
+ if cluster_points:
208
+ cluster = MarkerCluster()
209
+ group.add_child(cluster)
210
+ for _, r in df.iterrows():
211
+ folium.CircleMarker(
212
+ location=[r["lat"], r["lon"]],
213
+ radius=4,
214
+ color=color,
215
+ fill=True,
216
+ fill_opacity=0.7,
217
+ tooltip=(
218
+ f"Kiln\n"
219
+ f"Nearest kiln: {r.get('nearest_kiln_km', np.nan)} km\n"
220
+ f"Nearest hospital: {r.get('nearest_hospital_km', np.nan)} km\n"
221
+ f"Nearest water: {r.get('nearest_water_km', np.nan)} km"
222
+ ),
223
+ ).add_to(cluster)
224
+ else:
225
+ for _, r in df.iterrows():
226
+ folium.CircleMarker(
227
+ location=[r["lat"], r["lon"]],
228
+ radius=4,
229
+ color=color,
230
+ fill=True,
231
+ fill_opacity=0.7
232
+ ).add_to(group)
233
+
234
+ _add_markers(out[out["compliant"]], g_compliant, color="#16a34a") # green
235
+ _add_markers(out[~out["compliant"]], g_noncomp, color="#dc2626") # red
236
+
237
+ m.add_child(g_compliant)
238
+ m.add_child(g_noncomp)
239
+
240
+ if add_heatmap and len(out) > 0:
241
+ HeatMap(out[["lat", "lon"]].values.tolist(), name="Kiln density").add_to(m)
242
+
243
+ folium.LayerControl(collapsed=False).add_to(m)
244
+ map_html = m._repr_html_()
245
+
246
+ # Summary text
247
+ summary = (
248
+ f"Total kilns: {total} | "
249
+ f"Compliant: {compliant} | "
250
+ f"Non-compliant: {non_compliant}\n"
251
+ f"Rules: ≥{kiln_km_thresh} km from nearest kiln, "
252
+ f"≥{hosp_km_thresh} km from hospital, "
253
+ f"≥{water_km_thresh} km from water"
254
+ )
255
+
256
+ # Also return the combined DF (as bytes) so we can make a static plot without saving to disk
257
+ buf = io.BytesIO()
258
+ out.to_csv(buf, index=False)
259
+ buf.seek(0)
260
+ return map_html, summary, buf.read()
261
+
262
+ # ------------------------------
263
+ # Static visualization (Matplotlib)
264
+ # ------------------------------
265
+
266
+ def make_scatter_figure(csv_bytes: bytes, title: str = "Kilns: Compliant vs Non-compliant"):
267
+ df = pd.read_csv(io.BytesIO(csv_bytes))
268
+ fig, ax = plt.subplots(figsize=(6.5, 5.5)) # single plot
269
+
270
+ comp = df[df["compliant"] == True]
271
+ nonc = df[df["compliant"] == False]
272
+
273
+ # Keep default matplotlib colors (no explicit color)
274
+ if len(comp) > 0:
275
+ ax.scatter(comp["lon"], comp["lat"], marker="o", label=f"Compliant (n={len(comp)})")
276
+ if len(nonc) > 0:
277
+ ax.scatter(nonc["lon"], nonc["lat"], marker="x", label=f"Non-compliant (n={len(nonc)})")
278
+
279
+ ax.set_xlabel("Longitude")
280
+ ax.set_ylabel("Latitude")
281
+ ax.set_title(title)
282
+ ax.grid(True)
283
+ ax.legend()
284
+ return fig
285
+
286
+ # ------------------------------
287
+ # Gradio UI
288
+ # ------------------------------
289
+
290
+ with gr.Blocks(title="Brick Kiln Compliance Monitor (Gradio)") as demo:
291
+ gr.Markdown(
292
+ "## Automatic Compliance Monitoring for Brick Kilns\n"
293
+ "Upload CSVs, set thresholds, and visualize compliant vs non-compliant kilns on an interactive map.\n"
294
+ "- **Kilns CSV** must include columns: `lat, lon` (WGS84).\n"
295
+ "- Hospitals CSV can have `Latitude, Longitude` or `lat, lon`.\n"
296
+ "- Waterways CSV may be points (`lat, lon`) or WKT LineString/MultiLineString in `geometry`."
297
+ )
298
+
299
+ with gr.Row():
300
+ with gr.Column(scale=1):
301
+ use_demo = gr.Checkbox(value=True, label="Use bundled demo data (skip uploads)")
302
+
303
+ kilns_csv = gr.File(label="Kilns CSV (required if demo OFF)", file_types=[".csv"])
304
+ hospitals_csv = gr.File(label="Hospitals CSV (optional)", file_types=[".csv"])
305
+ waterways_csv = gr.File(label="Waterways CSV or WKT (optional)", file_types=[".csv"])
306
+
307
+ gr.Markdown("### Thresholds (km)")
308
+ kiln_thresh = gr.Number(value=1.0, label="Min distance to nearest kiln (km)")
309
+ hosp_thresh = gr.Number(value=0.8, label="Min distance to hospital (km)")
310
+ water_thresh = gr.Number(value=0.5, label="Min distance to water body (km)")
311
+
312
+ add_heatmap = gr.Checkbox(value=False, label="Add heatmap layer")
313
+ cluster_points = gr.Checkbox(value=True, label="Cluster markers for speed")
314
+
315
+ run_btn = gr.Button("Compute & Map", variant="primary")
316
+
317
+ with gr.Column(scale=2):
318
+ fmap = gr.HTML(label="Interactive Map")
319
+ summary = gr.Textbox(label="Summary", lines=3)
320
+ scatter = gr.Plot(label="Static Visualization: Compliant vs Non-compliant")
321
+
322
+ def _run(use_demo_flag, k, h, w, kt, ht, wt, heat, cluster):
323
+ if use_demo_flag:
324
+ k = "data/kilns_clean.csv"
325
+ h = "data/hospitals.csv" if os.path.exists("data/hospitals.csv") else None
326
+ w = "data/waterways_wkt.csv" if os.path.exists("data/waterways_wkt.csv") else None
327
+
328
+ map_html, summary_text, csv_bytes = compute_compliance(
329
+ k, h, w, float(kt), float(ht), float(wt), bool(heat), bool(cluster)
330
+ )
331
+ fig = make_scatter_figure(csv_bytes)
332
+ return map_html, summary_text, fig
333
+
334
+ run_btn.click(
335
+ _run,
336
+ inputs=[use_demo, kilns_csv, hospitals_csv, waterways_csv,
337
+ kiln_thresh, hosp_thresh, water_thresh, add_heatmap, cluster_points],
338
+ outputs=[fmap, summary, scatter],
339
+ )
340
+
341
+ if __name__ == "__main__":
342
+ # Change port if needed: demo.launch(server_port=7861)
343
+ demo.launch()
data/hospitals.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/kilns_clean.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/waterways_points.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:087d8bfbfa64ce762e30953277036df434b8b0ea994424a78bda8e97f9de701b
3
+ size 80434198
data/waterways_wkt.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08faa0dfbe33bbd145b5c072424909db74b8d2879750c8977e91af36f0f8a6b9
3
+ size 51995502
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.31
2
+ pandas>=2.0
3
+ numpy>=1.24
4
+ scikit-learn>=1.3
5
+ shapely>=2.0
6
+ folium>=0.15