Shivansh12 commited on
Commit
03ab89b
·
unverified ·
1 Parent(s): 9f26741

Source File Added

Browse files
Files changed (1) hide show
  1. sourcecode.ipynb +420 -0
sourcecode.ipynb ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "d305511a",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "# This Python 3 environment comes with many helpful analytics libraries installed\n",
11
+ "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
12
+ "# For example, here's several helpful packages to load\n",
13
+ "\n",
14
+ "import numpy as np # linear algebra\n",
15
+ "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
16
+ "\n",
17
+ "# Input data files are available in the read-only \"../input/\" directory\n",
18
+ "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
19
+ "\n",
20
+ "import os\n",
21
+ "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
22
+ " for filename in filenames:\n",
23
+ " print(os.path.join(dirname, filename))\n",
24
+ "\n",
25
+ "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
26
+ "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session\n",
27
+ "\n",
28
+ "\n",
29
+ "get_ipython().getoutput(\"pip install -q segmentation-models-pytorch albumentations\")\n",
30
+ "\n",
31
+ "\n",
32
+ "import os\n",
33
+ "import cv2\n",
34
+ "import numpy as np\n",
35
+ "import torch\n",
36
+ "import torch.nn as nn\n",
37
+ "from torch.utils.data import Dataset, DataLoader\n",
38
+ "import segmentation_models_pytorch as smp\n",
39
+ "import albumentations as A\n",
40
+ "from albumentations.pytorch import ToTensorV2\n",
41
+ "import matplotlib.pyplot as plt\n",
42
+ "from tqdm import tqdm\n",
43
+ "\n",
44
+ "\n",
45
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
46
+ "print(\"Using:\", device)\n",
47
+ "\n",
48
+ "\n",
49
+ "BASE_PATH = \"/kaggle/input/datasets/balraj98/massachusetts-buildings-dataset\"\n",
50
+ "\n",
51
+ "TRAIN_IMG_PATH = os.path.join(BASE_PATH, \"tiff/train\")\n",
52
+ "TRAIN_MASK_PATH = os.path.join(BASE_PATH, \"tiff/train_labels\")\n",
53
+ "\n",
54
+ "VAL_IMG_PATH = os.path.join(BASE_PATH, \"tiff/val\")\n",
55
+ "VAL_MASK_PATH = os.path.join(BASE_PATH, \"tiff/val_labels\")\n",
56
+ "\n",
57
+ "\n",
58
+ "train_transform = A.Compose([\n",
59
+ " A.HorizontalFlip(p=0.5),\n",
60
+ " A.VerticalFlip(p=0.5),\n",
61
+ " A.RandomRotate90(p=0.5),\n",
62
+ " A.RandomBrightnessContrast(p=0.3),\n",
63
+ " A.Normalize(mean=(0.485, 0.456, 0.406),\n",
64
+ " std=(0.229, 0.224, 0.225)),\n",
65
+ " ToTensorV2()\n",
66
+ "])\n",
67
+ "\n",
68
+ "val_transform = A.Compose([\n",
69
+ " A.Normalize(mean=(0.485, 0.456, 0.406),\n",
70
+ " std=(0.229, 0.224, 0.225)),\n",
71
+ " ToTensorV2()\n",
72
+ "])\n",
73
+ "\n",
74
+ "\n",
75
+ "def extract_patches(img, mask, patch_size=256):\n",
76
+ " img_patches = []\n",
77
+ " mask_patches = []\n",
78
+ "\n",
79
+ " h, w = img.shape[:2]\n",
80
+ "\n",
81
+ " for i in range(0, h - patch_size + 1, patch_size):\n",
82
+ " for j in range(0, w - patch_size + 1, patch_size):\n",
83
+ " img_patch = img[i:i+patch_size, j:j+patch_size]\n",
84
+ " mask_patch = mask[i:i+patch_size, j:j+patch_size]\n",
85
+ "\n",
86
+ " img_patches.append(img_patch)\n",
87
+ " mask_patches.append(mask_patch)\n",
88
+ "\n",
89
+ " return img_patches, mask_patches\n",
90
+ "\n",
91
+ "\n",
92
+ "class BuildingDataset(Dataset):\n",
93
+ " def __init__(self, img_dir, mask_dir, transform=None, patch_size=256):\n",
94
+ " self.transform = transform\n",
95
+ " self.patch_size = patch_size\n",
96
+ "\n",
97
+ " self.img_patches = []\n",
98
+ " self.mask_patches = []\n",
99
+ "\n",
100
+ " images = sorted(os.listdir(img_dir))\n",
101
+ " masks = sorted(os.listdir(mask_dir))\n",
102
+ "\n",
103
+ " for img_name, mask_name in zip(images, masks):\n",
104
+ " img = cv2.imread(os.path.join(img_dir, img_name))\n",
105
+ " img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
106
+ "\n",
107
+ " mask = cv2.imread(os.path.join(mask_dir, mask_name), 0)\n",
108
+ " mask = (mask > 0).astype(np.float32)\n",
109
+ "\n",
110
+ " img_p, mask_p = extract_patches(img, mask, self.patch_size)\n",
111
+ "\n",
112
+ " self.img_patches.extend(img_p)\n",
113
+ " self.mask_patches.extend(mask_p)\n",
114
+ "\n",
115
+ " def __len__(self):\n",
116
+ " return len(self.img_patches)\n",
117
+ "\n",
118
+ " def __getitem__(self, idx):\n",
119
+ " img = self.img_patches[idx]\n",
120
+ " mask = self.mask_patches[idx]\n",
121
+ "\n",
122
+ " if self.transform:\n",
123
+ " augmented = self.transform(image=img, mask=mask)\n",
124
+ " img = augmented[\"image\"]\n",
125
+ " mask = augmented[\"mask\"].unsqueeze(0)\n",
126
+ " else:\n",
127
+ " img = torch.tensor(img).permute(2,0,1).float() / 255.0\n",
128
+ " mask = torch.tensor(mask).unsqueeze(0)\n",
129
+ "\n",
130
+ " return img, mask.float()\n",
131
+ "\n",
132
+ "\n",
133
+ "train_dataset = BuildingDataset(\n",
134
+ " TRAIN_IMG_PATH,\n",
135
+ " TRAIN_MASK_PATH,\n",
136
+ " transform=train_transform,\n",
137
+ " patch_size=256 \n",
138
+ ")\n",
139
+ "\n",
140
+ "val_dataset = BuildingDataset(\n",
141
+ " VAL_IMG_PATH,\n",
142
+ " VAL_MASK_PATH,\n",
143
+ " transform=val_transform,\n",
144
+ " patch_size=256 \n",
145
+ ")\n",
146
+ "\n",
147
+ "\n",
148
+ "train_dataset = BuildingDataset(TRAIN_IMG_PATH, TRAIN_MASK_PATH, patch_size=256)\n",
149
+ "val_dataset = BuildingDataset(VAL_IMG_PATH, VAL_MASK_PATH, patch_size=256)\n",
150
+ "\n",
151
+ "train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)\n",
152
+ "val_loader = DataLoader(val_dataset, batch_size=8)\n",
153
+ "\n",
154
+ "\n",
155
+ "model = smp.Unet(\n",
156
+ " encoder_name=\"efficientnet-b3\",\n",
157
+ " encoder_weights=\"imagenet\",\n",
158
+ " in_channels=3,\n",
159
+ " classes=1,\n",
160
+ " activation=None\n",
161
+ ")\n",
162
+ "\n",
163
+ "model.to(device)\n",
164
+ "\n",
165
+ "\n",
166
+ "loss_fn = smp.losses.DiceLoss(mode='binary')\n",
167
+ "optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)\n",
168
+ "\n",
169
+ "\n",
170
+ "loss_fn = smp.losses.DiceLoss(mode='binary', from_logits=True)\n",
171
+ "optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)\n",
172
+ "\n",
173
+ "scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(\n",
174
+ " optimizer, mode='max', patience=3, factor=0.5\n",
175
+ ")\n",
176
+ "\n",
177
+ "\n",
178
+ "def iou_score(pred, mask):\n",
179
+ " pred = torch.sigmoid(pred)\n",
180
+ " pred = (pred > 0.5).float()\n",
181
+ " intersection = (pred * mask).sum()\n",
182
+ " union = pred.sum() + mask.sum() - intersection\n",
183
+ " return (intersection + 1e-6) / (union + 1e-6)\n",
184
+ "\n",
185
+ "epochs = 30\n",
186
+ "\n",
187
+ "best_iou = 0\n",
188
+ "train_losses = []\n",
189
+ "val_losses = []\n",
190
+ "ious = []\n",
191
+ "\n",
192
+ "for epoch in range(epochs):\n",
193
+ "\n",
194
+ " # ---- TRAIN ----\n",
195
+ " model.train()\n",
196
+ " train_loss = 0\n",
197
+ "\n",
198
+ " for imgs, masks in tqdm(train_loader):\n",
199
+ " imgs = imgs.to(device)\n",
200
+ " masks = masks.to(device)\n",
201
+ "\n",
202
+ " preds = model(imgs)\n",
203
+ " loss = loss_fn(preds, masks)\n",
204
+ "\n",
205
+ " optimizer.zero_grad()\n",
206
+ " loss.backward()\n",
207
+ " optimizer.step()\n",
208
+ "\n",
209
+ " train_loss += loss.item()\n",
210
+ "\n",
211
+ " avg_train_loss = train_loss / len(train_loader)\n",
212
+ " train_losses.append(avg_train_loss)\n",
213
+ "\n",
214
+ " # ---- VALIDATION ----\n",
215
+ " model.eval()\n",
216
+ " val_loss = 0\n",
217
+ " iou_total = 0\n",
218
+ "\n",
219
+ " with torch.no_grad():\n",
220
+ " for imgs, masks in val_loader:\n",
221
+ " imgs = imgs.to(device)\n",
222
+ " masks = masks.to(device)\n",
223
+ "\n",
224
+ " preds = model(imgs)\n",
225
+ " loss = loss_fn(preds, masks)\n",
226
+ "\n",
227
+ " val_loss += loss.item()\n",
228
+ " iou_total += iou_score(preds, masks).item()\n",
229
+ "\n",
230
+ " avg_val_loss = val_loss / len(val_loader)\n",
231
+ " avg_iou = iou_total / len(val_loader)\n",
232
+ "\n",
233
+ " val_losses.append(avg_val_loss)\n",
234
+ " ious.append(avg_iou)\n",
235
+ "\n",
236
+ " scheduler.step(avg_iou)\n",
237
+ "\n",
238
+ " print(f\"\\nEpoch {epoch+1}\")\n",
239
+ " print(f\"Train Loss: {avg_train_loss:.4f}\")\n",
240
+ " print(f\"Val Loss: {avg_val_loss:.4f}\")\n",
241
+ " print(f\"Val IoU: {avg_iou:.4f}\")\n",
242
+ "\n",
243
+ " if avg_iou > best_iou:\n",
244
+ " best_iou = avg_iou\n",
245
+ " torch.save(model.state_dict(), \"best_model.pth\")\n",
246
+ " print(\"Best model saved!\")\n",
247
+ "\n",
248
+ "\n",
249
+ "plt.figure(figsize=(12,5))\n",
250
+ "\n",
251
+ "plt.subplot(1,2,1)\n",
252
+ "plt.plot(train_losses, label=\"Train\")\n",
253
+ "plt.plot(val_losses, label=\"Val\")\n",
254
+ "plt.title(\"Loss Curve\")\n",
255
+ "plt.legend()\n",
256
+ "\n",
257
+ "plt.subplot(1,2,2)\n",
258
+ "plt.plot(ious)\n",
259
+ "plt.title(\"IoU Curve\")\n",
260
+ "\n",
261
+ "plt.show()\n",
262
+ "\n",
263
+ "\n",
264
+ "model.load_state_dict(torch.load(\"best_model.pth\"))\n",
265
+ "model.eval()\n",
266
+ "\n",
267
+ "imgs, masks = next(iter(val_loader))\n",
268
+ "imgs = imgs.to(device)\n",
269
+ "\n",
270
+ "with torch.no_grad():\n",
271
+ " preds = model(imgs)\n",
272
+ "\n",
273
+ "pred = torch.sigmoid(preds[0]).cpu().numpy().squeeze()\n",
274
+ "pred = (pred > 0.5)\n",
275
+ "\n",
276
+ "img = imgs[0].cpu().permute(1,2,0).numpy()\n",
277
+ "\n",
278
+ "plt.figure(figsize=(10,4))\n",
279
+ "\n",
280
+ "plt.subplot(1,2,1)\n",
281
+ "plt.imshow(img)\n",
282
+ "plt.title(\"Input\")\n",
283
+ "\n",
284
+ "plt.subplot(1,2,2)\n",
285
+ "plt.imshow(pred, cmap='gray')\n",
286
+ "plt.title(\"Prediction\")\n",
287
+ "\n",
288
+ "plt.show()\n"
289
+ ]
290
+ },
291
+ {
292
+ "cell_type": "code",
293
+ "execution_count": 1,
294
+ "id": "4bcc68d4",
295
+ "metadata": {},
296
+ "outputs": [
297
+ {
298
+ "ename": "ModuleNotFoundError",
299
+ "evalue": "No module named 'cv2'",
300
+ "output_type": "error",
301
+ "traceback": [
302
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
303
+ "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)",
304
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mcv2\u001b[39;00m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnp\u001b[39;00m\n\u001b[32m 5\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcreate_zoning_mask\u001b[39m(shape):\n",
305
+ "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'cv2'"
306
+ ]
307
+ }
308
+ ],
309
+ "source": [
310
+ "import cv2\n",
311
+ "import numpy as np\n",
312
+ "\n",
313
+ "\n",
314
+ "def create_zoning_mask(shape):\n",
315
+ " h, w = shape\n",
316
+ " zoning = np.zeros((h, w), dtype=np.uint8)\n",
317
+ " zoning[:, w//2:] = 1\n",
318
+ " return zoning\n",
319
+ "\n",
320
+ "\n",
321
+ "def get_building_components(binary_mask):\n",
322
+ " num_labels, labels = cv2.connectedComponents(binary_mask.astype(np.uint8))\n",
323
+ " return num_labels, labels\n",
324
+ "\n",
325
+ "\n",
326
+ "def detect_illegal_buildings(building_mask, zoning_mask):\n",
327
+ "\n",
328
+ " num_labels, labels = get_building_components(building_mask)\n",
329
+ "\n",
330
+ " illegal_buildings = []\n",
331
+ " legal_buildings = []\n",
332
+ "\n",
333
+ " for label in range(1, num_labels): # skip background (0)\n",
334
+ " building_pixels = (labels == label)\n",
335
+ "\n",
336
+ " # Check overlap with restricted zone\n",
337
+ " overlap = building_pixels & (zoning_mask == 1)\n",
338
+ "\n",
339
+ " if overlap.any():\n",
340
+ " illegal_buildings.append(label)\n",
341
+ " else:\n",
342
+ " legal_buildings.append(label)\n",
343
+ "\n",
344
+ " return illegal_buildings, legal_buildings, labels\n",
345
+ "\n",
346
+ "\n",
347
+ "def visualize_illegal(image, labels, illegal_buildings):\n",
348
+ "\n",
349
+ " output = image.copy()\n",
350
+ "\n",
351
+ " for label in illegal_buildings:\n",
352
+ " output[labels == label] = [255, 0, 0] # red\n",
353
+ "\n",
354
+ " return output\n",
355
+ "\n",
356
+ "\n",
357
+ "plt.figure(figsize=(12,4))\n",
358
+ "\n",
359
+ "plt.subplot(1,3,1)\n",
360
+ "plt.title(\"Building Mask\")\n",
361
+ "plt.imshow(pred_mask, cmap='gray')\n",
362
+ "\n",
363
+ "plt.subplot(1,3,2)\n",
364
+ "plt.title(\"Zoning Mask\")\n",
365
+ "plt.imshow(zoning_mask, cmap='gray')\n",
366
+ "\n",
367
+ "plt.subplot(1,3,3)\n",
368
+ "plt.title(\"Overlay Result\")\n",
369
+ "plt.imshow(overlay)\n",
370
+ "\n",
371
+ "plt.show()\n",
372
+ "\n",
373
+ "\n",
374
+ "pred_mask = (pred > 0.5).astype(np.uint8)\n",
375
+ "\n",
376
+ "zoning_mask = create_zoning_mask(pred_mask.shape)\n",
377
+ "\n",
378
+ "illegal_buildings, legal_buildings, labels = detect_illegal_buildings(\n",
379
+ " pred_mask,\n",
380
+ " zoning_mask\n",
381
+ ")\n",
382
+ "\n",
383
+ "overlay = visualize_illegal(img.astype(np.uint8), labels, illegal_buildings)\n",
384
+ "\n",
385
+ "print(\"Total Buildings:\", len(illegal_buildings) + len(legal_buildings))\n",
386
+ "print(\"Illegal Buildings:\", len(illegal_buildings))\n",
387
+ "print(\"Legal Buildings:\", len(legal_buildings))\n",
388
+ "\n",
389
+ "plt.figure(figsize=(8,6))\n",
390
+ "plt.imshow(overlay)\n",
391
+ "plt.title(\"Illegal Buildings Highlighted in Red\")\n",
392
+ "plt.show()\n",
393
+ "\n",
394
+ "\n",
395
+ "get_ipython().getoutput(\"ls /kaggle/working\")\n"
396
+ ]
397
+ }
398
+ ],
399
+ "metadata": {
400
+ "kernelspec": {
401
+ "display_name": "Python 3",
402
+ "language": "python",
403
+ "name": "python3"
404
+ },
405
+ "language_info": {
406
+ "codemirror_mode": {
407
+ "name": "ipython",
408
+ "version": 3
409
+ },
410
+ "file_extension": ".py",
411
+ "mimetype": "text/x-python",
412
+ "name": "python",
413
+ "nbconvert_exporter": "python",
414
+ "pygments_lexer": "ipython3",
415
+ "version": "3.13.9"
416
+ }
417
+ },
418
+ "nbformat": 4,
419
+ "nbformat_minor": 5
420
+ }