camilleseab commited on
Commit
d699f2f
1 Parent(s): 85a24a8

First working version of app

Browse files
.gitattributes CHANGED
@@ -1 +1,2 @@
 
1
  weights/*.pt filter=lfs diff=lfs merge=lfs -text
 
1
+ weights filter=lfs diff=lfs merge=lfs -text
2
  weights/*.pt filter=lfs diff=lfs merge=lfs -text
environment.yml CHANGED
@@ -5,7 +5,7 @@ dependencies:
5
  - python
6
  - ipykernel
7
  - pip
8
- - ultralytics
9
  - pillow
10
  - python-dotenv
11
  - opencv
 
5
  - python
6
  - ipykernel
7
  - pip
8
+ - ultralytics=8.0.186
9
  - pillow
10
  - python-dotenv
11
  - opencv
notebooks/app.ipynb CHANGED
@@ -2,135 +2,125 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
- "import ipywidgets as widgets"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ]
11
  },
12
  {
13
  "cell_type": "code",
14
- "execution_count": 2,
15
  "metadata": {},
16
  "outputs": [],
17
  "source": [
 
 
 
18
  "DEFAULTS = {\n",
19
  " 'size': 640,\n",
20
- " 'heading': 140,\n",
21
  " 'pitch': 10,\n",
22
- " 'fov': 50\n",
23
- "}"
 
 
 
 
 
 
 
 
24
  ]
25
  },
26
  {
27
  "cell_type": "code",
28
- "execution_count": 3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "metadata": {},
30
  "outputs": [
31
  {
32
- "data": {
33
- "application/vnd.jupyter.widget-view+json": {
34
- "model_id": "57fe1b257f5448208a6e6c07ada26ef8",
35
- "version_major": 2,
36
- "version_minor": 0
37
- },
38
- "text/plain": [
39
- "Text(value='33rd & Loch Raven Baltimore MD', description='Location', layout=Layout(width='600px'), style=TextS…"
40
- ]
41
- },
42
- "metadata": {},
43
- "output_type": "display_data"
44
- },
45
- {
46
- "data": {
47
- "application/vnd.jupyter.widget-view+json": {
48
- "model_id": "6a4f58d9f343425993aa59f5e3271118",
49
- "version_major": 2,
50
- "version_minor": 0
51
- },
52
- "text/plain": [
53
- "IntSlider(value=640, description='Image size', layout=Layout(width='600px'), max=1024, min=100, style=SliderSt…"
54
- ]
55
- },
56
- "metadata": {},
57
- "output_type": "display_data"
58
- },
59
- {
60
- "data": {
61
- "application/vnd.jupyter.widget-view+json": {
62
- "model_id": "36da56e191be4d7fb5c26a3e90145d4b",
63
- "version_major": 2,
64
- "version_minor": 0
65
- },
66
- "text/plain": [
67
- "IntSlider(value=140, description='Heading (rotation)', layout=Layout(width='600px'), max=360, style=SliderStyl…"
68
- ]
69
- },
70
- "metadata": {},
71
- "output_type": "display_data"
72
- },
73
- {
74
- "data": {
75
- "application/vnd.jupyter.widget-view+json": {
76
- "model_id": "cb10a0a70dc04d2690dc41d7eec0bb1f",
77
- "version_major": 2,
78
- "version_minor": 0
79
- },
80
- "text/plain": [
81
- "IntSlider(value=10, description='Pitch (tilt)', layout=Layout(width='600px'), max=40, style=SliderStyle(descri…"
82
- ]
83
- },
84
- "metadata": {},
85
- "output_type": "display_data"
86
- },
87
- {
88
- "data": {
89
- "application/vnd.jupyter.widget-view+json": {
90
- "model_id": "2425ac1c49504ec199bdee5abee9d2e3",
91
- "version_major": 2,
92
- "version_minor": 0
93
- },
94
- "text/plain": [
95
- "IntSlider(value=50, description='Field of view (zoom)', layout=Layout(width='600px'), max=120, min=10, style=S…"
96
- ]
97
- },
98
- "metadata": {},
99
- "output_type": "display_data"
100
- },
101
- {
102
- "data": {
103
- "application/vnd.jupyter.widget-view+json": {
104
- "model_id": "da832f9cbde14700a60901e887b1789c",
105
- "version_major": 2,
106
- "version_minor": 0
107
- },
108
- "text/plain": [
109
- "Button(description='Get image', style=ButtonStyle())"
110
- ]
111
- },
112
- "metadata": {},
113
- "output_type": "display_data"
114
- },
115
- {
116
- "data": {
117
- "application/vnd.jupyter.widget-view+json": {
118
- "model_id": "3d8bec926ef0413db6d9865418dc2045",
119
- "version_major": 2,
120
- "version_minor": 0
121
- },
122
- "text/plain": [
123
- "Output()"
124
- ]
125
- },
126
- "metadata": {},
127
- "output_type": "display_data"
128
  }
129
  ],
130
  "source": [
131
- "output = widgets.Output()\n",
132
  "lbl_style = {'description_width': '200px'}\n",
133
- "layout = widgets.Layout(width='600px')\n",
134
  "location = widgets.Text(value='33rd & Loch Raven Baltimore MD',\n",
135
  " description='Location',\n",
136
  " layout=layout,\n",
@@ -160,46 +150,63 @@
160
  " style=lbl_style,\n",
161
  " description='Field of view (zoom)')\n",
162
  "\n",
163
- "button = widgets.Button(description='Get image')\n",
 
 
 
 
 
 
164
  "\n",
165
- "display(location, size, heading, pitch, fov, button, output)\n",
 
166
  "\n",
167
  "\n",
168
  "def button_click(b):\n",
169
- " output.clear_output()\n",
170
- " with output:\n",
171
- " txt = f'location: {location.value}'\n",
172
- " print(txt)\n",
173
- " return txt\n",
 
 
 
 
 
 
 
174
  "\n",
175
- "sv_img = button.on_click(button_click)\n"
176
  ]
177
  },
178
  {
179
  "cell_type": "code",
180
- "execution_count": null,
181
- "metadata": {},
182
- "outputs": [],
183
- "source": []
184
- },
185
- {
186
- "cell_type": "code",
187
- "execution_count": null,
188
- "metadata": {},
189
- "outputs": [],
190
- "source": []
191
- },
192
- {
193
- "cell_type": "code",
194
- "execution_count": null,
195
  "metadata": {},
196
- "outputs": [],
197
- "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  }
199
  ],
200
  "metadata": {
201
  "kernelspec": {
202
- "display_name": "voila",
203
  "language": "python",
204
  "name": "python3"
205
  },
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 179,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
9
+ "from ultralytics import YOLO, RTDETR\n",
10
+ "from pathlib import Path \n",
11
+ "import cv2\n",
12
+ "from dotenv import load_dotenv\n",
13
+ "import os\n",
14
+ "import requests\n",
15
+ "import urllib\n",
16
+ "from io import BytesIO\n",
17
+ "from PIL import Image, ImageDraw\n",
18
+ "import matplotlib.pyplot as plt\n",
19
+ "import numpy as np\n",
20
+ "import ipywidgets as widgets\n",
21
+ "\n",
22
+ "%matplotlib inline\n",
23
+ "\n",
24
+ "os.chdir('/home/camille/code/surveillance/')\n",
25
+ "_ = load_dotenv()\n"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "markdown",
30
+ "metadata": {},
31
+ "source": [
32
+ "# Re-surveilling surveillance\n",
33
+ "\n",
34
+ "This is a demonstration of using object detection models to locate surveillance cameras in Google Street View imagery. There are two models: **YOLO**, which runs relatively quickly but is less accurate, and **RT-DETR**, a newer model type that is slower but better at finding cameras.\n",
35
+ "\n",
36
+ "Enter a location to look up and adjust the camera parameters. After clicking the button, any cameras detected by the models will be highlighted."
37
  ]
38
  },
39
  {
40
  "cell_type": "code",
41
+ "execution_count": 180,
42
  "metadata": {},
43
  "outputs": [],
44
  "source": [
45
+ "yolo = YOLO('./weights/yolo_tile_best.pt')\n",
46
+ "detr = RTDETR('./weights/detr_tile_frz_best.pt')\n",
47
+ "\n",
48
  "DEFAULTS = {\n",
49
  " 'size': 640,\n",
50
+ " 'heading': 160,\n",
51
  " 'pitch': 10,\n",
52
+ " 'fov': 50,\n",
53
+ " 'model': 'detr'\n",
54
+ "}\n",
55
+ "\n",
56
+ "rows = 4\n",
57
+ "cols = 3\n",
58
+ "grid = widgets.GridspecLayout(rows, cols, \n",
59
+ " min_height = '400px', \n",
60
+ " grid_gap = '8px',\n",
61
+ " merge = False)\n"
62
  ]
63
  },
64
  {
65
  "cell_type": "code",
66
+ "execution_count": 181,
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "def get_sv_img(\n",
71
+ " location: str, \n",
72
+ " key_name: str = 'GOOGLE_KEY',\n",
73
+ " size: int = DEFAULTS['size'],\n",
74
+ " heading: int = DEFAULTS['heading'],\n",
75
+ " pitch: int = DEFAULTS['pitch'],\n",
76
+ " fov: int = DEFAULTS['fov']\n",
77
+ ") -> Image:\n",
78
+ " img_params = {\n",
79
+ " 'location': location,\n",
80
+ " 'size': f'{size}x{size}',\n",
81
+ " 'heading': heading,\n",
82
+ " 'pitch': pitch,\n",
83
+ " 'fov': fov,\n",
84
+ " 'key': os.getenv(key_name)\n",
85
+ " }\n",
86
+ " r = requests.get('https://maps.googleapis.com/maps/api/streetview', params = urllib.parse.urlencode(img_params))\n",
87
+ " img = Image.open(BytesIO(r.content))\n",
88
+ " return img\n",
89
+ "\n",
90
+ "def make_img_widget(img: Image, label: str, size: int = 480) -> widgets.Image:\n",
91
+ " buff = BytesIO()\n",
92
+ " img.save(buff, format = 'PNG')\n",
93
+ " img_widget = widgets.Image(value = buff.getvalue(), width = size, height = size)\n",
94
+ " grd = widgets.VBox([widgets.Label(label), img_widget])\n",
95
+ " return grd\n",
96
+ "\n",
97
+ "def label_img(img, model) -> Image:\n",
98
+ " pred = model.predict(img, device = 'cpu')[0].plot(labels = False)\n",
99
+ " pred = cv2.cvtColor(pred, cv2.COLOR_BGR2RGB)\n",
100
+ " return Image.fromarray(pred)"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": 182,
106
  "metadata": {},
107
  "outputs": [
108
  {
109
+ "name": "stderr",
110
+ "output_type": "stream",
111
+ "text": [
112
+ "\n",
113
+ "0: 640x640 1 surveillance, 262.8ms\n",
114
+ "Speed: 1.9ms preprocess, 262.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)\n",
115
+ "\n",
116
+ "0: 640x640 3 surveillances, 988.3ms\n",
117
+ "Speed: 1.8ms preprocess, 988.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)\n"
118
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  }
120
  ],
121
  "source": [
 
122
  "lbl_style = {'description_width': '200px'}\n",
123
+ "layout = widgets.Layout(width='60%')\n",
124
  "location = widgets.Text(value='33rd & Loch Raven Baltimore MD',\n",
125
  " description='Location',\n",
126
  " layout=layout,\n",
 
150
  " style=lbl_style,\n",
151
  " description='Field of view (zoom)')\n",
152
  "\n",
153
+ "button = widgets.Button(description='Get image', button_style = 'primary')\n",
154
+ "\n",
155
+ "mod_choice = widgets.Dropdown(value=DEFAULTS['model'],\n",
156
+ " options=[('RT-DETR', 'detr'), ('YOLOv8', 'yolo')],\n",
157
+ " layout=layout,\n",
158
+ " style=lbl_style,\n",
159
+ " description='Type of model')\n",
160
  "\n",
161
+ "grid[0, :] = widgets.VBox([location, size, heading, pitch, fov, button],\n",
162
+ " layout = widgets.Layout(height = 'auto'))\n",
163
  "\n",
164
  "\n",
165
  "def button_click(b):\n",
166
+ " img = get_sv_img(location=location.value,\n",
167
+ " size=size.value,\n",
168
+ " heading=heading.value,\n",
169
+ " pitch=pitch.value,\n",
170
+ " fov=fov.value)\n",
171
+ " if img is not None:\n",
172
+ " grid[1:, 0] = make_img_widget(img, 'Original image')\n",
173
+ " yolo_pred = label_img(img, yolo)\n",
174
+ " grid[1:, 1] = make_img_widget(yolo_pred, 'YOLO predictions')\n",
175
+ " detr_pred = label_img(img, detr)\n",
176
+ " grid[1:, 2] = make_img_widget(detr_pred, 'RT-DETR predictions')\n",
177
+ " \n",
178
  "\n",
179
+ "button.on_click(button_click)\n"
180
  ]
181
  },
182
  {
183
  "cell_type": "code",
184
+ "execution_count": 184,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  "metadata": {},
186
+ "outputs": [
187
+ {
188
+ "data": {
189
+ "application/vnd.jupyter.widget-view+json": {
190
+ "model_id": "9020055a57294d38b6f3e40659b12a1d",
191
+ "version_major": 2,
192
+ "version_minor": 0
193
+ },
194
+ "text/plain": [
195
+ "GridspecLayout(children=(VBox(children=(Text(value='33rd & Loch Raven Baltimore MD', description='Location', l…"
196
+ ]
197
+ },
198
+ "metadata": {},
199
+ "output_type": "display_data"
200
+ }
201
+ ],
202
+ "source": [
203
+ "display(grid)"
204
+ ]
205
  }
206
  ],
207
  "metadata": {
208
  "kernelspec": {
209
+ "display_name": "cap",
210
  "language": "python",
211
  "name": "python3"
212
  },
weights/detr_full_frz_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8646e7c3f92c0e3bc96343cf1fad22bb252ccefb1b4cf44a69abbbadd6545b9d
3
+ size 52028929
weights/detr_tile_frz_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7acae2ecb5d1de1bcdf9b51afb305d696a086df4f85b6ecc533901f060eb9683
3
+ size 66137280
weights/yolo_tile_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8646e7c3f92c0e3bc96343cf1fad22bb252ccefb1b4cf44a69abbbadd6545b9d
3
+ size 52028929