tonyzzzzzz commited on
Commit
f474bfd
1 Parent(s): 0a84ef5

wip: gradio interactive demo

Browse files
Files changed (3) hide show
  1. app.py +90 -0
  2. mapper/utils/exif.py +358 -0
  3. mapper/utils/sensor_data.json +0 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from matplotlib import pyplot as plt
3
+ from mapper.utils.io import read_image
4
+ from mapper.utils.exif import EXIF
5
+ from mapper.utils.wrappers import Camera
6
+ from perspective2d import PerspectiveFields
7
+ import numpy as np
8
+ from typing import Optional, Tuple
9
+
10
+ description = """
11
+ <h1 align="center">
12
+ <ins>MapItAnywhere (MIA) </ins>
13
+ <br>
14
+ Empowering Bird’s Eye View Mapping using Large-scale Public Data
15
+ <br>
16
+ with Neural Matching</h1>
17
+ <h3 align="center">
18
+ <a href="https://mapitanywhere.github.io" target="_blank">Project Page</a> |
19
+ <a href="https://arxiv.org/abs/2109.08203" target="_blank">Paper</a> |
20
+ <a href="https://github.com/MapItAnywhere/MapItAnywhere" target="_blank">Code</a>
21
+ </h3>
22
+ <p align="center">
23
+ Mapper generates birds-eye-view maps from first person view monocular images. Try our demo by uploading your own images.
24
+ </p>
25
+ """
26
+
27
+ class ImageCalibrator(PerspectiveFields):
28
+ def __init__(self, version: str = "Paramnet-360Cities-edina-centered"):
29
+ super().__init__(version)
30
+ self.eval()
31
+
32
+ def run(
33
+ self,
34
+ image_rgb: np.ndarray,
35
+ focal_length: Optional[float] = None,
36
+ exif: Optional[EXIF] = None,
37
+ ) -> Tuple[Tuple[float, float], Camera]:
38
+ h, w, *_ = image_rgb.shape
39
+ if focal_length is None and exif is not None:
40
+ _, focal_ratio = exif.extract_focal()
41
+ if focal_ratio != 0:
42
+ focal_length = focal_ratio * max(h, w)
43
+
44
+ calib = self.inference(img_bgr=image_rgb[..., ::-1])
45
+ roll_pitch = (calib["pred_roll"].item(), calib["pred_pitch"].item())
46
+ if focal_length is None:
47
+ vfov = calib["pred_vfov"].item()
48
+ focal_length = h / 2 / np.tan(np.deg2rad(vfov) / 2)
49
+
50
+ camera = Camera.from_dict(
51
+ {
52
+ "model": "SIMPLE_PINHOLE",
53
+ "width": w,
54
+ "height": h,
55
+ "params": [focal_length, w / 2 + 0.5, h / 2 + 0.5],
56
+ }
57
+ )
58
+ return roll_pitch, camera
59
+
60
+ def run(input_img):
61
+ calibrator = ImageCalibrator().to("cuda")
62
+
63
+ image_path = input_img.name
64
+
65
+ image = read_image(image_path)
66
+ image = image.to("cuda")
67
+ with open(image_path, "rb") as fid:
68
+ exif = EXIF(fid, lambda: image.shape[:2])
69
+
70
+ gravity, camera = calibrator.run(image, exif=exif)
71
+
72
+ print(f"Gravity: {gravity}")
73
+ print(f"Camera: {camera._data}")
74
+
75
+ plt.imshow(image)
76
+ plt.axis('off')
77
+ fig1 = plt.gcf()
78
+
79
+ return fig1
80
+
81
+ demo = gr.Interface(
82
+ fn=run,
83
+ inputs=[
84
+ gr.File(file_types=["image"], label="Input Image")
85
+ ],
86
+ outputs=[
87
+ gr.Plot(label="Inputs", format="png")
88
+ ],
89
+ description=description,)
90
+ demo.launch(share=True)
mapper/utils/exif.py ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Copied from opensfm.exif to minimize hard dependencies."""
2
+
3
+ import datetime
4
+ import json
5
+ import logging
6
+ from codecs import decode, encode
7
+ from pathlib import Path
8
+ from typing import Any, Dict, Optional, Tuple
9
+
10
+ import exifread
11
+
12
+ logger: logging.Logger = logging.getLogger(__name__)
13
+
14
+ inch_in_mm = 25.4
15
+ cm_in_mm = 10
16
+ um_in_mm = 0.001
17
+ default_projection = "perspective"
18
+ maximum_altitude = 1e4
19
+
20
+
21
+ def sensor_data():
22
+ with (Path(__file__).parent / "sensor_data.json").open() as fid:
23
+ data = json.load(fid)
24
+ return {k.lower(): v for k, v in data.items()}
25
+
26
+
27
+ def eval_frac(value) -> Optional[float]:
28
+ try:
29
+ return float(value.num) / float(value.den)
30
+ except ZeroDivisionError:
31
+ return None
32
+
33
+
34
+ def gps_to_decimal(values, reference) -> Optional[float]:
35
+ sign = 1 if reference in "NE" else -1
36
+ degrees = eval_frac(values[0])
37
+ minutes = eval_frac(values[1])
38
+ seconds = eval_frac(values[2])
39
+ if degrees is not None and minutes is not None and seconds is not None:
40
+ return sign * (degrees + minutes / 60 + seconds / 3600)
41
+ return None
42
+
43
+
44
+ def get_tag_as_float(tags, key, index: int = 0) -> Optional[float]:
45
+ if key in tags:
46
+ val = tags[key].values[index]
47
+ if isinstance(val, exifread.utils.Ratio):
48
+ ret_val = eval_frac(val)
49
+ if ret_val is None:
50
+ logger.error(
51
+ 'The rational "{2}" of tag "{0:s}" at index {1:d} c'
52
+ "aused a division by zero error".format(key, index, val)
53
+ )
54
+ return ret_val
55
+ else:
56
+ return float(val)
57
+ else:
58
+ return None
59
+
60
+
61
+ def compute_focal(
62
+ focal_35: Optional[float], focal: Optional[float], sensor_width, sensor_string
63
+ ) -> Tuple[float, float]:
64
+ if focal_35 is not None and focal_35 > 0:
65
+ focal_ratio = focal_35 / 36.0 # 35mm film produces 36x24mm pictures.
66
+ else:
67
+ if not sensor_width:
68
+ sensor_width = sensor_data().get(sensor_string, None)
69
+ if sensor_width and focal:
70
+ focal_ratio = focal / sensor_width
71
+ focal_35 = 36.0 * focal_ratio
72
+ else:
73
+ focal_35 = 0.0
74
+ focal_ratio = 0.0
75
+ return focal_35, focal_ratio
76
+
77
+
78
+ def sensor_string(make: str, model: str) -> str:
79
+ if make != "unknown":
80
+ # remove duplicate 'make' information in 'model'
81
+ model = model.replace(make, "")
82
+ return (make.strip() + " " + model.strip()).strip().lower()
83
+
84
+
85
+ def unescape_string(s) -> str:
86
+ return decode(encode(s, "latin-1", "backslashreplace"), "unicode-escape")
87
+
88
+
89
+ class EXIF:
90
+ def __init__(
91
+ self, fileobj, image_size_loader=None, use_exif_size=True, name=None
92
+ ) -> None:
93
+ self.image_size_loader = image_size_loader
94
+ self.use_exif_size = use_exif_size
95
+ self.fileobj = fileobj
96
+ self.tags = exifread.process_file(fileobj, details=False)
97
+ fileobj.seek(0)
98
+ self.fileobj_name = self.fileobj.name if name is None else name
99
+
100
+ def extract_image_size(self) -> Tuple[int, int]:
101
+ if self.image_size_loader is not None:
102
+ height, width = self.image_size_loader()
103
+ elif (
104
+ self.use_exif_size
105
+ and "EXIF ExifImageWidth" in self.tags
106
+ and "EXIF ExifImageLength" in self.tags
107
+ ):
108
+ width, height = (
109
+ int(self.tags["EXIF ExifImageWidth"].values[0]),
110
+ int(self.tags["EXIF ExifImageLength"].values[0]),
111
+ )
112
+ elif (
113
+ self.use_exif_size
114
+ and "Image ImageWidth" in self.tags
115
+ and "Image ImageLength" in self.tags
116
+ ):
117
+ width, height = (
118
+ int(self.tags["Image ImageWidth"].values[0]),
119
+ int(self.tags["Image ImageLength"].values[0]),
120
+ )
121
+ else:
122
+ raise ValueError("Missing image size in EXIF tags or loader.")
123
+ return width, height
124
+
125
+ def _decode_make_model(self, value) -> str:
126
+ """Python 2/3 compatible decoding of make/model field."""
127
+ if hasattr(value, "decode"):
128
+ try:
129
+ return value.decode("utf-8")
130
+ except UnicodeDecodeError:
131
+ return "unknown"
132
+ else:
133
+ return value
134
+
135
+ def extract_make(self) -> str:
136
+ # Camera make and model
137
+ if "EXIF LensMake" in self.tags:
138
+ make = self.tags["EXIF LensMake"].values
139
+ elif "Image Make" in self.tags:
140
+ make = self.tags["Image Make"].values
141
+ else:
142
+ make = "unknown"
143
+ return self._decode_make_model(make)
144
+
145
+ def extract_model(self) -> str:
146
+ if "EXIF LensModel" in self.tags:
147
+ model = self.tags["EXIF LensModel"].values
148
+ elif "Image Model" in self.tags:
149
+ model = self.tags["Image Model"].values
150
+ else:
151
+ model = "unknown"
152
+ return self._decode_make_model(model)
153
+
154
+ def extract_focal(self) -> Tuple[float, float]:
155
+ make, model = self.extract_make(), self.extract_model()
156
+ focal_35, focal_ratio = compute_focal(
157
+ get_tag_as_float(self.tags, "EXIF FocalLengthIn35mmFilm"),
158
+ get_tag_as_float(self.tags, "EXIF FocalLength"),
159
+ self.extract_sensor_width(),
160
+ sensor_string(make, model),
161
+ )
162
+ return focal_35, focal_ratio
163
+
164
+ def extract_sensor_width(self) -> Optional[float]:
165
+ """Compute sensor with from width and resolution."""
166
+ if (
167
+ "EXIF FocalPlaneResolutionUnit" not in self.tags
168
+ or "EXIF FocalPlaneXResolution" not in self.tags
169
+ ):
170
+ return None
171
+ resolution_unit = self.tags["EXIF FocalPlaneResolutionUnit"].values[0]
172
+ mm_per_unit = self.get_mm_per_unit(resolution_unit)
173
+ if not mm_per_unit:
174
+ return None
175
+ pixels_per_unit = get_tag_as_float(self.tags, "EXIF FocalPlaneXResolution")
176
+ if pixels_per_unit is None:
177
+ return None
178
+ if pixels_per_unit <= 0.0:
179
+ pixels_per_unit = get_tag_as_float(self.tags, "EXIF FocalPlaneYResolution")
180
+ if pixels_per_unit is None or pixels_per_unit <= 0.0:
181
+ return None
182
+ units_per_pixel = 1 / pixels_per_unit
183
+ width_in_pixels = self.extract_image_size()[0]
184
+ return width_in_pixels * units_per_pixel * mm_per_unit
185
+
186
+ def get_mm_per_unit(self, resolution_unit) -> Optional[float]:
187
+ """Length of a resolution unit in millimeters.
188
+ Uses the values from the EXIF specs in
189
+ https://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/EXIF.html
190
+ Args:
191
+ resolution_unit: the resolution unit value given in the EXIF
192
+ """
193
+ if resolution_unit == 2: # inch
194
+ return inch_in_mm
195
+ elif resolution_unit == 3: # cm
196
+ return cm_in_mm
197
+ elif resolution_unit == 4: # mm
198
+ return 1
199
+ elif resolution_unit == 5: # um
200
+ return um_in_mm
201
+ else:
202
+ logger.warning(
203
+ "Unknown EXIF resolution unit value: {}".format(resolution_unit)
204
+ )
205
+ return None
206
+
207
+ def extract_orientation(self) -> int:
208
+ orientation = 1
209
+ if "Image Orientation" in self.tags:
210
+ value = self.tags.get("Image Orientation").values[0]
211
+ if isinstance(value, int) and value != 0:
212
+ orientation = value
213
+ return orientation
214
+
215
+ def extract_ref_lon_lat(self) -> Tuple[str, str]:
216
+ if "GPS GPSLatitudeRef" in self.tags:
217
+ reflat = self.tags["GPS GPSLatitudeRef"].values
218
+ else:
219
+ reflat = "N"
220
+ if "GPS GPSLongitudeRef" in self.tags:
221
+ reflon = self.tags["GPS GPSLongitudeRef"].values
222
+ else:
223
+ reflon = "E"
224
+ return reflon, reflat
225
+
226
+ def extract_lon_lat(self) -> Tuple[Optional[float], Optional[float]]:
227
+ if "GPS GPSLatitude" in self.tags:
228
+ reflon, reflat = self.extract_ref_lon_lat()
229
+ lat = gps_to_decimal(self.tags["GPS GPSLatitude"].values, reflat)
230
+ lon = gps_to_decimal(self.tags["GPS GPSLongitude"].values, reflon)
231
+ else:
232
+ lon, lat = None, None
233
+ return lon, lat
234
+
235
+ def extract_altitude(self) -> Optional[float]:
236
+ if "GPS GPSAltitude" in self.tags:
237
+ alt_value = self.tags["GPS GPSAltitude"].values[0]
238
+ if isinstance(alt_value, exifread.utils.Ratio):
239
+ altitude = eval_frac(alt_value)
240
+ elif isinstance(alt_value, int):
241
+ altitude = float(alt_value)
242
+ else:
243
+ altitude = None
244
+
245
+ # Check if GPSAltitudeRef is equal to 1, which means GPSAltitude
246
+ # should be negative, reference: http://www.exif.org/Exif2-2.PDF#page=53
247
+ if (
248
+ "GPS GPSAltitudeRef" in self.tags
249
+ and self.tags["GPS GPSAltitudeRef"].values[0] == 1
250
+ and altitude is not None
251
+ ):
252
+ altitude = -altitude
253
+ else:
254
+ altitude = None
255
+ return altitude
256
+
257
+ def extract_dop(self) -> Optional[float]:
258
+ if "GPS GPSDOP" in self.tags:
259
+ return eval_frac(self.tags["GPS GPSDOP"].values[0])
260
+ return None
261
+
262
+ def extract_geo(self) -> Dict[str, Any]:
263
+ altitude = self.extract_altitude()
264
+ dop = self.extract_dop()
265
+ lon, lat = self.extract_lon_lat()
266
+ d = {}
267
+
268
+ if lon is not None and lat is not None:
269
+ d["latitude"] = lat
270
+ d["longitude"] = lon
271
+ if altitude is not None:
272
+ d["altitude"] = min([maximum_altitude, altitude])
273
+ if dop is not None:
274
+ d["dop"] = dop
275
+ return d
276
+
277
+ def extract_capture_time(self) -> float:
278
+ if (
279
+ "GPS GPSDate" in self.tags
280
+ and "GPS GPSTimeStamp" in self.tags # Actually GPSDateStamp
281
+ ):
282
+ try:
283
+ hours_f = get_tag_as_float(self.tags, "GPS GPSTimeStamp", 0)
284
+ minutes_f = get_tag_as_float(self.tags, "GPS GPSTimeStamp", 1)
285
+ if hours_f is None or minutes_f is None:
286
+ raise TypeError
287
+ hours = int(hours_f)
288
+ minutes = int(minutes_f)
289
+ seconds = get_tag_as_float(self.tags, "GPS GPSTimeStamp", 2)
290
+ gps_timestamp_string = "{0:s} {1:02d}:{2:02d}:{3:02f}".format(
291
+ self.tags["GPS GPSDate"].values, hours, minutes, seconds
292
+ )
293
+ return (
294
+ datetime.datetime.strptime(
295
+ gps_timestamp_string, "%Y:%m:%d %H:%M:%S.%f"
296
+ )
297
+ - datetime.datetime(1970, 1, 1)
298
+ ).total_seconds()
299
+ except (TypeError, ValueError):
300
+ logger.info(
301
+ 'The GPS time stamp in image file "{0:s}" is invalid. '
302
+ "Falling back to DateTime*".format(self.fileobj_name)
303
+ )
304
+
305
+ time_strings = [
306
+ ("EXIF DateTimeOriginal", "EXIF SubSecTimeOriginal", "EXIF Tag 0x9011"),
307
+ ("EXIF DateTimeDigitized", "EXIF SubSecTimeDigitized", "EXIF Tag 0x9012"),
308
+ ("Image DateTime", "Image SubSecTime", "Image Tag 0x9010"),
309
+ ]
310
+ for datetime_tag, subsec_tag, offset_tag in time_strings:
311
+ if datetime_tag in self.tags:
312
+ date_time = self.tags[datetime_tag].values
313
+ if subsec_tag in self.tags:
314
+ subsec_time = self.tags[subsec_tag].values
315
+ else:
316
+ subsec_time = "0"
317
+ try:
318
+ s = "{0:s}.{1:s}".format(date_time, subsec_time)
319
+ d = datetime.datetime.strptime(s, "%Y:%m:%d %H:%M:%S.%f")
320
+ except ValueError:
321
+ logger.debug(
322
+ 'The "{1:s}" time stamp or "{2:s}" tag is invalid in '
323
+ 'image file "{0:s}"'.format(
324
+ self.fileobj_name, datetime_tag, subsec_tag
325
+ )
326
+ )
327
+ continue
328
+ # Test for OffsetTimeOriginal | OffsetTimeDigitized | OffsetTime
329
+ if offset_tag in self.tags:
330
+ offset_time = self.tags[offset_tag].values
331
+ try:
332
+ d += datetime.timedelta(
333
+ hours=-int(offset_time[0:3]), minutes=int(offset_time[4:6])
334
+ )
335
+ except (TypeError, ValueError):
336
+ logger.debug(
337
+ 'The "{0:s}" time zone offset in image file "{1:s}"'
338
+ " is invalid".format(offset_tag, self.fileobj_name)
339
+ )
340
+ logger.debug(
341
+ 'Naively assuming UTC on "{0:s}" in image file '
342
+ '"{1:s}"'.format(datetime_tag, self.fileobj_name)
343
+ )
344
+ else:
345
+ logger.debug(
346
+ "No GPS time stamp and no time zone offset in image "
347
+ 'file "{0:s}"'.format(self.fileobj_name)
348
+ )
349
+ logger.debug(
350
+ 'Naively assuming UTC on "{0:s}" in image file "{1:s}"'.format(
351
+ datetime_tag, self.fileobj_name
352
+ )
353
+ )
354
+ return (d - datetime.datetime(1970, 1, 1)).total_seconds()
355
+ logger.info(
356
+ 'Image file "{0:s}" has no valid time stamp'.format(self.fileobj_name)
357
+ )
358
+ return 0.0
mapper/utils/sensor_data.json ADDED
The diff for this file is too large to render. See raw diff