Spaces:
Running
Running
# Copyright (c) Meta Platforms, Inc. and affiliates. | |
import argparse | |
import asyncio | |
import json | |
import shutil | |
from collections import defaultdict | |
from pathlib import Path | |
from typing import List | |
import cv2 | |
import numpy as np | |
from omegaconf import DictConfig, OmegaConf | |
from opensfm.pygeometry import Camera | |
from opensfm.pymap import Shot | |
from opensfm.undistort import ( | |
perspective_camera_from_fisheye, | |
perspective_camera_from_perspective, | |
) | |
from tqdm import tqdm | |
from tqdm.contrib.concurrent import thread_map | |
from ... import logger | |
from ...osm.tiling import TileManager | |
from ...osm.viz import GeoPlotter | |
from ...utils.geo import BoundaryBox, Projection | |
from ...utils.io import DATA_URL, download_file, write_json | |
from ..utils import decompose_rotmat | |
from .dataset import MapillaryDataModule | |
from .download import ( | |
MapillaryDownloader, | |
fetch_image_infos, | |
fetch_images_pixels, | |
image_filename, | |
opensfm_shot_from_info, | |
) | |
from .utils import ( | |
CameraUndistorter, | |
PanoramaUndistorter, | |
keyframe_selection, | |
perspective_camera_from_pano, | |
scale_camera, | |
undistort_shot, | |
) | |
location_to_params = { | |
"sanfrancisco_soma": { | |
"bbox": BoundaryBox( | |
[-122.410307, 37.770364][::-1], [-122.388772, 37.795545][::-1] | |
), | |
"camera_models": ["GoPro Max"], | |
"osm_file": "sanfrancisco.osm", | |
}, | |
"sanfrancisco_hayes": { | |
"bbox": BoundaryBox( | |
[-122.438415, 37.768634][::-1], [-122.410605, 37.783894][::-1] | |
), | |
"camera_models": ["GoPro Max"], | |
"osm_file": "sanfrancisco.osm", | |
}, | |
"amsterdam": { | |
"bbox": BoundaryBox([4.845284, 52.340679][::-1], [4.926147, 52.386299][::-1]), | |
"camera_models": ["GoPro Max"], | |
"osm_file": "amsterdam.osm", | |
}, | |
"lemans": { | |
"bbox": BoundaryBox([0.185752, 47.995125][::-1], [0.224088, 48.014209][::-1]), | |
"owners": ["xXOocM1jUB4jaaeukKkmgw"], # sogefi | |
"osm_file": "lemans.osm", | |
}, | |
"berlin": { | |
"bbox": BoundaryBox([13.416271, 52.459656][::-1], [13.469829, 52.499195][::-1]), | |
"owners": ["LT3ajUxH6qsosamrOHIrFw"], # supaplex030 | |
"osm_file": "berlin.osm", | |
}, | |
"montrouge": { | |
"bbox": BoundaryBox([2.298958, 48.80874][::-1], [2.332989, 48.825276][::-1]), | |
"owners": [ | |
"XtzGKZX2_VIJRoiJ8IWRNQ", | |
"C4ENdWpJdFNf8CvnQd7NrQ", | |
"e_ZBE6mFd7CYNjRSpLl-Lg", | |
], # overflorian, phyks, francois2 | |
"camera_models": ["LG-R105"], | |
"osm_file": "paris.osm", | |
}, | |
"nantes": { | |
"bbox": BoundaryBox([-1.585839, 47.198289][::-1], [-1.51318, 47.236161][::-1]), | |
"owners": [ | |
"jGdq3CL-9N-Esvj3mtCWew", | |
"s-j5BH9JRIzsgORgaJF3aA", | |
], # c_mobilite, cartocite | |
"osm_file": "nantes.osm", | |
}, | |
"toulouse": { | |
"bbox": BoundaryBox([1.429457, 43.591434][::-1], [1.456653, 43.61343][::-1]), | |
"owners": ["MNkhq6MCoPsdQNGTMh3qsQ"], # tyndare | |
"osm_file": "toulouse.osm", | |
}, | |
"vilnius": { | |
"bbox": BoundaryBox([25.258633, 54.672956][::-1], [25.296094, 54.696755][::-1]), | |
"owners": ["bClduFF6Gq16cfwCdhWivw", "u5ukBseATUS8jUbtE43fcO"], # kedas, vms | |
"osm_file": "vilnius.osm", | |
}, | |
"helsinki": { | |
"bbox": BoundaryBox( | |
[24.8975480117, 60.1449128318][::-1], [24.9816543235, 60.1770977471][::-1] | |
), | |
"camera_types": ["spherical", "equirectangular"], | |
"osm_file": "helsinki.osm", | |
}, | |
"milan": { | |
"bbox": BoundaryBox( | |
[9.1732723899, 45.4810977947][::-1], | |
[9.2255987917, 45.5284238563][::-1], | |
), | |
"camera_types": ["spherical", "equirectangular"], | |
"osm_file": "milan.osm", | |
}, | |
"avignon": { | |
"bbox": BoundaryBox( | |
[4.7887045302, 43.9416178156][::-1], [4.8227015622, 43.9584848909][::-1] | |
), | |
"camera_types": ["spherical", "equirectangular"], | |
"osm_file": "avignon.osm", | |
}, | |
"paris": { | |
"bbox": BoundaryBox([2.306823, 48.833827][::-1], [2.39067, 48.889335][::-1]), | |
"camera_types": ["spherical", "equirectangular"], | |
"osm_file": "paris.osm", | |
}, | |
} | |
default_cfg = OmegaConf.create( | |
{ | |
"max_image_size": 512, | |
"do_legacy_pano_offset": True, | |
"min_dist_between_keyframes": 4, | |
"tiling": { | |
"tile_size": 128, | |
"margin": 128, | |
"ppm": 2, | |
}, | |
} | |
) | |
def get_pano_offset(image_info: dict, do_legacy: bool = False) -> float: | |
if do_legacy: | |
seed = int(image_info["sfm_cluster"]["id"]) | |
else: | |
seed = image_info["sequence"].__hash__() | |
seed = seed % (2**32 - 1) | |
return np.random.RandomState(seed).uniform(-45, 45) | |
def process_shot( | |
shot: Shot, info: dict, image_path: Path, output_dir: Path, cfg: DictConfig | |
) -> List[Shot]: | |
if not image_path.exists(): | |
return None | |
image_orig = cv2.imread(str(image_path)) | |
max_size = cfg.max_image_size | |
pano_offset = None | |
camera = shot.camera | |
camera.width, camera.height = image_orig.shape[:2][::-1] | |
if camera.is_panorama(camera.projection_type): | |
camera_new = perspective_camera_from_pano(camera, max_size) | |
undistorter = PanoramaUndistorter(camera, camera_new) | |
pano_offset = get_pano_offset(info, cfg.do_legacy_pano_offset) | |
elif camera.projection_type in ["fisheye", "perspective"]: | |
if camera.projection_type == "fisheye": | |
camera_new = perspective_camera_from_fisheye(camera) | |
else: | |
camera_new = perspective_camera_from_perspective(camera) | |
camera_new = scale_camera(camera_new, max_size) | |
camera_new.id = camera.id + "_undistorted" | |
undistorter = CameraUndistorter(camera, camera_new) | |
else: | |
raise NotImplementedError(camera.projection_type) | |
shots_undist, images_undist = undistort_shot( | |
image_orig, shot, undistorter, pano_offset | |
) | |
for shot, image in zip(shots_undist, images_undist): | |
cv2.imwrite(str(output_dir / f"{shot.id}.jpg"), image) | |
return shots_undist | |
def pack_shot_dict(shot: Shot, info: dict) -> dict: | |
latlong = info["computed_geometry"]["coordinates"][::-1] | |
latlong_gps = info["geometry"]["coordinates"][::-1] | |
w_p_c = shot.pose.get_origin() | |
w_r_c = shot.pose.get_R_cam_to_world() | |
rpy = decompose_rotmat(w_r_c) | |
return dict( | |
camera_id=shot.camera.id, | |
latlong=latlong, | |
t_c2w=w_p_c, | |
R_c2w=w_r_c, | |
roll_pitch_yaw=rpy, | |
capture_time=info["captured_at"], | |
gps_position=np.r_[latlong_gps, info["altitude"]], | |
compass_angle=info["compass_angle"], | |
chunk_id=int(info["sfm_cluster"]["id"]), | |
) | |
def pack_camera_dict(camera: Camera) -> dict: | |
assert camera.projection_type == "perspective" | |
K = camera.get_K_in_pixel_coordinates(camera.width, camera.height) | |
return dict( | |
id=camera.id, | |
model="PINHOLE", | |
width=camera.width, | |
height=camera.height, | |
params=K[[0, 1, 0, 1], [0, 1, 2, 2]], | |
) | |
def process_sequence( | |
image_ids: List[int], | |
image_infos: dict, | |
projection: Projection, | |
cfg: DictConfig, | |
raw_image_dir: Path, | |
out_image_dir: Path, | |
): | |
shots = [] | |
image_ids = sorted(image_ids, key=lambda i: image_infos[i]["captured_at"]) | |
for i in image_ids: | |
_, shot = opensfm_shot_from_info(image_infos[i], projection) | |
shots.append(shot) | |
if not shots: | |
return {} | |
shot_idxs = keyframe_selection(shots, min_dist=cfg.min_dist_between_keyframes) | |
shots = [shots[i] for i in shot_idxs] | |
shots_out = thread_map( | |
lambda shot: process_shot( | |
shot, | |
image_infos[int(shot.id)], | |
raw_image_dir / image_filename.format(image_id=shot.id), | |
out_image_dir, | |
cfg, | |
), | |
shots, | |
disable=True, | |
) | |
shots_out = [(i, s) for i, ss in enumerate(shots_out) if ss is not None for s in ss] | |
dump = {} | |
for index, shot in shots_out: | |
i, suffix = shot.id.rsplit("_", 1) | |
info = image_infos[int(i)] | |
seq_id = info["sequence"] | |
is_pano = not suffix.endswith("undistorted") | |
if is_pano: | |
seq_id += f"_{suffix}" | |
if seq_id not in dump: | |
dump[seq_id] = dict(views={}, cameras={}) | |
view = pack_shot_dict(shot, info) | |
view["index"] = index | |
dump[seq_id]["views"][shot.id] = view | |
dump[seq_id]["cameras"][shot.camera.id] = pack_camera_dict(shot.camera) | |
return dump | |
def process_location( | |
location: str, | |
data_dir: Path, | |
split_path: Path, | |
token: str, | |
cfg: DictConfig, | |
generate_tiles: bool = False, | |
): | |
params = location_to_params[location] | |
bbox = params["bbox"] | |
projection = Projection(*bbox.center) | |
splits = json.loads(split_path.read_text()) | |
image_ids = [i for split in splits.values() for i in split[location]] | |
loc_dir = data_dir / location | |
infos_dir = loc_dir / "image_infos" | |
raw_image_dir = loc_dir / "images_raw" | |
out_image_dir = loc_dir / "images" | |
for d in (infos_dir, raw_image_dir, out_image_dir): | |
d.mkdir(parents=True, exist_ok=True) | |
downloader = MapillaryDownloader(token) | |
loop = asyncio.get_event_loop() | |
logger.info("Fetching metadata for all images.") | |
image_infos, num_fail = loop.run_until_complete( | |
fetch_image_infos(image_ids, downloader, infos_dir) | |
) | |
logger.info("%d failures (%.1f%%).", num_fail, 100 * num_fail / len(image_ids)) | |
logger.info("Fetching image pixels.") | |
image_urls = [(i, info["thumb_2048_url"]) for i, info in image_infos.items()] | |
num_fail = loop.run_until_complete( | |
fetch_images_pixels(image_urls, downloader, raw_image_dir) | |
) | |
logger.info("%d failures (%.1f%%).", num_fail, 100 * num_fail / len(image_urls)) | |
seq_to_image_ids = defaultdict(list) | |
for i, info in image_infos.items(): | |
seq_to_image_ids[info["sequence"]].append(i) | |
seq_to_image_ids = dict(seq_to_image_ids) | |
dump = {} | |
for seq_image_ids in tqdm(seq_to_image_ids.values()): | |
dump.update( | |
process_sequence( | |
seq_image_ids, | |
image_infos, | |
projection, | |
cfg, | |
raw_image_dir, | |
out_image_dir, | |
) | |
) | |
write_json(loc_dir / "dump.json", dump) | |
# Get the view locations | |
view_ids = [] | |
views_latlon = [] | |
for seq in dump: | |
for view_id, view in dump[seq]["views"].items(): | |
view_ids.append(view_id) | |
views_latlon.append(view["latlong"]) | |
views_latlon = np.stack(views_latlon) | |
view_ids = np.array(view_ids) | |
views_xy = projection.project(views_latlon) | |
tiles_path = loc_dir / MapillaryDataModule.default_cfg["tiles_filename"] | |
if generate_tiles: | |
logger.info("Creating the map tiles.") | |
bbox_data = BoundaryBox(views_xy.min(0), views_xy.max(0)) | |
bbox_tiling = bbox_data + cfg.tiling.margin | |
osm_dir = data_dir / "osm" | |
osm_path = osm_dir / params["osm_file"] | |
if not osm_path.exists(): | |
logger.info("Downloading OSM raw data.") | |
download_file(DATA_URL + f"/osm/{params['osm_file']}", osm_path) | |
if not osm_path.exists(): | |
raise FileNotFoundError(f"Cannot find OSM data file {osm_path}.") | |
tile_manager = TileManager.from_bbox( | |
projection, | |
bbox_tiling, | |
cfg.tiling.ppm, | |
tile_size=cfg.tiling.tile_size, | |
path=osm_path, | |
) | |
tile_manager.save(tiles_path) | |
else: | |
logger.info("Downloading pre-generated map tiles.") | |
download_file(DATA_URL + f"/tiles/{location}.pkl", tiles_path) | |
tile_manager = TileManager.load(tiles_path) | |
# Visualize the data split | |
plotter = GeoPlotter() | |
view_ids_val = set(splits["val"][location]) | |
is_val = np.array([int(i.rsplit("_", 1)[0]) in view_ids_val for i in view_ids]) | |
plotter.points(views_latlon[~is_val], "red", view_ids[~is_val], "train") | |
plotter.points(views_latlon[is_val], "green", view_ids[is_val], "val") | |
plotter.bbox(bbox, "blue", "query bounding box") | |
plotter.bbox( | |
projection.unproject(tile_manager.bbox), "black", "tiling bounding box" | |
) | |
geo_viz_path = loc_dir / f"split_{location}.html" | |
plotter.fig.write_html(geo_viz_path) | |
logger.info("Wrote split visualization to %s.", geo_viz_path) | |
shutil.rmtree(raw_image_dir) | |
logger.info("Done processing for location %s.", location) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--locations", type=str, nargs="+", default=list(location_to_params) | |
) | |
parser.add_argument("--split_filename", type=str, default="splits_MGL_13loc.json") | |
parser.add_argument("--token", type=str, required=True) | |
parser.add_argument( | |
"--data_dir", type=Path, default=MapillaryDataModule.default_cfg["data_dir"] | |
) | |
parser.add_argument("--generate_tiles", action="store_true") | |
parser.add_argument("dotlist", nargs="*") | |
args = parser.parse_args() | |
args.data_dir.mkdir(exist_ok=True, parents=True) | |
shutil.copy(Path(__file__).parent / args.split_filename, args.data_dir) | |
cfg_ = OmegaConf.merge(default_cfg, OmegaConf.from_cli(args.dotlist)) | |
for location in args.locations: | |
logger.info("Starting processing for location %s.", location) | |
process_location( | |
location, | |
args.data_dir, | |
args.data_dir / args.split_filename, | |
args.token, | |
cfg_, | |
args.generate_tiles, | |
) | |