Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

Realcat commited on Aug 23, 2024

Commit

63f3cf2

1 Parent(s): 4487d43

fix: eloftr

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

hloc/extractors/sfd2.py +5 -7
hloc/matchers/eloftr.py +10 -6
hloc/matchers/imp.py +5 -6
third_party/pram/.gitignore +13 -0
third_party/pram/LICENSE +2 -0
third_party/pram/README.md +207 -0
third_party/pram/assets/map_sparsification.gif +3 -0
third_party/pram/assets/multi_recognition.png +3 -0
third_party/pram/assets/overview.png +3 -0
third_party/pram/assets/pipeline1.png +3 -0
third_party/pram/assets/pram_demo.gif +3 -0
third_party/pram/assets/sam_openvoc.png +3 -0
third_party/pram/colmap_utils/camera_intrinsics.py +30 -0
third_party/pram/colmap_utils/database.py +352 -0
third_party/pram/colmap_utils/geometry.py +17 -0
third_party/pram/colmap_utils/io.py +78 -0
third_party/pram/colmap_utils/parsers.py +73 -0
third_party/pram/colmap_utils/read_write_model.py +627 -0
third_party/pram/colmap_utils/utils.py +1 -0
third_party/pram/configs/config_train_12scenes_sfd2.yaml +102 -0
third_party/pram/configs/config_train_7scenes_sfd2.yaml +104 -0
third_party/pram/configs/config_train_aachen_sfd2.yaml +104 -0
third_party/pram/configs/config_train_cambridge_sfd2.yaml +103 -0
third_party/pram/configs/config_train_multiset_sfd2.yaml +100 -0
third_party/pram/configs/datasets/12Scenes.yaml +166 -0
third_party/pram/configs/datasets/7Scenes.yaml +96 -0
third_party/pram/configs/datasets/Aachen.yaml +15 -0
third_party/pram/configs/datasets/CambridgeLandmarks.yaml +67 -0
third_party/pram/dataset/aachen.py +119 -0
third_party/pram/dataset/basicdataset.py +477 -0
third_party/pram/dataset/cambridge_landmarks.py +101 -0
third_party/pram/dataset/customdataset.py +93 -0
third_party/pram/dataset/get_dataset.py +89 -0
third_party/pram/dataset/recdataset.py +95 -0
third_party/pram/dataset/seven_scenes.py +115 -0
third_party/pram/dataset/twelve_scenes.py +121 -0
third_party/pram/dataset/utils.py +31 -0
third_party/pram/environment.yml +173 -0
third_party/pram/inference.py +62 -0
third_party/pram/localization/base_model.py +45 -0
third_party/pram/localization/camera.py +11 -0
third_party/pram/localization/extract_features.py +256 -0
third_party/pram/localization/frame.py +195 -0
third_party/pram/localization/loc_by_rec_eval.py +299 -0
third_party/pram/localization/loc_by_rec_online.py +225 -0
third_party/pram/localization/localizer.py +217 -0
third_party/pram/localization/match_features.py +156 -0
third_party/pram/localization/match_features_batch.py +242 -0
third_party/pram/localization/matchers/__init__.py +3 -0
third_party/pram/localization/matchers/adagml.py +41 -0

hloc/extractors/sfd2.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# -*- coding: UTF-8 -*-
 import sys
 from pathlib import Path
@@ -7,10 +6,9 @@ import torchvision.transforms as tvf
 from .. import logger
 from ..utils.base_model import BaseModel
-pram_path = Path(__file__).parent / "../../third_party/pram"
-sys.path.append(str(pram_path))
-from nets.sfd2 import load_sfd2
 class SFD2(BaseModel):
@@ -26,8 +24,8 @@ class SFD2(BaseModel):
         self.norm_rgb = tvf.Normalize(
             mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
         )
-        model_fn = pram_path / "weights" / self.conf["model_name"]
-        self.net = load_sfd2(weight_path=model_fn).eval()
         logger.info("Load SFD2 model done.")

 import sys
 from pathlib import Path
 from .. import logger
 from ..utils.base_model import BaseModel
+tp_path = Path(__file__).parent / "../../third_party"
+sys.path.append(str(tp_path))
+from pram.nets.sfd2 import load_sfd2
 class SFD2(BaseModel):
         self.norm_rgb = tvf.Normalize(
             mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
         )
+        model_path = tp_path / "pram" / "weights" / self.conf["model_name"]
+        self.net = load_sfd2(weight_path=model_path).eval()
         logger.info("Load SFD2 model done.")

hloc/matchers/eloftr.py CHANGED Viewed

@@ -5,18 +5,22 @@ from pathlib import Path
 import torch
-eloftr_path = Path(__file__).parent / "../../third_party/EfficientLoFTR"
-sys.path.append(str(eloftr_path))
-from src.loftr import LoFTR as ELoFTR_
-from src.loftr import full_default_cfg, opt_default_cfg, reparameter
 from hloc import logger
 from ..utils.base_model import BaseModel
-class LoFTR(BaseModel):
     default_conf = {
         "weights": "weights/eloftr_outdoor.ckpt",
         "match_threshold": 0.2,
@@ -40,7 +44,7 @@ class LoFTR(BaseModel):
             _default_cfg["mp"] = True
         elif self.conf["precision"] == "fp16":
             _default_cfg["half"] = True
-        model_path = eloftr_path / self.conf["weights"]
         cfg = _default_cfg
         cfg["match_coarse"]["thr"] = conf["match_threshold"]
         # cfg["match_coarse"]["skh_iters"] = conf["sinkhorn_iterations"]

 import torch
+tp_path = Path(__file__).parent / "../../third_party"
+sys.path.append(str(tp_path))
+from EfficientLoFTR.src.loftr import LoFTR as ELoFTR_
+from EfficientLoFTR.src.loftr import (
+    full_default_cfg,
+    opt_default_cfg,
+    reparameter,
+)
 from hloc import logger
 from ..utils.base_model import BaseModel
+class ELoFTR(BaseModel):
     default_conf = {
         "weights": "weights/eloftr_outdoor.ckpt",
         "match_threshold": 0.2,
             _default_cfg["mp"] = True
         elif self.conf["precision"] == "fp16":
             _default_cfg["half"] = True
+        model_path = tp_path / "EfficientLoFTR" / self.conf["weights"]
         cfg = _default_cfg
         cfg["match_coarse"]["thr"] = conf["match_threshold"]
         # cfg["match_coarse"]["skh_iters"] = conf["sinkhorn_iterations"]

hloc/matchers/imp.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# -*- coding: UTF-8 -*-
 import sys
 from pathlib import Path
@@ -7,10 +6,9 @@ import torch
 from .. import DEVICE, logger
 from ..utils.base_model import BaseModel
-pram_path = Path(__file__).parent / "../../third_party/pram"
-sys.path.append(str(pram_path))
-from nets.gml import GML
 class IMP(BaseModel):
@@ -33,7 +31,8 @@ class IMP(BaseModel):
     def _init(self, conf):
         self.conf = {**self.default_conf, **conf}
-        weight_path = pram_path / "weights" / self.conf["model_name"]
         self.net = GML(self.conf).eval().to(DEVICE)
         self.net.load_state_dict(
             torch.load(weight_path, map_location="cpu")["model"], strict=True

 import sys
 from pathlib import Path
 from .. import DEVICE, logger
 from ..utils.base_model import BaseModel
+tp_path = Path(__file__).parent / "../../third_party"
+sys.path.append(str(tp_path))
+from pram.nets.gml import GML
 class IMP(BaseModel):
     def _init(self, conf):
         self.conf = {**self.default_conf, **conf}
+        weight_path = tp_path / "pram" / "weights" / self.conf["model_name"]
+        # self.net = nets.gml(self.conf).eval().to(DEVICE)
         self.net = GML(self.conf).eval().to(DEVICE)
         self.net.load_state_dict(
             torch.load(weight_path, map_location="cpu")["model"], strict=True

third_party/pram/.gitignore ADDED Viewed

	@@ -0,0 +1,13 @@

+.idea
+__pycache__
+weights/12scenes*
+weights/7scenes*
+weights/aachen*
+weights/cambridgelandmarks*
+weights/imp_adagml.80.pth
+landmarks
+3D-models
+log_*
+*.log
+.nfs*
+Pangolin

third_party/pram/LICENSE ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ This work is licensed under the Creative Commons Attribution-NonCommercial 4.0 International License.
2	+ To view a copy of this license, visit http://creativecommons.org/licenses/by-nc/4.0/.

third_party/pram/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+## PRAM: Place Recognition Anywhere Model for Efficient Visual Localization
+<p align="center">
+  <img src="assets/overview.png" width="960">
+</p>
+Humans localize themselves efficiently in known environments by first recognizing landmarks defined on certain objects
+and their spatial relationships, and then verifying the location by aligning detailed structures of recognized objects
+with those in the memory. Inspired by this, we propose the place recognition anywhere model (PRAM) to perform visual
+localization as efficiently as humans do. PRAM consists of two main components - recognition and registration. In
+detail, first of all, a self-supervised map-centric landmark definition strategy is adopted, making places in either
+indoor or outdoor scenes act as unique landmarks. Then, sparse keypoints extracted from images, are utilized as the
+input to a transformer-based deep neural network for landmark recognition; these keypoints enable PRAM to recognize
+hundreds of landmarks with high time and memory efficiency. Keypoints along with recognized landmark labels are further
+used for registration between query images and the 3D landmark map. Different from previous hierarchical methods, PRAM
+discards global and local descriptors, and reduces over 90% storage. Since PRAM utilizes recognition and landmark-wise
+verification to replace global reference search and exhaustive matching respectively, it runs 2.4 times faster than
+prior state-of-the-art approaches. Moreover, PRAM opens new directions for visual localization including multi-modality
+localization, map-centric feature learning, and hierarchical scene coordinate regression.
+* Full paper
+  PDF: [Place Recognition Anywhere Model for Efficient Visual Localization](https://arxiv.org/pdf/2404.07785.pdf).
+* Authors: *Fei Xue, Ignas Budvytis, Roberto Cipolla*
+* Website: [PRAM](https://feixue94.github.io/pram-project) for videos, slides, recent updates, and datasets.
+## Key Features
+### 1. Self-supervised landmark definition on 3D space
+- No need of segmentations on images
+- No inconsistent semantic results from multi-view images
+- No limitation to labels of only known objects
+- Work in any places with known or unknown objects
+- Landmark-wise 3D map sparsification
+<p align="center">
+  <img src="assets/map_sparsification.gif" width="640">
+</p>
+### 2. Efficient landmark-wise coarse and fine localization
+- Recognize landmarks as opposed to do global retrieval
+- Local landmark-wise matching as opposed to exhaustive matching
+- No global descriptors (e.g. NetVLAD)
+- No reference images and their heavy repetative 2D keypoints and descriptors
+- Automatic inlier/outlier idetification
+<p align="center">
+  <img src="assets/pipeline1.png" width="640">
+</p>
+### 4. Sparse recognition
+- Sparse SFD2 keypoints as tokens
+- No uncertainties of points at boundaries
+- Flexible to accept multi-modality inputs
+### 5. Relocalization and temporal localization
+- Per frame reclocalization from scratch
+- Tracking previous frames for higher efficiency
+### 6. One model one dataset
+- All 7 subscenes in 7Scenes dataset share a model
+- All 12 subscenes in 12Scenes dataset share a model
+- All 5 subscenes in CambridgeLandmarks share a model
+### 7. Robust to long-term changes
+<p align="center">
+  <img src="assets/pram_demo.gif" width="640">
+</p>
+## Open problems
+- Adaptive number landmarks determination
+- Using SAM + open vocabulary to generate semantic map
+- Multi-modality localization with other tokenized signals (e.g. text, language, GPS, Magonemeter)
+- More effective solutions to 3D sparsification
+## Preparation
+1. Download the 7Scenes, 12Scenes, CambridgeLandmarks, and Aachen datasets (remove redundant depth images otherwise they
+   will be found in the sfm process)
+2. Environments
+2.1 Create a virtual environment
+```
+conda env create -f environment.yml
+(do not activate pram before pangolin is installed)
+```
+2.2 Compile Pangolin for the installed python
+```
+git clone --recursive https://github.com/stevenlovegrove/Pangolin.git
+cd Pangolin
+git checkout v0.8
+# Install dependencies
+./scripts/install_prerequisites.sh recommended
+# Compile with your python
+cmake -DPython_EXECUTABLE=/your path to/anaconda3/envs/pram/bin/python3  -B build
+cmake --build build -t pypangolin_pip_install
+conda activate pram
+```
+## Run the localization with online visualization
+1. Download the [3D-models](https://drive.google.com/drive/folders/1DUB073KxAjsc8lxhMpFuxPRf0ZBQS6NS?usp=drive_link),
+   pretrained [models](https://drive.google.com/drive/folders/1E2QvujCevqnyg_CM9FGAa0AxKkt4KbLD?usp=drive_link) ,
+   and [landmarks](https://drive.google.com/drive/folders/1r9src9bz7k3WYGfaPmKJ9gqxuvdfxZU0?usp=sharing)
+2. Put pretrained models in ```weights``` directory
+3. Run the demo (e.g. 7Scenes)
+```
+python3 inference.py  --config configs/config_train_7scenes_sfd2.yaml --rec_weight_path weights/7scenes_nc113_birch_segnetvit.199.pth  --landmark_path /your path to/landmarks --online
+```
+## Train the recognition model (e.g. for 7Scenes)
+### 1. Do SfM with SFD2 including feature extraction (modify the dataset_dir, ref_sfm_dir, output_dir)
+```
+./sfm_scripts/reconstruct_7scenes.sh
+```
+This step will produce the SfM results together with the extracted keypoints
+### 2. Generate 3D landmarks
+```
+python3 -m recognition.recmap --dataset 7Scenes --dataset_dir /your path to/7Scenes --sfm_dir /sfm_path/7Scenes --save_dir /save_path/landmakrs
+```
+This step will generate 3D landmarks, create virtual reference frame, and sparsify the 3D points for each landmark for
+all scenes in 7Scenes
+### 3. Train the sparse recognition model (one model one dataset)
+```
+python3 train.py   --config configs/config_train_7scenes_sfd2.yaml
+```
+Remember to modify the paths in 'config_train_7scenes_sfd2.yaml'
+## Your own dataset
+1. Run colmap or hloc to obtain the SfM results
+2. Do reconstruction with SFD2 keypoints with the sfm from step as refernece sfm
+3. Do 3D landmark generation, VRF, map sparsification etc (Add DatasetName.yaml to configs/datasets)
+4. Train the recognition model
+5. Do evaluation
+## Previous works can be found here
+1. [Efficient large-scale localization by landmark recognition, CVPR 2022](https://github.com/feixue94/lbr)
+2. [IMP: Iterative Matching and Pose Estimation with Adaptive Pooling, CVPR 2023](https://github.com/feixue94/imp-release)
+3. [SFD2: Semantic-guided Feature Detection and Description, CVPR 2023](https://github.com/feixue94/sfd2)
+4. [VRS-NeRF: Visual Relocalization with Sparse Neural Radiance Field, under review](https://github.com/feixue94/vrs-nerf)
+## BibTeX Citation
+If you use any ideas from the paper or code in this repo, please consider citing:
+```
+ @article{xue2024pram,
+          author    = {Fei Xue and Ignas Budvytis and Roberto Cipolla},
+          title     = {PRAM: Place Recognition Anywhere Model for Efficient Visual Localization},
+          journal   = {arXiv preprint arXiv:2404.07785},
+          year      = {2024}
+ }
+@inproceedings{xue2023sfd2,
+  author    = {Fei Xue and Ignas Budvytis and Roberto Cipolla},
+  title     = {SFD2: Semantic-guided Feature Detection and Description},
+  booktitle = {CVPR},
+  year      = {2023}
+}
+@inproceedings{xue2022imp,
+  author    = {Fei Xue and Ignas Budvytis and Roberto Cipolla},
+  title     = {IMP: Iterative Matching and Pose Estimation with Adaptive Pooling},
+  booktitle = {CVPR},
+  year      = {2023}
+}
+@inproceedings{xue2022efficient,
+  author    = {Fei Xue and Ignas Budvytis and Daniel Olmeda Reino and Roberto Cipolla},
+  title     = {Efficient Large-scale Localization by Global Instance Recognition},
+  booktitle = {CVPR},
+  year      = {2022}
+}
+```
+## Acknowledgements
+Part of the code is from previous excellent works
+including , [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork)
+and [hloc](https://github.com/cvg/Hierarchical-Localization). You can find more details from their released
+repositories if you are interested in their works.

third_party/pram/assets/map_sparsification.gif ADDED Viewed

Git LFS Details

SHA256: fd7bbe3b0bad7c6ae330eaa702b2839533a6f27ad5a0b104c4a37597c0c37aad
Pointer size: 131 Bytes
Size of remote file: 493 kB

third_party/pram/assets/multi_recognition.png ADDED Viewed

Git LFS Details

SHA256: c84e81cb990adedc25ef612b31d1ec53f7cb9f2168ef2246f2f03ca479cca9cf
Pointer size: 132 Bytes
Size of remote file: 2.46 MB

third_party/pram/assets/overview.png ADDED Viewed

Git LFS Details

SHA256: 466b1f2b6a38cb956a389c1fc69c213c1655579c0c944174b6e95e247209eedc
Pointer size: 131 Bytes
Size of remote file: 662 kB

third_party/pram/assets/pipeline1.png ADDED Viewed

Git LFS Details

SHA256: 0bd0545bc3f4814d4b9f18893965529a08a73263e80a3978755162935e05d2b3
Pointer size: 132 Bytes
Size of remote file: 3.99 MB

third_party/pram/assets/pram_demo.gif ADDED Viewed

Git LFS Details

SHA256: 95e56e33824789b650f4760b4246eca89c9cd1a8c138afc2d2ab5e24ec665fac
Pointer size: 133 Bytes
Size of remote file: 14.7 MB

third_party/pram/assets/sam_openvoc.png ADDED Viewed

Git LFS Details

SHA256: b3e0b06b6917402ed010cd4054e2efcf75c04ede84be53f17d147e2dd388d15a
Pointer size: 132 Bytes
Size of remote file: 1.15 MB

third_party/pram/colmap_utils/camera_intrinsics.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   localizer -> camera_intrinsics
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   15/08/2023 12:33
+=================================================='''
+import numpy as np
+def intrinsics_from_camera(camera_model, params):
+    if camera_model in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
+        fx = fy = params[0]
+        cx = params[1]
+        cy = params[2]
+    elif camera_model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
+        fx = params[0]
+        fy = params[1]
+        cx = params[2]
+        cy = params[3]
+    else:
+        raise Exception("Camera model not supported")
+    # intrinsics
+    K = np.identity(3)
+    K[0, 0] = fx
+    K[1, 1] = fy
+    K[0, 2] = cx
+    K[1, 2] = cy
+    return K

third_party/pram/colmap_utils/database.py ADDED Viewed

	@@ -0,0 +1,352 @@

+# Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+#     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
+#       its contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
+# This script is based on an original implementation by True Price.
+import sys
+import sqlite3
+import numpy as np
+IS_PYTHON3 = sys.version_info[0] >= 3
+MAX_IMAGE_ID = 2**31 - 1
+CREATE_CAMERAS_TABLE = """CREATE TABLE IF NOT EXISTS cameras (
+    camera_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    model INTEGER NOT NULL,
+    width INTEGER NOT NULL,
+    height INTEGER NOT NULL,
+    params BLOB,
+    prior_focal_length INTEGER NOT NULL)"""
+CREATE_DESCRIPTORS_TABLE = """CREATE TABLE IF NOT EXISTS descriptors (
+    image_id INTEGER PRIMARY KEY NOT NULL,
+    rows INTEGER NOT NULL,
+    cols INTEGER NOT NULL,
+    data BLOB,
+    FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)"""
+CREATE_IMAGES_TABLE = """CREATE TABLE IF NOT EXISTS images (
+    image_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    name TEXT NOT NULL UNIQUE,
+    camera_id INTEGER NOT NULL,
+    prior_qw REAL,
+    prior_qx REAL,
+    prior_qy REAL,
+    prior_qz REAL,
+    prior_tx REAL,
+    prior_ty REAL,
+    prior_tz REAL,
+    CONSTRAINT image_id_check CHECK(image_id >= 0 and image_id < {}),
+    FOREIGN KEY(camera_id) REFERENCES cameras(camera_id))
+""".format(MAX_IMAGE_ID)
+CREATE_TWO_VIEW_GEOMETRIES_TABLE = """
+CREATE TABLE IF NOT EXISTS two_view_geometries (
+    pair_id INTEGER PRIMARY KEY NOT NULL,
+    rows INTEGER NOT NULL,
+    cols INTEGER NOT NULL,
+    data BLOB,
+    config INTEGER NOT NULL,
+    F BLOB,
+    E BLOB,
+    H BLOB)
+"""
+CREATE_KEYPOINTS_TABLE = """CREATE TABLE IF NOT EXISTS keypoints (
+    image_id INTEGER PRIMARY KEY NOT NULL,
+    rows INTEGER NOT NULL,
+    cols INTEGER NOT NULL,
+    data BLOB,
+    FOREIGN KEY(image_id) REFERENCES images(image_id) ON DELETE CASCADE)
+"""
+CREATE_MATCHES_TABLE = """CREATE TABLE IF NOT EXISTS matches (
+    pair_id INTEGER PRIMARY KEY NOT NULL,
+    rows INTEGER NOT NULL,
+    cols INTEGER NOT NULL,
+    data BLOB)"""
+CREATE_NAME_INDEX = \
+    "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"
+CREATE_ALL = "; ".join([
+    CREATE_CAMERAS_TABLE,
+    CREATE_IMAGES_TABLE,
+    CREATE_KEYPOINTS_TABLE,
+    CREATE_DESCRIPTORS_TABLE,
+    CREATE_MATCHES_TABLE,
+    CREATE_TWO_VIEW_GEOMETRIES_TABLE,
+    CREATE_NAME_INDEX
+])
+def image_ids_to_pair_id(image_id1, image_id2):
+    if image_id1 > image_id2:
+        image_id1, image_id2 = image_id2, image_id1
+    return image_id1 * MAX_IMAGE_ID + image_id2
+def pair_id_to_image_ids(pair_id):
+    image_id2 = pair_id % MAX_IMAGE_ID
+    image_id1 = (pair_id - image_id2) / MAX_IMAGE_ID
+    return image_id1, image_id2
+def array_to_blob(array):
+    if IS_PYTHON3:
+        return array.tostring()
+    else:
+        return np.getbuffer(array)
+def blob_to_array(blob, dtype, shape=(-1,)):
+    if IS_PYTHON3:
+        return np.fromstring(blob, dtype=dtype).reshape(*shape)
+    else:
+        return np.frombuffer(blob, dtype=dtype).reshape(*shape)
+class COLMAPDatabase(sqlite3.Connection):
+    @staticmethod
+    def connect(database_path):
+        return sqlite3.connect(str(database_path), factory=COLMAPDatabase)
+    def __init__(self, *args, **kwargs):
+        super(COLMAPDatabase, self).__init__(*args, **kwargs)
+        self.create_tables = lambda: self.executescript(CREATE_ALL)
+        self.create_cameras_table = \
+            lambda: self.executescript(CREATE_CAMERAS_TABLE)
+        self.create_descriptors_table = \
+            lambda: self.executescript(CREATE_DESCRIPTORS_TABLE)
+        self.create_images_table = \
+            lambda: self.executescript(CREATE_IMAGES_TABLE)
+        self.create_two_view_geometries_table = \
+            lambda: self.executescript(CREATE_TWO_VIEW_GEOMETRIES_TABLE)
+        self.create_keypoints_table = \
+            lambda: self.executescript(CREATE_KEYPOINTS_TABLE)
+        self.create_matches_table = \
+            lambda: self.executescript(CREATE_MATCHES_TABLE)
+        self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX)
+    def add_camera(self, model, width, height, params,
+                   prior_focal_length=False, camera_id=None):
+        params = np.asarray(params, np.float64)
+        cursor = self.execute(
+            "INSERT INTO cameras VALUES (?, ?, ?, ?, ?, ?)",
+            (camera_id, model, width, height, array_to_blob(params),
+             prior_focal_length))
+        return cursor.lastrowid
+    def add_image(self, name, camera_id,
+                  prior_q=np.zeros(4), prior_t=np.zeros(3), image_id=None):
+        cursor = self.execute(
+            "INSERT INTO images VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            (image_id, name, camera_id, prior_q[0], prior_q[1], prior_q[2],
+             prior_q[3], prior_t[0], prior_t[1], prior_t[2]))
+        return cursor.lastrowid
+    def add_keypoints(self, image_id, keypoints):
+        assert(len(keypoints.shape) == 2)
+        assert(keypoints.shape[1] in [2, 4, 6])
+        keypoints = np.asarray(keypoints, np.float32)
+        self.execute(
+            "INSERT INTO keypoints VALUES (?, ?, ?, ?)",
+            (image_id,) + keypoints.shape + (array_to_blob(keypoints),))
+    def add_descriptors(self, image_id, descriptors):
+        descriptors = np.ascontiguousarray(descriptors, np.uint8)
+        self.execute(
+            "INSERT INTO descriptors VALUES (?, ?, ?, ?)",
+            (image_id,) + descriptors.shape + (array_to_blob(descriptors),))
+    def add_matches(self, image_id1, image_id2, matches):
+        assert(len(matches.shape) == 2)
+        assert(matches.shape[1] == 2)
+        if image_id1 > image_id2:
+            matches = matches[:,::-1]
+        pair_id = image_ids_to_pair_id(image_id1, image_id2)
+        matches = np.asarray(matches, np.uint32)
+        self.execute(
+            "INSERT INTO matches VALUES (?, ?, ?, ?)",
+            (pair_id,) + matches.shape + (array_to_blob(matches),))
+    def add_two_view_geometry(self, image_id1, image_id2, matches,
+                              F=np.eye(3), E=np.eye(3), H=np.eye(3), config=2):
+        assert(len(matches.shape) == 2)
+        assert(matches.shape[1] == 2)
+        if image_id1 > image_id2:
+            matches = matches[:,::-1]
+        pair_id = image_ids_to_pair_id(image_id1, image_id2)
+        matches = np.asarray(matches, np.uint32)
+        F = np.asarray(F, dtype=np.float64)
+        E = np.asarray(E, dtype=np.float64)
+        H = np.asarray(H, dtype=np.float64)
+        self.execute(
+            "INSERT INTO two_view_geometries VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+            (pair_id,) + matches.shape + (array_to_blob(matches), config,
+             array_to_blob(F), array_to_blob(E), array_to_blob(H)))
+def example_usage():
+    import os
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--database_path", default="database.db")
+    args = parser.parse_args()
+    if os.path.exists(args.database_path):
+        print("ERROR: database path already exists -- will not modify it.")
+        return
+    # Open the database.
+    db = COLMAPDatabase.connect(args.database_path)
+    # For convenience, try creating all the tables upfront.
+    db.create_tables()
+    # Create dummy cameras.
+    model1, width1, height1, params1 = \
+        0, 1024, 768, np.array((1024., 512., 384.))
+    model2, width2, height2, params2 = \
+        2, 1024, 768, np.array((1024., 512., 384., 0.1))
+    camera_id1 = db.add_camera(model1, width1, height1, params1)
+    camera_id2 = db.add_camera(model2, width2, height2, params2)
+    # Create dummy images.
+    image_id1 = db.add_image("image1.png", camera_id1)
+    image_id2 = db.add_image("image2.png", camera_id1)
+    image_id3 = db.add_image("image3.png", camera_id2)
+    image_id4 = db.add_image("image4.png", camera_id2)
+    # Create dummy keypoints.
+    #
+    # Note that COLMAP supports:
+    #      - 2D keypoints: (x, y)
+    #      - 4D keypoints: (x, y, theta, scale)
+    #      - 6D affine keypoints: (x, y, a_11, a_12, a_21, a_22)
+    num_keypoints = 1000
+    keypoints1 = np.random.rand(num_keypoints, 2) * (width1, height1)
+    keypoints2 = np.random.rand(num_keypoints, 2) * (width1, height1)
+    keypoints3 = np.random.rand(num_keypoints, 2) * (width2, height2)
+    keypoints4 = np.random.rand(num_keypoints, 2) * (width2, height2)
+    db.add_keypoints(image_id1, keypoints1)
+    db.add_keypoints(image_id2, keypoints2)
+    db.add_keypoints(image_id3, keypoints3)
+    db.add_keypoints(image_id4, keypoints4)
+    # Create dummy matches.
+    M = 50
+    matches12 = np.random.randint(num_keypoints, size=(M, 2))
+    matches23 = np.random.randint(num_keypoints, size=(M, 2))
+    matches34 = np.random.randint(num_keypoints, size=(M, 2))
+    db.add_matches(image_id1, image_id2, matches12)
+    db.add_matches(image_id2, image_id3, matches23)
+    db.add_matches(image_id3, image_id4, matches34)
+    # Commit the data to the file.
+    db.commit()
+    # Read and check cameras.
+    rows = db.execute("SELECT * FROM cameras")
+    camera_id, model, width, height, params, prior = next(rows)
+    params = blob_to_array(params, np.float64)
+    assert camera_id == camera_id1
+    assert model == model1 and width == width1 and height == height1
+    assert np.allclose(params, params1)
+    camera_id, model, width, height, params, prior = next(rows)
+    params = blob_to_array(params, np.float64)
+    assert camera_id == camera_id2
+    assert model == model2 and width == width2 and height == height2
+    assert np.allclose(params, params2)
+    # Read and check keypoints.
+    keypoints = dict(
+        (image_id, blob_to_array(data, np.float32, (-1, 2)))
+        for image_id, data in db.execute(
+            "SELECT image_id, data FROM keypoints"))
+    assert np.allclose(keypoints[image_id1], keypoints1)
+    assert np.allclose(keypoints[image_id2], keypoints2)
+    assert np.allclose(keypoints[image_id3], keypoints3)
+    assert np.allclose(keypoints[image_id4], keypoints4)
+    # Read and check matches.
+    pair_ids = [image_ids_to_pair_id(*pair) for pair in
+                ((image_id1, image_id2),
+                 (image_id2, image_id3),
+                 (image_id3, image_id4))]
+    matches = dict(
+        (pair_id_to_image_ids(pair_id),
+         blob_to_array(data, np.uint32, (-1, 2)))
+        for pair_id, data in db.execute("SELECT pair_id, data FROM matches")
+    )
+    assert np.all(matches[(image_id1, image_id2)] == matches12)
+    assert np.all(matches[(image_id2, image_id3)] == matches23)
+    assert np.all(matches[(image_id3, image_id4)] == matches34)
+    # Clean up.
+    db.close()
+    if os.path.exists(args.database_path):
+        os.remove(args.database_path)
+if __name__ == "__main__":
+    example_usage()

third_party/pram/colmap_utils/geometry.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# -*- coding: UTF-8 -*-
+import numpy as np
+import pycolmap
+def to_homogeneous(p):
+    return np.pad(p, ((0, 0),) * (p.ndim - 1) + ((0, 1),), constant_values=1)
+def compute_epipolar_errors(j_from_i: pycolmap.Rigid3d, p2d_i, p2d_j):
+    j_E_i = j_from_i.essential_matrix()
+    l2d_j = to_homogeneous(p2d_i) @ j_E_i.T
+    l2d_i = to_homogeneous(p2d_j) @ j_E_i
+    dist = np.abs(np.sum(to_homogeneous(p2d_i) * l2d_i, axis=1))
+    errors_i = dist / np.linalg.norm(l2d_i[:, :2], axis=1)
+    errors_j = dist / np.linalg.norm(l2d_j[:, :2], axis=1)
+    return errors_i, errors_j

third_party/pram/colmap_utils/io.py ADDED Viewed

	@@ -0,0 +1,78 @@

+# -*- coding: UTF-8 -*-
+from pathlib import Path
+from typing import Tuple
+import cv2
+import h5py
+import numpy as np
+from .parsers import names_to_pair, names_to_pair_old
+def read_image(path, grayscale=False):
+    if grayscale:
+        mode = cv2.IMREAD_GRAYSCALE
+    else:
+        mode = cv2.IMREAD_COLOR
+    image = cv2.imread(str(path), mode)
+    if image is None:
+        raise ValueError(f"Cannot read image {path}.")
+    if not grayscale and len(image.shape) == 3:
+        image = image[:, :, ::-1]  # BGR to RGB
+    return image
+def list_h5_names(path):
+    names = []
+    with h5py.File(str(path), "r", libver="latest") as fd:
+        def visit_fn(_, obj):
+            if isinstance(obj, h5py.Dataset):
+                names.append(obj.parent.name.strip("/"))
+        fd.visititems(visit_fn)
+    return list(set(names))
+def get_keypoints(
+        path: Path, name: str, return_uncertainty: bool = False
+) -> np.ndarray:
+    with h5py.File(str(path), "r", libver="latest") as hfile:
+        dset = hfile[name]["keypoints"]
+        p = dset.__array__()
+        uncertainty = dset.attrs.get("uncertainty")
+    if return_uncertainty:
+        return p, uncertainty
+    return p
+def find_pair(hfile: h5py.File, name0: str, name1: str):
+    pair = names_to_pair(name0, name1)
+    if pair in hfile:
+        return pair, False
+    pair = names_to_pair(name1, name0)
+    if pair in hfile:
+        return pair, True
+    # older, less efficient format
+    pair = names_to_pair_old(name0, name1)
+    if pair in hfile:
+        return pair, False
+    pair = names_to_pair_old(name1, name0)
+    if pair in hfile:
+        return pair, True
+    raise ValueError(
+        f"Could not find pair {(name0, name1)}... "
+        "Maybe you matched with a different list of pairs? "
+    )
+def get_matches(path: Path, name0: str, name1: str) -> Tuple[np.ndarray]:
+    with h5py.File(str(path), "r", libver="latest") as hfile:
+        pair, reverse = find_pair(hfile, name0, name1)
+        matches = hfile[pair]["matches0"].__array__()
+        scores = hfile[pair]["matching_scores0"].__array__()
+    idx = np.where(matches != -1)[0]
+    matches = np.stack([idx, matches[idx]], -1)
+    if reverse:
+        matches = np.flip(matches, -1)
+    scores = scores[idx]
+    return matches, scores

third_party/pram/colmap_utils/parsers.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# -*- coding: UTF-8 -*-
+from pathlib import Path
+import logging
+import numpy as np
+from collections import defaultdict
+def parse_image_lists_with_intrinsics(paths):
+    results = []
+    files = list(Path(paths.parent).glob(paths.name))
+    assert len(files) > 0
+    for lfile in files:
+        with open(lfile, 'r') as f:
+            raw_data = f.readlines()
+        logging.info(f'Importing {len(raw_data)} queries in {lfile.name}')
+        for data in raw_data:
+            data = data.strip('\n').split(' ')
+            name, camera_model, width, height = data[:4]
+            params = np.array(data[4:], float)
+            info = (camera_model, int(width), int(height), params)
+            results.append((name, info))
+    assert len(results) > 0
+    return results
+def parse_img_lists_for_extended_cmu_seaons(paths):
+    Ks = {
+        "c0": "OPENCV 1024 768 868.993378 866.063001 525.942323 420.042529 -0.399431 0.188924 0.000153 0.000571",
+        "c1": "OPENCV 1024 768 868.993378 866.063001 525.942323 420.042529 -0.399431 0.188924 0.000153 0.000571"
+    }
+    results = []
+    files = list(Path(paths.parent).glob(paths.name))
+    assert len(files) > 0
+    for lfile in files:
+        with open(lfile, 'r') as f:
+            raw_data = f.readlines()
+            logging.info(f'Importing {len(raw_data)} queries in {lfile.name}')
+            for name in raw_data:
+                name = name.strip('\n')
+                camera = name.split('_')[2]
+                K = Ks[camera].split(' ')
+                camera_model, width, height = K[:3]
+                params = np.array(K[3:], float)
+                # print("camera: ", camera_model, width, height, params)
+                info = (camera_model, int(width), int(height), params)
+                results.append((name, info))
+        assert len(results) > 0
+        return results
+def parse_retrieval(path):
+    retrieval = defaultdict(list)
+    with open(path, 'r') as f:
+        for p in f.read().rstrip('\n').split('\n'):
+            q, r = p.split(' ')
+            retrieval[q].append(r)
+    return dict(retrieval)
+def names_to_pair_old(name0, name1):
+    return '_'.join((name0.replace('/', '-'), name1.replace('/', '-')))
+def names_to_pair(name0, name1, separator="/"):
+    return separator.join((name0.replace("/", "-"), name1.replace("/", "-")))

third_party/pram/colmap_utils/read_write_model.py ADDED Viewed

	@@ -0,0 +1,627 @@

+# Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#
+#     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
+#       its contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
+import os
+import sys
+import collections
+import numpy as np
+import struct
+import argparse
+CameraModel = collections.namedtuple(
+    "CameraModel", ["model_id", "model_name", "num_params"])
+Camera = collections.namedtuple(
+    "Camera", ["id", "model", "width", "height", "params"])
+BaseImage = collections.namedtuple(
+    "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
+Point3D = collections.namedtuple(
+    "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
+class Image(BaseImage):
+    def qvec2rotmat(self):
+        return qvec2rotmat(self.qvec)
+CAMERA_MODELS = {
+    CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
+    CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
+    CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
+    CameraModel(model_id=3, model_name="RADIAL", num_params=5),
+    CameraModel(model_id=4, model_name="OPENCV", num_params=8),
+    CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
+    CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
+    CameraModel(model_id=7, model_name="FOV", num_params=5),
+    CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
+    CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
+    CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
+}
+CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
+                         for camera_model in CAMERA_MODELS])
+CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
+                           for camera_model in CAMERA_MODELS])
+def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
+    """Read and unpack the next bytes from a binary file.
+    :param fid:
+    :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
+    :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
+    :param endian_character: Any of {@, =, <, >, !}
+    :return: Tuple of read and unpacked values.
+    """
+    data = fid.read(num_bytes)
+    return struct.unpack(endian_character + format_char_sequence, data)
+def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
+    """pack and write to a binary file.
+    :param fid:
+    :param data: data to send, if multiple elements are sent at the same time,
+    they should be encapsuled either in a list or a tuple
+    :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
+    should be the same length as the data list or tuple
+    :param endian_character: Any of {@, =, <, >, !}
+    """
+    if isinstance(data, (list, tuple)):
+        bytes = struct.pack(endian_character + format_char_sequence, *data)
+    else:
+        bytes = struct.pack(endian_character + format_char_sequence, data)
+    fid.write(bytes)
+def read_cameras_text(path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasText(const std::string& path)
+        void Reconstruction::ReadCamerasText(const std::string& path)
+    """
+    cameras = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                camera_id = int(elems[0])
+                model = elems[1]
+                width = int(elems[2])
+                height = int(elems[3])
+                params = np.array(tuple(map(float, elems[4:])))
+                cameras[camera_id] = Camera(id=camera_id, model=model,
+                                            width=width, height=height,
+                                            params=params)
+    return cameras
+def read_cameras_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasBinary(const std::string& path)
+        void Reconstruction::ReadCamerasBinary(const std::string& path)
+    """
+    cameras = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_cameras = read_next_bytes(fid, 8, "Q")[0]
+        for camera_line_index in range(num_cameras):
+            camera_properties = read_next_bytes(
+                fid, num_bytes=24, format_char_sequence="iiQQ")
+            camera_id = camera_properties[0]
+            model_id = camera_properties[1]
+            model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
+            width = camera_properties[2]
+            height = camera_properties[3]
+            num_params = CAMERA_MODEL_IDS[model_id].num_params
+            params = read_next_bytes(fid, num_bytes=8 * num_params,
+                                     format_char_sequence="d" * num_params)
+            cameras[camera_id] = Camera(id=camera_id,
+                                        model=model_name,
+                                        width=width,
+                                        height=height,
+                                        params=np.array(params))
+        assert len(cameras) == num_cameras
+    return cameras
+def write_cameras_text(cameras, path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasText(const std::string& path)
+        void Reconstruction::ReadCamerasText(const std::string& path)
+    """
+    HEADER = '# Camera list with one line of data per camera:\n'
+    '#   CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n'
+    '# Number of cameras: {}\n'.format(len(cameras))
+    with open(path, "w") as fid:
+        fid.write(HEADER)
+        for _, cam in cameras.items():
+            to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
+            line = " ".join([str(elem) for elem in to_write])
+            fid.write(line + "\n")
+def write_cameras_binary(cameras, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasBinary(const std::string& path)
+        void Reconstruction::ReadCamerasBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(cameras), "Q")
+        for _, cam in cameras.items():
+            model_id = CAMERA_MODEL_NAMES[cam.model].model_id
+            camera_properties = [cam.id,
+                                 model_id,
+                                 cam.width,
+                                 cam.height]
+            write_next_bytes(fid, camera_properties, "iiQQ")
+            for p in cam.params:
+                write_next_bytes(fid, float(p), "d")
+    return cameras
+def read_images_text(path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesText(const std::string& path)
+        void Reconstruction::WriteImagesText(const std::string& path)
+    """
+    images = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                image_id = int(elems[0])
+                qvec = np.array(tuple(map(float, elems[1:5])))
+                tvec = np.array(tuple(map(float, elems[5:8])))
+                camera_id = int(elems[8])
+                image_name = elems[9]
+                elems = fid.readline().split()
+                xys = np.column_stack([tuple(map(float, elems[0::3])),
+                                       tuple(map(float, elems[1::3]))])
+                point3D_ids = np.array(tuple(map(int, elems[2::3])))
+                images[image_id] = Image(
+                    id=image_id, qvec=qvec, tvec=tvec,
+                    camera_id=camera_id, name=image_name,
+                    xys=xys, point3D_ids=point3D_ids)
+    return images
+def read_images_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesBinary(const std::string& path)
+        void Reconstruction::WriteImagesBinary(const std::string& path)
+    """
+    images = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_reg_images = read_next_bytes(fid, 8, "Q")[0]
+        for image_index in range(num_reg_images):
+            binary_image_properties = read_next_bytes(
+                fid, num_bytes=64, format_char_sequence="idddddddi")
+            image_id = binary_image_properties[0]
+            qvec = np.array(binary_image_properties[1:5])
+            tvec = np.array(binary_image_properties[5:8])
+            camera_id = binary_image_properties[8]
+            image_name = ""
+            current_char = read_next_bytes(fid, 1, "c")[0]
+            while current_char != b"\x00":  # look for the ASCII 0 entry
+                image_name += current_char.decode("utf-8")
+                current_char = read_next_bytes(fid, 1, "c")[0]
+            num_points2D = read_next_bytes(fid, num_bytes=8,
+                                           format_char_sequence="Q")[0]
+            x_y_id_s = read_next_bytes(fid, num_bytes=24 * num_points2D,
+                                       format_char_sequence="ddq" * num_points2D)
+            xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
+                                   tuple(map(float, x_y_id_s[1::3]))])
+            point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
+            images[image_id] = Image(
+                id=image_id, qvec=qvec, tvec=tvec,
+                camera_id=camera_id, name=image_name,
+                xys=xys, point3D_ids=point3D_ids)
+    return images
+def write_images_text(images, path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesText(const std::string& path)
+        void Reconstruction::WriteImagesText(const std::string& path)
+    """
+    if len(images) == 0:
+        mean_observations = 0
+    else:
+        mean_observations = sum((len(img.point3D_ids) for _, img in images.items())) / len(images)
+    HEADER = '# Image list with two lines of data per image:\n'
+    '#   IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n'
+    '#   POINTS2D[] as (X, Y, POINT3D_ID)\n'
+    '# Number of images: {}, mean observations per image: {}\n'.format(len(images), mean_observations)
+    with open(path, "w") as fid:
+        fid.write(HEADER)
+        for _, img in images.items():
+            image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
+            first_line = " ".join(map(str, image_header))
+            fid.write(first_line + "\n")
+            points_strings = []
+            for xy, point3D_id in zip(img.xys, img.point3D_ids):
+                points_strings.append(" ".join(map(str, [*xy, point3D_id])))
+            fid.write(" ".join(points_strings) + "\n")
+def write_images_binary(images, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesBinary(const std::string& path)
+        void Reconstruction::WriteImagesBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(images), "Q")
+        for _, img in images.items():
+            write_next_bytes(fid, img.id, "i")
+            write_next_bytes(fid, img.qvec.tolist(), "dddd")
+            write_next_bytes(fid, img.tvec.tolist(), "ddd")
+            write_next_bytes(fid, img.camera_id, "i")
+            for char in img.name:
+                write_next_bytes(fid, char.encode("utf-8"), "c")
+            write_next_bytes(fid, b"\x00", "c")
+            write_next_bytes(fid, len(img.point3D_ids), "Q")
+            for xy, p3d_id in zip(img.xys, img.point3D_ids):
+                write_next_bytes(fid, [*xy, p3d_id], "ddq")
+def read_points3D_text(path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DText(const std::string& path)
+        void Reconstruction::WritePoints3DText(const std::string& path)
+    """
+    points3D = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                point3D_id = int(elems[0])
+                xyz = np.array(tuple(map(float, elems[1:4])))
+                rgb = np.array(tuple(map(int, elems[4:7])))
+                error = float(elems[7])
+                image_ids = np.array(tuple(map(int, elems[8::2])))
+                point2D_idxs = np.array(tuple(map(int, elems[9::2])))
+                points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
+                                               error=error, image_ids=image_ids,
+                                               point2D_idxs=point2D_idxs)
+    return points3D
+def read_points3d_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DBinary(const std::string& path)
+        void Reconstruction::WritePoints3DBinary(const std::string& path)
+    """
+    points3D = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_points = read_next_bytes(fid, 8, "Q")[0]
+        for point_line_index in range(num_points):
+            binary_point_line_properties = read_next_bytes(
+                fid, num_bytes=43, format_char_sequence="QdddBBBd")
+            point3D_id = binary_point_line_properties[0]
+            xyz = np.array(binary_point_line_properties[1:4])
+            rgb = np.array(binary_point_line_properties[4:7])
+            error = np.array(binary_point_line_properties[7])
+            track_length = read_next_bytes(
+                fid, num_bytes=8, format_char_sequence="Q")[0]
+            track_elems = read_next_bytes(
+                fid, num_bytes=8 * track_length,
+                format_char_sequence="ii" * track_length)
+            image_ids = np.array(tuple(map(int, track_elems[0::2])))
+            point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
+            points3D[point3D_id] = Point3D(
+                id=point3D_id, xyz=xyz, rgb=rgb,
+                error=error, image_ids=image_ids,
+                point2D_idxs=point2D_idxs)
+    return points3D
+def write_points3D_text(points3D, path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DText(const std::string& path)
+        void Reconstruction::WritePoints3DText(const std::string& path)
+    """
+    if len(points3D) == 0:
+        mean_track_length = 0
+    else:
+        mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items())) / len(points3D)
+    HEADER = '# 3D point list with one line of data per point:\n'
+    '#   POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n'
+    '# Number of points: {}, mean track length: {}\n'.format(len(points3D), mean_track_length)
+    with open(path, "w") as fid:
+        fid.write(HEADER)
+        for _, pt in points3D.items():
+            point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
+            fid.write(" ".join(map(str, point_header)) + " ")
+            track_strings = []
+            for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
+                track_strings.append(" ".join(map(str, [image_id, point2D])))
+            fid.write(" ".join(track_strings) + "\n")
+def write_points3d_binary(points3D, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DBinary(const std::string& path)
+        void Reconstruction::WritePoints3DBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(points3D), "Q")
+        for _, pt in points3D.items():
+            write_next_bytes(fid, pt.id, "Q")
+            write_next_bytes(fid, pt.xyz.tolist(), "ddd")
+            write_next_bytes(fid, pt.rgb.tolist(), "BBB")
+            write_next_bytes(fid, pt.error, "d")
+            track_length = pt.image_ids.shape[0]
+            write_next_bytes(fid, track_length, "Q")
+            for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
+                write_next_bytes(fid, [image_id, point2D_id], "ii")
+def read_model(path, ext):
+    if ext == ".txt":
+        cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
+        images = read_images_text(os.path.join(path, "images" + ext))
+        points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
+    else:
+        cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
+        images = read_images_binary(os.path.join(path, "images" + ext))
+        points3D = read_points3d_binary(os.path.join(path, "points3D") + ext)
+    return cameras, images, points3D
+def write_model(cameras, images, points3D, path, ext):
+    if ext == ".txt":
+        write_cameras_text(cameras, os.path.join(path, "cameras" + ext))
+        write_images_text(images, os.path.join(path, "images" + ext))
+        write_points3D_text(points3D, os.path.join(path, "points3D") + ext)
+    else:
+        write_cameras_binary(cameras, os.path.join(path, "cameras" + ext))
+        write_images_binary(images, os.path.join(path, "images" + ext))
+        write_points3d_binary(points3D, os.path.join(path, "points3D") + ext)
+    return cameras, images, points3D
+def read_compressed_images_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesBinary(const std::string& path)
+        void Reconstruction::WriteImagesBinary(const std::string& path)
+    """
+    images = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_reg_images = read_next_bytes(fid, 8, "Q")[0]
+        for image_index in range(num_reg_images):
+            binary_image_properties = read_next_bytes(
+                fid, num_bytes=64, format_char_sequence="idddddddi")
+            image_id = binary_image_properties[0]
+            qvec = np.array(binary_image_properties[1:5])
+            tvec = np.array(binary_image_properties[5:8])
+            camera_id = binary_image_properties[8]
+            image_name = ""
+            current_char = read_next_bytes(fid, 1, "c")[0]
+            while current_char != b"\x00":  # look for the ASCII 0 entry
+                image_name += current_char.decode("utf-8")
+                current_char = read_next_bytes(fid, 1, "c")[0]
+            num_points2D = read_next_bytes(fid, num_bytes=8,
+                                           format_char_sequence="Q")[0]
+            # x_y_id_s = read_next_bytes(fid, num_bytes=24 * num_points2D,
+            #                            format_char_sequence="ddq" * num_points2D)
+            # xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
+            #                        tuple(map(float, x_y_id_s[1::3]))])
+            x_y_id_s = read_next_bytes(fid, num_bytes=8 * num_points2D,
+                                       format_char_sequence="q" * num_points2D)
+            point3D_ids = np.array(x_y_id_s)
+            images[image_id] = Image(
+                id=image_id, qvec=qvec, tvec=tvec,
+                camera_id=camera_id, name=image_name,
+                xys=np.array([]), point3D_ids=point3D_ids)
+    return images
+def write_compressed_images_binary(images, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesBinary(const std::string& path)
+        void Reconstruction::WriteImagesBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(images), "Q")
+        for _, img in images.items():
+            write_next_bytes(fid, img.id, "i")
+            write_next_bytes(fid, img.qvec.tolist(), "dddd")
+            write_next_bytes(fid, img.tvec.tolist(), "ddd")
+            write_next_bytes(fid, img.camera_id, "i")
+            for char in img.name:
+                write_next_bytes(fid, char.encode("utf-8"), "c")
+            write_next_bytes(fid, b"\x00", "c")
+            write_next_bytes(fid, len(img.point3D_ids), "Q")
+            for p3d_id in img.point3D_ids:
+                write_next_bytes(fid, p3d_id, "q")
+            # for xy, p3d_id in zip(img.xys, img.point3D_ids):
+            #     write_next_bytes(fid, [*xy, p3d_id], "ddq")
+def read_compressed_points3d_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DBinary(const std::string& path)
+        void Reconstruction::WritePoints3DBinary(const std::string& path)
+    """
+    points3D = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_points = read_next_bytes(fid, 8, "Q")[0]
+        for point_line_index in range(num_points):
+            binary_point_line_properties = read_next_bytes(
+                fid, num_bytes=43, format_char_sequence="QdddBBBd")
+            point3D_id = binary_point_line_properties[0]
+            xyz = np.array(binary_point_line_properties[1:4])
+            rgb = np.array(binary_point_line_properties[4:7])
+            error = np.array(binary_point_line_properties[7])
+            track_length = read_next_bytes(
+                fid, num_bytes=8, format_char_sequence="Q")[0]
+            track_elems = read_next_bytes(
+                fid, num_bytes=4 * track_length,
+                format_char_sequence="i" * track_length)
+            image_ids = np.array(track_elems)
+            # point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
+            points3D[point3D_id] = Point3D(
+                id=point3D_id, xyz=xyz, rgb=rgb,
+                error=error, image_ids=image_ids,
+                point2D_idxs=np.array([]))
+    return points3D
+def write_compressed_points3d_binary(points3D, path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DBinary(const std::string& path)
+        void Reconstruction::WritePoints3DBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "wb") as fid:
+        write_next_bytes(fid, len(points3D), "Q")
+        for _, pt in points3D.items():
+            write_next_bytes(fid, pt.id, "Q")
+            write_next_bytes(fid, pt.xyz.tolist(), "ddd")
+            write_next_bytes(fid, pt.rgb.tolist(), "BBB")
+            write_next_bytes(fid, pt.error, "d")
+            track_length = pt.image_ids.shape[0]
+            write_next_bytes(fid, track_length, "Q")
+            # for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
+            #     write_next_bytes(fid, [image_id, point2D_id], "ii")
+            for image_id in pt.image_ids:
+                write_next_bytes(fid, image_id, "i")
+def read_compressed_model(path, ext):
+    if ext == ".txt":
+        cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
+        images = read_images_text(os.path.join(path, "images" + ext))
+        points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
+    else:
+        cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
+        images = read_compressed_images_binary(os.path.join(path, "images" + ext))
+        points3D = read_compressed_points3d_binary(os.path.join(path, "points3D") + ext)
+    return cameras, images, points3D
+def qvec2rotmat(qvec):
+    return np.array([
+        [1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
+         2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
+         2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
+        [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
+         1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
+         2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
+        [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
+         2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
+         1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2]])
+def rotmat2qvec(R):
+    Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
+    K = np.array([
+        [Rxx - Ryy - Rzz, 0, 0, 0],
+        [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
+        [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
+        [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
+    eigvals, eigvecs = np.linalg.eigh(K)
+    qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
+    if qvec[0] < 0:
+        qvec *= -1
+    return qvec
+def intrinsics_from_camera(camera_model, params):
+    if camera_model in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
+        fx = fy = params[0]
+        cx = params[1]
+        cy = params[2]
+    elif camera_model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
+        fx = params[0]
+        fy = params[1]
+        cx = params[2]
+        cy = params[3]
+    else:
+        raise Exception("Camera model not supported")
+    # intrinsics
+    K = np.identity(3)
+    K[0, 0] = fx
+    K[1, 1] = fy
+    K[0, 2] = cx
+    K[1, 2] = cy
+    return K
+def main():
+    parser = argparse.ArgumentParser(description='Read and write COLMAP binary and text models')
+    parser.add_argument('input_model', help='path to input model folder')
+    parser.add_argument('input_format', choices=['.bin', '.txt'],
+                        help='input model format')
+    parser.add_argument('--output_model', metavar='PATH',
+                        help='path to output model folder')
+    parser.add_argument('--output_format', choices=['.bin', '.txt'],
+                        help='outut model format', default='.txt')
+    args = parser.parse_args()
+    cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format)
+    print("num_cameras:", len(cameras))
+    print("num_images:", len(images))
+    print("num_points3D:", len(points3D))
+    if args.output_model is not None:
+        write_model(cameras, images, points3D, path=args.output_model, ext=args.output_format)
+if __name__ == "__main__":
+    main()

third_party/pram/colmap_utils/utils.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # -- coding: UTF-8 --

third_party/pram/configs/config_train_12scenes_sfd2.yaml ADDED Viewed

	@@ -0,0 +1,102 @@

+dataset: [ '12Scenes' ]
+network_1: "segnet"
+network: "segnetvit"
+local_rank: 0
+gpu: [ 0 ]
+feature: "sfd2"
+save_path: '/scratches/flyer_2/fx221/exp/pram'
+landmark_path: "/scratches/flyer_3/fx221/exp/pram/landmarks/sfd2-gml"
+dataset_path: "/scratches/flyer_3/fx221/dataset"
+config_path: 'configs/datasets'
+image_dim: 3
+feat_dim: 128
+min_inliers: 32
+max_inliers: 512
+random_inliers: true
+max_keypoints: 512
+ignore_index: -1
+output_dim: 1024
+output_dim_: 2048
+jitter_params:
+  brightness: 0.5
+  contrast: 0.5
+  saturation: 0.25
+  hue: 0.15
+  blur: 0
+scale_params: [ 0.5, 1.0 ]
+pre_load: false
+train: true
+inlier_th: 0.5
+lr: 0.0001
+min_lr: 0.00001
+optimizer: "adamw"
+seg_loss: "cew"
+seg_loss_nx: "cei"
+cls_loss: "ce"
+cls_loss_: "bce"
+ac_fn: "relu"
+norm_fn: "bn"
+workers: 8
+layers: 15
+log_intervals: 50
+eval_n_epoch: 10
+do_eval: false
+use_mid_feature: true
+norm_desc: false
+with_score: false
+with_aug: true
+with_dist: true
+batch_size: 32
+its_per_epoch: 1000
+decay_rate: 0.999992
+decay_iter: 60000
+epochs: 500
+cluster_method: 'birch'
+weight_path: null
+weight_path_1: '20230719_220620_segnet_L15_T_resnet4x_B32_K1024_relu_bn_od1024_nc193_adamw_cew_md_A_birch/segnet.499.pth'
+weight_path_2: '20240202_145337_segnetvit_L15_T_resnet4x_B32_K512_relu_bn_od1024_nc193_adam_cew_md_A_birch/segnetvit.499.pth'
+resume_path: null
+n_class: 193
+eval_max_keypoints: 1024
+localization:
+  loc_scene_name: [ 'apt1/kitchen' ]
+  save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
+  seg_k: 20
+  threshold: 8
+  min_kpts: 128
+  min_matches: 4
+  min_inliers: 64
+  matching_method_: "mnn"
+  matching_method_1: "spg"
+  matching_method_2: "gm"
+  matching_method: "gml"
+  matching_method_5: "adagml"
+  save: false
+  show: true
+  show_time: 1
+  max_vrf: 1
+  with_original: true
+  with_extra: false
+  with_compress: true
+  semantic_matching: true
+  do_refinement: true
+  refinement_method_: 'matching'
+  refinement_method: 'projection'
+  pre_filtering_th: 0.95
+  covisibility_frame: 20
+  refinement_radius: 20
+  refinement_nn_ratio: 0.9
+  refinement_max_matches: 0

third_party/pram/configs/config_train_7scenes_sfd2.yaml ADDED Viewed

	@@ -0,0 +1,104 @@

+dataset: [ '7Scenes' ]
+network: "segnetvit"
+local_rank: 0
+gpu: [ 0 ]
+# when using ddp, set gpu: [0,1,2,3]
+with_dist: true
+feature: "sfd2"
+save_path_: '/scratches/flyer_2/fx221/exp/pram'
+save_path: '/scratches/flyer_2/fx221/publications/test_pram/exp'
+landmark_path_: "/scratches/flyer_3/fx221/exp/pram/landmarks/sfd2-gml"
+landmark_path: "/scratches/flyer_2/fx221/publications/test_pram/landmakrs/sfd2-gml"
+dataset_path: "/scratches/flyer_3/fx221/dataset"
+config_path: 'configs/datasets'
+image_dim: 3
+feat_dim: 128
+min_inliers: 32
+max_inliers: 256
+random_inliers: 1
+max_keypoints: 512
+ignore_index: -1
+output_dim: 1024
+output_dim_: 2048
+jitter_params:
+  brightness: 0.5
+  contrast: 0.5
+  saturation: 0.25
+  hue: 0.15
+  blur: 0
+scale_params: [ 0.5, 1.0 ]
+pre_load: false
+train: true
+inlier_th: 0.5
+lr: 0.0001
+min_lr: 0.00001
+cls_loss: "ce"
+ac_fn: "relu"
+norm_fn: "bn"
+workers: 8
+layers: 15
+log_intervals: 50
+eval_n_epoch: 10
+do_eval: false
+use_mid_feature: true
+norm_desc: false
+with_cls: false
+with_score: false
+with_aug: true
+batch_size: 32
+its_per_epoch: 1000
+decay_rate: 0.999992
+decay_iter: 80000
+epochs: 200
+cluster_method: 'birch'
+weight_path: null
+weight_path_1: '20230724_203230_segnet_L15_S_resnet4x_B32_K1024_relu_bn_od1024_nc113_adam_cew_md_A_birch/segnet.180.pth'
+weight_path_2: '20240202_152519_segnetvit_L15_S_resnet4x_B32_K512_relu_bn_od1024_nc113_adamw_cew_md_A_birch/segnetvit.199.pth'
+# used for resuming training
+resume_path: null
+# used for localization
+n_class: 113
+eval_max_keypoints: 1024
+localization:
+  loc_scene_name: [ 'chess' ]
+  save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
+  seg_k: 20
+  threshold: 8
+  min_kpts: 128
+  min_matches: 16
+  min_inliers: 32
+  matching_method_: "mnn"
+  matching_method_1: "spg"
+  matching_method_2: "gm"
+  matching_method: "gml"
+  matching_method_4: "adagml"
+  save: false
+  show: true
+  show_time: 1
+  with_original: true
+  max_vrf: 1
+  with_compress: true
+  semantic_matching: true
+  do_refinement: true
+  pre_filtering_th: 0.95
+  refinement_method_: 'matching'
+  refinement_method: 'projection'
+  covisibility_frame: 20
+  refinement_radius: 20
+  refinement_nn_ratio: 0.9
+  refinement_max_matches: 0

third_party/pram/configs/config_train_aachen_sfd2.yaml ADDED Viewed

	@@ -0,0 +1,104 @@

+dataset: [ 'Aachen' ]
+network_: "segnet"
+network: "segnetvit"
+local_rank: 0
+gpu: [ 0 ]
+feature: "sfd2"
+save_path: '/scratches/flyer_2/fx221/exp/pram'
+landmark_path: "/scratches/flyer_3/fx221/exp/pram/landmarks/sfd2-gml"
+dataset_path: "/scratches/flyer_3/fx221/dataset"
+config_path: 'configs/datasets'
+image_dim: 3
+feat_dim: 128
+min_inliers: 32
+max_inliers: 512
+random_inliers: true
+max_keypoints: 1024
+ignore_index: -1
+output_dim: 1024
+output_dim_: 2048
+jitter_params:
+  brightness: 0.5
+  contrast: 0.5
+  saturation: 0.25
+  hue: 0.15
+  blur: 0
+scale_params: [ 0.5, 1.0 ]
+pre_load: false
+do_eval: true
+train: true
+inlier_th: 0.5
+lr: 0.0001
+min_lr: 0.00001
+optimizer: "adam"
+seg_loss: "cew"
+seg_loss_nx: "cei"
+cls_loss: "ce"
+cls_loss_: "bce"
+ac_fn: "relu"
+norm_fn: "bn"
+workers: 8
+layers: 15
+log_intervals: 50
+eval_n_epoch: 10
+use_mid_feature: true
+norm_desc: false
+with_sc: false
+with_cls: true
+with_score: false
+with_aug: true
+with_dist: true
+batch_size: 32
+its_per_epoch: 1000
+decay_rate: 0.999992
+decay_iter: 80000
+epochs: 800
+cluster_method: 'birch'
+weight_path: null
+weight_path_1: '20230719_221442_segnet_L15_A_resnet4x_B32_K1024_relu_bn_od1024_nc513_adamw_cew_md_A_birch/segnet.899.pth'
+weight_path_2: '20240211_142623_segnetvit_L15_A_resnet4x_B32_K1024_relu_bn_od1024_nc513_adam_cew_md_A_birch/segnetvit.799.pth'
+resume_path: null
+n_class: 513
+eval_max_keypoints: 4096
+localization:
+  loc_scene_name: [ ]
+  save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
+  seg_k: 10
+  threshold: 12
+  min_kpts: 256
+  min_matches: 8
+  min_inliers: 128
+  matching_method_: "mnn"
+  matching_method_1: "spg"
+  matching_method_2: "gm"
+  matching_method: "gml"
+  matching_method_4: "adagml"
+  save: false
+  show: true
+  show_time: 1
+  with_original: true
+  with_extra: false
+  max_vrf: 1
+  with_compress: true
+  semantic_matching: true
+  refinement_method_: 'matching'
+  refinement_method: 'projection'
+  pre_filtering_th: 0.95
+  do_refinement: true
+  covisibility_frame: 50
+  refinement_radius: 30
+  refinement_nn_ratio: 0.9
+  refinement_max_matches: 0

third_party/pram/configs/config_train_cambridge_sfd2.yaml ADDED Viewed

	@@ -0,0 +1,103 @@

+dataset: [ 'CambridgeLandmarks' ]
+network_: "segnet"
+network: "segnetvit"
+local_rank: 0
+gpu: [ 0 ]
+feature: "sfd2"
+save_path: '/scratches/flyer_2/fx221/exp/pram'
+landmark_path: "/scratches/flyer_3/fx221/exp/pram/landmarks/sfd2-gml"
+dataset_path: "/scratches/flyer_3/fx221/dataset"
+config_path: 'configs/datasets'
+image_dim: 3
+feat_dim: 128
+min_inliers: 32
+max_inliers: 512
+random_inliers: 1
+max_keypoints: 1024
+ignore_index: -1
+output_dim: 1024
+output_dim_: 2048
+jitter_params:
+  brightness: 0.5
+  contrast: 0.5
+  saturation: 0.25
+  hue: 0.15
+  blur: 0
+scale_params: [ 0.5, 1.0 ]
+pre_load: false
+do_eval: false
+train: true
+inlier_th: 0.5
+lr: 0.0001
+min_lr: 0.00001
+epochs: 300
+seg_loss: "cew"
+ac_fn: "relu"
+norm_fn: "bn"
+workers: 8
+layers: 15
+log_intervals: 50
+eval_n_epoch: 10
+use_mid_feature: true
+norm_desc: false
+with_score: false
+with_aug: true
+with_dist: true
+batch_size: 32
+its_per_epoch: 1000
+decay_rate: 0.999992
+decay_iter: 60000
+cluster_method: 'birch'
+weight_path: null
+weight_path_1: '20230725_144044_segnet_L15_C_resnet4x_B32_K1024_relu_bn_od1024_nc161_adam_cew_md_A_birch/segnet.260.pth'
+weight_path_2: '20240204_130323_segnetvit_L15_C_resnet4x_B32_K1024_relu_bn_od1024_nc161_adamw_cew_md_A_birch/segnetvit.399.pth'
+resume_path: null
+n_class: 161
+eval_max_keypoints: 2048
+localization:
+  loc_scene_name_1: [ 'GreatCourt' ]
+  loc_scene_name_2: [ 'KingsCollege' ]
+  loc_scene_name: [ 'StMarysChurch' ]
+  loc_scene_name_4: [ 'OldHospital' ]
+  save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
+  seg_k: 30
+  threshold: 12
+  min_kpts: 256
+  min_matches: 16
+  min_inliers_gm: 128
+  min_inliers: 128
+  matching_method_: "mnn"
+  matching_method_1: "spg"
+  matching_method_2: "gm"
+  matching_method: "gml"
+  matching_method_4: "adagml"
+  show: true
+  show_time: 1
+  save: false
+  with_original: true
+  max_vrf: 1
+  with_extra: false
+  with_compress: true
+  semantic_matching: true
+  do_refinement: true
+  pre_filtering_th: 0.95
+  refinement_method_: 'matching'
+  refinement_method: 'projection'
+  covisibility_frame: 20
+  refinement_radius: 20
+  refinement_nn_ratio: 0.9
+  refinement_max_matches: 0

third_party/pram/configs/config_train_multiset_sfd2.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+dataset: [ 'S', 'T', 'C', 'A' ]
+network: "segnet"
+network_: "gsegnet3"
+local_rank: 0
+gpu: [ 4 ]
+feature: "resnet4x"
+save_path: '/scratches/flyer_2/fx221/exp/localizer'
+landmark_path: "/scratches/flyer_3/fx221/exp/localizer/resnet4x-20230511-210205-pho-0005-gm"
+dataset_path: "/scratches/flyer_3/fx221/dataset"
+config_path: 'configs/datasets'
+image_dim: 3
+min_inliers: 32
+max_inliers: 512
+random_inliers: 1
+max_keypoints: 1024
+ignore_index: -1
+output_dim: 1024
+output_dim_: 2048
+jitter_params:
+  brightness: 0.5
+  contrast: 0.5
+  saturation: 0.25
+  hue: 0.15
+  blur: 0
+scale_params: [ 0.5, 1.0 ]
+pre_load: false
+do_eval: true
+train: true
+inlier_th: 0.5
+lr: 0.0001
+min_lr: 0.00001
+optimizer: "adam"
+seg_loss: "cew"
+seg_loss_nx: "cei"
+cls_loss: "ce"
+cls_loss_: "bce"
+sc_loss: 'l1g'
+ac_fn: "relu"
+norm_fn: "bn"
+workers: 8
+layers: 15
+log_intervals: 50
+eval_n_epoch: 10
+use_mid_feature: true
+norm_desc: false
+with_sc: false
+with_cls: true
+with_score: false
+with_aug: true
+with_dist: true
+batch_size: 32
+its_per_epoch: 1000
+decay_rate: 0.999992
+decay_iter: 150000
+epochs: 1500
+cluster_method_: 'kmeans'
+cluster_method: 'birch'
+weight_path_: null
+weight_path: '20230805_132653_segnet_L15_STCA_resnet4x_B32_K1024_relu_bn_od1024_nc977_adam_cew_md_A_birch/segnet.485.pth'
+resume_path: null
+eval: false
+#loc: false
+loc: true
+#n_class: 977
+online: false
+eval_max_keypoints: 4096
+localization:
+  loc_scene_name: [ ]
+  save_path: '/scratches/flyer_2/fx221/exp/localizer/loc_results'
+  dataset: [ 'T' ]
+  seg_k: 50
+  threshold: 8 # 8 for indoor, 12 for outdoor
+  min_kpts: 256
+  min_matches: 4
+  min_inliers: 64
+  matching_method_: "mnn"
+  matching_method_1: "spg"
+  matching_method: "gm"
+  save: false
+  show: true
+  show_time: 1
+  do_refinement: true
+  with_original: true
+  with_extra: false
+  max_vrf: 1
+  with_compress: false
+  covisibility_frame: 20
+  observation_threshold: 3

third_party/pram/configs/datasets/12Scenes.yaml ADDED Viewed

	@@ -0,0 +1,166 @@

+dataset: '12Scenes'
+scenes: [ 'apt1/kitchen',
+          'apt1/living',
+          'apt2/bed',
+          'apt2/kitchen',
+          'apt2/living',
+          'apt2/luke',
+          'office1/gates362',
+          'office1/gates381',
+          'office1/lounge',
+          'office1/manolis',
+          'office2/5a',
+          'office2/5b'
+]
+apt1/kitchen:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+  image_path_prefix: ''
+apt1/living:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+apt2/bed:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+apt2/kitchen:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+apt2/living:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+apt2/luke:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+office1/gates362:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 3
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+office1/gates381:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 3
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+office1/lounge:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+office1/manolis:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+office2/5a:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+office2/5b:
+  n_cluster: 16
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 5
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'

third_party/pram/configs/datasets/7Scenes.yaml ADDED Viewed

	@@ -0,0 +1,96 @@

+dataset: '7Scenes'
+scenes: [ 'chess', 'heads', 'office', 'fire', 'stairs', 'redkitchen', 'pumpkin' ]
+chess:
+  n_cluster: 16
+  cluster_mode: 'xz'
+  cluster_method_: 'kmeans'
+  cluster_method: 'birch'
+  training_sample_ratio: 2
+  eval_sample_ratio: 10
+  gt_pose_path: 'queries_poses.txt'
+  query_path: 'queries_with_intrinsics.txt'
+  image_path_prefix: ''
+heads:
+  n_cluster: 16
+  cluster_mode: 'xz'
+  cluster_method_: 'kmeans'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 2
+  gt_pose_path: 'queries_poses.txt'
+  query_path: 'queries_with_intrinsics.txt'
+  image_path_prefix: ''
+office:
+  n_cluster: 16
+  cluster_mode: 'xz'
+  cluster_method_: 'kmeans'
+  cluster_method: 'birch'
+  training_sample_ratio: 3
+  eval_sample_ratio: 10
+  gt_pose_path: 'queries_poses.txt'
+  query_path: 'queries_with_intrinsics.txt'
+  image_path_prefix: ''
+fire:
+  n_cluster: 16
+  cluster_mode: 'xz'
+  cluster_method_: 'kmeans'
+  cluster_method: 'birch'
+  training_sample_ratio: 2
+  eval_sample_ratio: 5
+  gt_pose_path: 'queries_poses.txt'
+  query_path: 'queries_with_intrinsics.txt'
+  image_path_prefix: ''
+stairs:
+  n_cluster: 16
+  cluster_mode: 'xz'
+  cluster_method_: 'kmeans'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 10
+  gt_pose_path: 'queries_poses.txt'
+  query_path: 'queries_with_intrinsics.txt'
+  image_path_prefix: ''
+redkitchen:
+  n_cluster: 16
+  cluster_mode: 'xz'
+  cluster_method_: 'kmeans'
+  cluster_method: 'birch'
+  training_sample_ratio: 3
+  eval_sample_ratio: 10
+  gt_pose_path: 'queries_poses.txt'
+  query_path: 'queries_with_intrinsics.txt'
+  image_path_prefix: ''
+pumpkin:
+  n_cluster: 16
+  cluster_mode: 'xz'
+  cluster_method_: 'kmeans'
+  cluster_method: 'birch'
+  training_sample_ratio: 2
+  eval_sample_ratio: 10
+  gt_pose_path: 'queries_poses.txt'
+  query_path: 'queries_with_intrinsics.txt'
+  image_path_prefix: ''

third_party/pram/configs/datasets/Aachen.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+dataset: 'Aachen'
+scenes: [ 'Aachenv11' ]
+Aachenv11:
+  n_cluster: 512
+  cluster_mode: 'xz'
+  cluster_method_: 'kmeans'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 1
+  image_path_prefix: 'images/images_upright'
+  query_path_: 'queries_with_intrinsics.txt'
+  query_path: 'queries_with_intrinsics_demo.txt'
+  gt_pose_path: 'queries_pose_spp_spg.txt'

third_party/pram/configs/datasets/CambridgeLandmarks.yaml ADDED Viewed

	@@ -0,0 +1,67 @@

+dataset: 'CambridgeLandmarks'
+scenes: [ 'GreatCourt', 'KingsCollege', 'OldHospital', 'ShopFacade', 'StMarysChurch' ]
+GreatCourt:
+  n_cluster: 32
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 1
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+KingsCollege:
+  n_cluster: 32
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 1
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+OldHospital:
+  n_cluster: 32
+  cluster_mode: 'xz'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 1
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+ShopFacade:
+  n_cluster: 32
+  cluster_mode: 'xy'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 1
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'
+StMarysChurch:
+  n_cluster: 32
+  cluster_mode: 'xz'
+  cluster_method: 'birch'
+  training_sample_ratio: 1
+  eval_sample_ratio: 1
+  image_path_prefix: ''
+  query_path: 'queries_with_intrinsics.txt'
+  gt_pose_path: 'queries_poses.txt'

third_party/pram/dataset/aachen.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> aachen
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   29/01/2024 14:33
+=================================================='''
+import os.path as osp
+import numpy as np
+import cv2
+from colmap_utils.read_write_model import read_model
+import torchvision.transforms as tvt
+from dataset.basicdataset import BasicDataset
+class Aachen(BasicDataset):
+    def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset='Aachen',
+                 nfeatures=1024,
+                 query_p3d_fn=None,
+                 train=True,
+                 with_aug=False,
+                 min_inliers=0,
+                 max_inliers=4096,
+                 random_inliers=False,
+                 jitter_params=None,
+                 scale_params=None,
+                 image_dim=3,
+                 query_info_path=None,
+                 sample_ratio=1, ):
+        self.landmark_path = osp.join(landmark_path, scene)
+        self.dataset_path = osp.join(dataset_path, scene)
+        self.n_class = n_class
+        self.dataset = dataset + '/' + scene
+        self.nfeatures = nfeatures
+        self.with_aug = with_aug
+        self.jitter_params = jitter_params
+        self.scale_params = scale_params
+        self.image_dim = image_dim
+        self.train = train
+        self.min_inliers = min_inliers
+        self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
+        self.random_inliers = random_inliers
+        self.image_prefix = 'images/images_upright'
+        train_transforms = []
+        if self.with_aug:
+            train_transforms.append(tvt.ColorJitter(
+                brightness=jitter_params['brightness'],
+                contrast=jitter_params['contrast'],
+                saturation=jitter_params['saturation'],
+                hue=jitter_params['hue']))
+            if jitter_params['blur'] > 0:
+                train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
+        self.train_transforms = tvt.Compose(train_transforms)
+        if train:
+            self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
+            self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
+        # only for testing of query images
+        if not self.train:
+            data = np.load(query_p3d_fn, allow_pickle=True)[()]
+            self.img_p3d = data
+        else:
+            self.img_p3d = {}
+        self.img_fns = []
+        if train:
+            with open(osp.join(self.dataset_path, 'aachen_db_imglist.txt'), 'r') as f:
+                lines = f.readlines()
+                for l in lines:
+                    l = l.strip()
+                    if l not in self.name_to_id.keys():
+                        continue
+                    self.img_fns.append(l)
+        else:
+            with open(osp.join(self.dataset_path, 'queries', 'day_time_queries_with_intrinsics.txt'), 'r') as f:
+                lines = f.readlines()
+                for l in lines:
+                    l = l.strip().split()[0]
+                    if l not in self.img_p3d.keys():
+                        continue
+                    self.img_fns.append(l)
+            with open(osp.join(self.dataset_path, 'queries', 'night_time_queries_with_intrinsics.txt'), 'r') as f:
+                lines = f.readlines()
+                for l in lines:
+                    l = l.strip().split()[0]
+                    if l not in self.img_p3d.keys():
+                        continue
+                    self.img_fns.append(l)
+        print(
+            'Load {} images from {} for {}...'.format(len(self.img_fns), self.dataset, 'training' if train else 'eval'))
+        data = np.load(osp.join(self.landmark_path,
+                                'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
+                       allow_pickle=True)[()]
+        p3d_id = data['id']
+        seg_id = data['label']
+        self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
+        xyzs = data['xyz']
+        self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
+        with open(osp.join(self.landmark_path, 'sc_mean_scale.txt'), 'r') as f:
+            lines = f.readlines()
+            for l in lines:
+                l = l.strip().split()
+                self.mean_xyz = np.array([float(v) for v in l[:3]])
+                self.scale_xyz = np.array([float(v) for v in l[3:]])
+        if not train:
+            self.query_info = self.read_query_info(path=query_info_path)
+        self.nfeatures = nfeatures
+        self.feature_dir = osp.join(self.landmark_path, 'feats')
+        self.feats = {}
+    def read_image(self, image_name):
+        return cv2.imread(osp.join(self.dataset_path, 'images/images_upright/', image_name))

third_party/pram/dataset/basicdataset.py ADDED Viewed

	@@ -0,0 +1,477 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> basicdataset
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   29/01/2024 14:27
+=================================================='''
+import torchvision.transforms.functional as tvf
+import torchvision.transforms as tvt
+import os.path as osp
+import numpy as np
+import cv2
+from colmap_utils.read_write_model import qvec2rotmat, read_model
+from dataset.utils import normalize_size
+class BasicDataset:
+    def __init__(self,
+                 img_list_fn,
+                 feature_dir,
+                 sfm_path,
+                 seg_fn,
+                 dataset_path,
+                 n_class,
+                 dataset,
+                 nfeatures=1024,
+                 query_p3d_fn=None,
+                 train=True,
+                 with_aug=False,
+                 min_inliers=0,
+                 max_inliers=4096,
+                 random_inliers=False,
+                 jitter_params=None,
+                 scale_params=None,
+                 image_dim=1,
+                 pre_load=False,
+                 query_info_path=None,
+                 sc_mean_scale_fn=None,
+                 ):
+        self.n_class = n_class
+        self.train = train
+        self.min_inliers = min_inliers
+        self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
+        self.random_inliers = random_inliers
+        self.dataset_path = dataset_path
+        self.with_aug = with_aug
+        self.dataset = dataset
+        self.jitter_params = jitter_params
+        self.scale_params = scale_params
+        self.image_dim = image_dim
+        self.image_prefix = ''
+        train_transforms = []
+        if self.with_aug:
+            train_transforms.append(tvt.ColorJitter(
+                brightness=jitter_params['brightness'],
+                contrast=jitter_params['contrast'],
+                saturation=jitter_params['saturation'],
+                hue=jitter_params['hue']))
+            if jitter_params['blur'] > 0:
+                train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
+        self.train_transforms = tvt.Compose(train_transforms)
+        # only for testing of query images
+        if not self.train:
+            data = np.load(query_p3d_fn, allow_pickle=True)[()]
+            self.img_p3d = data
+        else:
+            self.img_p3d = {}
+        self.img_fns = []
+        with open(img_list_fn, 'r') as f:
+            lines = f.readlines()
+            for l in lines:
+                l = l.strip()
+                self.img_fns.append(l)
+        print('Load {} images from {} for {}...'.format(len(self.img_fns), dataset, 'training' if train else 'eval'))
+        self.feats = {}
+        if train:
+            self.cameras, self.images, point3Ds = read_model(path=sfm_path, ext='.bin')
+            self.name_to_id = {image.name: i for i, image in self.images.items()}
+        data = np.load(seg_fn, allow_pickle=True)[()]
+        p3d_id = data['id']
+        seg_id = data['label']
+        self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
+        self.p3d_xyzs = {}
+        for pid in self.p3d_seg.keys():
+            p3d = point3Ds[pid]
+            self.p3d_xyzs[pid] = p3d.xyz
+        with open(sc_mean_scale_fn, 'r') as f:
+            lines = f.readlines()
+            for l in lines:
+                l = l.strip().split()
+                self.mean_xyz = np.array([float(v) for v in l[:3]])
+                self.scale_xyz = np.array([float(v) for v in l[3:]])
+        if not train:
+            self.query_info = self.read_query_info(path=query_info_path)
+        self.nfeatures = nfeatures
+        self.feature_dir = feature_dir
+        print('Pre loaded {} feats, mean xyz {}, scale xyz {}'.format(len(self.feats.keys()), self.mean_xyz,
+                                                                      self.scale_xyz))
+    def normalize_p3ds(self, p3ds):
+        mean_p3ds = np.ceil(np.mean(p3ds, axis=0))
+        p3ds_ = p3ds - mean_p3ds
+        dx = np.max(abs(p3ds_[:, 0]))
+        dy = np.max(abs(p3ds_[:, 1]))
+        dz = np.max(abs(p3ds_[:, 2]))
+        scale_p3ds = np.ceil(np.array([dx, dy, dz], dtype=float).reshape(3, ))
+        scale_p3ds[scale_p3ds < 1] = 1
+        scale_p3ds[scale_p3ds == 0] = 1
+        return mean_p3ds, scale_p3ds
+    def read_query_info(self, path):
+        query_info = {}
+        with open(path, 'r') as f:
+            lines = f.readlines()
+            for l in lines:
+                l = l.strip().split()
+                image_name = l[0]
+                cam_model = l[1]
+                h, w = int(l[2]), int(l[3])
+                params = np.array([float(v) for v in l[4:]])
+                query_info[image_name] = {
+                    'width': w,
+                    'height': h,
+                    'model': cam_model,
+                    'params': params,
+                }
+        return query_info
+    def extract_intrinsic_extrinsic_params(self, image_id):
+        cam = self.cameras[self.images[image_id].camera_id]
+        params = cam.params
+        model = cam.model
+        if model in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
+            fx = fy = params[0]
+            cx = params[1]
+            cy = params[2]
+        elif model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
+            fx = params[0]
+            fy = params[1]
+            cx = params[2]
+            cy = params[3]
+        else:
+            raise Exception("Camera model not supported")
+        K = np.eye(3, dtype=float)
+        K[0, 0] = fx
+        K[1, 1] = fy
+        K[0, 2] = cx
+        K[1, 2] = cy
+        qvec = self.images[image_id].qvec
+        tvec = self.images[image_id].tvec
+        R = qvec2rotmat(qvec=qvec)
+        P = np.eye(4, dtype=float)
+        P[:3, :3] = R
+        P[:3, 3] = tvec.reshape(3, )
+        return {'K': K, 'P': P}
+    def get_item_train(self, idx):
+        img_name = self.img_fns[idx]
+        if img_name in self.feats.keys():
+            feat_data = self.feats[img_name]
+        else:
+            feat_data = np.load(osp.join(self.feature_dir, img_name.replace('/', '+') + '.npy'), allow_pickle=True)[()]
+        # descs = feat_data['descriptors']  # [N, D]
+        scores = feat_data['scores']  # [N, 1]
+        kpts = feat_data['keypoints']  # [N, 2]
+        image_size = feat_data['image_size']
+        nfeat = kpts.shape[0]
+        # print(img_name, self.name_to_id[img_name])
+        p3d_ids = self.images[self.name_to_id[img_name]].point3D_ids
+        p3d_xyzs = np.zeros(shape=(nfeat, 3), dtype=float)
+        seg_ids = np.zeros(shape=(nfeat,), dtype=int)  # + self.n_class - 1
+        for i in range(nfeat):
+            p3d = p3d_ids[i]
+            if p3d in self.p3d_seg.keys():
+                seg_ids[i] = self.p3d_seg[p3d] + 1  # 0 for invalid
+                if seg_ids[i] == -1:
+                    seg_ids[i] = 0
+            if p3d in self.p3d_xyzs.keys():
+                p3d_xyzs[i] = self.p3d_xyzs[p3d]
+        seg_ids = np.array(seg_ids).reshape(-1, )
+        n_inliers = np.sum(seg_ids > 0)
+        n_outliers = np.sum(seg_ids == 0)
+        inlier_ids = np.where(seg_ids > 0)[0]
+        outlier_ids = np.where(seg_ids == 0)[0]
+        if n_inliers <= self.min_inliers:
+            sel_inliers = n_inliers
+            sel_outliers = self.nfeatures - sel_inliers
+            out_ids = np.arange(n_outliers)
+            np.random.shuffle(out_ids)
+            sel_ids = np.hstack([inlier_ids, outlier_ids[out_ids[:self.nfeatures - n_inliers]]])
+        else:
+            sel_inliers = np.random.randint(self.min_inliers, self.max_inliers)
+            if sel_inliers > n_inliers:
+                sel_inliers = n_inliers
+            if sel_inliers + n_outliers < self.nfeatures:
+                sel_inliers = self.nfeatures - n_outliers
+            sel_outliers = self.nfeatures - sel_inliers
+            in_ids = np.arange(n_inliers)
+            np.random.shuffle(in_ids)
+            sel_inlier_ids = inlier_ids[in_ids[:sel_inliers]]
+            out_ids = np.arange(n_outliers)
+            np.random.shuffle(out_ids)
+            sel_outlier_ids = outlier_ids[out_ids[:sel_outliers]]
+            sel_ids = np.hstack([sel_inlier_ids, sel_outlier_ids])
+        # sel_descs = descs[sel_ids]
+        sel_scores = scores[sel_ids]
+        sel_kpts = kpts[sel_ids]
+        sel_seg_ids = seg_ids[sel_ids]
+        sel_xyzs = p3d_xyzs[sel_ids]
+        shuffle_ids = np.arange(sel_ids.shape[0])
+        np.random.shuffle(shuffle_ids)
+        # sel_descs = sel_descs[shuffle_ids]
+        sel_scores = sel_scores[shuffle_ids]
+        sel_kpts = sel_kpts[shuffle_ids]
+        sel_seg_ids = sel_seg_ids[shuffle_ids]
+        sel_xyzs = sel_xyzs[shuffle_ids]
+        if sel_kpts.shape[0] < self.nfeatures:
+            # print(sel_descs.shape, sel_kpts.shape, sel_scores.shape, sel_seg_ids.shape, sel_xyzs.shape)
+            valid_sel_ids = np.array([v for v in range(sel_kpts.shape[0]) if sel_seg_ids[v] > 0], dtype=int)
+            # ref_sel_id = np.random.choice(valid_sel_ids, size=1)[0]
+            if valid_sel_ids.shape[0] == 0:
+                valid_sel_ids = np.array([v for v in range(sel_kpts.shape[0])], dtype=int)
+            random_n = self.nfeatures - sel_kpts.shape[0]
+            random_scores = np.random.random((random_n,))
+            random_kpts, random_seg_ids, random_xyzs = self.random_points_from_reference(
+                n=random_n,
+                ref_kpts=sel_kpts[valid_sel_ids],
+                ref_segs=sel_seg_ids[valid_sel_ids],
+                ref_xyzs=sel_xyzs[valid_sel_ids],
+                radius=5,
+            )
+            # sel_descs = np.vstack([sel_descs, random_descs])
+            sel_scores = np.hstack([sel_scores, random_scores])
+            sel_kpts = np.vstack([sel_kpts, random_kpts])
+            sel_seg_ids = np.hstack([sel_seg_ids, random_seg_ids])
+            sel_xyzs = np.vstack([sel_xyzs, random_xyzs])
+        gt_n_seg = np.zeros(shape=(self.n_class,), dtype=int)
+        gt_cls = np.zeros(shape=(self.n_class,), dtype=int)
+        gt_cls_dist = np.zeros(shape=(self.n_class,), dtype=float)
+        uids = np.unique(sel_seg_ids).tolist()
+        for uid in uids:
+            if uid == 0:
+                continue
+            gt_cls[uid] = 1
+            gt_n_seg[uid] = np.sum(sel_seg_ids == uid)
+            gt_cls_dist[uid] = np.sum(seg_ids == uid) / np.sum(seg_ids > 0)  # [valid_id / total_valid_id]
+        param_out = self.extract_intrinsic_extrinsic_params(image_id=self.name_to_id[img_name])
+        img = self.read_image(image_name=img_name)
+        image_size = img.shape[:2]
+        if self.image_dim == 1:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        else:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        if self.with_aug:
+            nh = img.shape[0]
+            nw = img.shape[1]
+            if self.scale_params is not None:
+                do_scale = np.random.random()
+                if do_scale <= 0.25:
+                    p = np.random.randint(0, 11)
+                    s = self.scale_params[0] + (self.scale_params[1] - self.scale_params[0]) / 10 * p
+                    nh = int(img.shape[0] * s)
+                    nw = int(img.shape[1] * s)
+                    sh = nh / img.shape[0]
+                    sw = nw / img.shape[1]
+                    sel_kpts[:, 0] = sel_kpts[:, 0] * sw
+                    sel_kpts[:, 1] = sel_kpts[:, 1] * sh
+                    img = cv2.resize(img, dsize=(nw, nh))
+            brightness = np.random.uniform(-self.jitter_params['brightness'], self.jitter_params['brightness']) * 255
+            contrast = 1 + np.random.uniform(-self.jitter_params['contrast'], self.jitter_params['contrast'])
+            img = cv2.addWeighted(img, contrast, img, 0, brightness)
+            img = np.clip(img, a_min=0, a_max=255)
+            if self.image_dim == 1:
+                img = img[..., None]
+            img = img.astype(float) / 255.
+            image_size = np.array([nh, nw], dtype=int)
+        else:
+            if self.image_dim == 1:
+                img = img[..., None].astype(float) / 255.
+        output = {
+            # 'descriptors': sel_descs,  # may not be used
+            'scores': sel_scores,
+            'keypoints': sel_kpts,
+            'norm_keypoints': normalize_size(x=sel_kpts, size=image_size),
+            'image': [img],
+            'gt_seg': sel_seg_ids,
+            'gt_cls': gt_cls,
+            'gt_cls_dist': gt_cls_dist,
+            'gt_n_seg': gt_n_seg,
+            'file_name': img_name,
+            'prefix_name': self.image_prefix,
+            # 'mean_xyz': self.mean_xyz,
+            # 'scale_xyz': self.scale_xyz,
+            # 'gt_sc': sel_xyzs,
+            # 'gt_norm_sc': (sel_xyzs - self.mean_xyz) / self.scale_xyz,
+            'K': param_out['K'],
+            'gt_P': param_out['P']
+        }
+        return output
+    def get_item_test(self, idx):
+        # evaluation of recognition only
+        img_name = self.img_fns[idx]
+        feat_data = np.load(osp.join(self.feature_dir, img_name.replace('/', '+') + '.npy'), allow_pickle=True)[()]
+        descs = feat_data['descriptors']  # [N, D]
+        scores = feat_data['scores']  # [N, 1]
+        kpts = feat_data['keypoints']  # [N, 2]
+        image_size = feat_data['image_size']
+        nfeat = descs.shape[0]
+        if img_name in self.img_p3d.keys():
+            p3d_ids = self.img_p3d[img_name]
+        p3d_xyzs = np.zeros(shape=(nfeat, 3), dtype=float)
+        seg_ids = np.zeros(shape=(nfeat,), dtype=int)  # attention! by default invalid!!!
+        for i in range(nfeat):
+            p3d = p3d_ids[i]
+            if p3d in self.p3d_seg.keys():
+                seg_ids[i] = self.p3d_seg[p3d] + 1
+                if seg_ids[i] == -1:
+                    seg_ids[i] = 0  # 0  for in valid
+            if p3d in self.p3d_xyzs.keys():
+                p3d_xyzs[i] = self.p3d_xyzs[p3d]
+        seg_ids = np.array(seg_ids).reshape(-1, )
+        if self.nfeatures > 0:
+            sorted_ids = np.argsort(scores)[::-1][:self.nfeatures]  # large to small
+            descs = descs[sorted_ids]
+            scores = scores[sorted_ids]
+            kpts = kpts[sorted_ids]
+            p3d_xyzs = p3d_xyzs[sorted_ids]
+            seg_ids = seg_ids[sorted_ids]
+        gt_n_seg = np.zeros(shape=(self.n_class,), dtype=int)
+        gt_cls = np.zeros(shape=(self.n_class,), dtype=int)
+        gt_cls_dist = np.zeros(shape=(self.n_class,), dtype=float)
+        uids = np.unique(seg_ids).tolist()
+        for uid in uids:
+            if uid == 0:
+                continue
+            gt_cls[uid] = 1
+            gt_n_seg[uid] = np.sum(seg_ids == uid)
+            gt_cls_dist[uid] = np.sum(seg_ids == uid) / np.sum(
+                seg_ids < self.n_class - 1)  # [valid_id / total_valid_id]
+        gt_cls[0] = 0
+        img = self.read_image(image_name=img_name)
+        if self.image_dim == 1:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+            img = img[..., None].astype(float) / 255.
+        else:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(float) / 255.
+        return {
+            'descriptors': descs,
+            'scores': scores,
+            'keypoints': kpts,
+            'image_size': image_size,
+            'norm_keypoints': normalize_size(x=kpts, size=image_size),
+            'gt_seg': seg_ids,
+            'gt_cls': gt_cls,
+            'gt_cls_dist': gt_cls_dist,
+            'gt_n_seg': gt_n_seg,
+            'file_name': img_name,
+            'prefix_name': self.image_prefix,
+            'image': [img],
+            'mean_xyz': self.mean_xyz,
+            'scale_xyz': self.scale_xyz,
+            'gt_sc': p3d_xyzs,
+            'gt_norm_sc': (p3d_xyzs - self.mean_xyz) / self.scale_xyz
+        }
+    def __getitem__(self, idx):
+        if self.train:
+            return self.get_item_train(idx=idx)
+        else:
+            return self.get_item_test(idx=idx)
+    def __len__(self):
+        return len(self.img_fns)
+    def read_image(self, image_name):
+        return cv2.imread(osp.join(self.dataset_path, image_name))
+    def jitter_augmentation(self, img, params):
+        brightness, contrast, saturation, hue = params
+        p = np.random.randint(0, 20) / 20
+        b = brightness[0] + (brightness[1] - brightness[0]) / 20 * p
+        img = tvf.adjust_brightness(img=img, brightness_factor=b)
+        p = np.random.randint(0, 20) / 20
+        c = contrast[0] + (contrast[1] - contrast[0]) / 20 * p
+        img = tvf.adjust_contrast(img=img, contrast_factor=c)
+        p = np.random.randint(0, 20) / 20
+        s = saturation[0] + (saturation[1] - saturation[0]) / 20 * p
+        img = tvf.adjust_saturation(img=img, saturation_factor=s)
+        p = np.random.randint(0, 20) / 20
+        h = hue[0] + (hue[1] - hue[0]) / 20 * p
+        img = tvf.adjust_hue(img=img, hue_factor=h)
+        return img
+    def random_points(self, n, d, h, w):
+        desc = np.random.random((n, d))
+        desc = desc / np.linalg.norm(desc, ord=2, axis=1)[..., None]
+        xs = np.random.randint(0, w - 1, size=(n, 1))
+        ys = np.random.randint(0, h - 1, size=(n, 1))
+        kpts = np.hstack([xs, ys])
+        return desc, kpts
+    def random_points_from_reference(self, n, ref_kpts, ref_segs, ref_xyzs, radius=5):
+        n_ref = ref_kpts.shape[0]
+        if n_ref < n:
+            ref_ids = np.random.choice([i for i in range(n_ref)], size=n).tolist()
+        else:
+            ref_ids = [i for i in range(n)]
+        new_xs = []
+        new_ys = []
+        # new_descs = []
+        new_segs = []
+        new_xyzs = []
+        for i in ref_ids:
+            nx = np.random.randint(-radius, radius) + ref_kpts[i, 0]
+            ny = np.random.randint(-radius, radius) + ref_kpts[i, 1]
+            new_xs.append(nx)
+            new_ys.append(ny)
+            # new_descs.append(ref_descs[i])
+            new_segs.append(ref_segs[i])
+            new_xyzs.append(ref_xyzs[i])
+        new_xs = np.array(new_xs).reshape(n, 1)
+        new_ys = np.array(new_ys).reshape(n, 1)
+        new_segs = np.array(new_segs).reshape(n, )
+        new_kpts = np.hstack([new_xs, new_ys])
+        # new_descs = np.array(new_descs).reshape(n, -1)
+        new_xyzs = np.array(new_xyzs)
+        return new_kpts, new_segs, new_xyzs

third_party/pram/dataset/cambridge_landmarks.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> cambridge_landmarks
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   29/01/2024 14:41
+=================================================='''
+import os.path as osp
+import numpy as np
+from colmap_utils.read_write_model import read_model
+import torchvision.transforms as tvt
+from dataset.basicdataset import BasicDataset
+class CambridgeLandmarks(BasicDataset):
+    def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset='CambridgeLandmarks',
+                 nfeatures=1024,
+                 query_p3d_fn=None,
+                 train=True,
+                 with_aug=False,
+                 min_inliers=0,
+                 max_inliers=4096,
+                 random_inliers=False,
+                 jitter_params=None,
+                 scale_params=None,
+                 image_dim=3,
+                 query_info_path=None,
+                 sample_ratio=1,
+                 ):
+        self.landmark_path = osp.join(landmark_path, scene)
+        self.dataset_path = osp.join(dataset_path, scene)
+        self.n_class = n_class
+        self.dataset = dataset + '/' + scene
+        self.nfeatures = nfeatures
+        self.with_aug = with_aug
+        self.jitter_params = jitter_params
+        self.scale_params = scale_params
+        self.image_dim = image_dim
+        self.train = train
+        self.min_inliers = min_inliers
+        self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
+        self.random_inliers = random_inliers
+        self.image_prefix = ''
+        train_transforms = []
+        if self.with_aug:
+            train_transforms.append(tvt.ColorJitter(
+                brightness=jitter_params['brightness'],
+                contrast=jitter_params['contrast'],
+                saturation=jitter_params['saturation'],
+                hue=jitter_params['hue']))
+            if jitter_params['blur'] > 0:
+                train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
+        self.train_transforms = tvt.Compose(train_transforms)
+        if train:
+            self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
+            self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
+        # only for testing of query images
+        if not self.train:
+            data = np.load(query_p3d_fn, allow_pickle=True)[()]
+            self.img_p3d = data
+        else:
+            self.img_p3d = {}
+        self.img_fns = []
+        with open(osp.join(self.dataset_path, 'dataset_train.txt' if train else 'dataset_test.txt'), 'r') as f:
+            lines = f.readlines()[3:]  # ignore the first 3 lines
+            for l in lines:
+                l = l.strip().split()[0]
+                if train and l not in self.name_to_id.keys():
+                    continue
+                if not train and l not in self.img_p3d.keys():
+                    continue
+                self.img_fns.append(l)
+        print('Load {} images from {} for {}...'.format(len(self.img_fns),
+                                                        self.dataset, 'training' if train else 'eval'))
+        data = np.load(osp.join(self.landmark_path,
+                                'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
+                       allow_pickle=True)[()]
+        p3d_id = data['id']
+        seg_id = data['label']
+        self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
+        xyzs = data['xyz']
+        self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
+        # with open(osp.join(self.landmark_path, 'sc_mean_scale.txt'), 'r') as f:
+        #     lines = f.readlines()
+        #     for l in lines:
+        #         l = l.strip().split()
+        #         self.mean_xyz = np.array([float(v) for v in l[:3]])
+        #         self.scale_xyz = np.array([float(v) for v in l[3:]])
+        if not train:
+            self.query_info = self.read_query_info(path=query_info_path)
+        self.nfeatures = nfeatures
+        self.feature_dir = osp.join(self.landmark_path, 'feats')
+        self.feats = {}

third_party/pram/dataset/customdataset.py ADDED Viewed

	@@ -0,0 +1,93 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> customdataset.py
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   29/01/2024 14:38
+=================================================='''
+import os.path as osp
+import numpy as np
+from colmap_utils.read_write_model import read_model
+import torchvision.transforms as tvt
+from dataset.basicdataset import BasicDataset
+class CustomDataset(BasicDataset):
+    def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset,
+                 nfeatures=1024,
+                 query_p3d_fn=None,
+                 train=True,
+                 with_aug=False,
+                 min_inliers=0,
+                 max_inliers=4096,
+                 random_inliers=False,
+                 jitter_params=None,
+                 scale_params=None,
+                 image_dim=3,
+                 query_info_path=None,
+                 sample_ratio=1,
+                 ):
+        self.landmark_path = osp.join(landmark_path, scene)
+        self.dataset_path = osp.join(dataset_path, scene)
+        self.n_class = n_class
+        self.dataset = dataset + '/' + scene
+        self.nfeatures = nfeatures
+        self.with_aug = with_aug
+        self.jitter_params = jitter_params
+        self.scale_params = scale_params
+        self.image_dim = image_dim
+        self.train = train
+        self.min_inliers = min_inliers
+        self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
+        self.random_inliers = random_inliers
+        self.image_prefix = ''
+        train_transforms = []
+        if self.with_aug:
+            train_transforms.append(tvt.ColorJitter(
+                brightness=jitter_params['brightness'],
+                contrast=jitter_params['contrast'],
+                saturation=jitter_params['saturation'],
+                hue=jitter_params['hue']))
+            if jitter_params['blur'] > 0:
+                train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
+        self.train_transforms = tvt.Compose(train_transforms)
+        if train:
+            self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
+            self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
+        # only for testing of query images
+        if not self.train:
+            data = np.load(query_p3d_fn, allow_pickle=True)[()]
+            self.img_p3d = data
+        else:
+            self.img_p3d = {}
+        if train:
+            self.img_fns = [self.images[v].name for v in self.images.keys() if
+                            self.images[v].name in self.name_to_id.keys()]
+        else:
+            self.img_fns = []
+            with open(osp.join(self.dataset_path, 'queries_with_intrinsics.txt'), 'r') as f:
+                lines = f.readlines()
+                for l in lines:
+                    self.img_fns.append(l.strip().split()[0])
+        print('Load {} images from {} for {}...'.format(len(self.img_fns),
+                                                        self.dataset, 'training' if train else 'eval'))
+        data = np.load(osp.join(self.landmark_path,
+                                'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
+                       allow_pickle=True)[()]
+        p3d_id = data['id']
+        seg_id = data['label']
+        self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
+        xyzs = data['xyz']
+        self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
+        if not train:
+            self.query_info = self.read_query_info(path=query_info_path)
+        self.nfeatures = nfeatures
+        self.feature_dir = osp.join(self.landmark_path, 'feats')
+        self.feats = {}

third_party/pram/dataset/get_dataset.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> get_dataset
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   29/01/2024 14:40
+=================================================='''
+import os.path as osp
+import yaml
+from dataset.aachen import Aachen
+from dataset.twelve_scenes import TwelveScenes
+from dataset.seven_scenes import SevenScenes
+from dataset.cambridge_landmarks import CambridgeLandmarks
+from dataset.customdataset import CustomDataset
+from dataset.recdataset import RecDataset
+def get_dataset(dataset):
+    if dataset in ['7Scenes', 'S']:
+        return SevenScenes
+    elif dataset in ['12Scenes', 'T']:
+        return TwelveScenes
+    elif dataset in ['Aachen', 'A']:
+        return Aachen
+    elif dataset in ['CambridgeLandmarks', 'C']:
+        return CambridgeLandmarks
+    else:
+        return CustomDataset
+def compose_datasets(datasets, config, train=True, sample_ratio=None):
+    sub_sets = []
+    for name in datasets:
+        if name == 'S':
+            ds_name = '7Scenes'
+        elif name == 'T':
+            ds_name = '12Scenes'
+        elif name == 'A':
+            ds_name = 'Aachen'
+        elif name == 'R':
+            ds_name = 'RobotCar-Seasons'
+        elif name == 'C':
+            ds_name = 'CambridgeLandmarks'
+        else:
+            ds_name = name
+            # raise '{} dataset does not exist'.format(name)
+        landmark_path = osp.join(config['landmark_path'], ds_name)
+        dataset_path = osp.join(config['dataset_path'], ds_name)
+        scene_config_path = 'configs/datasets/{:s}.yaml'.format(ds_name)
+        with open(scene_config_path, 'r') as f:
+            scene_config = yaml.load(f, Loader=yaml.Loader)
+        DSet = get_dataset(dataset=ds_name)
+        for scene in scene_config['scenes']:
+            if sample_ratio is None:
+                scene_sample_ratio = scene_config[scene]['training_sample_ratio'] if train else scene_config[scene][
+                    'eval_sample_ratio']
+            else:
+                scene_sample_ratio = sample_ratio
+            scene_set = DSet(landmark_path=landmark_path,
+                             dataset_path=dataset_path,
+                             scene=scene,
+                             seg_mode=scene_config[scene]['cluster_mode'],
+                             seg_method=scene_config[scene]['cluster_method'],
+                             n_class=scene_config[scene]['n_cluster'] + 1,  # including invalid - 0
+                             dataset=ds_name,
+                             train=train,
+                             nfeatures=config['max_keypoints'] if train else config['eval_max_keypoints'],
+                             min_inliers=config['min_inliers'],
+                             max_inliers=config['max_inliers'],
+                             random_inliers=config['random_inliers'],
+                             with_aug=config['with_aug'],
+                             jitter_params=config['jitter_params'],
+                             scale_params=config['scale_params'],
+                             image_dim=config['image_dim'],
+                             query_p3d_fn=osp.join(config['landmark_path'], ds_name, scene,
+                                                   'point3D_query_n{:d}_{:s}_{:s}.npy'.format(
+                                                       scene_config[scene]['n_cluster'],
+                                                       scene_config[scene]['cluster_mode'],
+                                                       scene_config[scene]['cluster_method'])),
+                             query_info_path=osp.join(config['dataset_path'], ds_name, scene,
+                                                      'queries_with_intrinsics.txt'),
+                             sample_ratio=scene_sample_ratio,
+                             )
+            sub_sets.append(scene_set)
+    return RecDataset(sub_sets=sub_sets)

third_party/pram/dataset/recdataset.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> recdataset
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   29/01/2024 14:42
+=================================================='''
+import numpy as np
+from torch.utils.data import Dataset
+class RecDataset(Dataset):
+    def __init__(self, sub_sets=[]):
+        assert len(sub_sets) >= 1
+        self.sub_sets = sub_sets
+        self.names = []
+        self.sub_set_index = []
+        self.seg_offsets = []
+        self.sub_set_item_index = []
+        self.dataset_names = []
+        self.scene_names = []
+        start_index_valid_seg = 1  # start from 1, 0 is for invalid
+        total_subset = 0
+        for scene_set in sub_sets:  # [0, n_class]
+            name = scene_set.dataset
+            self.names.append(name)
+            n_samples = len(scene_set)
+            n_class = scene_set.n_class
+            self.seg_offsets = self.seg_offsets + [start_index_valid_seg for v in range(len(scene_set))]
+            start_index_valid_seg = start_index_valid_seg + n_class - 1
+            self.sub_set_index = self.sub_set_index + [total_subset for k in range(n_samples)]
+            self.sub_set_item_index = self.sub_set_item_index + [k for k in range(n_samples)]
+            # self.dataset_names = self.dataset_names + [name for k in range(n_samples)]
+            self.scene_names = self.scene_names + [name for k in range(n_samples)]
+            total_subset += 1
+        self.n_class = start_index_valid_seg
+        print('Load {} images {} segs from {} subsets from {}'.format(len(self.sub_set_item_index), self.n_class,
+                                                                      len(sub_sets), self.names))
+    def __len__(self):
+        return len(self.sub_set_item_index)
+    def __getitem__(self, idx):
+        subset_idx = self.sub_set_index[idx]
+        item_idx = self.sub_set_item_index[idx]
+        scene_name = self.scene_names[idx]
+        out = self.sub_sets[subset_idx][item_idx]
+        org_gt_seg = out['gt_seg']
+        org_gt_cls = out['gt_cls']
+        org_gt_cls_dist = out['gt_cls_dist']
+        org_gt_n_seg = out['gt_n_seg']
+        offset = self.seg_offsets[idx]
+        org_n_class = self.sub_sets[subset_idx].n_class
+        gt_seg = np.zeros(shape=(org_gt_seg.shape[0],), dtype=int)  # [0, ..., n_features]
+        gt_n_seg = np.zeros(shape=(self.n_class,), dtype=int)
+        gt_cls = np.zeros(shape=(self.n_class,), dtype=int)
+        gt_cls_dist = np.zeros(shape=(self.n_class,), dtype=float)
+        # copy invalid segments
+        gt_n_seg[0] = org_gt_n_seg[0]
+        gt_cls[0] = org_gt_cls[0]
+        gt_cls_dist[0] = org_gt_cls_dist[0]
+        # print('org: ', org_n_class, org_gt_seg.shape, org_gt_n_seg.shape, org_gt_seg)
+        # copy valid segments
+        gt_seg[org_gt_seg > 0] = org_gt_seg[org_gt_seg > 0] + offset - 1  # [0, ..., 1023]
+        gt_n_seg[offset:offset + org_n_class - 1] = org_gt_n_seg[1:]  # [0...,n_seg]
+        gt_cls[offset:offset + org_n_class - 1] = org_gt_cls[1:]  # [0, ..., n_seg]
+        gt_cls_dist[offset:offset + org_n_class - 1] = org_gt_cls_dist[1:]  # [0, ..., n_seg]
+        out['gt_seg'] = gt_seg
+        out['gt_cls'] = gt_cls
+        out['gt_cls_dist'] = gt_cls_dist
+        out['gt_n_seg'] = gt_n_seg
+        # print('gt: ', org_n_class, gt_seg.shape, gt_n_seg.shape, gt_seg)
+        out['scene_name'] = scene_name
+        # out['org_gt_seg'] = org_gt_seg
+        # out['org_gt_n_seg'] = org_gt_n_seg
+        # out['org_gt_cls'] = org_gt_cls
+        # out['org_gt_cls_dist'] = org_gt_cls_dist
+        return out

third_party/pram/dataset/seven_scenes.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> seven_scenes
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   29/01/2024 14:36
+=================================================='''
+import os
+import os.path as osp
+import numpy as np
+from colmap_utils.read_write_model import read_model
+import torchvision.transforms as tvt
+from dataset.basicdataset import BasicDataset
+class SevenScenes(BasicDataset):
+    def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset='7Scenes',
+                 nfeatures=1024,
+                 query_p3d_fn=None,
+                 train=True,
+                 with_aug=False,
+                 min_inliers=0,
+                 max_inliers=4096,
+                 random_inliers=False,
+                 jitter_params=None,
+                 scale_params=None,
+                 image_dim=3,
+                 query_info_path=None,
+                 sample_ratio=1,
+                 ):
+        self.landmark_path = osp.join(landmark_path, scene)
+        self.dataset_path = osp.join(dataset_path, scene)
+        self.n_class = n_class
+        self.dataset = dataset + '/' + scene
+        self.nfeatures = nfeatures
+        self.with_aug = with_aug
+        self.jitter_params = jitter_params
+        self.scale_params = scale_params
+        self.image_dim = image_dim
+        self.train = train
+        self.min_inliers = min_inliers
+        self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
+        self.random_inliers = random_inliers
+        self.image_prefix = ''
+        train_transforms = []
+        if self.with_aug:
+            train_transforms.append(tvt.ColorJitter(
+                brightness=jitter_params['brightness'],
+                contrast=jitter_params['contrast'],
+                saturation=jitter_params['saturation'],
+                hue=jitter_params['hue']))
+            if jitter_params['blur'] > 0:
+                train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
+        self.train_transforms = tvt.Compose(train_transforms)
+        if train:
+            self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
+            self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
+        # only for testing of query images
+        if not self.train:
+            data = np.load(query_p3d_fn, allow_pickle=True)[()]
+            self.img_p3d = data
+        else:
+            self.img_p3d = {}
+        if self.train:
+            split_fn = osp.join(self.dataset_path, 'TrainSplit.txt')
+        else:
+            split_fn = osp.join(self.dataset_path, 'TestSplit.txt')
+        self.img_fns = []
+        with open(split_fn, 'r') as f:
+            lines = f.readlines()
+            for l in lines:
+                seq = int(l.strip()[8:])
+                fns = os.listdir(osp.join(self.dataset_path, osp.join('seq-{:02d}'.format(seq))))
+                fns = sorted(fns)
+                nf = 0
+                for fn in fns:
+                    if fn.find('png') >= 0:
+                        if train and 'seq-{:02d}'.format(seq) + '/' + fn not in self.name_to_id.keys():
+                            continue
+                        if not train and 'seq-{:02d}'.format(seq) + '/' + fn not in self.img_p3d.keys():
+                            continue
+                        if nf % sample_ratio == 0:
+                            self.img_fns.append('seq-{:02d}'.format(seq) + '/' + fn)
+                        nf += 1
+        print('Load {} images from {} for {}...'.format(len(self.img_fns),
+                                                        self.dataset, 'training' if train else 'eval'))
+        data = np.load(osp.join(self.landmark_path,
+                                'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
+                       allow_pickle=True)[()]
+        p3d_id = data['id']
+        seg_id = data['label']
+        self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
+        xyzs = data['xyz']
+        self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
+        # with open(osp.join(self.landmark_path, 'sc_mean_scale.txt'), 'r') as f:
+        #     lines = f.readlines()
+        #     for l in lines:
+        #         l = l.strip().split()
+        #         self.mean_xyz = np.array([float(v) for v in l[:3]])
+        #         self.scale_xyz = np.array([float(v) for v in l[3:]])
+        if not train:
+            self.query_info = self.read_query_info(path=query_info_path)
+        self.nfeatures = nfeatures
+        self.feature_dir = osp.join(self.landmark_path, 'feats')
+        self.feats = {}

third_party/pram/dataset/twelve_scenes.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> twelve_scenes
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   29/01/2024 14:37
+=================================================='''
+import os
+import os.path as osp
+import numpy as np
+from colmap_utils.read_write_model import read_model
+import torchvision.transforms as tvt
+from dataset.basicdataset import BasicDataset
+class TwelveScenes(BasicDataset):
+    def __init__(self, landmark_path, scene, dataset_path, n_class, seg_mode, seg_method, dataset='12Scenes',
+                 nfeatures=1024,
+                 query_p3d_fn=None,
+                 train=True,
+                 with_aug=False,
+                 min_inliers=0,
+                 max_inliers=4096,
+                 random_inliers=False,
+                 jitter_params=None,
+                 scale_params=None,
+                 image_dim=3,
+                 query_info_path=None,
+                 sample_ratio=1,
+                 ):
+        self.landmark_path = osp.join(landmark_path, scene)
+        self.dataset_path = osp.join(dataset_path, scene)
+        self.n_class = n_class
+        self.dataset = dataset + '/' + scene
+        self.nfeatures = nfeatures
+        self.with_aug = with_aug
+        self.jitter_params = jitter_params
+        self.scale_params = scale_params
+        self.image_dim = image_dim
+        self.train = train
+        self.min_inliers = min_inliers
+        self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
+        self.random_inliers = random_inliers
+        self.image_prefix = ''
+        train_transforms = []
+        if self.with_aug:
+            train_transforms.append(tvt.ColorJitter(
+                brightness=jitter_params['brightness'],
+                contrast=jitter_params['contrast'],
+                saturation=jitter_params['saturation'],
+                hue=jitter_params['hue']))
+            if jitter_params['blur'] > 0:
+                train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
+        self.train_transforms = tvt.Compose(train_transforms)
+        if train:
+            self.cameras, self.images, point3Ds = read_model(path=osp.join(self.landmark_path, '3D-models'), ext='.bin')
+            self.name_to_id = {image.name: i for i, image in self.images.items() if len(self.images[i].point3D_ids) > 0}
+        # only for testing of query images
+        if not self.train:
+            data = np.load(query_p3d_fn, allow_pickle=True)[()]
+            self.img_p3d = data
+        else:
+            self.img_p3d = {}
+        with open(osp.join(self.dataset_path, 'split.txt'), 'r') as f:
+            l = f.readline()
+            l = l.strip().split(' ')  # sequence0 [frames=357]  [start=0 ; end=356],  first sequence for testing
+            start_img_id = l[-3].split('=')[-1]
+            end_img_id = l[-1].split('=')[-1][:-1]
+            test_start_img_id = int(start_img_id)
+            test_end_img_id = int(end_img_id)
+        self.img_fns = []
+        fns = os.listdir(osp.join(self.dataset_path, 'data'))
+        fns = sorted(fns)
+        nf = 0
+        for fn in fns:
+            if fn.find('jpg') >= 0:  # frame-001098.color.jpg
+                frame_id = int(fn.split('.')[0].split('-')[-1])
+                if not train and frame_id > test_end_img_id:
+                    continue
+                if train and frame_id <= test_end_img_id:
+                    continue
+                if train and 'data' + '/' + fn not in self.name_to_id.keys():
+                    continue
+                if not train and 'data' + '/' + fn not in self.img_p3d.keys():
+                    continue
+                if nf % sample_ratio == 0:
+                    self.img_fns.append('data' + '/' + fn)
+                nf += 1
+        print('Load {} images from {} for {}...'.format(len(self.img_fns),
+                                                        self.dataset, 'training' if train else 'eval'))
+        data = np.load(osp.join(self.landmark_path,
+                                'point3D_cluster_n{:d}_{:s}_{:s}.npy'.format(n_class - 1, seg_mode, seg_method)),
+                       allow_pickle=True)[()]
+        p3d_id = data['id']
+        seg_id = data['label']
+        self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
+        xyzs = data['xyz']
+        self.p3d_xyzs = {p3d_id[i]: xyzs[i] for i in range(p3d_id.shape[0])}
+        # with open(osp.join(self.landmark_path, 'sc_mean_scale.txt'), 'r') as f:
+        #     lines = f.readlines()
+        #     for l in lines:
+        #         l = l.strip().split()
+        #         self.mean_xyz = np.array([float(v) for v in l[:3]])
+        #         self.scale_xyz = np.array([float(v) for v in l[3:]])
+        if not train:
+            self.query_info = self.read_query_info(path=query_info_path)
+        self.nfeatures = nfeatures
+        self.feature_dir = osp.join(self.landmark_path, 'feats')
+        self.feats = {}

third_party/pram/dataset/utils.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> utils
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   29/01/2024 14:31
+=================================================='''
+import torch
+def normalize_size(x, size, scale=0.7):
+    size = size.reshape([1, 2])
+    norm_fac = size.max() + 0.5
+    return (x - size / 2) / (norm_fac * scale)
+def collect_batch(batch):
+    out = {}
+    # if len(batch) == 0:
+    #     return batch
+    # else:
+    for k in batch[0].keys():
+        tmp = []
+        for v in batch:
+            tmp.append(v[k])
+        if isinstance(batch[0][k], str) or isinstance(batch[0][k], list):
+            out[k] = tmp
+        else:
+            out[k] = torch.cat([torch.from_numpy(i)[None] for i in tmp], dim=0)
+    return out

third_party/pram/environment.yml ADDED Viewed

	@@ -0,0 +1,173 @@

+name: pram
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_gnu
+  - binutils_impl_linux-64=2.38=h2a08ee3_1
+  - bzip2=1.0.8=h5eee18b_5
+  - ca-certificates=2024.3.11=h06a4308_0
+  - gcc=12.1.0=h9ea6d83_10
+  - gcc_impl_linux-64=12.1.0=hea43390_17
+  - kernel-headers_linux-64=2.6.32=he073ed8_17
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.4.4=h6a678d5_0
+  - libgcc-devel_linux-64=12.1.0=h1ec3361_17
+  - libgcc-ng=13.2.0=h807b86a_5
+  - libgomp=13.2.0=h807b86a_5
+  - libsanitizer=12.1.0=ha89aaad_17
+  - libstdcxx-ng=13.2.0=h7e041cc_5
+  - libuuid=1.41.5=h5eee18b_0
+  - ncurses=6.4=h6a678d5_0
+  - openssl=3.2.1=hd590300_1
+  - pip=23.3.1=py310h06a4308_0
+  - python=3.10.14=h955ad1f_0
+  - readline=8.2=h5eee18b_0
+  - setuptools=68.2.2=py310h06a4308_0
+  - sqlite=3.41.2=h5eee18b_0
+  - sysroot_linux-64=2.12=he073ed8_17
+  - tk=8.6.12=h1ccaba5_0
+  - wheel=0.41.2=py310h06a4308_0
+  - xz=5.4.6=h5eee18b_0
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+      - addict==2.4.0
+      - aiofiles==23.2.1
+      - aiohttp==3.9.3
+      - aioopenssl==0.6.0
+      - aiosasl==0.5.0
+      - aiosignal==1.3.1
+      - aioxmpp==0.13.3
+      - asttokens==2.4.1
+      - async-timeout==4.0.3
+      - attrs==23.2.0
+      - babel==2.14.0
+      - benbotasync==3.0.2
+      - blinker==1.7.0
+      - certifi==2024.2.2
+      - cffi==1.16.0
+      - charset-normalizer==3.3.2
+      - click==8.1.7
+      - colorama==0.4.6
+      - comm==0.2.2
+      - configargparse==1.7
+      - contourpy==1.2.1
+      - crayons==0.4.0
+      - cryptography==42.0.5
+      - cycler==0.12.1
+      - dash==2.16.1
+      - dash-core-components==2.0.0
+      - dash-html-components==2.0.0
+      - dash-table==5.0.0
+      - decorator==5.1.1
+      - dnspython==2.6.1
+      - einops==0.7.0
+      - exceptiongroup==1.2.0
+      - executing==2.0.1
+      - fastjsonschema==2.19.1
+      - filelock==3.13.3
+      - flask==3.0.2
+      - fonttools==4.50.0
+      - fortniteapiasync==0.1.7
+      - fortnitepy==3.6.9
+      - frozenlist==1.4.1
+      - fsspec==2024.3.1
+      - h5py==3.10.0
+      - html5tagger==1.3.0
+      - httptools==0.6.1
+      - idna==3.6
+      - importlib-metadata==7.1.0
+      - ipython==8.23.0
+      - ipywidgets==8.1.2
+      - itsdangerous==2.1.2
+      - jedi==0.19.1
+      - jinja2==3.1.3
+      - joblib==1.3.2
+      - jsonschema==4.21.1
+      - jsonschema-specifications==2023.12.1
+      - jupyter-core==5.7.2
+      - jupyterlab-widgets==3.0.10
+      - kiwisolver==1.4.5
+      - lxml==4.9.4
+      - markupsafe==2.1.5
+      - matplotlib==3.8.4
+      - matplotlib-inline==0.1.6
+      - mpmath==1.3.0
+      - multidict==6.0.5
+      - nbformat==5.10.4
+      - nest-asyncio==1.6.0
+      - networkx==3.2.1
+      - numpy==1.26.4
+      - nvidia-cublas-cu12==12.1.3.1
+      - nvidia-cuda-cupti-cu12==12.1.105
+      - nvidia-cuda-nvrtc-cu12==12.1.105
+      - nvidia-cuda-runtime-cu12==12.1.105
+      - nvidia-cudnn-cu12==8.9.2.26
+      - nvidia-cufft-cu12==11.0.2.54
+      - nvidia-curand-cu12==10.3.2.106
+      - nvidia-cusolver-cu12==11.4.5.107
+      - nvidia-cusparse-cu12==12.1.0.106
+      - nvidia-nccl-cu12==2.19.3
+      - nvidia-nvjitlink-cu12==12.4.127
+      - nvidia-nvtx-cu12==12.1.105
+      - open3d==0.18.0
+      - opencv-contrib-python==4.5.5.64
+      - packaging==24.0
+      - pandas==2.2.1
+      - parso==0.8.3
+      - pexpect==4.9.0
+      - pillow==10.3.0
+      - platformdirs==4.2.0
+      - plotly==5.20.0
+      - prompt-toolkit==3.0.43
+      - ptyprocess==0.7.0
+      - pure-eval==0.2.2
+      - pyasn1==0.6.0
+      - pyasn1-modules==0.4.0
+      - pybind11==2.12.0
+      - pycolmap==0.6.1
+      - pycparser==2.22
+      - pygments==2.17.2
+      - pyopengl==3.1.7
+      - pyopengl-accelerate==3.1.7
+      - pyopenssl==24.1.0
+      - pyparsing==3.1.2
+      - pyquaternion==0.9.9
+      - python-dateutil==2.9.0.post0
+      - pytz==2024.1
+      - pyyaml==6.0.1
+      - referencing==0.34.0
+      - requests==2.31.0
+      - retrying==1.3.4
+      - rpds-py==0.18.0
+      - sanic==23.12.1
+      - sanic-routing==23.12.0
+      - scikit-learn==1.4.1.post1
+      - scipy==1.13.0
+      - six==1.16.0
+      - sortedcollections==2.1.0
+      - sortedcontainers==2.4.0
+      - stack-data==0.6.3
+      - sympy==1.12
+      - tenacity==8.2.3
+      - threadpoolctl==3.4.0
+      - torch==2.2.2
+      - torchvision==0.17.2
+      - tqdm==4.66.2
+      - tracerite==1.1.1
+      - traitlets==5.14.2
+      - triton==2.2.0
+      - typing-extensions==4.10.0
+      - tzdata==2024.1
+      - tzlocal==5.2
+      - ujson==5.9.0
+      - urllib3==2.2.1
+      - uvloop==0.15.2
+      - wcwidth==0.2.13
+      - websockets==12.0
+      - werkzeug==3.0.2
+      - widgetsnbextension==4.0.10
+      - yaml2==0.0.1
+      - yarl==1.9.4
+      - zipp==3.18.1

third_party/pram/inference.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> inference
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   03/04/2024 16:06
+=================================================='''
+import argparse
+import torch
+import torchvision.transforms.transforms as tvt
+import yaml
+from nets.load_segnet import load_segnet
+from nets.sfd2 import load_sfd2
+from dataset.get_dataset import compose_datasets
+parser = argparse.ArgumentParser(description='PRAM', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--config', type=str, required=True, help='config of specifications')
+parser.add_argument('--landmark_path', type=str, required=True, help='path of landmarks')
+parser.add_argument('--feat_weight_path', type=str, default='weights/sfd2_20230511_210205_resnet4x.79.pth')
+parser.add_argument('--rec_weight_path', type=str, required=True, help='recognition weight')
+parser.add_argument('--online', action='store_true', help='online visualization with pangolin')
+if __name__ == '__main__':
+    args = parser.parse_args()
+    with open(args.config, 'rt') as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+    config['landmark_path'] = args.landmark_path
+    feat_model = load_sfd2(weight_path=args.feat_weight_path).cuda().eval()
+    print('Load SFD2 weight from {:s}'.format(args.feat_weight_path))
+    # rec_model = get_model(config=config)
+    rec_model = load_segnet(network=config['network'],
+                            n_class=config['n_class'],
+                            desc_dim=256 if config['use_mid_feature'] else 128,
+                            n_layers=config['layers'],
+                            output_dim=config['output_dim'])
+    state_dict = torch.load(args.rec_weight_path, map_location='cpu')['model']
+    rec_model.load_state_dict(state_dict, strict=True)
+    print('Load recognition weight from {:s}'.format(args.rec_weight_path))
+    img_transforms = []
+    img_transforms.append(tvt.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
+    img_transforms = tvt.Compose(img_transforms)
+    dataset = config['dataset']
+    if not args.online:
+        from localization.loc_by_rec_eval import loc_by_rec_eval
+        test_set = compose_datasets(datasets=dataset, config=config, train=False, sample_ratio=1)
+        config['n_class'] = test_set.n_class
+        loc_by_rec_eval(rec_model=rec_model.cuda().eval(),
+                        loader=test_set,
+                        local_feat=feat_model.cuda().eval(),
+                        config=config, img_transforms=img_transforms)
+    else:
+        from localization.loc_by_rec_online import loc_by_rec_online
+        loc_by_rec_online(rec_model=rec_model.cuda().eval(),
+                          local_feat=feat_model.cuda().eval(),
+                          config=config, img_transforms=img_transforms)

third_party/pram/localization/base_model.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from abc import ABCMeta, abstractmethod
+from torch import nn
+from copy import copy
+import inspect
+class BaseModel(nn.Module, metaclass=ABCMeta):
+    default_conf = {}
+    required_data_keys = []
+    def __init__(self, conf):
+        """Perform some logic and call the _init method of the child model."""
+        super().__init__()
+        self.conf = conf = {**self.default_conf, **conf}
+        self.required_data_keys = copy(self.required_data_keys)
+        self._init(conf)
+    def forward(self, data):
+        """Check the data and call the _forward method of the child model."""
+        for key in self.required_data_keys:
+            assert key in data, 'Missing key {} in data'.format(key)
+        return self._forward(data)
+    @abstractmethod
+    def _init(self, conf):
+        """To be implemented by the child class."""
+        raise NotImplementedError
+    @abstractmethod
+    def _forward(self, data):
+        """To be implemented by the child class."""
+        raise NotImplementedError
+def dynamic_load(root, model):
+    module_path = f'{root.__name__}.{model}'
+    module = __import__(module_path, fromlist=[''])
+    classes = inspect.getmembers(module, inspect.isclass)
+    # Filter classes defined in the module
+    classes = [c for c in classes if c[1].__module__ == module_path]
+    # Filter classes inherited from BaseModel
+    classes = [c for c in classes if issubclass(c[1], BaseModel)]
+    assert len(classes) == 1, classes
+    return classes[0][1]
+    # return getattr(module, 'Model')

third_party/pram/localization/camera.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> camera
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   04/03/2024 11:27
+=================================================='''
+import collections
+Camera = collections.namedtuple(
+    "Camera", ["id", "model", "width", "height", "params"])

third_party/pram/localization/extract_features.py ADDED Viewed

	@@ -0,0 +1,256 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> extract_features.py
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   07/02/2024 14:49
+=================================================='''
+import os
+import os.path as osp
+import h5py
+import numpy as np
+import progressbar
+import yaml
+import torch
+import cv2
+import torch.utils.data as Data
+from tqdm import tqdm
+from types import SimpleNamespace
+import logging
+import pprint
+from pathlib import Path
+import argparse
+from nets.sfd2 import ResNet4x, extract_sfd2_return
+from nets.superpoint import SuperPoint, extract_sp_return
+confs = {
+    'superpoint-n4096': {
+        'output': 'feats-superpoint-n4096',
+        'model': {
+            'name': 'superpoint',
+            'outdim': 256,
+            'use_stability': False,
+            'nms_radius': 3,
+            'max_keypoints': 4096,
+            'conf_th': 0.005,
+            'multiscale': False,
+            'scales': [1.0],
+            'model_fn': osp.join(os.getcwd(),
+                                 "weights/superpoint_v1.pth"),
+        },
+        'preprocessing': {
+            'grayscale': True,
+            'resize_max': False,
+        },
+    },
+    'resnet4x-20230511-210205-pho-0005': {
+        'output': 'feats-resnet4x-20230511-210205-pho-0005',
+        'model': {
+            'outdim': 128,
+            'name': 'resnet4x',
+            'use_stability': False,
+            'max_keypoints': 4096,
+            'conf_th': 0.005,
+            'multiscale': False,
+            'scales': [1.0],
+            'model_fn': osp.join(os.getcwd(),
+                                 "weights/sfd2_20230511_210205_resnet4x.79.pth"),
+        },
+        'preprocessing': {
+            'grayscale': False,
+            'resize_max': False,
+        },
+        'mask': False,
+    },
+    'sfd2': {
+        'output': 'feats-sfd2',
+        'model': {
+            'outdim': 128,
+            'name': 'resnet4x',
+            'use_stability': False,
+            'max_keypoints': 4096,
+            'conf_th': 0.005,
+            'multiscale': False,
+            'scales': [1.0],
+            'model_fn': osp.join(os.getcwd(),
+                                 "weights/sfd2_20230511_210205_resnet4x.79.pth"),
+        },
+        'preprocessing': {
+            'grayscale': False,
+            'resize_max': False,
+        },
+        'mask': False,
+    },
+}
+class ImageDataset(Data.Dataset):
+    default_conf = {
+        'globs': ['*.jpg', '*.png', '*.jpeg', '*.JPG', '*.PNG'],
+        'grayscale': False,
+        'resize_max': None,
+        'resize_force': False,
+    }
+    def __init__(self, root, conf, image_list=None,
+                 mask_root=None):
+        self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf})
+        self.root = root
+        self.paths = []
+        if image_list is None:
+            for g in conf.globs:
+                self.paths += list(Path(root).glob('**/' + g))
+            if len(self.paths) == 0:
+                raise ValueError(f'Could not find any image in root: {root}.')
+            self.paths = [i.relative_to(root) for i in self.paths]
+        else:
+            with open(image_list, "r") as f:
+                lines = f.readlines()
+                for l in lines:
+                    l = l.strip()
+                    self.paths.append(Path(l))
+        logging.info(f'Found {len(self.paths)} images in root {root}.')
+        if mask_root is not None:
+            self.mask_root = mask_root
+        else:
+            self.mask_root = None
+    def __getitem__(self, idx):
+        path = self.paths[idx]
+        if self.conf.grayscale:
+            mode = cv2.IMREAD_GRAYSCALE
+        else:
+            mode = cv2.IMREAD_COLOR
+        image = cv2.imread(str(self.root / path), mode)
+        if not self.conf.grayscale:
+            image = image[:, :, ::-1]  # BGR to RGB
+        if image is None:
+            raise ValueError(f'Cannot read image {str(path)}.')
+        image = image.astype(np.float32)
+        size = image.shape[:2][::-1]
+        w, h = size
+        if self.conf.resize_max and (self.conf.resize_force
+                                     or max(w, h) > self.conf.resize_max):
+            scale = self.conf.resize_max / max(h, w)
+            h_new, w_new = int(round(h * scale)), int(round(w * scale))
+            image = cv2.resize(
+                image, (w_new, h_new), interpolation=cv2.INTER_CUBIC)
+        if self.conf.grayscale:
+            image = image[None]
+        else:
+            image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
+        image = image / 255.
+        data = {
+            'name': str(path),
+            'image': image,
+            'original_size': np.array(size),
+        }
+        if self.mask_root is not None:
+            mask_path = Path(str(path).replace("jpg", "png"))
+            if osp.exists(mask_path):
+                mask = cv2.imread(str(self.mask_root / mask_path))
+                mask = cv2.resize(mask, dsize=(image.shape[2], image.shape[1]), interpolation=cv2.INTER_NEAREST)
+            else:
+                mask = np.zeros(shape=(image.shape[1], image.shape[2], 3), dtype=np.uint8)
+            data['mask'] = mask
+        return data
+    def __len__(self):
+        return len(self.paths)
+def get_model(model_name, weight_path, outdim=128, **kwargs):
+    if model_name == 'superpoint':
+        model = SuperPoint(config={
+            'descriptor_dim': 256,
+            'nms_radius': 4,
+            'keypoint_threshold': 0.005,
+            'max_keypoints': -1,
+            'remove_borders': 4,
+            'weight_path': weight_path,
+        }).eval()
+        extractor = extract_sp_return
+    if model_name == 'resnet4x':
+        model = ResNet4x(outdim=outdim).eval()
+        model.load_state_dict(torch.load(weight_path)['state_dict'], strict=True)
+        extractor = extract_sfd2_return
+    return model, extractor
+@torch.no_grad()
+def main(conf, image_dir, export_dir):
+    logging.info('Extracting local features with configuration:'
+                 f'\n{pprint.pformat(conf)}')
+    model, extractor = get_model(model_name=conf['model']['name'], weight_path=conf["model"]["model_fn"],
+                                 use_stability=conf['model']['use_stability'], outdim=conf['model']['outdim'])
+    model = model.cuda()
+    loader = ImageDataset(image_dir,
+                          conf['preprocessing'],
+                          image_list=args.image_list,
+                          mask_root=None)
+    loader = torch.utils.data.DataLoader(loader, num_workers=4)
+    os.makedirs(export_dir, exist_ok=True)
+    feature_path = Path(export_dir, conf['output'] + '.h5')
+    feature_path.parent.mkdir(exist_ok=True, parents=True)
+    feature_file = h5py.File(str(feature_path), 'a')
+    with tqdm(total=len(loader)) as t:
+        for idx, data in enumerate(loader):
+            t.update()
+            pred = extractor(model, img=data["image"],
+                             topK=conf["model"]["max_keypoints"],
+                             mask=None,
+                             conf_th=conf["model"]["conf_th"],
+                             scales=conf["model"]["scales"],
+                             )
+            # pred = {k: v[0].cpu().numpy() for k, v in pred.items()}
+            pred['descriptors'] = pred['descriptors'].transpose()
+            t.set_postfix(npoints=pred['keypoints'].shape[0])
+            # print(pred['keypoints'].shape)
+            pred['image_size'] = original_size = data['original_size'][0].numpy()
+            # pred['descriptors'] = pred['descriptors'].T
+            if 'keypoints' in pred.keys():
+                size = np.array(data['image'].shape[-2:][::-1])
+                scales = (original_size / size).astype(np.float32)
+                pred['keypoints'] = (pred['keypoints'] + .5) * scales[None] - .5
+            grp = feature_file.create_group(data['name'][0])
+            for k, v in pred.items():
+                # print(k, v.shape)
+                grp.create_dataset(k, data=v)
+            del pred
+    feature_file.close()
+    logging.info('Finished exporting features.')
+    return feature_path
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--image_dir', type=Path, required=True)
+    parser.add_argument('--image_list', type=str, default=None)
+    parser.add_argument('--mask_dir', type=Path, default=None)
+    parser.add_argument('--export_dir', type=Path, required=True)
+    parser.add_argument('--conf', type=str, required=True, choices=list(confs.keys()))
+    args = parser.parse_args()
+    main(confs[args.conf], args.image_dir, args.export_dir)

third_party/pram/localization/frame.py ADDED Viewed

	@@ -0,0 +1,195 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> frame
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   01/03/2024 10:08
+=================================================='''
+from collections import defaultdict
+import numpy as np
+import torch
+import pycolmap
+from localization.camera import Camera
+from localization.utils import compute_pose_error
+class Frame:
+    def __init__(self, image: np.ndarray, camera: pycolmap.Camera, id: int, name: str = None, qvec=None, tvec=None,
+                 scene_name=None,
+                 reference_frame_id=None):
+        self.image = image
+        self.camera = camera
+        self.id = id
+        self.name = name
+        self.image_size = np.array([camera.height, camera.width])
+        self.qvec = qvec
+        self.tvec = tvec
+        self.scene_name = scene_name
+        self.reference_frame_id = reference_frame_id
+        self.keypoints = None  # [N, 3]
+        self.descriptors = None  # [N, D]
+        self.segmentations = None  # [N C]
+        self.seg_scores = None  # [N C]
+        self.seg_ids = None  # [N, 1]
+        self.point3D_ids = None  # [N, 1]
+        self.xyzs = None
+        self.gt_qvec = None
+        self.gt_tvec = None
+        self.matched_scene_name = None
+        self.matched_keypoints = None
+        self.matched_keypoint_ids = None
+        self.matched_xyzs = None
+        self.matched_point3D_ids = None
+        self.matched_inliers = None
+        self.matched_sids = None
+        self.matched_order = None
+        self.refinement_reference_frame_ids = None
+        self.image_rec = None
+        self.image_matching = None
+        self.image_inlier = None
+        self.reference_frame_name = None
+        self.image_matching_tmp = None
+        self.image_inlier_tmp = None
+        self.reference_frame_name_tmp = None
+        self.tracking_status = None
+        self.time_feat = 0
+        self.time_rec = 0
+        self.time_loc = 0
+        self.time_ref = 0
+    def update_point3ds_old(self):
+        pt = torch.from_numpy(self.keypoints[:, :2]).unsqueeze(-1)  # [M 2 1]
+        mpt = torch.from_numpy(self.matched_keypoints[:, :2].transpose()).unsqueeze(0)  # [1 2 N]
+        dist = torch.sqrt(torch.sum((pt - mpt) ** 2, dim=1))
+        values, ids = torch.topk(dist, dim=1, k=1, largest=False)
+        values = values[:, 0].numpy()
+        ids = ids[:, 0].numpy()
+        mask = (values < 1)  # 1 pixel error
+        self.point3D_ids = np.zeros(shape=(self.keypoints.shape[0],), dtype=int) - 1
+        self.point3D_ids[mask] = self.matched_point3D_ids[ids[mask]]
+        # self.xyzs = np.zeros(shape=(self.keypoints.shape[0], 3), dtype=float)
+        inlier_mask = self.matched_inliers
+        self.xyzs[mask] = self.matched_xyzs[ids[mask]]
+        self.seg_ids[mask] = self.matched_sids[ids[mask]]
+    def update_point3ds(self):
+        # print('Frame: update_point3ds: ', self.matched_keypoint_ids.shape, self.matched_xyzs.shape,
+        #       self.matched_sids.shape, self.matched_point3D_ids.shape)
+        self.xyzs[self.matched_keypoint_ids] = self.matched_xyzs
+        self.seg_ids[self.matched_keypoint_ids] = self.matched_sids
+        self.point3D_ids[self.matched_keypoint_ids] = self.matched_point3D_ids
+    def add_keypoints(self, keypoints: np.ndarray, descriptors: np.ndarray):
+        self.keypoints = keypoints
+        self.descriptors = descriptors
+        self.initialize_localization_variables()
+    def add_segmentations(self, segmentations: torch.Tensor, filtering_threshold: float):
+        '''
+        :param segmentations: [number_points number_labels]
+        :return:
+        '''
+        seg_scores = torch.softmax(segmentations, dim=-1)
+        if filtering_threshold > 0:
+            scores_background = seg_scores[:, 0]
+            non_bg_mask = (scores_background < filtering_threshold)
+            print('pre filtering before: ', self.keypoints.shape)
+            if torch.sum(non_bg_mask) >= 0.4 * seg_scores.shape[0]:
+                self.keypoints = self.keypoints[non_bg_mask.cpu().numpy()]
+                self.descriptors = self.descriptors[non_bg_mask.cpu().numpy()]
+                # print('pre filtering after: ', self.keypoints.shape)
+                # update localization variables
+                self.initialize_localization_variables()
+                segmentations = segmentations[non_bg_mask]
+                seg_scores = seg_scores[non_bg_mask]
+            print('pre filtering after: ', self.keypoints.shape)
+        # extract initial segmentation info
+        self.segmentations = segmentations.cpu().numpy()
+        self.seg_scores = seg_scores.cpu().numpy()
+        self.seg_ids = segmentations.max(dim=-1)[1].cpu().numpy() - 1  # should start from 0
+    def filter_keypoints(self, seg_scores: np.ndarray, filtering_threshold: float):
+        scores_background = seg_scores[:, 0]
+        non_bg_mask = (scores_background < filtering_threshold)
+        print('pre filtering before: ', self.keypoints.shape)
+        if np.sum(non_bg_mask) >= 0.4 * seg_scores.shape[0]:
+            self.keypoints = self.keypoints[non_bg_mask]
+            self.descriptors = self.descriptors[non_bg_mask]
+            print('pre filtering after: ', self.keypoints.shape)
+            # update localization variables
+            self.initialize_localization_variables()
+            return non_bg_mask
+        else:
+            print('pre filtering after: ', self.keypoints.shape)
+            return None
+    def compute_pose_error(self, pred_qvec=None, pred_tvec=None):
+        if pred_qvec is not None and pred_tvec is not None:
+            if self.gt_qvec is not None and self.gt_tvec is not None:
+                return compute_pose_error(pred_qcw=pred_qvec, pred_tcw=pred_tvec,
+                                          gt_qcw=self.gt_qvec, gt_tcw=self.gt_tvec)
+            else:
+                return 100, 100
+        if self.qvec is None or self.tvec is None or self.gt_qvec is None or self.gt_tvec is None:
+            return 100, 100
+        else:
+            err_q, err_t = compute_pose_error(pred_qcw=self.qvec, pred_tcw=self.tvec,
+                                              gt_qcw=self.gt_qvec, gt_tcw=self.gt_tvec)
+            return err_q, err_t
+    def get_intrinsics(self) -> np.ndarray:
+        camera_model = self.camera.model.name
+        params = self.camera.params
+        if camera_model in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
+            fx = fy = params[0]
+            cx = params[1]
+            cy = params[2]
+        elif camera_model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
+            fx = params[0]
+            fy = params[1]
+            cx = params[2]
+            cy = params[3]
+        else:
+            raise Exception("Camera model not supported")
+        # intrinsics
+        K = np.identity(3)
+        K[0, 0] = fx
+        K[1, 1] = fy
+        K[0, 2] = cx
+        K[1, 2] = cy
+        return K
+    def get_dominate_seg_id(self):
+        counts = np.bincount(self.seg_ids[self.seg_ids > 0])
+        return np.argmax(counts)
+    def clear_localization_track(self):
+        self.matched_scene_name = None
+        self.matched_keypoints = None
+        self.matched_xyzs = None
+        self.matched_point3D_ids = None
+        self.matched_inliers = None
+        self.matched_sids = None
+        self.refinement_reference_frame_ids = None
+    def initialize_localization_variables(self):
+        nkpt = self.keypoints.shape[0]
+        self.seg_ids = np.zeros(shape=(nkpt,), dtype=int) - 1
+        self.point3D_ids = np.zeros(shape=(nkpt,), dtype=int) - 1
+        self.xyzs = np.zeros(shape=(nkpt, 3), dtype=float)

third_party/pram/localization/loc_by_rec_eval.py ADDED Viewed

	@@ -0,0 +1,299 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> loc_by_rec
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   08/02/2024 15:26
+=================================================='''
+import torch
+from torch.autograd import Variable
+from localization.multimap3d import MultiMap3D
+from localization.frame import Frame
+import yaml, cv2, time
+import numpy as np
+import os.path as osp
+import threading
+import os
+from tqdm import tqdm
+from recognition.vis_seg import vis_seg_point, generate_color_dic
+from tools.metrics import compute_iou, compute_precision
+from localization.tracker import Tracker
+from localization.utils import read_query_info
+from localization.camera import Camera
+def loc_by_rec_eval(rec_model, loader, config, local_feat, img_transforms=None):
+    n_epoch = int(config['weight_path'].split('.')[1])
+    save_fn = osp.join(config['localization']['save_path'],
+                       config['weight_path'].split('/')[0] + '_{:d}'.format(n_epoch) + '_{:d}'.format(
+                           config['feat_dim']))
+    tag = 'k{:d}_th{:d}_mm{:d}_mi{:d}'.format(config['localization']['seg_k'], config['localization']['threshold'],
+                                              config['localization']['min_matches'],
+                                              config['localization']['min_inliers'])
+    if config['localization']['do_refinement']:
+        tag += '_op{:d}'.format(config['localization']['covisibility_frame'])
+    if config['localization']['with_compress']:
+        tag += '_comp'
+    save_fn = save_fn + '_' + tag
+    save = config['localization']['save']
+    save = config['localization']['save']
+    if save:
+        save_dir = save_fn
+        os.makedirs(save_dir, exist_ok=True)
+    else:
+        save_dir = None
+    seg_color = generate_color_dic(n_seg=2000)
+    dataset_path = config['dataset_path']
+    show = config['localization']['show']
+    if show:
+        cv2.namedWindow('img', cv2.WINDOW_NORMAL)
+    locMap = MultiMap3D(config=config, save_dir=None)
+    # start tracker
+    mTracker = Tracker(locMap=locMap, matcher=locMap.matcher, config=config)
+    dataset_name = config['dataset'][0]
+    all_scene_query_info = {}
+    with open(osp.join(config['config_path'], '{:s}.yaml'.format(dataset_name)), 'r') as f:
+        scene_config = yaml.load(f, Loader=yaml.Loader)
+    scenes = scene_config['scenes']
+    for scene in scenes:
+        query_path = osp.join(config['dataset_path'], dataset_name, scene, scene_config[scene]['query_path'])
+        query_info = read_query_info(query_fn=query_path)
+        all_scene_query_info[dataset_name + '/' + scene] = query_info
+        # print(scene, query_info.keys())
+    tracking = False
+    full_log = ''
+    failed_cases = []
+    success_cases = []
+    poses = {}
+    err_ths_cnt = [0, 0, 0, 0]
+    seg_results = {}
+    time_results = {
+        'feat': [],
+        'rec': [],
+        'loc': [],
+        'ref': [],
+        'total': [],
+    }
+    n_total = 0
+    loc_scene_names = config['localization']['loc_scene_name']
+    # loader = loader[8990:]
+    for bid, pred in tqdm(enumerate(loader), total=len(loader)):
+        pred = loader[bid]
+        image_name = pred['file_name']  # [0]
+        scene_name = pred['scene_name']  # [0]  # dataset_scene
+        if len(loc_scene_names) > 0:
+            skip = True
+            for loc_scene in loc_scene_names:
+                if scene_name.find(loc_scene) > 0:
+                    skip = False
+                    break
+            if skip:
+                continue
+        with torch.no_grad():
+            for k in pred:
+                if k.find('name') >= 0:
+                    continue
+                if k != 'image0' and k != 'image1' and k != 'depth0' and k != 'depth1':
+                    if type(pred[k]) == np.ndarray:
+                        pred[k] = Variable(torch.from_numpy(pred[k]).float().cuda())[None]
+                    elif type(pred[k]) == torch.Tensor:
+                        pred[k] = Variable(pred[k].float().cuda())
+                    elif type(pred[k]) == list:
+                        continue
+                    else:
+                        pred[k] = Variable(torch.stack(pred[k]).float().cuda())
+            print('scene: ', scene_name, image_name)
+            n_total += 1
+            with torch.no_grad():
+                img = pred['image']
+                while isinstance(img, list):
+                    img = img[0]
+                new_im = torch.from_numpy(img).permute(2, 0, 1).cuda().float()
+                if img_transforms is not None:
+                    new_im = img_transforms(new_im)[None]
+                else:
+                    new_im = new_im[None]
+                img = (img * 255).astype(np.uint8)
+                fn = image_name
+                camera_model, width, height, params = all_scene_query_info[scene_name][fn]
+                camera = Camera(id=-1, model=camera_model, width=width, height=height, params=params)
+                curr_frame = Frame(image=img, camera=camera, id=0, name=fn, scene_name=scene_name)
+                gt_sub_map = locMap.sub_maps[curr_frame.scene_name]
+                if gt_sub_map.gt_poses is not None and curr_frame.name in gt_sub_map.gt_poses.keys():
+                    curr_frame.gt_qvec = gt_sub_map.gt_poses[curr_frame.name]['qvec']
+                    curr_frame.gt_tvec = gt_sub_map.gt_poses[curr_frame.name]['tvec']
+                    t_start = time.time()
+                    encoder_out = local_feat.extract_local_global(data={'image': new_im},
+                                                                  config=
+                                                                  {
+                                                                      # 'min_keypoints': 128,
+                                                                      'max_keypoints': config['eval_max_keypoints'],
+                                                                  }
+                                                                  )
+                    t_feat = time.time() - t_start
+                    # global_descriptors_cuda = encoder_out['global_descriptors']
+                    # scores_cuda = encoder_out['scores'][0][None]
+                    # kpts_cuda = encoder_out['keypoints'][0][None]
+                    # descriptors_cuda = encoder_out['descriptors'][0][None].permute(0, 2, 1)
+                    sparse_scores = pred['scores']
+                    sparse_descs = pred['descriptors']
+                    sparse_kpts = pred['keypoints']
+                    gt_seg = pred['gt_seg']
+                    curr_frame.add_keypoints(keypoints=np.hstack([sparse_kpts[0].cpu().numpy(),
+                                                                  sparse_scores[0].cpu().numpy().reshape(-1, 1)]),
+                                             descriptors=sparse_descs[0].cpu().numpy())
+                    curr_frame.time_feat = t_feat
+                    t_start = time.time()
+                    _, seg_descriptors = local_feat.sample(score_map=encoder_out['score_map'],
+                                                           semi_descs=encoder_out['mid_features'],
+                                                           # kpts=kpts_cuda[0],
+                                                           kpts=sparse_kpts[0],
+                                                           norm_desc=config['norm_desc'])
+                    rec_out = rec_model({'scores': sparse_scores,
+                                         'seg_descriptors': seg_descriptors[None].permute(0, 2, 1),
+                                         'keypoints': sparse_kpts,
+                                         'image': new_im})
+                    t_rec = time.time() - t_start
+                    curr_frame.time_rec = t_rec
+                    pred = {
+                        # 'scores': scores_cuda,
+                        # 'keypoints': kpts_cuda,
+                        # 'descriptors': descriptors_cuda,
+                        # 'global_descriptors': global_descriptors_cuda,
+                        'image_size': np.array([img.shape[1], img.shape[0]])[None],
+                    }
+                    pred = {**pred, **rec_out}
+                    pred_seg = torch.max(pred['prediction'], dim=2)[1]  # [B, N, C]
+                    pred_seg = pred_seg[0].cpu().numpy()
+                    kpts = sparse_kpts[0].cpu().numpy()
+                    img_pred_seg = vis_seg_point(img=img, kpts=kpts, segs=pred_seg, seg_color=seg_color, radius=9)
+                    show_text = 'kpts: {:d}'.format(kpts.shape[0])
+                    img_pred_seg = cv2.putText(img=img_pred_seg, text=show_text,
+                                               org=(50, 30),
+                                               fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+                                               fontScale=1, color=(0, 0, 255),
+                                               thickness=2, lineType=cv2.LINE_AA)
+                    curr_frame.image_rec = img_pred_seg
+                    if show:
+                        cv2.imshow('img', img)
+                        key = cv2.waitKey(1)
+                        if key == ord('q'):
+                            exit(0)
+                        elif key == ord('s'):
+                            show_time = -1
+                        elif key == ord('c'):
+                            show_time = 1
+                    segmentations = pred['prediction'][0]  # .cpu().numpy()  # [N, C]
+                    curr_frame.add_segmentations(segmentations=segmentations,
+                                                 filtering_threshold=config['localization']['pre_filtering_th'])
+                    # Step1: do tracker first
+                    success = not mTracker.lost and tracking
+                    if success:
+                        success = mTracker.run(frame=curr_frame)
+                    if not success:
+                        success = locMap.run(q_frame=curr_frame)
+                    if success:
+                        curr_frame.update_point3ds()
+                        if tracking:
+                            mTracker.lost = False
+                            mTracker.last_frame = curr_frame
+                    # '''
+                    pred_seg = torch.max(pred['prediction'], dim=-1)[1]  # [B, N, C]
+                    pred_seg = pred_seg[0].cpu().numpy()
+                    gt_seg = gt_seg[0].cpu().numpy()
+                    iou = compute_iou(pred=pred_seg, target=gt_seg, n_class=pred_seg.shape[0],
+                                      ignored_ids=[0])  # 0 - background
+                    prec = compute_precision(pred=pred_seg, target=gt_seg, ignored_ids=[0])
+                    kpts = sparse_kpts[0].cpu().numpy()
+                    if scene not in seg_results.keys():
+                        seg_results[scene] = {
+                            'day': {
+                                'prec': [],
+                                'iou': [],
+                                'kpts': [],
+                            },
+                            'night': {
+                                'prec': [],
+                                'iou': [],
+                                'kpts': [],
+                            }
+                        }
+                    if fn.find('night') >= 0:
+                        seg_results[scene]['night']['prec'].append(prec)
+                        seg_results[scene]['night']['iou'].append(iou)
+                        seg_results[scene]['night']['kpts'].append(kpts.shape[0])
+                    else:
+                        seg_results[scene]['day']['prec'].append(prec)
+                        seg_results[scene]['day']['iou'].append(iou)
+                        seg_results[scene]['day']['kpts'].append(kpts.shape[0])
+                    print_text = 'name: {:s}, kpts: {:d}, iou: {:.3f}, prec: {:.3f}'.format(fn, kpts.shape[0], iou,
+                                                                                            prec)
+                    print(print_text)
+                    # '''
+                    t_feat = curr_frame.time_feat
+                    t_rec = curr_frame.time_rec
+                    t_loc = curr_frame.time_loc
+                    t_ref = curr_frame.time_ref
+                    t_total = t_feat + t_rec + t_loc + t_ref
+                    time_results['feat'].append(t_feat)
+                    time_results['rec'].append(t_rec)
+                    time_results['loc'].append(t_loc)
+                    time_results['ref'].append(t_ref)
+                    time_results['total'].append(t_total)
+                    poses[scene + '/' + fn] = (curr_frame.qvec, curr_frame.tvec)
+                    q_err, t_err = curr_frame.compute_pose_error()
+                    if q_err <= 5 and t_err <= 0.05:
+                        err_ths_cnt[0] = err_ths_cnt[0] + 1
+                    if q_err <= 2 and t_err <= 0.25:
+                        err_ths_cnt[1] = err_ths_cnt[1] + 1
+                    if q_err <= 5 and t_err <= 0.5:
+                        err_ths_cnt[2] = err_ths_cnt[2] + 1
+                    if q_err <= 10 and t_err <= 5:
+                        err_ths_cnt[3] = err_ths_cnt[3] + 1
+                    if success:
+                        success_cases.append(scene + '/' + fn)
+                        print_text = 'qname: {:s} localization success {:d}/{:d}, q_err: {:.2f}, t_err: {:.2f}, {:d}/{:d}/{:d}/{:d}/{:d}, time: {:.2f}/{:.2f}/{:.2f}/{:.2f}/{:.2f}'.format(
+                            scene + '/' + fn, len(success_cases), n_total, q_err, t_err, err_ths_cnt[0],
+                            err_ths_cnt[1],
+                            err_ths_cnt[2],
+                            err_ths_cnt[3],
+                            n_total,
+                            t_feat, t_rec, t_loc, t_ref, t_total
+                        )
+                    else:
+                        failed_cases.append(scene + '/' + fn)
+                        print_text = 'qname: {:s} localization fail {:d}/{:d}, q_err: {:.2f}, t_err: {:.2f}, {:d}/{:d}/{:d}/{:d}/{:d}, time: {:.2f}/{:.2f}/{:.2f}/{:.2f}/{:.2f}'.format(
+                            scene + '/' + fn, len(failed_cases), n_total, q_err, t_err, err_ths_cnt[0],
+                            err_ths_cnt[1],
+                            err_ths_cnt[2],
+                            err_ths_cnt[3],
+                            n_total, t_feat, t_rec, t_loc, t_ref, t_total)
+                    print(print_text)

third_party/pram/localization/loc_by_rec_online.py ADDED Viewed

	@@ -0,0 +1,225 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> loc_by_rec
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   08/02/2024 15:26
+=================================================='''
+import torch
+import pycolmap
+from localization.multimap3d import MultiMap3D
+from localization.frame import Frame
+import yaml, cv2, time
+import numpy as np
+import os.path as osp
+import threading
+from recognition.vis_seg import vis_seg_point, generate_color_dic
+from tools.common import resize_img
+from localization.viewer import Viewer
+from localization.tracker import Tracker
+from localization.utils import read_query_info
+from tools.common import puttext_with_background
+def loc_by_rec_online(rec_model, config, local_feat, img_transforms=None):
+    seg_color = generate_color_dic(n_seg=2000)
+    dataset_path = config['dataset_path']
+    show = config['localization']['show']
+    if show:
+        cv2.namedWindow('img', cv2.WINDOW_NORMAL)
+    locMap = MultiMap3D(config=config, save_dir=None)
+    if config['dataset'][0] in ['Aachen']:
+        viewer_config = {'scene': 'outdoor',
+                         'image_size_indoor': 4,
+                         'image_line_width_indoor': 8, }
+    elif config['dataset'][0] in ['C']:
+        viewer_config = {'scene': 'outdoor'}
+    elif config['dataset'][0] in ['12Scenes', '7Scenes']:
+        viewer_config = {'scene': 'indoor', }
+    else:
+        viewer_config = {'scene': 'outdoor',
+                         'image_size_indoor': 0.4,
+                         'image_line_width_indoor': 2, }
+    # start viewer
+    mViewer = Viewer(locMap=locMap, seg_color=seg_color, config=viewer_config)
+    mViewer.refinement = locMap.do_refinement
+    # locMap.viewer = mViewer
+    viewer_thread = threading.Thread(target=mViewer.run)
+    viewer_thread.start()
+    # start tracker
+    mTracker = Tracker(locMap=locMap, matcher=locMap.matcher, config=config)
+    dataset_name = config['dataset'][0]
+    all_scene_query_info = {}
+    with open(osp.join(config['config_path'], '{:s}.yaml'.format(dataset_name)), 'r') as f:
+        scene_config = yaml.load(f, Loader=yaml.Loader)
+    # multiple scenes in a single dataset
+    err_ths_cnt = [0, 0, 0, 0]
+    show_time = -1
+    scenes = scene_config['scenes']
+    n_total = 0
+    for scene in scenes:
+        if len(config['localization']['loc_scene_name']) > 0:
+            if scene not in config['localization']['loc_scene_name']:
+                continue
+        query_path = osp.join(config['dataset_path'], dataset_name, scene, scene_config[scene]['query_path'])
+        query_info = read_query_info(query_fn=query_path)
+        all_scene_query_info[dataset_name + '/' + scene] = query_info
+        image_path = osp.join(dataset_path, dataset_name, scene)
+        for fn in sorted(query_info.keys()):
+            # for fn in sorted(query_info.keys())[880:][::5]:  # darwinRGB-loc-outdoor-aligned
+            # for fn in sorted(query_info.keys())[3161:][::5]:  # darwinRGB-loc-indoor-aligned
+            #     for fn in sorted(query_info.keys())[2840:][::5]:  # darwinRGB-loc-indoor-aligned
+            # for fn in sorted(query_info.keys())[2100:][::5]: # darwinRGB-loc-outdoor
+            # for fn in sorted(query_info.keys())[4360:][::5]:  # darwinRGB-loc-indoor
+            # for fn in sorted(query_info.keys())[1380:]:  # Cam-Church
+            # for fn in sorted(query_info.keys())[::5]: #ACUED-test2
+            # for fn in sorted(query_info.keys())[1260:]:  # jesus aligned
+            # for fn in sorted(query_info.keys())[1260:]:  # jesus aligned
+            # for fn in sorted(query_info.keys())[4850:]:
+            img = cv2.imread(osp.join(image_path, fn))  # BGR
+            camera_model, width, height, params = all_scene_query_info[dataset_name + '/' + scene][fn]
+            # camera = Camera(id=-1, model=camera_model, width=width, height=height, params=params)
+            camera = pycolmap.Camera(model=camera_model, width=int(width), height=int(height), params=params)
+            curr_frame = Frame(image=img, camera=camera, id=0, name=fn, scene_name=dataset_name + '/' + scene)
+            gt_sub_map = locMap.sub_maps[curr_frame.scene_name]
+            if gt_sub_map.gt_poses is not None and curr_frame.name in gt_sub_map.gt_poses.keys():
+                curr_frame.gt_qvec = gt_sub_map.gt_poses[curr_frame.name]['qvec']
+                curr_frame.gt_tvec = gt_sub_map.gt_poses[curr_frame.name]['tvec']
+            with torch.no_grad():
+                if config['image_dim'] == 1:
+                    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+                    img_cuda = torch.from_numpy(img_gray / 255)[None].cuda().float()
+                else:
+                    img_cuda = torch.from_numpy(img / 255).permute(2, 0, 1).cuda().float()
+                if img_transforms is not None:
+                    img_cuda = img_transforms(img_cuda)[None]
+                else:
+                    img_cuda = img_cuda[None]
+                t_start = time.time()
+                encoder_out = local_feat.extract_local_global(data={'image': img_cuda},
+                                                              config={'min_keypoints': 128,
+                                                                      'max_keypoints': config['eval_max_keypoints'],
+                                                                      }
+                                                              )
+                t_feat = time.time() - t_start
+                # global_descriptors_cuda = encoder_out['global_descriptors']
+                scores_cuda = encoder_out['scores'][0][None]
+                kpts_cuda = encoder_out['keypoints'][0][None]
+                descriptors_cuda = encoder_out['descriptors'][0][None].permute(0, 2, 1)
+                curr_frame.add_keypoints(keypoints=np.hstack([kpts_cuda[0].cpu().numpy(),
+                                                              scores_cuda[0].cpu().numpy().reshape(-1, 1)]),
+                                         descriptors=descriptors_cuda[0].cpu().numpy())
+                curr_frame.time_feat = t_feat
+                t_start = time.time()
+                _, seg_descriptors = local_feat.sample(score_map=encoder_out['score_map'],
+                                                       semi_descs=encoder_out['mid_features'],
+                                                       kpts=kpts_cuda[0],
+                                                       norm_desc=config['norm_desc'])
+                rec_out = rec_model({'scores': scores_cuda,
+                                     'seg_descriptors': seg_descriptors[None].permute(0, 2, 1),
+                                     'keypoints': kpts_cuda,
+                                     'image': img_cuda})
+                t_rec = time.time() - t_start
+                curr_frame.time_rec = t_rec
+                pred = {
+                    'scores': scores_cuda,
+                    'keypoints': kpts_cuda,
+                    'descriptors': descriptors_cuda,
+                    # 'global_descriptors': global_descriptors_cuda,
+                    'image_size': np.array([img.shape[1], img.shape[0]])[None],
+                }
+                pred = {**pred, **rec_out}
+                pred_seg = torch.max(pred['prediction'], dim=2)[1]  # [B, N, C]
+                pred_seg = pred_seg[0].cpu().numpy()
+                kpts = kpts_cuda[0].cpu().numpy()
+                segmentations = pred['prediction'][0]  # .cpu().numpy()  # [N, C]
+                curr_frame.add_segmentations(segmentations=segmentations,
+                                             filtering_threshold=config['localization']['pre_filtering_th'])
+                img_pred_seg = vis_seg_point(img=img, kpts=curr_frame.keypoints,
+                                             segs=curr_frame.seg_ids + 1, seg_color=seg_color, radius=9)
+                show_text = 'kpts: {:d}'.format(kpts.shape[0])
+                img_pred_seg = cv2.putText(img=img_pred_seg,
+                                           text=show_text,
+                                           org=(50, 30),
+                                           fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+                                           fontScale=1, color=(0, 0, 255),
+                                           thickness=2, lineType=cv2.LINE_AA)
+                curr_frame.image_rec = img_pred_seg
+                if show:
+                    img_text = puttext_with_background(image=img, text='Press C - continue | S - pause | Q - exit',
+                                                       org=(30, 50),
+                                                       bg_color=(255, 255, 255),
+                                                       text_color=(0, 0, 255),
+                                                       fontScale=1, thickness=2)
+                    cv2.imshow('img', img_text)
+                    key = cv2.waitKey(show_time)
+                    if key == ord('q'):
+                        exit(0)
+                    elif key == ord('s'):
+                        show_time = -1
+                    elif key == ord('c'):
+                        show_time = 1
+                # Step1: do tracker first
+                success = not mTracker.lost and mViewer.tracking
+                if success:
+                    success = mTracker.run(frame=curr_frame)
+                    if success:
+                        mViewer.update(curr_frame=curr_frame)
+                if not success:
+                    # success = locMap.run(q_frame=curr_frame, q_segs=segmentations)
+                    success = locMap.run(q_frame=curr_frame)
+                    if success:
+                        mViewer.update(curr_frame=curr_frame)
+                if success:
+                    curr_frame.update_point3ds()
+                    if mViewer.tracking:
+                        mTracker.lost = False
+                        mTracker.last_frame = curr_frame
+                time.sleep(50 / 1000)
+                locMap.do_refinement = mViewer.refinement
+                n_total = n_total + 1
+                q_err, t_err = curr_frame.compute_pose_error()
+                if q_err <= 5 and t_err <= 0.05:
+                    err_ths_cnt[0] = err_ths_cnt[0] + 1
+                if q_err <= 2 and t_err <= 0.25:
+                    err_ths_cnt[1] = err_ths_cnt[1] + 1
+                if q_err <= 5 and t_err <= 0.5:
+                    err_ths_cnt[2] = err_ths_cnt[2] + 1
+                if q_err <= 10 and t_err <= 5:
+                    err_ths_cnt[3] = err_ths_cnt[3] + 1
+                time_total = curr_frame.time_feat + curr_frame.time_rec + curr_frame.time_loc + curr_frame.time_ref
+                print_text = 'qname: {:s} localization {:b}, q_err: {:.2f}, t_err: {:.2f}, {:d}/{:d}/{:d}/{:d}/{:d}, time: {:.2f}/{:.2f}/{:.2f}/{:.2f}/{:.2f}'.format(
+                    scene + '/' + fn, success, q_err, t_err,
+                    err_ths_cnt[0],
+                    err_ths_cnt[1],
+                    err_ths_cnt[2],
+                    err_ths_cnt[3],
+                    n_total,
+                    curr_frame.time_feat, curr_frame.time_rec, curr_frame.time_loc, curr_frame.time_ref, time_total
+                )
+                print(print_text)
+    mViewer.terminate()
+    viewer_thread.join()

third_party/pram/localization/localizer.py ADDED Viewed

	@@ -0,0 +1,217 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> hloc
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   07/02/2024 16:45
+=================================================='''
+import os
+import os.path as osp
+from tqdm import tqdm
+import argparse
+import time
+import logging
+import h5py
+import numpy as np
+from pathlib import Path
+from colmap_utils.read_write_model import read_model
+from colmap_utils.parsers import parse_image_lists_with_intrinsics
+# localization
+from localization.match_features_batch import confs
+from localization.base_model import dynamic_load
+from localization import matchers
+from localization.utils import compute_pose_error, read_gt_pose, read_retrieval_results
+from localization.pose_estimator import pose_estimator_hloc, pose_estimator_iterative
+def run(args):
+    if args.gt_pose_fn is not None:
+        gt_poses = read_gt_pose(path=args.gt_pose_fn)
+    else:
+        gt_poses = {}
+    retrievals = read_retrieval_results(args.retrieval)
+    save_root = args.save_root  # path to save
+    os.makedirs(save_root, exist_ok=True)
+    matcher_name = args.matcher_method  # matching method
+    print('matcher: ', confs[args.matcher_method]['model']['name'])
+    Model = dynamic_load(matchers, confs[args.matcher_method]['model']['name'])
+    matcher = Model(confs[args.matcher_method]['model']).eval().cuda()
+    local_feat_name = args.features.as_posix().split("/")[-1].split(".")[0]  # name of local features
+    save_fn = '{:s}_{:s}'.format(local_feat_name, matcher_name)
+    if args.use_hloc:
+        save_fn = 'hloc_' + save_fn
+    save_fn = osp.join(save_root, save_fn)
+    queries = parse_image_lists_with_intrinsics(args.queries)
+    _, db_images, points3D = read_model(str(args.reference_sfm), '.bin')
+    db_name_to_id = {image.name: i for i, image in db_images.items()}
+    feature_file = h5py.File(args.features, 'r')
+    tag = ''
+    if args.do_covisible_opt:
+        tag = tag + "_o" + str(int(args.obs_thresh)) + 'op' + str(int(args.covisibility_frame))
+        tag = tag + "th" + str(int(args.opt_thresh))
+        if args.iters > 0:
+            tag = tag + "i" + str(int(args.iters))
+    log_fn = save_fn + tag
+    vis_dir = save_fn + tag
+    results = save_fn + tag
+    full_log_fn = log_fn + '_full.log'
+    loc_log_fn = log_fn + '_loc.npy'
+    results = Path(results + '.txt')
+    vis_dir = Path(vis_dir)
+    if vis_dir is not None:
+        Path(vis_dir).mkdir(exist_ok=True)
+    print("save_fn: ", log_fn)
+    logging.info('Starting localization...')
+    poses = {}
+    failed_cases = []
+    n_total = 0
+    n_failed = 0
+    full_log_info = ''
+    loc_results = {}
+    error_ths = ((0.25, 2), (0.5, 5), (5, 10))
+    success = [0, 0, 0]
+    total_loc_time = []
+    for qname, qinfo in tqdm(queries):
+        kpq = feature_file[qname]['keypoints'].__array__()
+        n_total += 1
+        time_start = time.time()
+        if qname in retrievals.keys():
+            cans = retrievals[qname]
+            db_ids = [db_name_to_id[v] for v in cans]
+        else:
+            cans = []
+            db_ids = []
+        time_coarse = time.time()
+        if args.use_hloc:
+            output = pose_estimator_hloc(qname=qname, qinfo=qinfo, db_ids=db_ids, db_images=db_images,
+                                         points3D=points3D,
+                                         feature_file=feature_file,
+                                         thresh=args.ransac_thresh,
+                                         image_dir=args.image_dir,
+                                         matcher=matcher,
+                                         log_info='',
+                                         query_img_prefix='',
+                                         db_img_prefix='')
+        else:  # should be faster and more accurate than hloc
+            t_start = time.time()
+            output = pose_estimator_iterative(qname=qname,
+                                              qinfo=qinfo,
+                                              matcher=matcher,
+                                              db_ids=db_ids,
+                                              db_images=db_images,
+                                              points3D=points3D,
+                                              feature_file=feature_file,
+                                              thresh=args.ransac_thresh,
+                                              image_dir=args.image_dir,
+                                              do_covisibility_opt=args.do_covisible_opt,
+                                              covisibility_frame=args.covisibility_frame,
+                                              log_info='',
+                                              inlier_th=args.inlier_thresh,
+                                              obs_th=args.obs_thresh,
+                                              opt_th=args.opt_thresh,
+                                              gt_qvec=gt_poses[qname]['qvec'] if qname in gt_poses.keys() else None,
+                                              gt_tvec=gt_poses[qname]['tvec'] if qname in gt_poses.keys() else None,
+                                              query_img_prefix='',
+                                              db_img_prefix='database',
+                                              )
+        time_full = time.time()
+        qvec = output['qvec']
+        tvec = output['tvec']
+        loc_time = time_full - time_start
+        total_loc_time.append(loc_time)
+        poses[qname] = (qvec, tvec)
+        print_text = "All {:d}/{:d} failed cases, time[cs/fn]: {:.2f}/{:.2f}".format(
+            n_failed, n_total,
+            time_coarse - time_start,
+            time_full - time_coarse,
+        )
+        if qname in gt_poses.keys():
+            gt_qvec = gt_poses[qname]['qvec']
+            gt_tvec = gt_poses[qname]['tvec']
+            q_error, t_error = compute_pose_error(pred_qcw=qvec, pred_tcw=tvec, gt_qcw=gt_qvec, gt_tcw=gt_tvec)
+            for error_idx, th in enumerate(error_ths):
+                if t_error <= th[0] and q_error <= th[1]:
+                    success[error_idx] += 1
+            print_text += (
+                ', q_error:{:.2f} t_error:{:.2f} {:d}/{:d}/{:d}/{:d}, time: {:.2f}, {:d}pts'.format(q_error, t_error,
+                                                                                                    success[0],
+                                                                                                    success[1],
+                                                                                                    success[2], n_total,
+                                                                                                    loc_time,
+                                                                                                    kpq.shape[0]))
+        if output['num_inliers'] == 0:
+            failed_cases.append(qname)
+        loc_results[qname] = {
+            'keypoints_query': output['keypoints_query'],
+            'points3D_ids': output['points3D_ids'],
+        }
+        full_log_info = full_log_info + output['log_info']
+        full_log_info += (print_text + "\n")
+        print(print_text)
+    logs_path = f'{results}.failed'
+    with open(logs_path, 'w') as f:
+        for v in failed_cases:
+            print(v)
+            f.write(v + "\n")
+    logging.info(f'Localized {len(poses)} / {len(queries)} images.')
+    logging.info(f'Writing poses to {results}...')
+    # logging.info(f'Mean loc time: {np.mean(total_loc_time)}...')
+    print('Mean loc time: {:.2f}...'.format(np.mean(total_loc_time)))
+    with open(results, 'w') as f:
+        for q in poses:
+            qvec, tvec = poses[q]
+            qvec = ' '.join(map(str, qvec))
+            tvec = ' '.join(map(str, tvec))
+            name = q
+            f.write(f'{name} {qvec} {tvec}\n')
+    with open(full_log_fn, 'w') as f:
+        f.write(full_log_info)
+    np.save(loc_log_fn, loc_results)
+    print('Save logs to ', loc_log_fn)
+    logging.info('Done!')
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--image_dir', type=str, required=True)
+    parser.add_argument('--dataset', type=str, required=True)
+    parser.add_argument('--reference_sfm', type=Path, required=True)
+    parser.add_argument('--queries', type=Path, required=True)
+    parser.add_argument('--features', type=Path, required=True)
+    parser.add_argument('--ransac_thresh', type=float, default=12)
+    parser.add_argument('--covisibility_frame', type=int, default=50)
+    parser.add_argument('--do_covisible_opt', action='store_true')
+    parser.add_argument('--use_hloc', action='store_true')
+    parser.add_argument('--matcher_method', type=str, default="NNM")
+    parser.add_argument('--inlier_thresh', type=int, default=50)
+    parser.add_argument('--obs_thresh', type=float, default=3)
+    parser.add_argument('--opt_thresh', type=float, default=12)
+    parser.add_argument('--save_root', type=str, required=True)
+    parser.add_argument('--retrieval', type=Path, default=None)
+    parser.add_argument('--gt_pose_fn', type=str, default=None)
+    args = parser.parse_args()
+    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+    run(args=args)

third_party/pram/localization/match_features.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import argparse
+import torch
+from pathlib import Path
+import h5py
+import logging
+from tqdm import tqdm
+import pprint
+import localization.matchers as matchers
+from localization.base_model import dynamic_load
+from colmap_utils.parsers import names_to_pair
+confs = {
+    'gm': {
+        'output': 'gm',
+        'model': {
+            'name': 'gm',
+            'weight_path': 'weights/imp_gm.900.pth',
+            'sinkhorn_iterations': 20,
+        },
+    },
+    'gml': {
+        'output': 'gml',
+        'model': {
+            'name': 'gml',
+            'weight_path': 'weights/imp_gml.920.pth',
+            'sinkhorn_iterations': 20,
+        },
+    },
+    'adagml': {
+        'output': 'adagml',
+        'model': {
+            'name': 'adagml',
+            'weight_path': 'weights/imp_adagml.80.pth',
+            'sinkhorn_iterations': 20,
+        },
+    },
+    'superglue': {
+        'output': 'superglue',
+        'model': {
+            'name': 'superglue',
+            'weights': 'outdoor',
+            'sinkhorn_iterations': 20,
+            'weight_path': 'weights/superglue_outdoor.pth',
+        },
+    },
+    'NNM': {
+        'output': 'NNM',
+        'model': {
+            'name': 'nearest_neighbor',
+            'do_mutual_check': True,
+            'distance_threshold': None,
+        },
+    },
+}
+@torch.no_grad()
+def main(conf, pairs, features, export_dir, exhaustive=False):
+    logging.info('Matching local features with configuration:'
+                 f'\n{pprint.pformat(conf)}')
+    feature_path = Path(export_dir, features + '.h5')
+    assert feature_path.exists(), feature_path
+    feature_file = h5py.File(str(feature_path), 'r')
+    pairs_name = pairs.stem
+    if not exhaustive:
+        assert pairs.exists(), pairs
+        with open(pairs, 'r') as f:
+            pair_list = f.read().rstrip('\n').split('\n')
+    elif exhaustive:
+        logging.info(f'Writing exhaustive match pairs to {pairs}.')
+        assert not pairs.exists(), pairs
+        # get the list of images from the feature file
+        images = []
+        feature_file.visititems(
+            lambda name, obj: images.append(obj.parent.name.strip('/'))
+            if isinstance(obj, h5py.Dataset) else None)
+        images = list(set(images))
+        pair_list = [' '.join((images[i], images[j]))
+                     for i in range(len(images)) for j in range(i)]
+        with open(str(pairs), 'w') as f:
+            f.write('\n'.join(pair_list))
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    Model = dynamic_load(matchers, conf['model']['name'])
+    model = Model(conf['model']).eval().to(device)
+    match_name = f'{features}-{conf["output"]}-{pairs_name}'
+    match_path = Path(export_dir, match_name + '.h5')
+    match_file = h5py.File(str(match_path), 'a')
+    matched = set()
+    for pair in tqdm(pair_list, smoothing=.1):
+        name0, name1 = pair.split(' ')
+        pair = names_to_pair(name0, name1)
+        # Avoid to recompute duplicates to save time
+        if len({(name0, name1), (name1, name0)} & matched) \
+                or pair in match_file:
+            continue
+        data = {}
+        feats0, feats1 = feature_file[name0], feature_file[name1]
+        for k in feats1.keys():
+            # data[k + '0'] = feats0[k].__array__()
+            if k == 'descriptors':
+                data[k + '0'] = feats0[k][()].transpose()  # [N D]
+            else:
+                data[k + '0'] = feats0[k][()]
+        for k in feats1.keys():
+            # data[k + '1'] = feats1[k].__array__()
+            # data[k + '1'] = feats1[k][()].transpose()  # [N D]
+            if k == 'descriptors':
+                data[k + '1'] = feats1[k][()].transpose()  # [N D]
+            else:
+                data[k + '1'] = feats1[k][()]
+        data = {k: torch.from_numpy(v)[None].float().to(device)
+                for k, v in data.items()}
+        # some matchers might expect an image but only use its size
+        data['image0'] = torch.empty((1, 1,) + tuple(feats0['image_size'])[::-1])
+        data['image1'] = torch.empty((1, 1,) + tuple(feats1['image_size'])[::-1])
+        pred = model(data)
+        grp = match_file.create_group(pair)
+        matches = pred['matches0'][0].cpu().short().numpy()
+        grp.create_dataset('matches0', data=matches)
+        if 'matching_scores0' in pred:
+            scores = pred['matching_scores0'][0].cpu().half().numpy()
+            grp.create_dataset('matching_scores0', data=scores)
+        matched |= {(name0, name1), (name1, name0)}
+    match_file.close()
+    logging.info('Finished exporting matches.')
+    return match_path
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--export_dir', type=Path, required=True)
+    parser.add_argument('--features', type=str, required=True)
+    parser.add_argument('--pairs', type=Path, required=True)
+    parser.add_argument('--conf', type=str, required=True, choices=list(confs.keys()))
+    parser.add_argument('--exhaustive', action='store_true')
+    args = parser.parse_args()
+    main(confs[args.conf], args.pairs, args.features, args.export_dir,
+         exhaustive=args.exhaustive)

third_party/pram/localization/match_features_batch.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import argparse
+import torch
+from pathlib import Path
+import h5py
+import logging
+from tqdm import tqdm
+import pprint
+from queue import Queue
+from threading import Thread
+from functools import partial
+from typing import Dict, List, Optional, Tuple, Union
+import localization.matchers as matchers
+from localization.base_model import dynamic_load
+from colmap_utils.parsers import names_to_pair, names_to_pair_old, parse_retrieval
+confs = {
+    'gm': {
+        'output': 'gm',
+        'model': {
+            'name': 'gm',
+            'weight_path': 'weights/imp_gm.900.pth',
+            'sinkhorn_iterations': 20,
+        },
+    },
+    'gml': {
+        'output': 'gml',
+        'model': {
+            'name': 'gml',
+            'weight_path': 'weights/imp_gml.920.pth',
+            'sinkhorn_iterations': 20,
+        },
+    },
+    'adagml': {
+        'output': 'adagml',
+        'model': {
+            'name': 'adagml',
+            'weight_path': 'weights/imp_adagml.80.pth',
+            'sinkhorn_iterations': 20,
+        },
+    },
+    'superglue': {
+        'output': 'superglue',
+        'model': {
+            'name': 'superglue',
+            'weights': 'outdoor',
+            'sinkhorn_iterations': 20,
+            'weight_path': 'weights/superglue_outdoor.pth',
+        },
+    },
+    'NNM': {
+        'output': 'NNM',
+        'model': {
+            'name': 'nearest_neighbor',
+            'do_mutual_check': True,
+            'distance_threshold': None,
+        },
+    },
+}
+class WorkQueue:
+    def __init__(self, work_fn, num_threads=1):
+        self.queue = Queue(num_threads)
+        self.threads = [
+            Thread(target=self.thread_fn, args=(work_fn,)) for _ in range(num_threads)
+        ]
+        for thread in self.threads:
+            thread.start()
+    def join(self):
+        for thread in self.threads:
+            self.queue.put(None)
+        for thread in self.threads:
+            thread.join()
+    def thread_fn(self, work_fn):
+        item = self.queue.get()
+        while item is not None:
+            work_fn(item)
+            item = self.queue.get()
+    def put(self, data):
+        self.queue.put(data)
+class FeaturePairsDataset(torch.utils.data.Dataset):
+    def __init__(self, pairs, feature_path_q, feature_path_r):
+        self.pairs = pairs
+        self.feature_path_q = feature_path_q
+        self.feature_path_r = feature_path_r
+    def __getitem__(self, idx):
+        name0, name1 = self.pairs[idx]
+        data = {}
+        with h5py.File(self.feature_path_q, "r") as fd:
+            grp = fd[name0]
+            for k, v in grp.items():
+                data[k + "0"] = torch.from_numpy(v.__array__()).float()
+                if k == 'descriptors':
+                    data[k + '0'] = data[k + '0'].t()
+            # some matchers might expect an image but only use its size
+            data["image0"] = torch.empty((1,) + tuple(grp["image_size"])[::-1])
+        with h5py.File(self.feature_path_r, "r") as fd:
+            grp = fd[name1]
+            for k, v in grp.items():
+                data[k + "1"] = torch.from_numpy(v.__array__()).float()
+                if k == 'descriptors':
+                    data[k + '1'] = data[k + '1'].t()
+            data["image1"] = torch.empty((1,) + tuple(grp["image_size"])[::-1])
+        return data
+    def __len__(self):
+        return len(self.pairs)
+def writer_fn(inp, match_path):
+    pair, pred = inp
+    with h5py.File(str(match_path), "a", libver="latest") as fd:
+        if pair in fd:
+            del fd[pair]
+        grp = fd.create_group(pair)
+        matches = pred["matches0"][0].cpu().short().numpy()
+        grp.create_dataset("matches0", data=matches)
+        if "matching_scores0" in pred:
+            scores = pred["matching_scores0"][0].cpu().half().numpy()
+            grp.create_dataset("matching_scores0", data=scores)
+def main(
+        conf: Dict,
+        pairs: Path,
+        features: Union[Path, str],
+        export_dir: Optional[Path] = None,
+        matches: Optional[Path] = None,
+        features_ref: Optional[Path] = None,
+        overwrite: bool = False,
+) -> Path:
+    if isinstance(features, Path) or Path(features).exists():
+        features_q = features
+        if matches is None:
+            raise ValueError(
+                "Either provide both features and matches as Path" " or both as names."
+            )
+    else:
+        if export_dir is None:
+            raise ValueError(
+                "Provide an export_dir if features is not" f" a file path: {features}."
+            )
+        features_q = Path(export_dir, features + ".h5")
+        if matches is None:
+            matches = Path(export_dir, f'{features}-{conf["output"]}-{pairs.stem}.h5')
+    if features_ref is None:
+        features_ref = features_q
+    match_from_paths(conf, pairs, matches, features_q, features_ref, overwrite)
+    return matches
+def find_unique_new_pairs(pairs_all: List[Tuple[str]], match_path: Path = None):
+    """Avoid to recompute duplicates to save time."""
+    pairs = set()
+    for i, j in pairs_all:
+        if (j, i) not in pairs:
+            pairs.add((i, j))
+    pairs = list(pairs)
+    if match_path is not None and match_path.exists():
+        with h5py.File(str(match_path), "r", libver="latest") as fd:
+            pairs_filtered = []
+            for i, j in pairs:
+                if (
+                        names_to_pair(i, j) in fd
+                        or names_to_pair(j, i) in fd
+                        or names_to_pair_old(i, j) in fd
+                        or names_to_pair_old(j, i) in fd
+                ):
+                    continue
+                pairs_filtered.append((i, j))
+        return pairs_filtered
+    return pairs
+@torch.no_grad()
+def match_from_paths(
+        conf: Dict,
+        pairs_path: Path,
+        match_path: Path,
+        feature_path_q: Path,
+        feature_path_ref: Path,
+        overwrite: bool = False,
+) -> Path:
+    logging.info(
+        "Matching local features with configuration:" f"\n{pprint.pformat(conf)}"
+    )
+    if not feature_path_q.exists():
+        raise FileNotFoundError(f"Query feature file {feature_path_q}.")
+    if not feature_path_ref.exists():
+        raise FileNotFoundError(f"Reference feature file {feature_path_ref}.")
+    match_path.parent.mkdir(exist_ok=True, parents=True)
+    assert pairs_path.exists(), pairs_path
+    pairs = parse_retrieval(pairs_path)
+    pairs = [(q, r) for q, rs in pairs.items() for r in rs]
+    pairs = find_unique_new_pairs(pairs, None if overwrite else match_path)
+    if len(pairs) == 0:
+        logging.info("Skipping the matching.")
+        return
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    Model = dynamic_load(matchers, conf["model"]["name"])
+    model = Model(conf["model"]).eval().to(device)
+    dataset = FeaturePairsDataset(pairs, feature_path_q, feature_path_ref)
+    loader = torch.utils.data.DataLoader(
+        dataset, num_workers=4, batch_size=1, shuffle=False, pin_memory=True
+    )
+    writer_queue = WorkQueue(partial(writer_fn, match_path=match_path), 5)
+    for idx, data in enumerate(tqdm(loader, smoothing=0.1)):
+        data = {
+            k: v if k.startswith("image") else v.to(device, non_blocking=True)
+            for k, v in data.items()
+        }
+        pred = model(data)
+        pair = names_to_pair(*pairs[idx])
+        writer_queue.put((pair, pred))
+    writer_queue.join()
+    logging.info("Finished exporting matches.")
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--export_dir', type=Path, required=True)
+    parser.add_argument('--features', type=str, required=True)
+    parser.add_argument('--pairs', type=Path, required=True)
+    parser.add_argument('--conf', type=str, required=True, choices=list(confs.keys()))
+    args = parser.parse_args()
+    main(confs[args.conf], args.pairs, args.features, args.export_dir)

third_party/pram/localization/matchers/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+def get_matcher(matcher):
+    mod = __import__(f'{__name__}.{matcher}', fromlist=[''])
+    return getattr(mod, 'Model')

third_party/pram/localization/matchers/adagml.py ADDED Viewed

	@@ -0,0 +1,41 @@

+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File   pram -> adagml
+@IDE    PyCharm
+@Author fx221@cam.ac.uk
+@Date   11/02/2024 14:34
+=================================================='''
+import torch
+from localization.base_model import BaseModel
+from nets.adagml import AdaGML as GMatcher
+class AdaGML(BaseModel):
+    default_config = {
+        'descriptor_dim': 128,
+        'hidden_dim': 256,
+        'weights': 'indoor',
+        'keypoint_encoder': [32, 64, 128, 256],
+        'GNN_layers': ['self', 'cross'] * 9,  # [self, cross, self, cross, ...] 9 in total
+        'sinkhorn_iterations': 20,
+        'match_threshold': 0.2,
+        'with_pose': False,
+        'n_layers': 9,
+        'n_min_tokens': 256,
+        'with_sinkhorn': True,
+        'weight_path': None,
+    }
+    required_inputs = [
+        'image0', 'keypoints0', 'scores0', 'descriptors0',
+        'image1', 'keypoints1', 'scores1', 'descriptors1',
+    ]
+    def _init(self, conf):
+        self.net = GMatcher(config=conf).eval()
+        state_dict = torch.load(conf['weight_path'], map_location='cpu')['model']
+        self.net.load_state_dict(state_dict, strict=True)
+    def _forward(self, data):
+        with torch.no_grad():
+            return self.net(data)