diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..b5f733e4c01e5fc1a32d76774ba4c74b71d13dd1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +checkpoints +src/output +visualization/visualizer +flagged diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..6e57769e4a564791650a292e73fcd64c8100dc4b --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 ZhiGang Jiang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Post-Porcessing.md b/Post-Porcessing.md new file mode 100644 index 0000000000000000000000000000000000000000..f9633df488ffe2dd4050d78786dba69f6b0d3cae --- /dev/null +++ b/Post-Porcessing.md @@ -0,0 +1,35 @@ +# Post-Processing +## Step + +1. Simplify polygon by [DP algorithm](https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm) + +![img.png](src/fig/post_processing/img_0.png) + +2. Detect occlusion, calculating box fill with 1 + +![img.png](src/fig/post_processing/img_1.png) + +3. Fill in reasonable sampling section + +![img.png](src/fig/post_processing/img_2.png) + +4. Output processed polygon + +![img.png](src/fig/post_processing/img_3.png) + +## performance +It works, and a performance comparison on the MatterportLayout dataset: + +| Method | 2D IoU(%) | 3D IoU(%) | RMSE | $\mathbf{\delta_{1}}$ | +|--|--|--|--|--| +without post-proc | 83.52 | 81.11 | 0.204 | 0.951 | +original post-proc |83.12 | 80.71 | 0.230 | 0.936|\ +optimized post-proc | 83.48 | 81.08| 0.214 | 0.940 | + +original: + +![img.png](src/fig/post_processing/original.png) + +optimized: + +![img.png](src/fig/post_processing/optimized.png) diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..5dcda14ababe0b3a646054a142811146d5845dbf --- /dev/null +++ b/app.py @@ -0,0 +1,139 @@ +''' +@author: Zhigang Jiang +@time: 2022/05/23 +@description: +''' + +import gradio as gr +import numpy as np +import os +import torch + +from PIL import Image + +from utils.logger import get_logger +from config.defaults import get_config +from inference import preprocess, run_one_inference +from models.build import build_model +from argparse import Namespace +import gdown + + +def down_ckpt(model_cfg, ckpt_dir): + model_ids = [ + ['src/config/mp3d.yaml', '1o97oAmd-yEP5bQrM0eAWFPLq27FjUDbh'], + ['src/config/zind.yaml', '1PzBj-dfDfH_vevgSkRe5kczW0GVl_43I'], + ['src/config/pano.yaml', '1JoeqcPbm_XBPOi6O9GjjWi3_rtyPZS8m'], + ['src/config/s2d3d.yaml', '1PfJzcxzUsbwwMal7yTkBClIFgn8IdEzI'], + ['src/config/ablation_study/full.yaml', '1U16TxUkvZlRwJNaJnq9nAUap-BhCVIha'] + ] + + for model_id in model_ids: + if model_id[0] != model_cfg: + continue + path = os.path.join(ckpt_dir, 'best.pkl') + if not os.path.exists(path): + logger.info(f"Downloading {model_id}") + os.makedirs(ckpt_dir, exist_ok=True) + gdown.download(f"https://drive.google.com/uc?id={model_id[1]}", path, False) + + +def greet(img_path, pre_processing, weight_name, post_processing, visualization, mesh_format, mesh_resolution): + args.pre_processing = pre_processing + args.post_processing = post_processing + if weight_name == 'mp3d': + model = mp3d_model + elif weight_name == 'zind': + model = zind_model + else: + logger.error("unknown pre-trained weight name") + raise NotImplementedError + + img_name = os.path.basename(img_path).split('.')[0] + img = np.array(Image.open(img_path).resize((1024, 512), Image.Resampling.BICUBIC))[..., :3] + + vp_cache_path = 'src/demo/default_vp.txt' + if args.pre_processing: + vp_cache_path = os.path.join('src/output', f'{img_name}_vp.txt') + logger.info("pre-processing ...") + img, vp = preprocess(img, vp_cache_path=vp_cache_path) + + img = (img / 255.0).astype(np.float32) + run_one_inference(img, model, args, img_name, + logger=logger, show=False, + show_depth='depth-normal-gradient' in visualization, + show_floorplan='2d-floorplan' in visualization, + mesh_format=mesh_format, mesh_resolution=int(mesh_resolution)) + + return [os.path.join(args.output_dir, f"{img_name}_pred.png"), + os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"), + os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"), + vp_cache_path, + os.path.join(args.output_dir, f"{img_name}_pred.json")] + + +def get_model(args): + config = get_config(args) + down_ckpt(args.cfg, config.CKPT.DIR) + if ('cuda' in args.device or 'cuda' in config.TRAIN.DEVICE) and not torch.cuda.is_available(): + logger.info(f'The {args.device} is not available, will use cpu ...') + config.defrost() + args.device = "cpu" + config.TRAIN.DEVICE = "cpu" + config.freeze() + model, _, _, _ = build_model(config, logger) + return model + + +if __name__ == '__main__': + logger = get_logger() + args = Namespace(device='cuda', output_dir='src/output', visualize_3d=False, output_3d=True) + os.makedirs(args.output_dir, exist_ok=True) + + args.cfg = 'src/config/mp3d.yaml' + mp3d_model = get_model(args) + + args.cfg = 'src/config/zind.yaml' + zind_model = get_model(args) + + description = "This demo of the project " \ + "LGT-Net. " \ + "It uses the Geometry-Aware Transformer Network to predict the 3d room layout of an rgb panorama." + + demo = gr.Interface(fn=greet, + inputs=[gr.Image(type='filepath', label='input rgb panorama', value='src/demo/pano_demo1.png'), + gr.Checkbox(label='pre-processing', value=True), + gr.Radio(['mp3d', 'zind'], + label='pre-trained weight', + value='mp3d'), + gr.Radio(['manhattan', 'atalanta', 'original'], + label='post-processing method', + value='manhattan'), + gr.CheckboxGroup(['depth-normal-gradient', '2d-floorplan'], + label='2d-visualization', + value=['depth-normal-gradient', '2d-floorplan']), + gr.Radio(['.gltf', '.obj', '.glb'], + label='output format of 3d mesh', + value='.gltf'), + gr.Radio(['128', '256', '512', '1024'], + label='output resolution of 3d mesh', + value='256'), + ], + outputs=[gr.Image(label='predicted result 2d-visualization', type='filepath'), + gr.Model3D(label='3d mesh reconstruction', clear_color=[1.0, 1.0, 1.0, 1.0]), + gr.File(label='3d mesh file'), + gr.File(label='vanishing point information'), + gr.File(label='layout json')], + examples=[ + ['src/demo/pano_demo1.png', True, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], + ['src/demo/mp3d_demo1.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], + ['src/demo/mp3d_demo2.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], + ['src/demo/mp3d_demo3.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], + ['src/demo/zind_demo1.png', True, 'zind', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], + ['src/demo/zind_demo2.png', False, 'zind', 'atalanta', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], + ['src/demo/zind_demo3.png', True, 'zind', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], + ['src/demo/other_demo1.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], + ['src/demo/other_demo2.png', True, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'], + ], title='LGT-Net', allow_flagging="never", cache_examples=False, description=description) + + demo.launch(debug=True, enable_queue=False) diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5ccaa23be821afe11edb098d1179bba4330fb95f --- /dev/null +++ b/config/__init__.py @@ -0,0 +1,4 @@ +""" +@Date: 2021/07/17 +@description: +""" diff --git a/config/defaults.py b/config/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..5cab407dfe0cba098c1edc172283c7d4b729b389 --- /dev/null +++ b/config/defaults.py @@ -0,0 +1,289 @@ +""" +@Date: 2021/07/17 +@description: +""" +import os +import logging +from yacs.config import CfgNode as CN + +_C = CN() +_C.DEBUG = False +_C.MODE = 'train' +_C.VAL_NAME = 'val' +_C.TAG = 'default' +_C.COMMENT = 'add some comments to help you understand' +_C.SHOW_BAR = True +_C.SAVE_EVAL = False +_C.MODEL = CN() +_C.MODEL.NAME = 'model_name' +_C.MODEL.SAVE_BEST = True +_C.MODEL.SAVE_LAST = True +_C.MODEL.ARGS = [] +_C.MODEL.FINE_TUNE = [] + +# ----------------------------------------------------------------------------- +# Training settings +# ----------------------------------------------------------------------------- +_C.TRAIN = CN() +_C.TRAIN.SCRATCH = False +_C.TRAIN.START_EPOCH = 0 +_C.TRAIN.EPOCHS = 300 +_C.TRAIN.DETERMINISTIC = False +_C.TRAIN.SAVE_FREQ = 5 + +_C.TRAIN.BASE_LR = 5e-4 + +_C.TRAIN.WARMUP_EPOCHS = 20 +_C.TRAIN.WEIGHT_DECAY = 0 +_C.TRAIN.WARMUP_LR = 5e-7 +_C.TRAIN.MIN_LR = 5e-6 +# Clip gradient norm +_C.TRAIN.CLIP_GRAD = 5.0 +# Auto resume from latest checkpoint +_C.TRAIN.RESUME_LAST = True +# Gradient accumulation steps +# could be overwritten by command line argument +_C.TRAIN.ACCUMULATION_STEPS = 0 +# Whether to use gradient checkpointing to save memory +# could be overwritten by command line argument +_C.TRAIN.USE_CHECKPOINT = False +# 'cpu' or 'cuda:0, 1, 2, 3' or 'cuda' +_C.TRAIN.DEVICE = 'cuda' + +# LR scheduler +_C.TRAIN.LR_SCHEDULER = CN() +_C.TRAIN.LR_SCHEDULER.NAME = '' +_C.TRAIN.LR_SCHEDULER.ARGS = [] + + +# Optimizer +_C.TRAIN.OPTIMIZER = CN() +_C.TRAIN.OPTIMIZER.NAME = 'adam' +# Optimizer Epsilon +_C.TRAIN.OPTIMIZER.EPS = 1e-8 +# Optimizer Betas +_C.TRAIN.OPTIMIZER.BETAS = (0.9, 0.999) +# SGD momentum +_C.TRAIN.OPTIMIZER.MOMENTUM = 0.9 + +# Criterion +_C.TRAIN.CRITERION = CN() +# Boundary loss (Horizon-Net) +_C.TRAIN.CRITERION.BOUNDARY = CN() +_C.TRAIN.CRITERION.BOUNDARY.NAME = 'boundary' +_C.TRAIN.CRITERION.BOUNDARY.LOSS = 'BoundaryLoss' +_C.TRAIN.CRITERION.BOUNDARY.WEIGHT = 0.0 +_C.TRAIN.CRITERION.BOUNDARY.WEIGHTS = [] +_C.TRAIN.CRITERION.BOUNDARY.NEED_ALL = True +# Up and Down depth loss (LED2-Net) +_C.TRAIN.CRITERION.LEDDepth = CN() +_C.TRAIN.CRITERION.LEDDepth.NAME = 'led_depth' +_C.TRAIN.CRITERION.LEDDepth.LOSS = 'LEDLoss' +_C.TRAIN.CRITERION.LEDDepth.WEIGHT = 0.0 +_C.TRAIN.CRITERION.LEDDepth.WEIGHTS = [] +_C.TRAIN.CRITERION.LEDDepth.NEED_ALL = True +# Depth loss +_C.TRAIN.CRITERION.DEPTH = CN() +_C.TRAIN.CRITERION.DEPTH.NAME = 'depth' +_C.TRAIN.CRITERION.DEPTH.LOSS = 'L1Loss' +_C.TRAIN.CRITERION.DEPTH.WEIGHT = 0.0 +_C.TRAIN.CRITERION.DEPTH.WEIGHTS = [] +_C.TRAIN.CRITERION.DEPTH.NEED_ALL = False +# Ratio(Room Height) loss +_C.TRAIN.CRITERION.RATIO = CN() +_C.TRAIN.CRITERION.RATIO.NAME = 'ratio' +_C.TRAIN.CRITERION.RATIO.LOSS = 'L1Loss' +_C.TRAIN.CRITERION.RATIO.WEIGHT = 0.0 +_C.TRAIN.CRITERION.RATIO.WEIGHTS = [] +_C.TRAIN.CRITERION.RATIO.NEED_ALL = False +# Grad(Normal) loss +_C.TRAIN.CRITERION.GRAD = CN() +_C.TRAIN.CRITERION.GRAD.NAME = 'grad' +_C.TRAIN.CRITERION.GRAD.LOSS = 'GradLoss' +_C.TRAIN.CRITERION.GRAD.WEIGHT = 0.0 +_C.TRAIN.CRITERION.GRAD.WEIGHTS = [1.0, 1.0] +_C.TRAIN.CRITERION.GRAD.NEED_ALL = True +# Object loss +_C.TRAIN.CRITERION.OBJECT = CN() +_C.TRAIN.CRITERION.OBJECT.NAME = 'object' +_C.TRAIN.CRITERION.OBJECT.LOSS = 'ObjectLoss' +_C.TRAIN.CRITERION.OBJECT.WEIGHT = 0.0 +_C.TRAIN.CRITERION.OBJECT.WEIGHTS = [] +_C.TRAIN.CRITERION.OBJECT.NEED_ALL = True +# Heatmap loss +_C.TRAIN.CRITERION.CHM = CN() +_C.TRAIN.CRITERION.CHM.NAME = 'corner_heat_map' +_C.TRAIN.CRITERION.CHM.LOSS = 'HeatmapLoss' +_C.TRAIN.CRITERION.CHM.WEIGHT = 0.0 +_C.TRAIN.CRITERION.CHM.WEIGHTS = [] +_C.TRAIN.CRITERION.CHM.NEED_ALL = False + +_C.TRAIN.VIS_MERGE = True +_C.TRAIN.VIS_WEIGHT = 1024 +# ----------------------------------------------------------------------------- +# Output settings +# ----------------------------------------------------------------------------- +_C.CKPT = CN() +_C.CKPT.PYTORCH = './' +_C.CKPT.ROOT = "./checkpoints" +_C.CKPT.DIR = os.path.join(_C.CKPT.ROOT, _C.MODEL.NAME, _C.TAG) +_C.CKPT.RESULT_DIR = os.path.join(_C.CKPT.DIR, 'results', _C.MODE) + +_C.LOGGER = CN() +_C.LOGGER.DIR = os.path.join(_C.CKPT.DIR, "logs") +_C.LOGGER.LEVEL = logging.DEBUG + +# ----------------------------------------------------------------------------- +# Misc +# ----------------------------------------------------------------------------- +# Mixed precision opt level, if O0, no amp is used ('O0', 'O1', 'O2'), Please confirm your device support FP16(Half). +# overwritten by command line argument +_C.AMP_OPT_LEVEL = 'O1' +# Path to output folder, overwritten by command line argument +_C.OUTPUT = '' +# Tag of experiment, overwritten by command line argument +_C.TAG = 'default' +# Frequency to save checkpoint +_C.SAVE_FREQ = 1 +# Frequency to logging info +_C.PRINT_FREQ = 10 +# Fixed random seed +_C.SEED = 0 +# Perform evaluation only, overwritten by command line argument +_C.EVAL_MODE = False +# Test throughput only, overwritten by command line argument +_C.THROUGHPUT_MODE = False + +# ----------------------------------------------------------------------------- +# FIX +# ----------------------------------------------------------------------------- +_C.LOCAL_RANK = 0 +_C.WORLD_SIZE = 0 + +# ----------------------------------------------------------------------------- +# Data settings +# ----------------------------------------------------------------------------- +_C.DATA = CN() +# Sub dataset of pano_s2d3d +_C.DATA.SUBSET = None +# Dataset name +_C.DATA.DATASET = 'mp3d' +# Path to dataset, could be overwritten by command line argument +_C.DATA.DIR = '' +# Max wall number +_C.DATA.WALL_NUM = 0 # all +# Panorama image size +_C.DATA.SHAPE = [512, 1024] +# Really camera height +_C.DATA.CAMERA_HEIGHT = 1.6 +# Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU. +_C.DATA.PIN_MEMORY = True +# Debug use, fast test performance of model +_C.DATA.FOR_TEST_INDEX = None + +# Batch size for a single GPU, could be overwritten by command line argument +_C.DATA.BATCH_SIZE = 8 +# Number of data loading threads +_C.DATA.NUM_WORKERS = 8 + +# Training augment +_C.DATA.AUG = CN() +# Flip the panorama horizontally +_C.DATA.AUG.FLIP = True +# Pano Stretch Data Augmentation by HorizonNet +_C.DATA.AUG.STRETCH = True +# Rotate the panorama horizontally +_C.DATA.AUG.ROTATE = True +# Gamma adjusting +_C.DATA.AUG.GAMMA = True + +_C.DATA.KEYS = [] + + +_C.EVAL = CN() +_C.EVAL.POST_PROCESSING = None +_C.EVAL.NEED_CPE = False +_C.EVAL.NEED_F1 = False +_C.EVAL.NEED_RMSE = False +_C.EVAL.FORCE_CUBE = False + + +def merge_from_file(cfg_path): + config = _C.clone() + config.merge_from_file(cfg_path) + return config + + +def get_config(args=None): + config = _C.clone() + if args: + if 'cfg' in args and args.cfg: + config.merge_from_file(args.cfg) + + if 'mode' in args and args.mode: + config.MODE = args.mode + + if 'debug' in args and args.debug: + config.DEBUG = args.debug + + if 'hidden_bar' in args and args.hidden_bar: + config.SHOW_BAR = False + + if 'bs' in args and args.bs: + config.DATA.BATCH_SIZE = args.bs + + if 'save_eval' in args and args.save_eval: + config.SAVE_EVAL = True + + if 'val_name' in args and args.val_name: + config.VAL_NAME = args.val_name + + if 'post_processing' in args and args.post_processing: + config.EVAL.POST_PROCESSING = args.post_processing + + if 'need_cpe' in args and args.need_cpe: + config.EVAL.NEED_CPE = args.need_cpe + + if 'need_f1' in args and args.need_f1: + config.EVAL.NEED_F1 = args.need_f1 + + if 'need_rmse' in args and args.need_rmse: + config.EVAL.NEED_RMSE = args.need_rmse + + if 'force_cube' in args and args.force_cube: + config.EVAL.FORCE_CUBE = args.force_cube + + if 'wall_num' in args and args.wall_num: + config.DATA.WALL_NUM = args.wall_num + + args = config.MODEL.ARGS[0] + config.CKPT.DIR = os.path.join(config.CKPT.ROOT, f"{args['decoder_name']}_{args['output_name']}_Net", + config.TAG, 'debug' if config.DEBUG else '') + config.CKPT.RESULT_DIR = os.path.join(config.CKPT.DIR, 'results', config.MODE) + config.LOGGER.DIR = os.path.join(config.CKPT.DIR, "logs") + + core_number = os.popen("grep 'physical id' /proc/cpuinfo | sort | uniq | wc -l").read() + + try: + config.DATA.NUM_WORKERS = int(core_number) * 2 + print(f"System core number: {config.DATA.NUM_WORKERS}") + except ValueError: + print(f"Can't get system core number, will use config: { config.DATA.NUM_WORKERS}") + config.freeze() + return config + + +def get_rank_config(cfg, local_rank, world_size): + local_rank = 0 if local_rank is None else local_rank + config = cfg.clone() + config.defrost() + if world_size > 1: + ids = config.TRAIN.DEVICE.split(':')[-1].split(',') if ':' in config.TRAIN.DEVICE else range(world_size) + config.TRAIN.DEVICE = f'cuda:{ids[local_rank]}' + + config.LOCAL_RANK = local_rank + config.WORLD_SIZE = world_size + config.SEED = config.SEED + local_rank + + config.freeze() + return config diff --git a/convert_ckpt.py b/convert_ckpt.py new file mode 100644 index 0000000000000000000000000000000000000000..5ab42bef760ecc52ba363540bb05b005ecbfccd1 --- /dev/null +++ b/convert_ckpt.py @@ -0,0 +1,61 @@ +""" +@date: 2021/11/22 +@description: Conversion training ckpt into inference ckpt +""" +import argparse +import os + +import torch + +from config.defaults import merge_from_file + + +def parse_option(): + parser = argparse.ArgumentParser(description='Conversion training ckpt into inference ckpt') + parser.add_argument('--cfg', + type=str, + required=True, + metavar='FILE', + help='path of config file') + + parser.add_argument('--output_path', + type=str, + help='path of output ckpt') + + args = parser.parse_args() + + print("arguments:") + for arg in vars(args): + print(arg, ":", getattr(args, arg)) + print("-" * 50) + return args + + +def convert_ckpt(): + args = parse_option() + config = merge_from_file(args.cfg) + ck_dir = os.path.join("checkpoints", f"{config.MODEL.ARGS[0]['decoder_name']}_{config.MODEL.ARGS[0]['output_name']}_Net", + config.TAG) + print(f"Processing {ck_dir}") + model_paths = [name for name in os.listdir(ck_dir) if '_best_' in name] + if len(model_paths) == 0: + print("Not find best ckpt") + return + model_path = os.path.join(ck_dir, model_paths[0]) + print(f"Loading {model_path}") + checkpoint = torch.load(model_path, map_location=torch.device('cuda:0')) + net = checkpoint['net'] + output_path = None + if args.output_path is None: + output_path = os.path.join(ck_dir, 'best.pkl') + else: + output_path = args.output_path + if output_path is None: + print("Output path is invalid") + print(f"Save on: {output_path}") + os.makedirs(os.path.dirname(output_path), exist_ok=True) + torch.save(net, output_path) + + +if __name__ == '__main__': + convert_ckpt() diff --git a/dataset/__init__.py b/dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/dataset/build.py b/dataset/build.py new file mode 100644 index 0000000000000000000000000000000000000000..6460ad7debbc459b72815b1199d8381c281daf52 --- /dev/null +++ b/dataset/build.py @@ -0,0 +1,115 @@ +""" +@Date: 2021/07/18 +@description: +""" +import numpy as np +import torch.utils.data +from dataset.mp3d_dataset import MP3DDataset +from dataset.pano_s2d3d_dataset import PanoS2D3DDataset +from dataset.pano_s2d3d_mix_dataset import PanoS2D3DMixDataset +from dataset.zind_dataset import ZindDataset + + +def build_loader(config, logger): + name = config.DATA.DATASET + ddp = config.WORLD_SIZE > 1 + train_dataset = None + train_data_loader = None + if config.MODE == 'train': + train_dataset = build_dataset(mode='train', config=config, logger=logger) + + val_dataset = build_dataset(mode=config.VAL_NAME if config.MODE != 'test' else 'test', config=config, logger=logger) + + train_sampler = None + val_sampler = None + if ddp: + if train_dataset: + train_sampler = torch.utils.data.DistributedSampler(train_dataset, shuffle=True) + val_sampler = torch.utils.data.DistributedSampler(val_dataset, shuffle=False) + + batch_size = config.DATA.BATCH_SIZE + num_workers = 0 if config.DEBUG else config.DATA.NUM_WORKERS + pin_memory = config.DATA.PIN_MEMORY + if train_dataset: + logger.info(f'Train data loader batch size: {batch_size}') + train_data_loader = torch.utils.data.DataLoader( + train_dataset, sampler=train_sampler, + batch_size=batch_size, + shuffle=True, + num_workers=num_workers, + pin_memory=pin_memory, + drop_last=True, + ) + batch_size = batch_size - (len(val_dataset) % np.arange(batch_size, 0, -1)).tolist().index(0) + logger.info(f'Val data loader batch size: {batch_size}') + val_data_loader = torch.utils.data.DataLoader( + val_dataset, sampler=val_sampler, + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=pin_memory, + drop_last=False + ) + logger.info(f'Build data loader: num_workers:{num_workers} pin_memory:{pin_memory}') + return train_data_loader, val_data_loader + + +def build_dataset(mode, config, logger): + name = config.DATA.DATASET + if name == 'mp3d': + dataset = MP3DDataset( + root_dir=config.DATA.DIR, + mode=mode, + shape=config.DATA.SHAPE, + max_wall_num=config.DATA.WALL_NUM, + aug=config.DATA.AUG if mode == 'train' else None, + camera_height=config.DATA.CAMERA_HEIGHT, + logger=logger, + for_test_index=config.DATA.FOR_TEST_INDEX, + keys=config.DATA.KEYS + ) + elif name == 'pano_s2d3d': + dataset = PanoS2D3DDataset( + root_dir=config.DATA.DIR, + mode=mode, + shape=config.DATA.SHAPE, + max_wall_num=config.DATA.WALL_NUM, + aug=config.DATA.AUG if mode == 'train' else None, + camera_height=config.DATA.CAMERA_HEIGHT, + logger=logger, + for_test_index=config.DATA.FOR_TEST_INDEX, + subset=config.DATA.SUBSET, + keys=config.DATA.KEYS + ) + elif name == 'pano_s2d3d_mix': + dataset = PanoS2D3DMixDataset( + root_dir=config.DATA.DIR, + mode=mode, + shape=config.DATA.SHAPE, + max_wall_num=config.DATA.WALL_NUM, + aug=config.DATA.AUG if mode == 'train' else None, + camera_height=config.DATA.CAMERA_HEIGHT, + logger=logger, + for_test_index=config.DATA.FOR_TEST_INDEX, + subset=config.DATA.SUBSET, + keys=config.DATA.KEYS + ) + elif name == 'zind': + dataset = ZindDataset( + root_dir=config.DATA.DIR, + mode=mode, + shape=config.DATA.SHAPE, + max_wall_num=config.DATA.WALL_NUM, + aug=config.DATA.AUG if mode == 'train' else None, + camera_height=config.DATA.CAMERA_HEIGHT, + logger=logger, + for_test_index=config.DATA.FOR_TEST_INDEX, + is_simple=True, + is_ceiling_flat=False, + keys=config.DATA.KEYS, + vp_align=config.EVAL.POST_PROCESSING is not None and 'manhattan' in config.EVAL.POST_PROCESSING + ) + else: + raise NotImplementedError(f"Unknown dataset: {name}") + + return dataset diff --git a/dataset/communal/__init__.py b/dataset/communal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8ea6021ad3c5c3d080e03089095aec34106e5541 --- /dev/null +++ b/dataset/communal/__init__.py @@ -0,0 +1,4 @@ +""" +@Date: 2021/09/22 +@description: +""" diff --git a/dataset/communal/base_dataset.py b/dataset/communal/base_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..a4256581b25518957066f3b4e3c343bbcdc6f9a1 --- /dev/null +++ b/dataset/communal/base_dataset.py @@ -0,0 +1,127 @@ +""" +@Date: 2021/07/26 +@description: +""" +import numpy as np +import torch + +from utils.boundary import corners2boundary, visibility_corners, get_heat_map +from utils.conversion import xyz2depth, uv2xyz, uv2pixel +from dataset.communal.data_augmentation import PanoDataAugmentation + + +class BaseDataset(torch.utils.data.Dataset): + def __init__(self, mode, shape=None, max_wall_num=999, aug=None, camera_height=1.6, patch_num=256, keys=None): + if keys is None or len(keys) == 0: + keys = ['image', 'depth', 'ratio', 'id', 'corners'] + if shape is None: + shape = [512, 1024] + + assert mode == 'train' or mode == 'val' or mode == 'test' or mode is None, 'unknown mode!' + self.mode = mode + self.keys = keys + self.shape = shape + self.pano_aug = None if aug is None or mode == 'val' else PanoDataAugmentation(aug) + self.camera_height = camera_height + self.max_wall_num = max_wall_num + self.patch_num = patch_num + self.data = None + + def __len__(self): + return len(self.data) + + @staticmethod + def get_depth(corners, plan_y=1, length=256, visible=True): + visible_floor_boundary = corners2boundary(corners, length=length, visible=visible) + # The horizon-depth relative to plan_y + visible_depth = xyz2depth(uv2xyz(visible_floor_boundary, plan_y), plan_y) + return visible_depth + + def process_data(self, label, image, patch_num): + """ + :param label: + :param image: + :param patch_num: + :return: + """ + corners = label['corners'] + if self.pano_aug is not None: + corners, image = self.pano_aug.execute_aug(corners, image if 'image' in self.keys else None) + eps = 1e-3 + corners[:, 1] = np.clip(corners[:, 1], 0.5+eps, 1-eps) + + output = {} + if 'image' in self.keys: + image = image.transpose(2, 0, 1) + output['image'] = image + + visible_corners = None + if 'corner_class' in self.keys or 'depth' in self.keys: + visible_corners = visibility_corners(corners) + + if 'depth' in self.keys: + depth = self.get_depth(visible_corners, length=patch_num, visible=False) + assert len(depth) == patch_num, f"{label['id']}, {len(depth)}, {self.pano_aug.parameters}, {corners}" + output['depth'] = depth + + if 'ratio' in self.keys: + # Why use ratio? Because when floor_height =y_plan=1, we only need to predict ceil_height(ratio). + output['ratio'] = label['ratio'] + + if 'id' in self.keys: + output['id'] = label['id'] + + if 'corners' in self.keys: + # all corners for evaluating Full_IoU + assert len(label['corners']) <= 32, "len(label['corners']):"+len(label['corners']) + output['corners'] = np.zeros((32, 2), dtype=np.float32) + output['corners'][:len(label['corners'])] = label['corners'] + + if 'corner_heat_map' in self.keys: + output['corner_heat_map'] = get_heat_map(visible_corners[..., 0]) + + if 'object' in self.keys and 'objects' in label: + output[f'object_heat_map'] = np.zeros((3, patch_num), dtype=np.float32) + output['object_size'] = np.zeros((3, patch_num), dtype=np.float32) # width, height, bottom_height + for i, type in enumerate(label['objects']): + if len(label['objects'][type]) == 0: + continue + + u_s = [] + for obj in label['objects'][type]: + center_u = obj['center_u'] + u_s.append(center_u) + center_pixel_u = uv2pixel(np.array([center_u]), w=patch_num, axis=0)[0] + output['object_size'][0, center_pixel_u] = obj['width_u'] + output['object_size'][1, center_pixel_u] = obj['height_v'] + output['object_size'][2, center_pixel_u] = obj['boundary_v'] + output[f'object_heat_map'][i] = get_heat_map(np.array(u_s)) + + return output + + +if __name__ == '__main__': + from dataset.communal.read import read_image, read_label + from visualization.boundary import draw_boundaries + from utils.boundary import depth2boundaries + from tqdm import trange + + # np.random.seed(0) + dataset = BaseDataset() + dataset.pano_aug = PanoDataAugmentation(aug={ + 'STRETCH': True, + 'ROTATE': True, + 'FLIP': True, + }) + # pano_img = read_image("../src/demo.png") + # label = read_label("../src/demo.json") + pano_img_path = "../../src/dataset/mp3d/image/yqstnuAEVhm_6589ad7a5a0444b59adbf501c0f0fe53.png" + label_path = "../../src/dataset/mp3d/label/yqstnuAEVhm_6589ad7a5a0444b59adbf501c0f0fe53.json" + pano_img = read_image(pano_img_path) + label = read_label(label_path) + + # batch test + for i in trange(1): + output = dataset.process_data(label, pano_img, 256) + boundary_list = depth2boundaries(output['ratio'], output['depth'], step=None) + draw_boundaries(output['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) diff --git a/dataset/communal/data_augmentation.py b/dataset/communal/data_augmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..d4656acf518be6972276e7cb4e42dcf402a79c98 --- /dev/null +++ b/dataset/communal/data_augmentation.py @@ -0,0 +1,279 @@ +""" +@Date: 2021/07/27 +@description: +""" +import numpy as np +import cv2 +import functools + +from utils.conversion import pixel2lonlat, lonlat2pixel, uv2lonlat, lonlat2uv, pixel2uv + + +@functools.lru_cache() +def prepare_stretch(w, h): + lon = pixel2lonlat(np.array(range(w)), w=w, axis=0) + lat = pixel2lonlat(np.array(range(h)), h=h, axis=1) + sin_lon = np.sin(lon) + cos_lon = np.cos(lon) + tan_lat = np.tan(lat) + return sin_lon, cos_lon, tan_lat + + +def pano_stretch_image(pano_img, kx, ky, kz): + """ + Note that this is the inverse mapping, which refers to Equation 3 in HorizonNet paper (the coordinate system in + the paper is different from here, xz needs to be swapped) + :param pano_img: a panorama image, shape must be [h,w,c] + :param kx: stretching along left-right direction + :param ky: stretching along up-down direction + :param kz: stretching along front-back direction + :return: + """ + w = pano_img.shape[1] + h = pano_img.shape[0] + + sin_lon, cos_lon, tan_lat = prepare_stretch(w, h) + + n_lon = np.arctan2(sin_lon * kz / kx, cos_lon) + n_lat = np.arctan(tan_lat[..., None] * np.sin(n_lon) / sin_lon * kx / ky) + n_pu = lonlat2pixel(n_lon, w=w, axis=0, need_round=False) + n_pv = lonlat2pixel(n_lat, h=h, axis=1, need_round=False) + + pixel_map = np.empty((h, w, 2), dtype=np.float32) + pixel_map[..., 0] = n_pu + pixel_map[..., 1] = n_pv + map1 = pixel_map[..., 0] + map2 = pixel_map[..., 1] + # using wrap mode because it is continues at left or right of panorama + new_img = cv2.remap(pano_img, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_WRAP) + return new_img + + +def pano_stretch_conner(corners, kx, ky, kz): + """ + :param corners: + :param kx: stretching along left-right direction + :param ky: stretching along up-down direction + :param kz: stretching along front-back direction + :return: + """ + + lonlat = uv2lonlat(corners) + sin_lon = np.sin(lonlat[..., 0:1]) + cos_lon = np.cos(lonlat[..., 0:1]) + tan_lat = np.tan(lonlat[..., 1:2]) + + n_lon = np.arctan2(sin_lon * kx / kz, cos_lon) + + a = np.bitwise_or(corners[..., 0] == 0.5, corners[..., 0] == 1) + b = np.bitwise_not(a) + w = np.zeros_like(n_lon) + w[b] = np.sin(n_lon[b]) / sin_lon[b] + w[a] = kx / kz + + n_lat = np.arctan(tan_lat * w / kx * ky) + + lst = [n_lon, n_lat] + lonlat = np.concatenate(lst, axis=-1) + new_corners = lonlat2uv(lonlat) + return new_corners + + +def pano_stretch(pano_img, corners, kx, ky, kz): + """ + :param pano_img: a panorama image, shape must be [h,w,c] + :param corners: + :param kx: stretching along left-right direction + :param ky: stretching along up-down direction + :param kz: stretching along front-back direction + :return: + """ + new_img = pano_stretch_image(pano_img, kx, ky, kz) + new_corners = pano_stretch_conner(corners, kx, ky, kz) + return new_img, new_corners + + +class PanoDataAugmentation: + def __init__(self, aug): + self.aug = aug + self.parameters = {} + + def need_aug(self, name): + return name in self.aug and self.aug[name] + + def execute_space_aug(self, corners, image): + if image is None: + return image + + if self.aug is None: + return corners, image + w = image.shape[1] + h = image.shape[0] + + if self.need_aug('STRETCH'): + kx = np.random.uniform(1, 2) + kx = 1 / kx if np.random.randint(2) == 0 else kx + # we found that the ky transform may cause IoU to drop (HorizonNet also only x and z transform) + # ky = np.random.uniform(1, 2) + # ky = 1 / ky if np.random.randint(2) == 0 else ky + ky = 1 + kz = np.random.uniform(1, 2) + kz = 1 / kz if np.random.randint(2) == 0 else kz + image, corners = pano_stretch(image, corners, kx, ky, kz) + self.parameters['STRETCH'] = {'kx': kx, 'ky': ky, 'kz': kz} + else: + self.parameters['STRETCH'] = None + + if self.need_aug('ROTATE'): + d_pu = np.random.randint(w) + image = np.roll(image, d_pu, axis=1) + corners[..., 0] = (corners[..., 0] + pixel2uv(np.array([d_pu]), w, h)) % pixel2uv(np.array([w]), w, h) + self.parameters['ROTATE'] = d_pu + else: + self.parameters['ROTATE'] = None + + if self.need_aug('FLIP') and np.random.randint(2) == 0: + image = np.flip(image, axis=1).copy() + corners[..., 0] = pixel2uv(np.array([w]), w, h) - corners[..., 0] + corners = corners[::-1] + self.parameters['FLIP'] = True + else: + self.parameters['FLIP'] = None + + return corners, image + + def execute_visual_aug(self, image): + if self.need_aug('GAMMA'): + p = np.random.uniform(1, 2) + if np.random.randint(2) == 0: + p = 1 / p + image = image ** p + self.parameters['GAMMA'] = p + else: + self.parameters['GAMMA'] = None + + # The following visual augmentation methods are only implemented but not tested + if self.need_aug('HUE') or self.need_aug('SATURATION'): + image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) + + if self.need_aug('HUE') and np.random.randint(2) == 0: + p = np.random.uniform(-0.1, 0.1) + image[..., 0] = np.mod(image[..., 0] + p * 180, 180) + self.parameters['HUE'] = p + else: + self.parameters['HUE'] = None + + if self.need_aug('SATURATION') and np.random.randint(2) == 0: + p = np.random.uniform(0.5, 1.5) + image[..., 1] = np.clip(image[..., 1] * p, 0, 1) + self.parameters['SATURATION'] = p + else: + self.parameters['SATURATION'] = None + + image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB) + + if self.need_aug('CONTRAST') and np.random.randint(2) == 0: + p = np.random.uniform(0.9, 1.1) + mean = image.mean(axis=0).mean(axis=0) + image = (image - mean) * p + mean + image = np.clip(image, 0, 1) + self.parameters['CONTRAST'] = p + else: + self.parameters['CONTRAST'] = None + + return image + + def execute_aug(self, corners, image): + corners, image = self.execute_space_aug(corners, image) + if image is not None: + image = self.execute_visual_aug(image) + return corners, image + + +if __name__ == '__main__1': + from tqdm import trange + from visualization.floorplan import draw_floorplan + from dataset.communal.read import read_image, read_label + from utils.time_watch import TimeWatch + from utils.conversion import uv2xyz + from utils.boundary import corners2boundary + + np.random.seed(123) + pano_img_path = "../../src/dataset/mp3d/image/TbHJrupSAjP_f320ae084f3a447da3e8ab11dd5f9320.png" + label_path = "../../src/dataset/mp3d/label/TbHJrupSAjP_f320ae084f3a447da3e8ab11dd5f9320.json" + pano_img = read_image(pano_img_path) + label = read_label(label_path) + + corners = label['corners'] + ratio = label['ratio'] + + pano_aug = PanoDataAugmentation(aug={ + 'STRETCH': True, + 'ROTATE': True, + 'FLIP': True, + 'GAMMA': True, + # 'HUE': True, + # 'SATURATION': True, + # 'CONTRAST': True + }) + + # draw_floorplan(corners, show=True, marker_color=0.5, center_color=0.8, plan_y=1.6, show_radius=8) + # draw_boundaries(pano_img, corners_list=[corners], show=True, length=1024, ratio=ratio) + + w = TimeWatch("test") + for i in trange(50000): + new_corners, new_pano_img = pano_aug.execute_aug(corners.copy(), pano_img.copy()) + # draw_floorplan(uv2xyz(new_corners, plan_y=1.6)[..., ::2], show=True, marker_color=0.5, center_color=0.8, + # show_radius=8) + # draw_boundaries(new_pano_img, corners_list=[new_corners], show=True, length=1024, ratio=ratio) + + +if __name__ == '__main__': + from utils.boundary import corners2boundary + from visualization.floorplan import draw_floorplan + from utils.boundary import visibility_corners + + corners = np.array([[0.7664539, 0.7416811], + [0.06641078, 0.6521386], + [0.30997428, 0.57855356], + [0.383300784, 0.58726823], + [0.383300775, 0.8005296], + [0.5062902, 0.74822706]]) + corners = visibility_corners(corners) + print(corners) + # draw_floorplan(uv2xyz(corners, plan_y=1.6)[..., ::2], show=True, marker_color=0.5, center_color=0.8, + # show_radius=8) + visible_floor_boundary = corners2boundary(corners, length=256, visible=True) + # visible_depth = xyz2depth(uv2xyz(visible_floor_boundary, 1), 1) + print(len(visible_floor_boundary)) + + +if __name__ == '__main__0': + from visualization.floorplan import draw_floorplan + + from dataset.communal.read import read_image, read_label + from utils.time_watch import TimeWatch + from utils.conversion import uv2xyz + + # np.random.seed(1234) + pano_img_path = "../../src/dataset/mp3d/image/VVfe2KiqLaN_35b41dcbfcf84f96878f6ca28c70e5af.png" + label_path = "../../src/dataset/mp3d/label/VVfe2KiqLaN_35b41dcbfcf84f96878f6ca28c70e5af.json" + pano_img = read_image(pano_img_path) + label = read_label(label_path) + + corners = label['corners'] + ratio = label['ratio'] + + # draw_floorplan(corners, show=True, marker_color=0.5, center_color=0.8, plan_y=1.6, show_radius=8) + + w = TimeWatch() + for i in range(5): + kx = np.random.uniform(1, 2) + kx = 1 / kx if np.random.randint(2) == 0 else kx + ky = np.random.uniform(1, 2) + ky = 1 / ky if np.random.randint(2) == 0 else ky + kz = np.random.uniform(1, 2) + kz = 1 / kz if np.random.randint(2) == 0 else kz + new_corners = pano_stretch_conner(corners.copy(), kx, ky, kz) + draw_floorplan(uv2xyz(new_corners, plan_y=1.6)[..., ::2], show=True, marker_color=0.5, center_color=0.8, + show_radius=8) diff --git a/dataset/communal/read.py b/dataset/communal/read.py new file mode 100644 index 0000000000000000000000000000000000000000..1098a9838110b48eac32c84909ae7407bbcc719f --- /dev/null +++ b/dataset/communal/read.py @@ -0,0 +1,214 @@ +""" +@Date: 2021/07/28 +@description: +""" +import os +import numpy as np +import cv2 +import json +from PIL import Image +from utils.conversion import xyz2uv, pixel2uv +from utils.height import calc_ceil_ratio + + +def read_image(image_path, shape=None): + if shape is None: + shape = [512, 1024] + img = np.array(Image.open(image_path)).astype(np.float32) / 255 + if img.shape[0] != shape[0] or img.shape[1] != shape[1]: + img = cv2.resize(img, dsize=tuple(shape[::-1]), interpolation=cv2.INTER_AREA) + + return np.array(img) + + +def read_label(label_path, data_type='MP3D'): + + if data_type == 'MP3D': + with open(label_path, 'r') as f: + label = json.load(f) + point_idx = [one['pointsIdx'][0] for one in label['layoutWalls']['walls']] + camera_height = label['cameraHeight'] + room_height = label['layoutHeight'] + camera_ceiling_height = room_height - camera_height + ratio = camera_ceiling_height / camera_height + + xyz = [one['xyz'] for one in label['layoutPoints']['points']] + assert len(xyz) == len(point_idx), "len(xyz) != len(point_idx)" + xyz = [xyz[i] for i in point_idx] + xyz = np.asarray(xyz, dtype=np.float32) + xyz[:, 2] *= -1 + xyz[:, 1] = camera_height + corners = xyz2uv(xyz) + elif data_type == 'Pano_S2D3D': + with open(label_path, 'r') as f: + lines = [line for line in f.readlines() if + len([c for c in line.split(' ') if c[0].isnumeric()]) > 1] + + corners_list = np.array([line.strip().split() for line in lines], np.float32) + uv_list = pixel2uv(corners_list) + ceil_uv = uv_list[::2] + floor_uv = uv_list[1::2] + ratio = calc_ceil_ratio([ceil_uv, floor_uv], mode='mean') + corners = floor_uv + else: + return None + + output = { + 'ratio': np.array([ratio], dtype=np.float32), + 'corners': corners, + 'id': os.path.basename(label_path).split('.')[0] + } + return output + + +def move_not_simple_image(data_dir, simple_panos): + import shutil + for house_index in os.listdir(data_dir): + house_path = os.path.join(data_dir, house_index) + if not os.path.isdir(house_path) or house_index == 'visualization': + continue + + floor_plan_path = os.path.join(house_path, 'floor_plans') + if os.path.exists(floor_plan_path): + print(f'move:{floor_plan_path}') + dst_floor_plan_path = floor_plan_path.replace('zind', 'zind2') + os.makedirs(dst_floor_plan_path, exist_ok=True) + shutil.move(floor_plan_path, dst_floor_plan_path) + + panos_path = os.path.join(house_path, 'panos') + for pano in os.listdir(panos_path): + pano_path = os.path.join(panos_path, pano) + pano_index = '_'.join(pano.split('.')[0].split('_')[-2:]) + if f'{house_index}_{pano_index}' not in simple_panos and os.path.exists(pano_path): + print(f'move:{pano_path}') + dst_pano_path = pano_path.replace('zind', 'zind2') + os.makedirs(os.path.dirname(dst_pano_path), exist_ok=True) + shutil.move(pano_path, dst_pano_path) + + +def read_zind(partition_path, simplicity_path, data_dir, mode, is_simple=True, + layout_type='layout_raw', is_ceiling_flat=False, plan_y=1): + with open(simplicity_path, 'r') as f: + simple_tag = json.load(f) + simple_panos = {} + for k in simple_tag.keys(): + if not simple_tag[k]: + continue + split = k.split('_') + house_index = split[0] + pano_index = '_'.join(split[-2:]) + simple_panos[f'{house_index}_{pano_index}'] = True + + # move_not_simple_image(data_dir, simple_panos) + + pano_list = [] + with open(partition_path, 'r') as f1: + house_list = json.load(f1)[mode] + + for house_index in house_list: + with open(os.path.join(data_dir, house_index, f"zind_data.json"), 'r') as f2: + data = json.load(f2) + + panos = [] + merger = data['merger'] + for floor in merger.values(): + for complete_room in floor.values(): + for partial_room in complete_room.values(): + for pano_index in partial_room: + pano = partial_room[pano_index] + pano['index'] = pano_index + panos.append(pano) + + for pano in panos: + if layout_type not in pano: + continue + pano_index = pano['index'] + + if is_simple and f'{house_index}_{pano_index}' not in simple_panos.keys(): + continue + + if is_ceiling_flat and not pano['is_ceiling_flat']: + continue + + layout = pano[layout_type] + # corners + corner_xz = np.array(layout['vertices']) + corner_xz[..., 0] = -corner_xz[..., 0] + corner_xyz = np.insert(corner_xz, 1, pano['camera_height'], axis=1) + corners = xyz2uv(corner_xyz).astype(np.float32) + + # ratio + ratio = np.array([(pano['ceiling_height'] - pano['camera_height']) / pano['camera_height']], dtype=np.float32) + + # Ours future work: detection window, door, opening + objects = { + 'windows': [], + 'doors': [], + 'openings': [], + } + for label_index, wdo_type in enumerate(["windows", "doors", "openings"]): + if wdo_type not in layout: + continue + + wdo_vertices = np.array(layout[wdo_type]) + if len(wdo_vertices) == 0: + continue + + assert len(wdo_vertices) % 3 == 0 + + for i in range(0, len(wdo_vertices), 3): + # In the Zind dataset, the camera height is 1, and the default camera height in our code is also 1, + # so the xyz coordinate here can be used directly + # Since we're taking the opposite z-axis, we're changing the order of left and right + + left_bottom_xyz = np.array( + [-wdo_vertices[i + 1][0], -wdo_vertices[i + 2][0], wdo_vertices[i + 1][1]]) + right_bottom_xyz = np.array( + [-wdo_vertices[i][0], -wdo_vertices[i + 2][0], wdo_vertices[i][1]]) + center_bottom_xyz = (left_bottom_xyz + right_bottom_xyz) / 2 + + center_top_xyz = center_bottom_xyz.copy() + center_top_xyz[1] = -wdo_vertices[i + 2][1] + + center_boundary_xyz = center_bottom_xyz.copy() + center_boundary_xyz[1] = plan_y + + uv = xyz2uv(np.array([left_bottom_xyz, right_bottom_xyz, + center_bottom_xyz, center_top_xyz, + center_boundary_xyz])) + + left_bottom_uv = uv[0] + right_bottom_uv = uv[1] + width_u = abs(right_bottom_uv[0] - left_bottom_uv[0]) + width_u = 1 - width_u if width_u > 0.5 else width_u + assert width_u > 0, width_u + + center_bottom_uv = uv[2] + center_top_uv = uv[3] + height_v = center_bottom_uv[1] - center_top_uv[1] + + if height_v < 0: + continue + + center_boundary_uv = uv[4] + boundary_v = center_boundary_uv[1] - center_bottom_uv[1] if wdo_type == 'windows' else 0 + boundary_v = 0 if boundary_v < 0 else boundary_v + + center_u = center_bottom_uv[0] + + objects[wdo_type].append({ + 'width_u': width_u, + 'height_v': height_v, + 'boundary_v': boundary_v, + 'center_u': center_u + }) + + pano_list.append({ + 'img_path': os.path.join(data_dir, house_index, pano['image_path']), + 'corners': corners, + 'objects': objects, + 'ratio': ratio, + 'id': f'{house_index}_{pano_index}', + 'is_inside': pano['is_inside'] + }) + return pano_list diff --git a/dataset/mp3d_dataset.py b/dataset/mp3d_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..66042e79d8794c1f57dd280b4ade9e4f24e5ba8e --- /dev/null +++ b/dataset/mp3d_dataset.py @@ -0,0 +1,110 @@ +""" +@date: 2021/6/25 +@description: +""" +import os +import json + +from dataset.communal.read import read_image, read_label +from dataset.communal.base_dataset import BaseDataset +from utils.logger import get_logger + + +class MP3DDataset(BaseDataset): + def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None, + split_list=None, patch_num=256, keys=None, for_test_index=None): + super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys) + + if logger is None: + logger = get_logger() + self.root_dir = root_dir + + split_dir = os.path.join(root_dir, 'split') + label_dir = os.path.join(root_dir, 'label') + img_dir = os.path.join(root_dir, 'image') + + if split_list is None: + with open(os.path.join(split_dir, f"{mode}.txt"), 'r') as f: + split_list = [x.rstrip().split() for x in f] + + split_list.sort() + if for_test_index is not None: + split_list = split_list[:for_test_index] + + self.data = [] + invalid_num = 0 + for name in split_list: + name = "_".join(name) + img_path = os.path.join(img_dir, f"{name}.png") + label_path = os.path.join(label_dir, f"{name}.json") + + if not os.path.exists(img_path): + logger.warning(f"{img_path} not exists") + invalid_num += 1 + continue + if not os.path.exists(label_path): + logger.warning(f"{label_path} not exists") + invalid_num += 1 + continue + + with open(label_path, 'r') as f: + label = json.load(f) + + if self.max_wall_num >= 10: + if label['layoutWalls']['num'] < self.max_wall_num: + invalid_num += 1 + continue + elif self.max_wall_num != 0 and label['layoutWalls']['num'] != self.max_wall_num: + invalid_num += 1 + continue + + # print(label['layoutWalls']['num']) + self.data.append([img_path, label_path]) + + logger.info( + f"Build dataset mode: {self.mode} max_wall_num: {self.max_wall_num} valid: {len(self.data)} invalid: {invalid_num}") + + def __getitem__(self, idx): + rgb_path, label_path = self.data[idx] + label = read_label(label_path, data_type='MP3D') + image = read_image(rgb_path, self.shape) + output = self.process_data(label, image, self.patch_num) + return output + + +if __name__ == "__main__": + import numpy as np + from PIL import Image + + from tqdm import tqdm + from visualization.boundary import draw_boundaries + from visualization.floorplan import draw_floorplan + from utils.boundary import depth2boundaries + from utils.conversion import uv2xyz + + modes = ['test', 'val'] + for i in range(1): + for mode in modes: + print(mode) + mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode=mode, aug={ + 'STRETCH': True, + 'ROTATE': True, + 'FLIP': True, + 'GAMMA': True + }) + save_dir = f'../src/dataset/mp3d/visualization/{mode}' + if not os.path.isdir(save_dir): + os.makedirs(save_dir) + + bar = tqdm(mp3d_dataset, ncols=100) + for data in bar: + bar.set_description(f"Processing {data['id']}") + boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None) + pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) + Image.fromarray((pano_img * 255).astype(np.uint8)).save( + os.path.join(save_dir, f"{data['id']}_boundary.png")) + + floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, + marker_color=None, center_color=0.8, show_radius=None) + Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save( + os.path.join(save_dir, f"{data['id']}_floorplan.png")) diff --git a/dataset/pano_s2d3d_dataset.py b/dataset/pano_s2d3d_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..b6939fea1a08e5f1c1eb985b85fc739be0c53b04 --- /dev/null +++ b/dataset/pano_s2d3d_dataset.py @@ -0,0 +1,107 @@ +""" +@date: 2021/6/16 +@description: +""" +import math +import os +import numpy as np + +from dataset.communal.read import read_image, read_label +from dataset.communal.base_dataset import BaseDataset +from utils.logger import get_logger + + +class PanoS2D3DDataset(BaseDataset): + def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None, + split_list=None, patch_num=256, keys=None, for_test_index=None, subset=None): + super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys) + + if logger is None: + logger = get_logger() + self.root_dir = root_dir + + if mode is None: + return + label_dir = os.path.join(root_dir, 'valid' if mode == 'val' else mode, 'label_cor') + img_dir = os.path.join(root_dir, 'valid' if mode == 'val' else mode, 'img') + + if split_list is None: + split_list = [name.split('.')[0] for name in os.listdir(label_dir) if + not name.startswith('.') and name.endswith('txt')] + + split_list.sort() + + assert subset == 'pano' or subset == 's2d3d' or subset is None, 'error subset' + if subset == 'pano': + split_list = [name for name in split_list if 'pano_' in name] + logger.info(f"Use PanoContext Dataset") + elif subset == 's2d3d': + split_list = [name for name in split_list if 'camera_' in name] + logger.info(f"Use Stanford2D3D Dataset") + + if for_test_index is not None: + split_list = split_list[:for_test_index] + + self.data = [] + invalid_num = 0 + for name in split_list: + img_path = os.path.join(img_dir, f"{name}.png") + label_path = os.path.join(label_dir, f"{name}.txt") + + if not os.path.exists(img_path): + logger.warning(f"{img_path} not exists") + invalid_num += 1 + continue + if not os.path.exists(label_path): + logger.warning(f"{label_path} not exists") + invalid_num += 1 + continue + + with open(label_path, 'r') as f: + lines = [line for line in f.readlines() if + len([c for c in line.split(' ') if c[0].isnumeric()]) > 1] + if len(lines) % 2 != 0: + invalid_num += 1 + continue + self.data.append([img_path, label_path]) + + logger.info( + f"Build dataset mode: {self.mode} valid: {len(self.data)} invalid: {invalid_num}") + + def __getitem__(self, idx): + rgb_path, label_path = self.data[idx] + label = read_label(label_path, data_type='Pano_S2D3D') + image = read_image(rgb_path, self.shape) + output = self.process_data(label, image, self.patch_num) + return output + + +if __name__ == '__main__': + + modes = ['test', 'val', 'train'] + for i in range(1): + for mode in modes: + print(mode) + mp3d_dataset = PanoS2D3DDataset(root_dir='../src/dataset/pano_s2d3d', mode=mode, aug={ + # 'STRETCH': True, + # 'ROTATE': True, + # 'FLIP': True, + # 'GAMMA': True + }) + continue + save_dir = f'../src/dataset/pano_s2d3d/visualization/{mode}' + if not os.path.isdir(save_dir): + os.makedirs(save_dir) + + bar = tqdm(mp3d_dataset, ncols=100) + for data in bar: + bar.set_description(f"Processing {data['id']}") + boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None) + pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=False) + Image.fromarray((pano_img * 255).astype(np.uint8)).save( + os.path.join(save_dir, f"{data['id']}_boundary.png")) + + floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=False, + marker_color=None, center_color=0.8, show_radius=None) + Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save( + os.path.join(save_dir, f"{data['id']}_floorplan.png")) diff --git a/dataset/pano_s2d3d_mix_dataset.py b/dataset/pano_s2d3d_mix_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..d8f8444b20f89b1c1b1ad274c7c7d0274ef5aa2f --- /dev/null +++ b/dataset/pano_s2d3d_mix_dataset.py @@ -0,0 +1,91 @@ +""" +@date: 2021/6/16 +@description: +""" + +import os + +from dataset.pano_s2d3d_dataset import PanoS2D3DDataset +from utils.logger import get_logger + + +class PanoS2D3DMixDataset(PanoS2D3DDataset): + def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None, + split_list=None, patch_num=256, keys=None, for_test_index=None, subset=None): + assert subset == 's2d3d' or subset == 'pano', 'error subset' + super().__init__(root_dir, None, shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, subset) + if logger is None: + logger = get_logger() + self.mode = mode + if mode == 'train': + if subset == 'pano': + s2d3d_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, 's2d3d').data + s2d3d_val_data = PanoS2D3DDataset(root_dir, 'val', shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, 's2d3d').data + s2d3d_test_data = PanoS2D3DDataset(root_dir, 'test', shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, 's2d3d').data + s2d3d_all_data = s2d3d_train_data + s2d3d_val_data + s2d3d_test_data + + pano_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, 'pano').data + self.data = s2d3d_all_data + pano_train_data + elif subset == 's2d3d': + pano_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, 'pano').data + pano_val_data = PanoS2D3DDataset(root_dir, 'val', shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, 'pano').data + pano_test_data = PanoS2D3DDataset(root_dir, 'test', shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, 'pano').data + pano_all_data = pano_train_data + pano_val_data + pano_test_data + + s2d3d_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, 's2d3d').data + self.data = pano_all_data + s2d3d_train_data + else: + self.data = PanoS2D3DDataset(root_dir, mode, shape, max_wall_num, aug, camera_height, logger, + split_list, patch_num, keys, None, subset).data + + if for_test_index is not None: + self.data = self.data[:for_test_index] + logger.info(f"Build dataset mode: {self.mode} valid: {len(self.data)}") + + +if __name__ == '__main__': + import numpy as np + from PIL import Image + + from tqdm import tqdm + from visualization.boundary import draw_boundaries + from visualization.floorplan import draw_floorplan + from utils.boundary import depth2boundaries + from utils.conversion import uv2xyz + + modes = ['test', 'val', 'train'] + for i in range(1): + for mode in modes: + print(mode) + mp3d_dataset = PanoS2D3DMixDataset(root_dir='../src/dataset/pano_s2d3d', mode=mode, aug={ + # 'STRETCH': True, + # 'ROTATE': True, + # 'FLIP': True, + # 'GAMMA': True + }, subset='pano') + continue + save_dir = f'../src/dataset/pano_s2d3d/visualization1/{mode}' + if not os.path.isdir(save_dir): + os.makedirs(save_dir) + + bar = tqdm(mp3d_dataset, ncols=100) + for data in bar: + bar.set_description(f"Processing {data['id']}") + boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None) + pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=False) + Image.fromarray((pano_img * 255).astype(np.uint8)).save( + os.path.join(save_dir, f"{data['id']}_boundary.png")) + + floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=False, + marker_color=None, center_color=0.8, show_radius=None) + Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save( + os.path.join(save_dir, f"{data['id']}_floorplan.png")) diff --git a/dataset/zind_dataset.py b/dataset/zind_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..20258763fcfa6bc130e33a3889d5e88018d4708e --- /dev/null +++ b/dataset/zind_dataset.py @@ -0,0 +1,138 @@ +""" +@Date: 2021/09/22 +@description: +""" +import os +import json +import math +import numpy as np + +from dataset.communal.read import read_image, read_label, read_zind +from dataset.communal.base_dataset import BaseDataset +from utils.logger import get_logger +from preprocessing.filter import filter_center, filter_boundary, filter_self_intersection +from utils.boundary import calc_rotation + + +class ZindDataset(BaseDataset): + def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None, + split_list=None, patch_num=256, keys=None, for_test_index=None, + is_simple=True, is_ceiling_flat=False, vp_align=False): + # if keys is None: + # keys = ['image', 'depth', 'ratio', 'id', 'corners', 'corner_heat_map', 'object'] + super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys) + if logger is None: + logger = get_logger() + self.root_dir = root_dir + self.vp_align = vp_align + + data_dir = os.path.join(root_dir) + img_dir = os.path.join(root_dir, 'image') + + pano_list = read_zind(partition_path=os.path.join(data_dir, f"zind_partition.json"), + simplicity_path=os.path.join(data_dir, f"room_shape_simplicity_labels.json"), + data_dir=data_dir, mode=mode, is_simple=is_simple, is_ceiling_flat=is_ceiling_flat) + + if for_test_index is not None: + pano_list = pano_list[:for_test_index] + if split_list: + pano_list = [pano for pano in pano_list if pano['id'] in split_list] + self.data = [] + invalid_num = 0 + for pano in pano_list: + if not os.path.exists(pano['img_path']): + logger.warning(f"{pano['img_path']} not exists") + invalid_num += 1 + continue + + if not filter_center(pano['corners']): + # logger.warning(f"{pano['id']} camera center not in layout") + # invalid_num += 1 + continue + + if self.max_wall_num >= 10: + if len(pano['corners']) < self.max_wall_num: + invalid_num += 1 + continue + elif self.max_wall_num != 0 and len(pano['corners']) != self.max_wall_num: + invalid_num += 1 + continue + + if not filter_boundary(pano['corners']): + logger.warning(f"{pano['id']} boundary cross") + invalid_num += 1 + continue + + if not filter_self_intersection(pano['corners']): + logger.warning(f"{pano['id']} self_intersection") + invalid_num += 1 + continue + + self.data.append(pano) + + logger.info( + f"Build dataset mode: {self.mode} max_wall_num: {self.max_wall_num} valid: {len(self.data)} invalid: {invalid_num}") + + def __getitem__(self, idx): + pano = self.data[idx] + rgb_path = pano['img_path'] + label = pano + image = read_image(rgb_path, self.shape) + + if self.vp_align: + # Equivalent to vanishing point alignment step + rotation = calc_rotation(corners=label['corners']) + shift = math.modf(rotation / (2 * np.pi) + 1)[0] + image = np.roll(image, round(shift * self.shape[1]), axis=1) + label['corners'][:, 0] = np.modf(label['corners'][:, 0] + shift)[0] + + output = self.process_data(label, image, self.patch_num) + return output + + +if __name__ == "__main__": + import numpy as np + from PIL import Image + + from tqdm import tqdm + from visualization.boundary import draw_boundaries, draw_object + from visualization.floorplan import draw_floorplan + from utils.boundary import depth2boundaries, calc_rotation + from utils.conversion import uv2xyz + from models.other.init_env import init_env + + init_env(123) + + modes = ['val'] + for i in range(1): + for mode in modes: + print(mode) + mp3d_dataset = ZindDataset(root_dir='../src/dataset/zind', mode=mode, aug={ + 'STRETCH': False, + 'ROTATE': False, + 'FLIP': False, + 'GAMMA': False + }) + # continue + # save_dir = f'../src/dataset/zind/visualization/{mode}' + # if not os.path.isdir(save_dir): + # os.makedirs(save_dir) + + bar = tqdm(mp3d_dataset, ncols=100) + for data in bar: + # if data['id'] != '1079_pano_18': + # continue + bar.set_description(f"Processing {data['id']}") + boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None) + + pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) + # Image.fromarray((pano_img * 255).astype(np.uint8)).save( + # os.path.join(save_dir, f"{data['id']}_boundary.png")) + # draw_object(pano_img, heat_maps=data['object_heat_map'], depth=data['depth'], + # size=data['object_size'], show=True) + # pass + # + floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, + marker_color=None, center_color=0.2) + # Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save( + # os.path.join(save_dir, f"{data['id']}_floorplan.png")) diff --git a/evaluation/__init__.py b/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1bf9d8dfba501e83ea5738ff98228c5756949a47 --- /dev/null +++ b/evaluation/__init__.py @@ -0,0 +1,4 @@ +""" +@date: 2021/6/29 +@description: +""" diff --git a/evaluation/accuracy.py b/evaluation/accuracy.py new file mode 100644 index 0000000000000000000000000000000000000000..754a33502a3b89e9b3ff41b14e4d4ca76f7fa8d4 --- /dev/null +++ b/evaluation/accuracy.py @@ -0,0 +1,249 @@ +""" +@date: 2021/8/4 +@description: +""" +import numpy as np +import cv2 +import scipy + +from evaluation.f1_score import f1_score_2d +from loss import GradLoss +from utils.boundary import corners2boundaries, layout2depth +from utils.conversion import depth2xyz, uv2xyz, get_u, depth2uv, xyz2uv, uv2pixel +from utils.height import calc_ceil_ratio +from evaluation.iou import calc_IoU, calc_Iou_height +from visualization.boundary import draw_boundaries +from visualization.floorplan import draw_iou_floorplan +from visualization.grad import show_grad + + +def calc_accuracy(dt, gt, visualization=False, h=512): + visb_iou_2ds = [] + visb_iou_3ds = [] + full_iou_2ds = [] + full_iou_3ds = [] + iou_heights = [] + + visb_iou_floodplans = [] + full_iou_floodplans = [] + pano_bds = [] + + if 'depth' not in dt.keys(): + dt['depth'] = gt['depth'] + + for i in range(len(gt['depth'])): + # print(i) + dt_xyz = dt['processed_xyz'][i] if 'processed_xyz' in dt else depth2xyz(np.abs(dt['depth'][i])) + visb_gt_xyz = depth2xyz(np.abs(gt['depth'][i])) + corners = gt['corners'][i] + full_gt_corners = corners[corners[..., 0] + corners[..., 1] != 0] # Take effective corners + full_gt_xyz = uv2xyz(full_gt_corners) + + dt_xz = dt_xyz[..., ::2] + visb_gt_xz = visb_gt_xyz[..., ::2] + full_gt_xz = full_gt_xyz[..., ::2] + + gt_ratio = gt['ratio'][i][0] + + if 'ratio' not in dt.keys(): + if 'boundary' in dt.keys(): + w = len(dt['boundary'][i]) + boundary = np.clip(dt['boundary'][i], 0.0001, 0.4999) + depth = np.clip(dt['depth'][i], 0.001, 9999) + dt_ceil_boundary = np.concatenate([get_u(w, is_np=True)[..., None], boundary], axis=-1) + dt_floor_boundary = depth2uv(depth) + dt_ratio = calc_ceil_ratio(boundaries=[dt_ceil_boundary, dt_floor_boundary]) + else: + dt_ratio = gt_ratio + else: + dt_ratio = dt['ratio'][i][0] + + visb_iou_2d, visb_iou_3d = calc_IoU(dt_xz, visb_gt_xz, dt_height=1 + dt_ratio, gt_height=1 + gt_ratio) + full_iou_2d, full_iou_3d = calc_IoU(dt_xz, full_gt_xz, dt_height=1 + dt_ratio, gt_height=1 + gt_ratio) + iou_height = calc_Iou_height(dt_height=1 + dt_ratio, gt_height=1 + gt_ratio) + + visb_iou_2ds.append(visb_iou_2d) + visb_iou_3ds.append(visb_iou_3d) + full_iou_2ds.append(full_iou_2d) + full_iou_3ds.append(full_iou_3d) + iou_heights.append(iou_height) + + if visualization: + pano_img = cv2.resize(gt['image'][i].transpose(1, 2, 0), (h*2, h)) + # visb_iou_floodplans.append(draw_iou_floorplan(dt_xz, visb_gt_xz, iou_2d=visb_iou_2d, iou_3d=visb_iou_3d, side_l=h)) + # full_iou_floodplans.append(draw_iou_floorplan(dt_xz, full_gt_xz, iou_2d=full_iou_2d, iou_3d=full_iou_3d, side_l=h)) + visb_iou_floodplans.append(draw_iou_floorplan(dt_xz, visb_gt_xz, side_l=h)) + full_iou_floodplans.append(draw_iou_floorplan(dt_xz, full_gt_xz, side_l=h)) + gt_boundaries = corners2boundaries(gt_ratio, corners_xyz=full_gt_xyz, step=None, length=1024, visible=False) + dt_boundaries = corners2boundaries(dt_ratio, corners_xyz=dt_xyz, step=None, visible=False, + length=1024)#visb_gt_xyz.shape[0] if dt_xyz.shape[0] != visb_gt_xyz.shape[0] else None) + + pano_bd = draw_boundaries(pano_img, boundary_list=gt_boundaries, boundary_color=[0, 0, 1]) + pano_bd = draw_boundaries(pano_bd, boundary_list=dt_boundaries, boundary_color=[0, 1, 0]) + pano_bds.append(pano_bd) + + visb_iou_2d = np.array(visb_iou_2ds).mean() + visb_iou_3d = np.array(visb_iou_3ds).mean() + full_iou_2d = np.array(full_iou_2ds).mean() + full_iou_3d = np.array(full_iou_3ds).mean() + iou_height = np.array(iou_heights).mean() + + if visualization: + visb_iou_floodplans = np.array(visb_iou_floodplans).transpose(0, 3, 1, 2) # NCHW + full_iou_floodplans = np.array(full_iou_floodplans).transpose(0, 3, 1, 2) # NCHW + pano_bds = np.array(pano_bds).transpose(0, 3, 1, 2) + return [visb_iou_2d, visb_iou_3d, visb_iou_floodplans],\ + [full_iou_2d, full_iou_3d, full_iou_floodplans], iou_height, pano_bds, full_iou_2ds + + +def calc_ce(dt, gt): + w = 1024 + h = 512 + ce_s = [] + for i in range(len(gt['corners'])): + floor_gt_corners = gt['corners'][i] + # Take effective corners + floor_gt_corners = floor_gt_corners[floor_gt_corners[..., 0] + floor_gt_corners[..., 1] != 0] + floor_gt_corners = np.roll(floor_gt_corners, -np.argmin(floor_gt_corners[..., 0]), 0) + gt_ratio = gt['ratio'][i][0] + ceil_gt_corners = corners2boundaries(gt_ratio, corners_uv=floor_gt_corners, step=None)[1] + gt_corners = np.concatenate((floor_gt_corners, ceil_gt_corners)) + gt_corners = uv2pixel(gt_corners, w, h) + + floor_dt_corners = xyz2uv(dt['processed_xyz'][i]) + floor_dt_corners = np.roll(floor_dt_corners, -np.argmin(floor_dt_corners[..., 0]), 0) + dt_ratio = dt['ratio'][i][0] + ceil_dt_corners = corners2boundaries(dt_ratio, corners_uv=floor_dt_corners, step=None)[1] + dt_corners = np.concatenate((floor_dt_corners, ceil_dt_corners)) + dt_corners = uv2pixel(dt_corners, w, h) + + mse = np.sqrt(((gt_corners - dt_corners) ** 2).sum(1)).mean() + ce = 100 * mse / np.sqrt(w ** 2 + h ** 2) + ce_s.append(ce) + + return np.array(ce_s).mean() + + +def calc_pe(dt, gt): + w = 1024 + h = 512 + pe_s = [] + for i in range(len(gt['corners'])): + floor_gt_corners = gt['corners'][i] + # Take effective corners + floor_gt_corners = floor_gt_corners[floor_gt_corners[..., 0] + floor_gt_corners[..., 1] != 0] + floor_gt_corners = np.roll(floor_gt_corners, -np.argmin(floor_gt_corners[..., 0]), 0) + gt_ratio = gt['ratio'][i][0] + gt_floor_boundary, gt_ceil_boundary = corners2boundaries(gt_ratio, corners_uv=floor_gt_corners, length=w) + gt_floor_boundary = uv2pixel(gt_floor_boundary, w, h) + gt_ceil_boundary = uv2pixel(gt_ceil_boundary, w, h) + + floor_dt_corners = xyz2uv(dt['processed_xyz'][i]) + floor_dt_corners = np.roll(floor_dt_corners, -np.argmin(floor_dt_corners[..., 0]), 0) + dt_ratio = dt['ratio'][i][0] + dt_floor_boundary, dt_ceil_boundary = corners2boundaries(dt_ratio, corners_uv=floor_dt_corners, length=w) + dt_floor_boundary = uv2pixel(dt_floor_boundary, w, h) + dt_ceil_boundary = uv2pixel(dt_ceil_boundary, w, h) + + gt_surface = np.zeros((h, w), dtype=np.int32) + gt_surface[gt_ceil_boundary[..., 1], np.arange(w)] = 1 + gt_surface[gt_floor_boundary[..., 1], np.arange(w)] = 1 + gt_surface = np.cumsum(gt_surface, axis=0) + + dt_surface = np.zeros((h, w), dtype=np.int32) + dt_surface[dt_ceil_boundary[..., 1], np.arange(w)] = 1 + dt_surface[dt_floor_boundary[..., 1], np.arange(w)] = 1 + dt_surface = np.cumsum(dt_surface, axis=0) + + pe = 100 * (dt_surface != gt_surface).sum() / (h * w) + pe_s.append(pe) + return np.array(pe_s).mean() + + +def calc_rmse_delta_1(dt, gt): + rmse_s = [] + delta_1_s = [] + for i in range(len(gt['depth'])): + gt_boundaries = corners2boundaries(gt['ratio'][i], corners_xyz=depth2xyz(gt['depth'][i]), step=None, + visible=False) + dt_xyz = dt['processed_xyz'][i] if 'processed_xyz' in dt else depth2xyz(np.abs(dt['depth'][i])) + + dt_boundaries = corners2boundaries(dt['ratio'][i], corners_xyz=dt_xyz, step=None, + length=256 if 'processed_xyz' in dt else None, + visible=True if 'processed_xyz' in dt else False) + gt_layout_depth = layout2depth(gt_boundaries, show=False) + dt_layout_depth = layout2depth(dt_boundaries, show=False) + + rmse = ((gt_layout_depth - dt_layout_depth) ** 2).mean() ** 0.5 + threshold = np.maximum(gt_layout_depth / dt_layout_depth, dt_layout_depth / gt_layout_depth) + delta_1 = (threshold < 1.25).mean() + rmse_s.append(rmse) + delta_1_s.append(delta_1) + return np.array(rmse_s).mean(), np.array(delta_1_s).mean() + + +def calc_f1_score(dt, gt, threshold=10): + w = 1024 + h = 512 + f1_s = [] + precision_s = [] + recall_s = [] + for i in range(len(gt['corners'])): + floor_gt_corners = gt['corners'][i] + # Take effective corners + floor_gt_corners = floor_gt_corners[floor_gt_corners[..., 0] + floor_gt_corners[..., 1] != 0] + floor_gt_corners = np.roll(floor_gt_corners, -np.argmin(floor_gt_corners[..., 0]), 0) + gt_ratio = gt['ratio'][i][0] + ceil_gt_corners = corners2boundaries(gt_ratio, corners_uv=floor_gt_corners, step=None)[1] + gt_corners = np.concatenate((floor_gt_corners, ceil_gt_corners)) + gt_corners = uv2pixel(gt_corners, w, h) + + floor_dt_corners = xyz2uv(dt['processed_xyz'][i]) + floor_dt_corners = np.roll(floor_dt_corners, -np.argmin(floor_dt_corners[..., 0]), 0) + dt_ratio = dt['ratio'][i][0] + ceil_dt_corners = corners2boundaries(dt_ratio, corners_uv=floor_dt_corners, step=None)[1] + dt_corners = np.concatenate((floor_dt_corners, ceil_dt_corners)) + dt_corners = uv2pixel(dt_corners, w, h) + + Fs, Ps, Rs = f1_score_2d(gt_corners, dt_corners, [threshold]) + f1_s.append(Fs[0]) + precision_s.append(Ps[0]) + recall_s.append(Rs[0]) + + return np.array(f1_s).mean(), np.array(precision_s).mean(), np.array(recall_s).mean() + + +def show_heat_map(dt, gt, vis_w=1024): + dt_heat_map = dt['corner_heat_map'].detach().cpu().numpy() + gt_heat_map = gt['corner_heat_map'].detach().cpu().numpy() + dt_heat_map_imgs = [] + gt_heat_map_imgs = [] + for i in range(len(gt['depth'])): + dt_heat_map_img = dt_heat_map[..., np.newaxis].repeat(3, axis=-1).repeat(20, axis=0) + gt_heat_map_img = gt_heat_map[..., np.newaxis].repeat(3, axis=-1).repeat(20, axis=0) + dt_heat_map_imgs.append(cv2.resize(dt_heat_map_img, (vis_w, dt_heat_map_img.shape[0])).transpose(2, 0, 1)) + gt_heat_map_imgs.append(cv2.resize(gt_heat_map_img, (vis_w, dt_heat_map_img.shape[0])).transpose(2, 0, 1)) + return dt_heat_map_imgs, gt_heat_map_imgs + + +def show_depth_normal_grad(dt, gt, device, vis_w=1024): + grad_conv = GradLoss().to(device).grad_conv + gt_grad_imgs = [] + dt_grad_imgs = [] + + if 'depth' not in dt.keys(): + dt['depth'] = gt['depth'] + + if vis_w == 1024: + h = 5 + else: + h = int(vis_w / (12 * 10)) + + for i in range(len(gt['depth'])): + gt_grad_img = show_grad(gt['depth'][i], grad_conv, h) + dt_grad_img = show_grad(dt['depth'][i], grad_conv, h) + vis_h = dt_grad_img.shape[0] * (vis_w // dt_grad_img.shape[1]) + gt_grad_imgs.append(cv2.resize(gt_grad_img, (vis_w, vis_h), interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1)) + dt_grad_imgs.append(cv2.resize(dt_grad_img, (vis_w, vis_h), interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1)) + + return gt_grad_imgs, dt_grad_imgs diff --git a/evaluation/analyse_layout_type.py b/evaluation/analyse_layout_type.py new file mode 100644 index 0000000000000000000000000000000000000000..00549d50e1d2a5435b66ad8fac0b2d143d6685d6 --- /dev/null +++ b/evaluation/analyse_layout_type.py @@ -0,0 +1,83 @@ +""" +@Date: 2022/01/31 +@description: +ZInd: +{'test': {'mw': 2789, 'aw': 381}, 'train': {'mw': 21228, 'aw': 3654}, 'val': {'mw': 2647, 'aw': 433}} + +""" +import numpy as np +import matplotlib.pyplot as plt +import json + +from tqdm import tqdm +from evaluation.iou import calc_IoU_2D +from visualization.floorplan import draw_floorplan +from visualization.boundary import draw_boundaries +from utils.conversion import depth2xyz, uv2xyz + + +def analyse_layout_type(dataset, show=False): + bar = tqdm(dataset, total=len(dataset), ncols=100) + manhattan = 0 + atlanta = 0 + corner_type = {} + for data in bar: + bar.set_description(f"Processing {data['id']}") + corners = data['corners'] + corners = corners[corners[..., 0] + corners[..., 1] != 0] # Take effective corners + corners_count = str(len(corners)) if len(corners) < 10 else "10" + if corners_count not in corner_type: + corner_type[corners_count] = 0 + corner_type[corners_count] += 1 + + all_xz = uv2xyz(corners)[..., ::2] + + c = len(all_xz) + flag = False + for i in range(c - 1): + l1 = all_xz[i + 1] - all_xz[i] + l2 = all_xz[(i + 2) % c] - all_xz[i + 1] + a = (np.linalg.norm(l1)*np.linalg.norm(l2)) + if a == 0: + continue + dot = np.dot(l1, l2)/a + if 0.9 > abs(dot) > 0.1: + # cos-1(0.1)=84.26 > angle > cos-1(0.9)=25.84 or + # cos-1(-0.9)=154.16 > angle > cos-1(-0.1)=95.74 + flag = True + break + if flag: + atlanta += 1 + else: + manhattan += 1 + + if flag and show: + draw_floorplan(all_xz, show=True) + draw_boundaries(data['image'].transpose(1, 2, 0), [corners], ratio=data['ratio'], show=True) + + corner_type = dict(sorted(corner_type.items(), key=lambda item: int(item[0]))) + return {'manhattan': manhattan, "atlanta": atlanta, "corner_type": corner_type} + + +def execute_analyse_layout_type(root_dir, dataset, modes=None): + if modes is None: + modes = ["train", "val", "test"] + + iou2d_d = {} + for mode in modes: + print("mode: {}".format(mode)) + types = analyse_layout_type(dataset(root_dir, mode), show=False) + iou2d_d[mode] = types + print(json.dumps(types, indent=4)) + return iou2d_d + + +if __name__ == '__main__': + from dataset.zind_dataset import ZindDataset + from dataset.mp3d_dataset import MP3DDataset + + iou2d_d = execute_analyse_layout_type(root_dir='../src/dataset/mp3d', + dataset=MP3DDataset) + # iou2d_d = execute_analyse_layout_type(root_dir='../src/dataset/zind', + # dataset=ZindDataset) + print(json.dumps(iou2d_d, indent=4)) diff --git a/evaluation/eval_visible_iou.py b/evaluation/eval_visible_iou.py new file mode 100644 index 0000000000000000000000000000000000000000..be1883d816d784e68f31420c0f85c64485f653ca --- /dev/null +++ b/evaluation/eval_visible_iou.py @@ -0,0 +1,56 @@ +""" +@Date: 2021/08/02 +@description: +The 2DIoU for calculating the visible and full boundaries, such as the MP3D dataset, +has the following data: {'train': 0.9775843958583535, 'test': 0.9828616219607289, 'val': 0.9883810438132491}, +indicating that our best performance is limited to below 98.29% 2DIoU using our approach. +""" +import numpy as np +import matplotlib.pyplot as plt + +from tqdm import tqdm +from evaluation.iou import calc_IoU_2D +from visualization.floorplan import draw_iou_floorplan +from utils.conversion import depth2xyz, uv2xyz + + +def eval_dataset_visible_IoU(dataset, show=False): + bar = tqdm(dataset, total=len(dataset), ncols=100) + iou2ds = [] + for data in bar: + bar.set_description(f"Processing {data['id']}") + corners = data['corners'] + corners = corners[corners[..., 0] + corners[..., 1] != 0] # Take effective corners + all_xz = uv2xyz(corners)[..., ::2] + visible_xz = depth2xyz(data['depth'])[..., ::2] + iou2d = calc_IoU_2D(all_xz, visible_xz) + iou2ds.append(iou2d) + if show: + layout_floorplan = draw_iou_floorplan(all_xz, visible_xz, iou2d=iou2d) + plt.imshow(layout_floorplan) + plt.show() + + mean_iou2d = np.array(iou2ds).mean() + return mean_iou2d + + +def execute_eval_dataset_visible_IoU(root_dir, dataset, modes=None): + if modes is None: + modes = ["train", "test", "valid"] + + iou2d_d = {} + for mode in modes: + print("mode: {}".format(mode)) + iou2d = eval_dataset_visible_IoU(dataset(root_dir, mode, patch_num=1024, + keys=['depth', 'visible_corners', 'corners', 'id']), show=False) + iou2d_d[mode] = iou2d + return iou2d_d + + +if __name__ == '__main__': + from dataset.mp3d_dataset import MP3DDataset + + iou2d_d = execute_eval_dataset_visible_IoU(root_dir='../src/dataset/mp3d', + dataset=MP3DDataset, + modes=['train', 'test', 'val']) + print(iou2d_d) diff --git a/evaluation/f1_score.py b/evaluation/f1_score.py new file mode 100644 index 0000000000000000000000000000000000000000..92db643188cdc560a64688ca1af0cbfc048b8cba --- /dev/null +++ b/evaluation/f1_score.py @@ -0,0 +1,78 @@ +""" +@author: Zhigang Jiang +@time: 2022/01/28 +@description: +Holistic 3D Vision Challenge on General Room Layout Estimation Track Evaluation Package +Reference: https://github.com/bertjiazheng/indoor-layout-evaluation +""" + +from scipy.optimize import linear_sum_assignment +import numpy as np +import scipy + +HEIGHT, WIDTH = 512, 1024 +MAX_DISTANCE = np.sqrt(HEIGHT**2 + WIDTH**2) + + +def f1_score_2d(gt_corners, dt_corners, thresholds): + distances = scipy.spatial.distance.cdist(gt_corners, dt_corners) + return eval_junctions(distances, thresholds=thresholds) + + +def eval_junctions(distances, thresholds=5): + thresholds = thresholds if isinstance(thresholds, tuple) or isinstance( + thresholds, list) else list([thresholds]) + + num_gts, num_preds = distances.shape + + # filter the matches between ceiling-wall and floor-wall junctions + mask = np.zeros_like(distances, dtype=np.bool) + mask[:num_gts//2, :num_preds//2] = True + mask[num_gts//2:, num_preds//2:] = True + distances[~mask] = np.inf + + # F-measure under different thresholds + Fs = [] + Ps = [] + Rs = [] + for threshold in thresholds: + distances_temp = distances.copy() + + # filter the mis-matched pairs + distances_temp[distances_temp > threshold] = np.inf + + # remain the rows and columns that contain non-inf elements + distances_temp = distances_temp[:, np.any(np.isfinite(distances_temp), axis=0)] + + if np.prod(distances_temp.shape) == 0: + Fs.append(0) + Ps.append(0) + Rs.append(0) + continue + + distances_temp = distances_temp[np.any(np.isfinite(distances_temp), axis=1), :] + + # solve the bipartite graph matching problem + row_ind, col_ind = linear_sum_assignment_with_inf(distances_temp) + true_positive = np.sum(np.isfinite(distances_temp[row_ind, col_ind])) + + # compute precision and recall + precision = true_positive / num_preds + recall = true_positive / num_gts + + # compute F measure + Fs.append(2 * precision * recall / (precision + recall)) + Ps.append(precision) + Rs.append(recall) + + return Fs, Ps, Rs + + +def linear_sum_assignment_with_inf(cost_matrix): + """ + Deal with linear_sum_assignment with inf according to + https://github.com/scipy/scipy/issues/6900#issuecomment-451735634 + """ + cost_matrix = np.copy(cost_matrix) + cost_matrix[np.isinf(cost_matrix)] = MAX_DISTANCE + return linear_sum_assignment(cost_matrix) \ No newline at end of file diff --git a/evaluation/iou.py b/evaluation/iou.py new file mode 100644 index 0000000000000000000000000000000000000000..0e4004302f50b9a55561be617d80051b55e0ff44 --- /dev/null +++ b/evaluation/iou.py @@ -0,0 +1,148 @@ +""" +@date: 2021/6/29 +@description: +The method with "_floorplan" suffix is only for comparison, which is used for calculation in LED2-net. +However, the floorplan is affected by show_radius. Setting too large will result in the decrease of accuracy, +and setting too small will result in the failure of calculation beyond the range. +""" +import numpy as np +from shapely.geometry import Polygon + + +def calc_inter_area(dt_xz, gt_xz): + """ + :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] + :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] + :return: + """ + dt_polygon = Polygon(dt_xz) + gt_polygon = Polygon(gt_xz) + + dt_area = dt_polygon.area + gt_area = gt_polygon.area + inter_area = dt_polygon.intersection(gt_polygon).area + return dt_area, gt_area, inter_area + + +def calc_IoU_2D(dt_xz, gt_xz): + """ + :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] + :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] + :return: + """ + dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz) + iou_2d = inter_area / (gt_area + dt_area - inter_area) + return iou_2d + + +def calc_IoU_3D(dt_xz, gt_xz, dt_height, gt_height): + """ + :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] + :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] + :param dt_height: + :param gt_height: + :return: + """ + dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz) + dt_volume = dt_area * dt_height + gt_volume = gt_area * gt_height + inter_volume = inter_area * min(dt_height, gt_height) + iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume) + return iou_3d + + +def calc_IoU(dt_xz, gt_xz, dt_height, gt_height): + """ + :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] + :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...] + :param dt_height: + :param gt_height: + :return: + """ + dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz) + iou_2d = inter_area / (gt_area + dt_area - inter_area) + + dt_volume = dt_area * dt_height + gt_volume = gt_area * gt_height + inter_volume = inter_area * min(dt_height, gt_height) + iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume) + + return iou_2d, iou_3d + + +def calc_Iou_height(dt_height, gt_height): + return min(dt_height, gt_height) / max(dt_height, gt_height) + + +# the following is for testing only +def calc_inter_area_floorplan(dt_floorplan, gt_floorplan): + intersect = np.sum(np.logical_and(dt_floorplan, gt_floorplan)) + dt_area = np.sum(dt_floorplan) + gt_area = np.sum(gt_floorplan) + return dt_area, gt_area, intersect + + +def calc_IoU_2D_floorplan(dt_floorplan, gt_floorplan): + dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan) + iou_2d = inter_area / (gt_area + dt_area - inter_area) + return iou_2d + + +def calc_IoU_3D_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height): + dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan) + dt_volume = dt_area * dt_height + gt_volume = gt_area * gt_height + inter_volume = inter_area * min(dt_height, gt_height) + iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume) + return iou_3d + + +def calc_IoU_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height): + dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan) + iou_2d = inter_area / (gt_area + dt_area - inter_area) + + dt_volume = dt_area * dt_height + gt_volume = gt_area * gt_height + inter_volume = inter_area * min(dt_height, gt_height) + iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume) + return iou_2d, iou_3d + + +if __name__ == '__main__': + from visualization.floorplan import draw_floorplan, draw_iou_floorplan + from visualization.boundary import draw_boundaries, corners2boundaries + from utils.conversion import uv2xyz + from utils.height import height2ratio + + # dummy data + dt_floor_corners = np.array([[0.2, 0.7], + [0.4, 0.7], + [0.6, 0.7], + [0.8, 0.7]]) + dt_height = 2.8 + + gt_floor_corners = np.array([[0.3, 0.7], + [0.5, 0.7], + [0.7, 0.7], + [0.9, 0.7]]) + gt_height = 3.2 + + dt_xz = uv2xyz(dt_floor_corners)[..., ::2] + gt_xz = uv2xyz(gt_floor_corners)[..., ::2] + + dt_floorplan = draw_floorplan(dt_xz, show=False, show_radius=1) + gt_floorplan = draw_floorplan(gt_xz, show=False, show_radius=1) + # dt_floorplan = draw_floorplan(dt_xz, show=False, show_radius=2) + # gt_floorplan = draw_floorplan(gt_xz, show=False, show_radius=2) + + iou_2d, iou_3d = calc_IoU_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height) + print('use floor plan image:', iou_2d, iou_3d) + + iou_2d, iou_3d = calc_IoU(dt_xz, gt_xz, dt_height, gt_height) + print('use floor plan polygon:', iou_2d, iou_3d) + + draw_iou_floorplan(dt_xz, gt_xz, show=True, iou_2d=iou_2d, iou_3d=iou_3d) + pano_bd = draw_boundaries(np.zeros([512, 1024, 3]), corners_list=[dt_floor_corners], + boundary_color=[0, 0, 1], ratio=height2ratio(dt_height), draw_corners=False) + pano_bd = draw_boundaries(pano_bd, corners_list=[gt_floor_corners], + boundary_color=[0, 1, 0], ratio=height2ratio(gt_height), show=True, draw_corners=False) diff --git a/inference.py b/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..729fd8c17b8673647b4757f8600d8ef785b55cb8 --- /dev/null +++ b/inference.py @@ -0,0 +1,261 @@ +""" +@Date: 2021/09/19 +@description: +""" +import json +import os +import argparse +import cv2 +import numpy as np +import torch +import matplotlib.pyplot as plt +import glob + +from tqdm import tqdm +from PIL import Image +from config.defaults import merge_from_file, get_config +from dataset.mp3d_dataset import MP3DDataset +from dataset.zind_dataset import ZindDataset +from models.build import build_model +from loss import GradLoss +from postprocessing.post_process import post_process +from preprocessing.pano_lsd_align import panoEdgeDetection, rotatePanorama +from utils.boundary import corners2boundaries, layout2depth +from utils.conversion import depth2xyz +from utils.logger import get_logger +from utils.misc import tensor2np_d, tensor2np +from evaluation.accuracy import show_grad +from models.lgt_net import LGT_Net +from utils.writer import xyz2json +from visualization.boundary import draw_boundaries +from visualization.floorplan import draw_floorplan, draw_iou_floorplan +from visualization.obj3d import create_3d_obj + + +def parse_option(): + parser = argparse.ArgumentParser(description='Panorama Layout Transformer training and evaluation script') + parser.add_argument('--img_glob', + type=str, + required=True, + help='image glob path') + + parser.add_argument('--cfg', + type=str, + required=True, + metavar='FILE', + help='path of config file') + + parser.add_argument('--post_processing', + type=str, + default='manhattan', + choices=['manhattan', 'atalanta', 'original'], + help='post-processing type') + + parser.add_argument('--output_dir', + type=str, + default='src/output', + help='path of output') + + parser.add_argument('--visualize_3d', action='store_true', + help='visualize_3d') + + parser.add_argument('--output_3d', action='store_true', + help='output_3d') + + parser.add_argument('--device', + type=str, + default='cuda', + help='device') + + args = parser.parse_args() + args.mode = 'test' + + print("arguments:") + for arg in vars(args): + print(arg, ":", getattr(args, arg)) + print("-" * 50) + return args + + +def visualize_2d(img, dt, show_depth=True, show_floorplan=True, show=False, save_path=None): + dt_np = tensor2np_d(dt) + dt_depth = dt_np['depth'][0] + dt_xyz = depth2xyz(np.abs(dt_depth)) + dt_ratio = dt_np['ratio'][0][0] + dt_boundaries = corners2boundaries(dt_ratio, corners_xyz=dt_xyz, step=None, visible=False, length=img.shape[1]) + vis_img = draw_boundaries(img, boundary_list=dt_boundaries, boundary_color=[0, 1, 0]) + + if 'processed_xyz' in dt: + dt_boundaries = corners2boundaries(dt_ratio, corners_xyz=dt['processed_xyz'][0], step=None, visible=False, + length=img.shape[1]) + vis_img = draw_boundaries(vis_img, boundary_list=dt_boundaries, boundary_color=[1, 0, 0]) + + if show_depth: + dt_grad_img = show_depth_normal_grad(dt) + grad_h = dt_grad_img.shape[0] + vis_merge = [ + vis_img[0:-grad_h, :, :], + dt_grad_img, + ] + vis_img = np.concatenate(vis_merge, axis=0) + # vis_img = dt_grad_img.transpose(1, 2, 0)[100:] + + if show_floorplan: + if 'processed_xyz' in dt: + floorplan = draw_iou_floorplan(dt['processed_xyz'][0][..., ::2], dt_xyz[..., ::2], + dt_board_color=[1, 0, 0, 1], gt_board_color=[0, 1, 0, 1]) + else: + floorplan = show_alpha_floorplan(dt_xyz, border_color=[0, 1, 0, 1]) + + vis_img = np.concatenate([vis_img, floorplan[:, 60:-60, :]], axis=1) + if show: + plt.imshow(vis_img) + plt.show() + if save_path: + result = Image.fromarray((vis_img * 255).astype(np.uint8)) + result.save(save_path) + return vis_img + + +def preprocess(img_ori, q_error=0.7, refine_iter=3, vp_cache_path=None): + # Align images with VP + if os.path.exists(vp_cache_path): + with open(vp_cache_path) as f: + vp = [[float(v) for v in line.rstrip().split(' ')] for line in f.readlines()] + vp = np.array(vp) + else: + # VP detection and line segment extraction + _, vp, _, _, _, _, _ = panoEdgeDetection(img_ori, + qError=q_error, + refineIter=refine_iter) + i_img = rotatePanorama(img_ori, vp[2::-1]) + + if vp_cache_path is not None: + with open(vp_cache_path, 'w') as f: + for i in range(3): + f.write('%.6f %.6f %.6f\n' % (vp[i, 0], vp[i, 1], vp[i, 2])) + + return i_img, vp + + +def show_depth_normal_grad(dt): + grad_conv = GradLoss().to(dt['depth'].device).grad_conv + dt_grad_img = show_grad(dt['depth'][0], grad_conv, 50) + dt_grad_img = cv2.resize(dt_grad_img, (1024, 60), interpolation=cv2.INTER_NEAREST) + return dt_grad_img + + +def show_alpha_floorplan(dt_xyz, side_l=512, border_color=None): + if border_color is None: + border_color = [1, 0, 0, 1] + fill_color = [0.2, 0.2, 0.2, 0.2] + dt_floorplan = draw_floorplan(xz=dt_xyz[..., ::2], fill_color=fill_color, + border_color=border_color, side_l=side_l, show=False, center_color=[1, 0, 0, 1]) + dt_floorplan = Image.fromarray((dt_floorplan * 255).astype(np.uint8), mode='RGBA') + back = np.zeros([side_l, side_l, len(fill_color)], dtype=np.float) + back[..., :] = [0.8, 0.8, 0.8, 1] + back = Image.fromarray((back * 255).astype(np.uint8), mode='RGBA') + iou_floorplan = Image.alpha_composite(back, dt_floorplan).convert("RGB") + dt_floorplan = np.array(iou_floorplan) / 255.0 + return dt_floorplan + + +def save_pred_json(xyz, ration, save_path): + # xyz[..., -1] = -xyz[..., -1] + json_data = xyz2json(xyz, ration) + with open(save_path, 'w') as f: + f.write(json.dumps(json_data, indent=4) + '\n') + return json_data + + +def inference(): + if len(img_paths) == 0: + logger.error('No images found') + return + + bar = tqdm(img_paths, ncols=100) + for img_path in bar: + if not os.path.isfile(img_path): + logger.error(f'The {img_path} not is file') + continue + name = os.path.basename(img_path).split('.')[0] + bar.set_description(name) + img = np.array(Image.open(img_path).resize((1024, 512), Image.Resampling.BICUBIC))[..., :3] + if args.post_processing is not None and 'manhattan' in args.post_processing: + bar.set_description("Preprocessing") + img, vp = preprocess(img, vp_cache_path=os.path.join(args.output_dir, f"{name}_vp.txt")) + + img = (img / 255.0).astype(np.float32) + run_one_inference(img, model, args, name) + + +def inference_dataset(dataset): + bar = tqdm(dataset, ncols=100) + for data in bar: + bar.set_description(data['id']) + run_one_inference(data['image'].transpose(1, 2, 0), model, args, name=data['id'], logger=logger) + + +@torch.no_grad() +def run_one_inference(img, model, args, name, logger, show=True, show_depth=True, + show_floorplan=True, mesh_format='.gltf', mesh_resolution=512): + model.eval() + logger.info("model inference...") + dt = model(torch.from_numpy(img.transpose(2, 0, 1)[None]).to(args.device)) + if args.post_processing != 'original': + logger.info(f"post-processing, type:{args.post_processing}...") + dt['processed_xyz'] = post_process(tensor2np(dt['depth']), type_name=args.post_processing) + + visualize_2d(img, dt, + show_depth=show_depth, + show_floorplan=show_floorplan, + show=show, + save_path=os.path.join(args.output_dir, f"{name}_pred.png")) + output_xyz = dt['processed_xyz'][0] if 'processed_xyz' in dt else depth2xyz(tensor2np(dt['depth'][0])) + + logger.info(f"saving predicted layout json...") + json_data = save_pred_json(output_xyz, tensor2np(dt['ratio'][0])[0], + save_path=os.path.join(args.output_dir, f"{name}_pred.json")) + # if args.visualize_3d: + # from visualization.visualizer.visualizer import visualize_3d + # visualize_3d(json_data, (img * 255).astype(np.uint8)) + + if args.visualize_3d or args.output_3d: + dt_boundaries = corners2boundaries(tensor2np(dt['ratio'][0])[0], corners_xyz=output_xyz, step=None, + length=mesh_resolution if 'processed_xyz' in dt else None, + visible=True if 'processed_xyz' in dt else False) + dt_layout_depth = layout2depth(dt_boundaries, show=False) + + logger.info(f"creating 3d mesh ...") + create_3d_obj(cv2.resize(img, dt_layout_depth.shape[::-1]), dt_layout_depth, + save_path=os.path.join(args.output_dir, f"{name}_3d{mesh_format}") if args.output_3d else None, + mesh=True, show=args.visualize_3d) + + +if __name__ == '__main__': + logger = get_logger() + args = parse_option() + config = get_config(args) + + if ('cuda' in args.device or 'cuda' in config.TRAIN.DEVICE) and not torch.cuda.is_available(): + logger.info(f'The {args.device} is not available, will use cpu ...') + config.defrost() + args.device = "cpu" + config.TRAIN.DEVICE = "cpu" + config.freeze() + + model, _, _, _ = build_model(config, logger) + os.makedirs(args.output_dir, exist_ok=True) + img_paths = sorted(glob.glob(args.img_glob)) + + inference() + + # dataset = MP3DDataset(root_dir='./src/dataset/mp3d', mode='test', split_list=[ + # ['7y3sRwLe3Va', '155fac2d50764bf09feb6c8f33e8fb76'], + # ['e9zR4mvMWw7', 'c904c55a5d0e420bbd6e4e030b9fe5b4'], + # ]) + # dataset = ZindDataset(root_dir='./src/dataset/zind', mode='test', split_list=[ + # '1169_pano_21', + # '0583_pano_59', + # ], vp_align=True) + # inference_dataset(dataset) diff --git a/loss/__init__.py b/loss/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d51224b899be267ea6cbcc30ce35f990ab90f63e --- /dev/null +++ b/loss/__init__.py @@ -0,0 +1,10 @@ +""" +@date: 2021/7/19 +@description: +""" + +from torch.nn import L1Loss +from loss.led_loss import LEDLoss +from loss.grad_loss import GradLoss +from loss.boundary_loss import BoundaryLoss +from loss.object_loss import ObjectLoss, HeatmapLoss diff --git a/loss/boundary_loss.py b/loss/boundary_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..86049218de0f273b3d053641a13c92458c577759 --- /dev/null +++ b/loss/boundary_loss.py @@ -0,0 +1,51 @@ +""" +@Date: 2021/08/12 +@description: For HorizonNet, using latitudes to calculate loss. +""" +import torch +import torch.nn as nn +from utils.conversion import depth2xyz, xyz2lonlat + + +class BoundaryLoss(nn.Module): + def __init__(self): + super().__init__() + self.loss = nn.L1Loss() + + def forward(self, gt, dt): + gt_floor_xyz = depth2xyz(gt['depth']) + gt_ceil_xyz = gt_floor_xyz.clone() + gt_ceil_xyz[..., 1] = -gt['ratio'] + + gt_floor_boundary = xyz2lonlat(gt_floor_xyz)[..., -1:] + gt_ceil_boundary = xyz2lonlat(gt_ceil_xyz)[..., -1:] + + gt_boundary = torch.cat([gt_floor_boundary, gt_ceil_boundary], dim=-1).permute(0, 2, 1) + dt_boundary = dt['boundary'] + + loss = self.loss(gt_boundary, dt_boundary) + return loss + + +if __name__ == '__main__': + import numpy as np + from dataset.mp3d_dataset import MP3DDataset + + mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train') + gt = mp3d_dataset.__getitem__(0) + + gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis]) # batch size is 1 + gt['ratio'] = torch.from_numpy(gt['ratio'][np.newaxis]) # batch size is 1 + + dummy_dt = { + 'depth': gt['depth'].clone(), + 'boundary': torch.cat([ + xyz2lonlat(depth2xyz(gt['depth']))[..., -1:], + xyz2lonlat(depth2xyz(gt['depth'], plan_y=-gt['ratio']))[..., -1:] + ], dim=-1).permute(0, 2, 1) + } + # dummy_dt['boundary'][:, :, :20] /= 1.2 # some different + + boundary_loss = BoundaryLoss() + loss = boundary_loss(gt, dummy_dt) + print(loss) diff --git a/loss/grad_loss.py b/loss/grad_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..f77bef42e0575584a3aea34da0926a8363863c11 --- /dev/null +++ b/loss/grad_loss.py @@ -0,0 +1,57 @@ +""" +@Date: 2021/08/12 +@description: +""" + +import torch +import torch.nn as nn +import numpy as np + +from visualization.grad import get_all + + +class GradLoss(nn.Module): + def __init__(self): + super().__init__() + self.loss = nn.L1Loss() + self.cos = nn.CosineSimilarity(dim=-1, eps=0) + + self.grad_conv = nn.Conv1d(1, 1, kernel_size=3, stride=1, padding=0, bias=False, padding_mode='circular') + self.grad_conv.weight = nn.Parameter(torch.tensor([[[1, 0, -1]]]).float()) + self.grad_conv.weight.requires_grad = False + + def forward(self, gt, dt): + gt_direction, _, gt_angle_grad = get_all(gt['depth'], self.grad_conv) + dt_direction, _, dt_angle_grad = get_all(dt['depth'], self.grad_conv) + + normal_loss = (1 - self.cos(gt_direction, dt_direction)).mean() + grad_loss = self.loss(gt_angle_grad, dt_angle_grad) + return [normal_loss, grad_loss] + + +if __name__ == '__main__': + from dataset.mp3d_dataset import MP3DDataset + from utils.boundary import depth2boundaries + from utils.conversion import uv2xyz + from visualization.boundary import draw_boundaries + from visualization.floorplan import draw_floorplan + + def show_boundary(image, depth, ratio): + boundary_list = depth2boundaries(ratio, depth, step=None) + draw_boundaries(image.transpose(1, 2, 0), boundary_list=boundary_list, show=True) + draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, center_color=0.8) + + mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train', patch_num=256) + gt = mp3d_dataset.__getitem__(1) + gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis]) # batch size is 1 + dummy_dt = { + 'depth': gt['depth'].clone(), + } + # dummy_dt['depth'][..., 20] *= 3 # some different + + # show_boundary(gt['image'], gt['depth'][0].numpy(), gt['ratio']) + # show_boundary(gt['image'], dummy_dt['depth'][0].numpy(), gt['ratio']) + + grad_loss = GradLoss() + loss = grad_loss(gt, dummy_dt) + print(loss) diff --git a/loss/led_loss.py b/loss/led_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..6d64700fe4796d50cb48122936bde23f66c86773 --- /dev/null +++ b/loss/led_loss.py @@ -0,0 +1,47 @@ +""" +@Date: 2021/08/12 +@description: +""" +import torch +import torch.nn as nn + + +class LEDLoss(nn.Module): + def __init__(self): + super().__init__() + self.loss = nn.L1Loss() + + def forward(self, gt, dt): + camera_height = 1.6 + gt_depth = gt['depth'] * camera_height + + dt_ceil_depth = dt['ceil_depth'] * camera_height * gt['ratio'] + dt_floor_depth = dt['depth'] * camera_height + + ceil_loss = self.loss(gt_depth, dt_ceil_depth) + floor_loss = self.loss(gt_depth, dt_floor_depth) + + loss = floor_loss + ceil_loss + + return loss + + +if __name__ == '__main__': + import numpy as np + from dataset.mp3d_dataset import MP3DDataset + + mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train') + gt = mp3d_dataset.__getitem__(0) + + gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis]) # batch size is 1 + gt['ratio'] = torch.from_numpy(gt['ratio'][np.newaxis]) # batch size is 1 + + dummy_dt = { + 'depth': gt['depth'].clone(), + 'ceil_depth': gt['depth'] / gt['ratio'] + } + # dummy_dt['depth'][..., :20] *= 3 # some different + + led_loss = LEDLoss() + loss = led_loss(gt, dummy_dt) + print(loss) diff --git a/loss/object_loss.py b/loss/object_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..eda7c1c15feed4586e6262326ce06ece98f885ef --- /dev/null +++ b/loss/object_loss.py @@ -0,0 +1,42 @@ +""" +@Date: 2021/08/12 +@description: +""" +import torch +import torch.nn as nn +from loss.grad_loss import GradLoss + + +class ObjectLoss(nn.Module): + def __init__(self): + super().__init__() + self.heat_map_loss = HeatmapLoss(reduction='mean') # FocalLoss(reduction='mean') + self.l1_loss = nn.SmoothL1Loss() + + def forward(self, gt, dt): + # TODO:: + return 0 + + +class HeatmapLoss(nn.Module): + def __init__(self, weight=None, alpha=2, beta=4, reduction='mean'): + super(HeatmapLoss, self).__init__() + self.alpha = alpha + self.beta = beta + self.reduction = reduction + + def forward(self, targets, inputs): + center_id = (targets == 1.0).float() + other_id = (targets != 1.0).float() + center_loss = -center_id * (1.0 - inputs) ** self.alpha * torch.log(inputs + 1e-14) + other_loss = -other_id * (1 - targets) ** self.beta * inputs ** self.alpha * torch.log(1.0 - inputs + 1e-14) + loss = center_loss + other_loss + + batch_size = loss.size(0) + if self.reduction == 'mean': + loss = torch.sum(loss) / batch_size + + if self.reduction == 'sum': + loss = torch.sum(loss) / batch_size + + return loss diff --git a/main.py b/main.py new file mode 100644 index 0000000000000000000000000000000000000000..d5722a3ef0161f4f269c8e1beec2bb5d18ebe69e --- /dev/null +++ b/main.py @@ -0,0 +1,401 @@ +""" +@Date: 2021/07/17 +@description: +""" +import sys +import os +import shutil +import argparse +import numpy as np +import json +import torch +import torch.nn.parallel +import torch.optim +import torch.multiprocessing as mp +import torch.utils.data +import torch.utils.data.distributed +import torch.cuda + +from PIL import Image +from tqdm import tqdm +from torch.utils.tensorboard import SummaryWriter +from config.defaults import get_config, get_rank_config +from models.other.criterion import calc_criterion +from models.build import build_model +from models.other.init_env import init_env +from utils.logger import build_logger +from utils.misc import tensor2np_d, tensor2np +from dataset.build import build_loader +from evaluation.accuracy import calc_accuracy, show_heat_map, calc_ce, calc_pe, calc_rmse_delta_1, \ + show_depth_normal_grad, calc_f1_score +from postprocessing.post_process import post_process + +try: + from apex import amp +except ImportError: + amp = None + + +def parse_option(): + debug = True if sys.gettrace() else False + parser = argparse.ArgumentParser(description='Panorama Layout Transformer training and evaluation script') + parser.add_argument('--cfg', + type=str, + metavar='FILE', + help='path to config file') + + parser.add_argument('--mode', + type=str, + default='train', + choices=['train', 'val', 'test'], + help='train/val/test mode') + + parser.add_argument('--val_name', + type=str, + choices=['val', 'test'], + help='val name') + + parser.add_argument('--bs', type=int, + help='batch size') + + parser.add_argument('--save_eval', action='store_true', + help='save eval result') + + parser.add_argument('--post_processing', type=str, + choices=['manhattan', 'atalanta', 'manhattan_old'], + help='type of postprocessing ') + + parser.add_argument('--need_cpe', action='store_true', + help='need to evaluate corner error and pixel error') + + parser.add_argument('--need_f1', action='store_true', + help='need to evaluate f1-score of corners') + + parser.add_argument('--need_rmse', action='store_true', + help='need to evaluate root mean squared error and delta error') + + parser.add_argument('--force_cube', action='store_true', + help='force cube shape when eval') + + parser.add_argument('--wall_num', type=int, + help='wall number') + + args = parser.parse_args() + args.debug = debug + print("arguments:") + for arg in vars(args): + print(arg, ":", getattr(args, arg)) + print("-" * 50) + return args + + +def main(): + args = parse_option() + config = get_config(args) + + if config.TRAIN.SCRATCH and os.path.exists(config.CKPT.DIR) and config.MODE == 'train': + print(f"Train from scratch, delete checkpoint dir: {config.CKPT.DIR}") + f = [int(f.split('_')[-1].split('.')[0]) for f in os.listdir(config.CKPT.DIR) if 'pkl' in f] + if len(f) > 0: + last_epoch = np.array(f).max() + if last_epoch > 10: + c = input(f"delete it (last_epoch: {last_epoch})?(Y/N)\n") + if c != 'y' and c != 'Y': + exit(0) + + shutil.rmtree(config.CKPT.DIR, ignore_errors=True) + + os.makedirs(config.CKPT.DIR, exist_ok=True) + os.makedirs(config.CKPT.RESULT_DIR, exist_ok=True) + os.makedirs(config.LOGGER.DIR, exist_ok=True) + + if ':' in config.TRAIN.DEVICE: + nprocs = len(config.TRAIN.DEVICE.split(':')[-1].split(',')) + if 'cuda' in config.TRAIN.DEVICE: + if not torch.cuda.is_available(): + print(f"Cuda is not available(config is: {config.TRAIN.DEVICE}), will use cpu ...") + config.defrost() + config.TRAIN.DEVICE = "cpu" + config.freeze() + nprocs = 1 + + if config.MODE == 'train': + with open(os.path.join(config.CKPT.DIR, "config.yaml"), "w") as f: + f.write(config.dump(allow_unicode=True)) + + if config.TRAIN.DEVICE == 'cpu' or nprocs < 2: + print(f"Use single process, device:{config.TRAIN.DEVICE}") + main_worker(0, config, 1) + else: + print(f"Use {nprocs} processes ...") + mp.spawn(main_worker, nprocs=nprocs, args=(config, nprocs), join=True) + + +def main_worker(local_rank, cfg, world_size): + config = get_rank_config(cfg, local_rank, world_size) + logger = build_logger(config) + writer = SummaryWriter(config.CKPT.DIR) + logger.info(f"Comment: {config.COMMENT}") + cur_pid = os.getpid() + logger.info(f"Current process id: {cur_pid}") + torch.hub._hub_dir = config.CKPT.PYTORCH + logger.info(f"Pytorch hub dir: {torch.hub._hub_dir}") + init_env(config.SEED, config.TRAIN.DETERMINISTIC, config.DATA.NUM_WORKERS) + + model, optimizer, criterion, scheduler = build_model(config, logger) + train_data_loader, val_data_loader = build_loader(config, logger) + + if 'cuda' in config.TRAIN.DEVICE: + torch.cuda.set_device(config.TRAIN.DEVICE) + + if config.MODE == 'train': + train(model, train_data_loader, val_data_loader, optimizer, criterion, config, logger, writer, scheduler) + else: + iou_results, other_results = val_an_epoch(model, val_data_loader, + criterion, config, logger, writer=None, + epoch=config.TRAIN.START_EPOCH) + results = dict(iou_results, **other_results) + if config.SAVE_EVAL: + save_path = os.path.join(config.CKPT.RESULT_DIR, f"result.json") + with open(save_path, 'w+') as f: + json.dump(results, f, indent=4) + + +def save(model, optimizer, epoch, iou_d, logger, writer, config): + model.save(optimizer, epoch, accuracy=iou_d['full_3d'], logger=logger, acc_d=iou_d, config=config) + for k in model.acc_d: + writer.add_scalar(f"BestACC/{k}", model.acc_d[k]['acc'], epoch) + + +def train(model, train_data_loader, val_data_loader, optimizer, criterion, config, logger, writer, scheduler): + for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS): + logger.info("=" * 200) + train_an_epoch(model, train_data_loader, optimizer, criterion, config, logger, writer, epoch) + epoch_iou_d, _ = val_an_epoch(model, val_data_loader, criterion, config, logger, writer, epoch) + + if config.LOCAL_RANK == 0: + ddp = config.WORLD_SIZE > 1 + save(model.module if ddp else model, optimizer, epoch, epoch_iou_d, logger, writer, config) + + if scheduler is not None: + if scheduler.min_lr is not None and optimizer.param_groups[0]['lr'] <= scheduler.min_lr: + continue + scheduler.step() + writer.close() + + +def train_an_epoch(model, train_data_loader, optimizer, criterion, config, logger, writer, epoch=0): + logger.info(f'Start Train Epoch {epoch}/{config.TRAIN.EPOCHS - 1}') + model.train() + + if len(config.MODEL.FINE_TUNE) > 0: + model.feature_extractor.eval() + + optimizer.zero_grad() + + data_len = len(train_data_loader) + start_i = data_len * epoch * config.WORLD_SIZE + bar = enumerate(train_data_loader) + if config.LOCAL_RANK == 0 and config.SHOW_BAR: + bar = tqdm(bar, total=data_len, ncols=200) + + device = config.TRAIN.DEVICE + epoch_loss_d = {} + for i, gt in bar: + imgs = gt['image'].to(device, non_blocking=True) + gt['depth'] = gt['depth'].to(device, non_blocking=True) + gt['ratio'] = gt['ratio'].to(device, non_blocking=True) + if 'corner_heat_map' in gt: + gt['corner_heat_map'] = gt['corner_heat_map'].to(device, non_blocking=True) + if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device: + imgs = imgs.type(torch.float16) + gt['depth'] = gt['depth'].type(torch.float16) + gt['ratio'] = gt['ratio'].type(torch.float16) + dt = model(imgs) + loss, batch_loss_d, epoch_loss_d = calc_criterion(criterion, gt, dt, epoch_loss_d) + if config.LOCAL_RANK == 0 and config.SHOW_BAR: + bar.set_postfix(batch_loss_d) + + optimizer.zero_grad() + if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + optimizer.step() + + global_step = start_i + i * config.WORLD_SIZE + config.LOCAL_RANK + for key, val in batch_loss_d.items(): + writer.add_scalar(f'TrainBatchLoss/{key}', val, global_step) + + if config.LOCAL_RANK != 0: + return + + epoch_loss_d = dict(zip(epoch_loss_d.keys(), [np.array(epoch_loss_d[k]).mean() for k in epoch_loss_d.keys()])) + s = 'TrainEpochLoss: ' + for key, val in epoch_loss_d.items(): + writer.add_scalar(f'TrainEpochLoss/{key}', val, epoch) + s += f" {key}={val}" + logger.info(s) + writer.add_scalar('LearningRate', optimizer.param_groups[0]['lr'], epoch) + logger.info(f"LearningRate: {optimizer.param_groups[0]['lr']}") + + +@torch.no_grad() +def val_an_epoch(model, val_data_loader, criterion, config, logger, writer, epoch=0): + model.eval() + logger.info(f'Start Validate Epoch {epoch}/{config.TRAIN.EPOCHS - 1}') + data_len = len(val_data_loader) + start_i = data_len * epoch * config.WORLD_SIZE + bar = enumerate(val_data_loader) + if config.LOCAL_RANK == 0 and config.SHOW_BAR: + bar = tqdm(bar, total=data_len, ncols=200) + device = config.TRAIN.DEVICE + epoch_loss_d = {} + epoch_iou_d = { + 'visible_2d': [], + 'visible_3d': [], + 'full_2d': [], + 'full_3d': [], + 'height': [] + } + + epoch_other_d = { + 'ce': [], + 'pe': [], + 'f1': [], + 'precision': [], + 'recall': [], + 'rmse': [], + 'delta_1': [] + } + + show_index = np.random.randint(0, data_len) + for i, gt in bar: + imgs = gt['image'].to(device, non_blocking=True) + gt['depth'] = gt['depth'].to(device, non_blocking=True) + gt['ratio'] = gt['ratio'].to(device, non_blocking=True) + if 'corner_heat_map' in gt: + gt['corner_heat_map'] = gt['corner_heat_map'].to(device, non_blocking=True) + dt = model(imgs) + + vis_w = config.TRAIN.VIS_WEIGHT + visualization = False # (config.LOCAL_RANK == 0 and i == show_index) or config.SAVE_EVAL + + loss, batch_loss_d, epoch_loss_d = calc_criterion(criterion, gt, dt, epoch_loss_d) + + if config.EVAL.POST_PROCESSING is not None: + depth = tensor2np(dt['depth']) + dt['processed_xyz'] = post_process(depth, type_name=config.EVAL.POST_PROCESSING, + need_cube=config.EVAL.FORCE_CUBE) + + if config.EVAL.FORCE_CUBE and config.EVAL.NEED_CPE: + ce = calc_ce(tensor2np_d(dt), tensor2np_d(gt)) + pe = calc_pe(tensor2np_d(dt), tensor2np_d(gt)) + + epoch_other_d['ce'].append(ce) + epoch_other_d['pe'].append(pe) + + if config.EVAL.NEED_F1: + f1, precision, recall = calc_f1_score(tensor2np_d(dt), tensor2np_d(gt)) + epoch_other_d['f1'].append(f1) + epoch_other_d['precision'].append(precision) + epoch_other_d['recall'].append(recall) + + if config.EVAL.NEED_RMSE: + rmse, delta_1 = calc_rmse_delta_1(tensor2np_d(dt), tensor2np_d(gt)) + epoch_other_d['rmse'].append(rmse) + epoch_other_d['delta_1'].append(delta_1) + + visb_iou, full_iou, iou_height, pano_bds, full_iou_2ds = calc_accuracy(tensor2np_d(dt), tensor2np_d(gt), + visualization, h=vis_w // 2) + epoch_iou_d['visible_2d'].append(visb_iou[0]) + epoch_iou_d['visible_3d'].append(visb_iou[1]) + epoch_iou_d['full_2d'].append(full_iou[0]) + epoch_iou_d['full_3d'].append(full_iou[1]) + epoch_iou_d['height'].append(iou_height) + + if config.LOCAL_RANK == 0 and config.SHOW_BAR: + bar.set_postfix(batch_loss_d) + + global_step = start_i + i * config.WORLD_SIZE + config.LOCAL_RANK + + if writer: + for key, val in batch_loss_d.items(): + writer.add_scalar(f'ValBatchLoss/{key}', val, global_step) + + if not visualization: + continue + + gt_grad_imgs, dt_grad_imgs = show_depth_normal_grad(dt, gt, device, vis_w) + + dt_heat_map_imgs = None + gt_heat_map_imgs = None + if 'corner_heat_map' in gt: + dt_heat_map_imgs, gt_heat_map_imgs = show_heat_map(dt, gt, vis_w) + + if config.TRAIN.VIS_MERGE or config.SAVE_EVAL: + imgs = [] + for j in range(len(pano_bds)): + # floorplan = np.concatenate([visb_iou[2][j], full_iou[2][j]], axis=-1) + floorplan = full_iou[2][j] + margin_w = int(floorplan.shape[-1] * (60/512)) + floorplan = floorplan[:, :, margin_w:-margin_w] + + grad_h = dt_grad_imgs[0].shape[1] + vis_merge = [ + gt_grad_imgs[j], + pano_bds[j][:, grad_h:-grad_h], + dt_grad_imgs[j] + ] + if 'corner_heat_map' in gt: + vis_merge = [dt_heat_map_imgs[j], gt_heat_map_imgs[j]] + vis_merge + img = np.concatenate(vis_merge, axis=-2) + + img = np.concatenate([img, ], axis=-1) + # img = gt_grad_imgs[j] + imgs.append(img) + if writer: + writer.add_images('VIS/Merge', np.array(imgs), global_step) + + if config.SAVE_EVAL: + for k in range(len(imgs)): + img = imgs[k] * 255.0 + save_path = os.path.join(config.CKPT.RESULT_DIR, f"{gt['id'][k]}_{full_iou_2ds[k]:.5f}.png") + Image.fromarray(img.transpose(1, 2, 0).astype(np.uint8)).save(save_path) + + elif writer: + writer.add_images('IoU/Visible_Floorplan', visb_iou[2], global_step) + writer.add_images('IoU/Full_Floorplan', full_iou[2], global_step) + writer.add_images('IoU/Boundary', pano_bds, global_step) + writer.add_images('Grad/gt', gt_grad_imgs, global_step) + writer.add_images('Grad/dt', dt_grad_imgs, global_step) + + if config.LOCAL_RANK != 0: + return + + epoch_loss_d = dict(zip(epoch_loss_d.keys(), [np.array(epoch_loss_d[k]).mean() for k in epoch_loss_d.keys()])) + s = 'ValEpochLoss: ' + for key, val in epoch_loss_d.items(): + if writer: + writer.add_scalar(f'ValEpochLoss/{key}', val, epoch) + s += f" {key}={val}" + logger.info(s) + + epoch_iou_d = dict(zip(epoch_iou_d.keys(), [np.array(epoch_iou_d[k]).mean() for k in epoch_iou_d.keys()])) + s = 'ValEpochIoU: ' + for key, val in epoch_iou_d.items(): + if writer: + writer.add_scalar(f'ValEpochIoU/{key}', val, epoch) + s += f" {key}={val}" + logger.info(s) + epoch_other_d = dict(zip(epoch_other_d.keys(), + [np.array(epoch_other_d[k]).mean() if len(epoch_other_d[k]) > 0 else 0 for k in + epoch_other_d.keys()])) + + logger.info(f'other acc: {epoch_other_d}') + return epoch_iou_d, epoch_other_d + + +if __name__ == '__main__': + main() diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6ea2bf591294feef8e5c6547a05e7ccd9a5a3697 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1 @@ +from models.lgt_net import LGT_Net diff --git a/models/base_model.py b/models/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..644002752a9157ba72ffa6e27fb2b860990aa1f0 --- /dev/null +++ b/models/base_model.py @@ -0,0 +1,150 @@ +""" +@Date: 2021/07/17 +@description: +""" +import os +import torch +import torch.nn as nn +import datetime + + +class BaseModule(nn.Module): + def __init__(self, ckpt_dir=None): + super().__init__() + + self.ckpt_dir = ckpt_dir + + if ckpt_dir: + if not os.path.exists(ckpt_dir): + os.makedirs(ckpt_dir) + else: + self.model_lst = [x for x in sorted(os.listdir(self.ckpt_dir)) if x.endswith('.pkl')] + + self.last_model_path = None + self.best_model_path = None + self.best_accuracy = -float('inf') + self.acc_d = {} + + def show_parameter_number(self, logger): + total = sum(p.numel() for p in self.parameters()) + trainable = sum(p.numel() for p in self.parameters() if p.requires_grad) + logger.info('{} parameter total:{:,}, trainable:{:,}'.format(self._get_name(), total, trainable)) + + def load(self, device, logger, optimizer=None, best=False): + if len(self.model_lst) == 0: + logger.info('*'*50) + logger.info("Empty model folder! Using initial weights") + logger.info('*'*50) + return 0 + + last_model_lst = list(filter(lambda n: '_last_' in n, self.model_lst)) + best_model_lst = list(filter(lambda n: '_best_' in n, self.model_lst)) + + if len(last_model_lst) == 0 and len(best_model_lst) == 0: + logger.info('*'*50) + ckpt_path = os.path.join(self.ckpt_dir, self.model_lst[0]) + logger.info(f"Load: {ckpt_path}") + checkpoint = torch.load(ckpt_path, map_location=torch.device(device)) + self.load_state_dict(checkpoint, strict=False) + logger.info('*'*50) + return 0 + + checkpoint = None + if len(last_model_lst) > 0: + self.last_model_path = os.path.join(self.ckpt_dir, last_model_lst[-1]) + checkpoint = torch.load(self.last_model_path, map_location=torch.device(device)) + self.best_accuracy = checkpoint['accuracy'] + self.acc_d = checkpoint['acc_d'] + + if len(best_model_lst) > 0: + self.best_model_path = os.path.join(self.ckpt_dir, best_model_lst[-1]) + best_checkpoint = torch.load(self.best_model_path, map_location=torch.device(device)) + self.best_accuracy = best_checkpoint['accuracy'] + self.acc_d = best_checkpoint['acc_d'] + if best: + checkpoint = best_checkpoint + + for k in self.acc_d: + if isinstance(self.acc_d[k], float): + self.acc_d[k] = { + 'acc': self.acc_d[k], + 'epoch': checkpoint['epoch'] + } + + if checkpoint is None: + logger.error("Invalid checkpoint") + return + + self.load_state_dict(checkpoint['net'], strict=False) + if optimizer and not best: # best的时候使用新的优化器比如从adam->sgd + logger.info('Load optimizer') + optimizer.load_state_dict(checkpoint['optimizer']) + for state in optimizer.state.values(): + for k, v in state.items(): + if torch.is_tensor(v): + state[k] = v.to(device) + + logger.info('*'*50) + if best: + logger.info(f"Lode best: {self.best_model_path}") + else: + logger.info(f"Lode last: {self.last_model_path}") + + logger.info(f"Best accuracy: {self.best_accuracy}") + logger.info(f"Last epoch: {checkpoint['epoch'] + 1}") + logger.info('*'*50) + return checkpoint['epoch'] + 1 + + def update_acc(self, acc_d, epoch, logger): + logger.info("-" * 100) + for k in acc_d: + if k not in self.acc_d.keys() or acc_d[k] > self.acc_d[k]['acc']: + self.acc_d[k] = { + 'acc': acc_d[k], + 'epoch': epoch + } + logger.info(f"Update ACC: {k} {self.acc_d[k]['acc']:.4f}({self.acc_d[k]['epoch']}-{epoch})") + logger.info("-" * 100) + + def save(self, optim, epoch, accuracy, logger, replace=True, acc_d=None, config=None): + """ + + :param config: + :param optim: + :param epoch: + :param accuracy: + :param logger: + :param replace: + :param acc_d: 其他评估数据,visible_2/3d, full_2/3d, rmse... + :return: + """ + if acc_d: + self.update_acc(acc_d, epoch, logger) + name = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S_last_{:.4f}_{}'.format(accuracy, epoch)) + name = f"model_{name}.pkl" + checkpoint = { + 'net': self.state_dict(), + 'optimizer': optim.state_dict(), + 'epoch': epoch, + 'accuracy': accuracy, + 'acc_d': acc_d + } + # FIXME:: delete always true + if (True or config.MODEL.SAVE_LAST) and epoch % config.TRAIN.SAVE_FREQ == 0: + if replace and self.last_model_path and os.path.exists(self.last_model_path): + os.remove(self.last_model_path) + self.last_model_path = os.path.join(self.ckpt_dir, name) + torch.save(checkpoint, self.last_model_path) + logger.info(f"Saved last model: {self.last_model_path}") + + if accuracy > self.best_accuracy: + self.best_accuracy = accuracy + # FIXME:: delete always true + if True or config.MODEL.SAVE_BEST: + if replace and self.best_model_path and os.path.exists(self.best_model_path): + os.remove(self.best_model_path) + self.best_model_path = os.path.join(self.ckpt_dir, name.replace('last', 'best')) + torch.save(checkpoint, self.best_model_path) + logger.info("#" * 100) + logger.info(f"Saved best model: {self.best_model_path}") + logger.info("#" * 100) \ No newline at end of file diff --git a/models/build.py b/models/build.py new file mode 100644 index 0000000000000000000000000000000000000000..6ebcd54ece723e6b42585aa97add633915071545 --- /dev/null +++ b/models/build.py @@ -0,0 +1,81 @@ +""" +@Date: 2021/07/18 +@description: +""" +import os +import models +import torch.distributed as dist +import torch + +from torch.nn import init +from torch.optim import lr_scheduler +from utils.time_watch import TimeWatch +from models.other.optimizer import build_optimizer +from models.other.criterion import build_criterion + + +def build_model(config, logger): + name = config.MODEL.NAME + w = TimeWatch(f"Build model: {name}", logger) + + ddp = config.WORLD_SIZE > 1 + if ddp: + logger.info(f"use ddp") + dist.init_process_group("nccl", init_method='tcp://127.0.0.1:23456', rank=config.LOCAL_RANK, + world_size=config.WORLD_SIZE) + + device = config.TRAIN.DEVICE + logger.info(f"Creating model: {name} to device:{device}, args:{config.MODEL.ARGS[0]}") + + net = getattr(models, name) + ckpt_dir = os.path.abspath(os.path.join(config.CKPT.DIR, os.pardir)) if config.DEBUG else config.CKPT.DIR + if len(config.MODEL.ARGS) != 0: + model = net(ckpt_dir=ckpt_dir, **config.MODEL.ARGS[0]) + else: + model = net(ckpt_dir=ckpt_dir) + logger.info(f'model dropout: {model.dropout_d}') + model = model.to(device) + optimizer = None + scheduler = None + + if config.MODE == 'train': + optimizer = build_optimizer(config, model, logger) + + config.defrost() + config.TRAIN.START_EPOCH = model.load(device, logger, optimizer, best=config.MODE != 'train' or not config.TRAIN.RESUME_LAST) + config.freeze() + + if config.MODE == 'train' and len(config.MODEL.FINE_TUNE) > 0: + for param in model.parameters(): + param.requires_grad = False + for layer in config.MODEL.FINE_TUNE: + logger.info(f'Fine-tune: {layer}') + getattr(model, layer).requires_grad_(requires_grad=True) + getattr(model, layer).reset_parameters() + + model.show_parameter_number(logger) + + if config.MODE == 'train': + if len(config.TRAIN.LR_SCHEDULER.NAME) > 0: + if 'last_epoch' not in config.TRAIN.LR_SCHEDULER.ARGS[0].keys(): + config.TRAIN.LR_SCHEDULER.ARGS[0]['last_epoch'] = config.TRAIN.START_EPOCH - 1 + + scheduler = getattr(lr_scheduler, config.TRAIN.LR_SCHEDULER.NAME)(optimizer=optimizer, + **config.TRAIN.LR_SCHEDULER.ARGS[0]) + logger.info(f"Use scheduler: name:{config.TRAIN.LR_SCHEDULER.NAME} args: {config.TRAIN.LR_SCHEDULER.ARGS[0]}") + logger.info(f"Current scheduler last lr: {scheduler.get_last_lr()}") + else: + scheduler = None + + if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device: + import apex + logger.info(f"use amp:{config.AMP_OPT_LEVEL}") + model, optimizer = apex.amp.initialize(model, optimizer, opt_level=config.AMP_OPT_LEVEL, verbosity=0) + if ddp: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.TRAIN.DEVICE], + broadcast_buffers=True) # use rank:0 bn + + criterion = build_criterion(config, logger) + if optimizer is not None: + logger.info(f"Finally lr: {optimizer.param_groups[0]['lr']}") + return model, optimizer, criterion, scheduler diff --git a/models/lgt_net.py b/models/lgt_net.py new file mode 100644 index 0000000000000000000000000000000000000000..63b53f83fb1232f4f4667b3429767c9f28c49f20 --- /dev/null +++ b/models/lgt_net.py @@ -0,0 +1,213 @@ +import torch.nn +import torch +import torch.nn as nn +import models.modules as modules +import numpy as np + +from models.base_model import BaseModule +from models.modules.horizon_net_feature_extractor import HorizonNetFeatureExtractor +from models.modules.patch_feature_extractor import PatchFeatureExtractor +from utils.conversion import uv2depth, get_u, lonlat2depth, get_lon, lonlat2uv +from utils.height import calc_ceil_ratio +from utils.misc import tensor2np + + +class LGT_Net(BaseModule): + def __init__(self, ckpt_dir=None, backbone='resnet50', dropout=0.0, output_name='LGT', + decoder_name='Transformer', win_size=8, depth=6, + ape=None, rpe=None, corner_heat_map=False, rpe_pos=1): + super().__init__(ckpt_dir) + + self.patch_num = 256 + self.patch_dim = 1024 + self.decoder_name = decoder_name + self.output_name = output_name + self.corner_heat_map = corner_heat_map + self.dropout_d = dropout + + if backbone == 'patch': + self.feature_extractor = PatchFeatureExtractor(patch_num=self.patch_num, input_shape=[3, 512, 1024]) + else: + # feature extractor + self.feature_extractor = HorizonNetFeatureExtractor(backbone) + + if 'Transformer' in self.decoder_name: + # transformer encoder + transformer_dim = self.patch_dim + transformer_layers = depth + transformer_heads = 8 + transformer_head_dim = transformer_dim // transformer_heads + transformer_ff_dim = 2048 + rpe = None if rpe == 'None' else rpe + self.transformer = getattr(modules, decoder_name)(dim=transformer_dim, depth=transformer_layers, + heads=transformer_heads, dim_head=transformer_head_dim, + mlp_dim=transformer_ff_dim, win_size=win_size, + dropout=self.dropout_d, patch_num=self.patch_num, + ape=ape, rpe=rpe, rpe_pos=rpe_pos) + elif self.decoder_name == 'LSTM': + self.bi_rnn = nn.LSTM(input_size=self.feature_extractor.c_last, + hidden_size=self.patch_dim // 2, + num_layers=2, + dropout=self.dropout_d, + batch_first=False, + bidirectional=True) + self.drop_out = nn.Dropout(self.dropout_d) + else: + raise NotImplementedError("Only support *Transformer and LSTM") + + if self.output_name == 'LGT': + # omnidirectional-geometry aware output + self.linear_depth_output = nn.Linear(in_features=self.patch_dim, out_features=1) + self.linear_ratio = nn.Linear(in_features=self.patch_dim, out_features=1) + self.linear_ratio_output = nn.Linear(in_features=self.patch_num, out_features=1) + elif self.output_name == 'LED' or self.output_name == 'Horizon': + # horizon-depth or latitude output + self.linear = nn.Linear(in_features=self.patch_dim, out_features=2) + else: + raise NotImplementedError("Unknown output") + + if self.corner_heat_map: + # corners heat map output + self.linear_corner_heat_map_output = nn.Linear(in_features=self.patch_dim, out_features=1) + + self.name = f"{self.decoder_name}_{self.output_name}_Net" + + def lgt_output(self, x): + """ + :param x: [ b, 256(patch_num), 1024(d)] + :return: { + 'depth': [b, 256(patch_num & d)] + 'ratio': [b, 1(d)] + } + """ + depth = self.linear_depth_output(x) # [b, 256(patch_num), 1(d)] + depth = depth.view(-1, self.patch_num) # [b, 256(patch_num & d)] + + # ratio represent room height + ratio = self.linear_ratio(x) # [b, 256(patch_num), 1(d)] + ratio = ratio.view(-1, self.patch_num) # [b, 256(patch_num & d)] + ratio = self.linear_ratio_output(ratio) # [b, 1(d)] + output = { + 'depth': depth, + 'ratio': ratio + } + return output + + def led_output(self, x): + """ + :param x: [ b, 256(patch_num), 1024(d)] + :return: { + 'depth': [b, 256(patch_num)] + 'ceil_depth': [b, 256(patch_num)] + 'ratio': [b, 1(d)] + } + """ + bon = self.linear(x) # [b, 256(patch_num), 2(d)] + bon = bon.permute(0, 2, 1) # [b, 2(d), 256(patch_num)] + bon = torch.sigmoid(bon) + + ceil_v = bon[:, 0, :] * -0.5 + 0.5 # [b, 256(patch_num)] + floor_v = bon[:, 1, :] * 0.5 + 0.5 # [b, 256(patch_num)] + u = get_u(w=self.patch_num, is_np=False, b=ceil_v.shape[0]).to(ceil_v.device) + ceil_boundary = torch.stack((u, ceil_v), axis=-1) # [b, 256(patch_num), 2] + floor_boundary = torch.stack((u, floor_v), axis=-1) # [b, 256(patch_num), 2] + output = { + 'depth': uv2depth(floor_boundary), # [b, 256(patch_num)] + 'ceil_depth': uv2depth(ceil_boundary), # [b, 256(patch_num)] + } + # print(output['depth'].mean()) + if not self.training: + # [b, 1(d)] + output['ratio'] = calc_ceil_ratio([tensor2np(ceil_boundary), tensor2np(floor_boundary)], mode='lsq').reshape(-1, 1) + return output + + def horizon_output(self, x): + """ + :param x: [ b, 256(patch_num), 1024(d)] + :return: { + 'floor_boundary': [b, 256(patch_num)] + 'ceil_boundary': [b, 256(patch_num)] + } + """ + bon = self.linear(x) # [b, 256(patch_num), 2(d)] + bon = bon.permute(0, 2, 1) # [b, 2(d), 256(patch_num)] + + output = { + 'boundary': bon + } + if not self.training: + lon = get_lon(w=self.patch_num, is_np=False, b=bon.shape[0]).to(bon.device) + floor_lat = torch.clip(bon[:, 0, :], 1e-4, np.pi / 2) + ceil_lat = torch.clip(bon[:, 1, :], -np.pi / 2, -1e-4) + floor_lonlat = torch.stack((lon, floor_lat), axis=-1) # [b, 256(patch_num), 2] + ceil_lonlat = torch.stack((lon, ceil_lat), axis=-1) # [b, 256(patch_num), 2] + output['depth'] = lonlat2depth(floor_lonlat) + output['ratio'] = calc_ceil_ratio([tensor2np(lonlat2uv(ceil_lonlat)), + tensor2np(lonlat2uv(floor_lonlat))], mode='mean').reshape(-1, 1) + return output + + def forward(self, x): + """ + :param x: [b, 3(d), 512(h), 1024(w)] + :return: { + 'depth': [b, 256(patch_num & d)] + 'ratio': [b, 1(d)] + } + """ + + # feature extractor + x = self.feature_extractor(x) # [b 1024(d) 256(w)] + + if 'Transformer' in self.decoder_name: + # transformer decoder + x = x.permute(0, 2, 1) # [b 256(patch_num) 1024(d)] + x = self.transformer(x) # [b 256(patch_num) 1024(d)] + elif self.decoder_name == 'LSTM': + # lstm decoder + x = x.permute(2, 0, 1) # [256(patch_num), b, 1024(d)] + self.bi_rnn.flatten_parameters() + x, _ = self.bi_rnn(x) # [256(patch_num & seq_len), b, 1024(d)] + x = x.permute(1, 0, 2) # [b, 256(patch_num), 1024(d)] + x = self.drop_out(x) + + output = None + if self.output_name == 'LGT': + # plt output + output = self.lgt_output(x) + + elif self.output_name == 'LED': + # led output + output = self.led_output(x) + + elif self.output_name == 'Horizon': + # led output + output = self.horizon_output(x) + + if self.corner_heat_map: + corner_heat_map = self.linear_corner_heat_map_output(x) # [b, 256(patch_num), 1] + corner_heat_map = corner_heat_map.view(-1, self.patch_num) + corner_heat_map = torch.sigmoid(corner_heat_map) + output['corner_heat_map'] = corner_heat_map + + return output + + +if __name__ == '__main__': + from PIL import Image + import numpy as np + from models.other.init_env import init_env + + init_env(0, deterministic=True) + + net = LGT_Net() + + total = sum(p.numel() for p in net.parameters()) + trainable = sum(p.numel() for p in net.parameters() if p.requires_grad) + print('parameter total:{:,}, trainable:{:,}'.format(total, trainable)) + + img = np.array(Image.open("../src/demo.png")).transpose((2, 0, 1)) + input = torch.Tensor([img]) # 1 3 512 1024 + output = net(input) + + print(output['depth'].shape) # 1 256 + print(output['ratio'].shape) # 1 1 diff --git a/models/modules/__init__.py b/models/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..78be267dda39cc4b64b79f79400fac9f638887f0 --- /dev/null +++ b/models/modules/__init__.py @@ -0,0 +1,8 @@ +""" +@Date: 2021/09/01 +@description: +""" + +from models.modules.swin_transformer import Swin_Transformer +from models.modules.swg_transformer import SWG_Transformer +from models.modules.transformer import Transformer diff --git a/models/modules/conv_transformer.py b/models/modules/conv_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..6fcbfe4acfc2a30e12eafd2ed74a6e7b5d25641d --- /dev/null +++ b/models/modules/conv_transformer.py @@ -0,0 +1,128 @@ +import torch +import torch.nn.functional as F + +from torch import nn, einsum +from einops import rearrange + + +class PreNorm(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.norm = nn.LayerNorm(dim) + self.fn = fn + + def forward(self, x, **kwargs): + return self.fn(self.norm(x), **kwargs) + + +class GELU(nn.Module): + def forward(self, input): + return F.gelu(input) + + +class Attend(nn.Module): + + def __init__(self, dim=None): + super().__init__() + self.dim = dim + + def forward(self, input): + return F.softmax(input, dim=self.dim, dtype=input.dtype) + + +class FeedForward(nn.Module): + def __init__(self, dim, hidden_dim, dropout=0.): + super().__init__() + self.net = nn.Sequential( + nn.Linear(dim, hidden_dim), + GELU(), + nn.Dropout(dropout), + nn.Linear(hidden_dim, dim), + nn.Dropout(dropout) + ) + + def forward(self, x): + return self.net(x) + + +class Attention(nn.Module): + def __init__(self, dim, heads=8, dim_head=64, dropout=0.): + super().__init__() + inner_dim = dim_head * heads + project_out = not (heads == 1 and dim_head == dim) + + self.heads = heads + self.scale = dim_head ** -0.5 + + self.attend = Attend(dim=-1) + self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False) + + self.to_out = nn.Sequential( + nn.Linear(inner_dim, dim), + nn.Dropout(dropout) + ) if project_out else nn.Identity() + + def forward(self, x): + b, n, _, h = *x.shape, self.heads + qkv = self.to_qkv(x).chunk(3, dim=-1) + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv) + dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale + attn = self.attend(dots) + out = einsum('b h i j, b h j d -> b h i d', attn, v) + out = rearrange(out, 'b h n d -> b n (h d)') + return self.to_out(out) + + +class Conv(nn.Module): + def __init__(self, dim, dropout=0.): + super().__init__() + self.dim = dim + self.net = nn.Sequential( + nn.Conv1d(dim, dim, kernel_size=3, stride=1, padding=0), + nn.Dropout(dropout) + ) + + def forward(self, x): + x = x.transpose(1, 2) + x = torch.cat([x[..., -1:], x, x[..., :1]], dim=-1) + x = self.net(x) + return x.transpose(1, 2) + + +class ConvTransformer(nn.Module): + def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.): + super().__init__() + self.layers = nn.ModuleList([]) + for _ in range(depth): + self.layers.append(nn.ModuleList([ + PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)), + PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)), + PreNorm(dim, Conv(dim, dropout=dropout)) + ])) + + def forward(self, x): + for attn, ff, cov in self.layers: + x = attn(x) + x + x = ff(x) + x + x = cov(x) + x + return x + + +if __name__ == '__main__': + token_dim = 1024 + toke_len = 256 + + transformer = ConvTransformer(dim=token_dim, + depth=6, + heads=16, + dim_head=64, + mlp_dim=2048, + dropout=0.1) + + total = sum(p.numel() for p in transformer.parameters()) + trainable = sum(p.numel() for p in transformer.parameters() if p.requires_grad) + print('parameter total:{:,}, trainable:{:,}'.format(total, trainable)) + + input = torch.randn(1, toke_len, token_dim) + output = transformer(input) + print(output.shape) diff --git a/models/modules/horizon_net_feature_extractor.py b/models/modules/horizon_net_feature_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..328e7942ef7a1441e124681fe3c7868e5b60f6be --- /dev/null +++ b/models/modules/horizon_net_feature_extractor.py @@ -0,0 +1,267 @@ +""" +@author: +@Date: 2021/07/17 +@description: Use the feature extractor proposed by HorizonNet +""" + +import numpy as np +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchvision.models as models +import functools +from models.base_model import BaseModule + +ENCODER_RESNET = [ + 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', + 'resnext50_32x4d', 'resnext101_32x8d' +] +ENCODER_DENSENET = [ + 'densenet121', 'densenet169', 'densenet161', 'densenet201' +] + + +def lr_pad(x, padding=1): + ''' Pad left/right-most to each other instead of zero padding ''' + return torch.cat([x[..., -padding:], x, x[..., :padding]], dim=3) + + +class LR_PAD(nn.Module): + ''' Pad left/right-most to each other instead of zero padding ''' + + def __init__(self, padding=1): + super(LR_PAD, self).__init__() + self.padding = padding + + def forward(self, x): + return lr_pad(x, self.padding) + + +def wrap_lr_pad(net): + for name, m in net.named_modules(): + if not isinstance(m, nn.Conv2d): + continue + if m.padding[1] == 0: + continue + w_pad = int(m.padding[1]) + m.padding = (m.padding[0], 0) # weight padding is 0, LR_PAD then use valid padding will keep dim of weight + names = name.split('.') + + root = functools.reduce(lambda o, i: getattr(o, i), [net] + names[:-1]) + setattr( + root, names[-1], + nn.Sequential(LR_PAD(w_pad), m) + ) + + +''' +Encoder +''' + + +class Resnet(nn.Module): + def __init__(self, backbone='resnet50', pretrained=True): + super(Resnet, self).__init__() + assert backbone in ENCODER_RESNET + self.encoder = getattr(models, backbone)(pretrained=pretrained) + del self.encoder.fc, self.encoder.avgpool + + def forward(self, x): + features = [] + x = self.encoder.conv1(x) + x = self.encoder.bn1(x) + x = self.encoder.relu(x) + x = self.encoder.maxpool(x) + + x = self.encoder.layer1(x) + features.append(x) # 1/4 + x = self.encoder.layer2(x) + features.append(x) # 1/8 + x = self.encoder.layer3(x) + features.append(x) # 1/16 + x = self.encoder.layer4(x) + features.append(x) # 1/32 + return features + + def list_blocks(self): + lst = [m for m in self.encoder.children()] + block0 = lst[:4] + block1 = lst[4:5] + block2 = lst[5:6] + block3 = lst[6:7] + block4 = lst[7:8] + return block0, block1, block2, block3, block4 + + +class Densenet(nn.Module): + def __init__(self, backbone='densenet169', pretrained=True): + super(Densenet, self).__init__() + assert backbone in ENCODER_DENSENET + self.encoder = getattr(models, backbone)(pretrained=pretrained) + self.final_relu = nn.ReLU(inplace=True) + del self.encoder.classifier + + def forward(self, x): + lst = [] + for m in self.encoder.features.children(): + x = m(x) + lst.append(x) + features = [lst[4], lst[6], lst[8], self.final_relu(lst[11])] + return features + + def list_blocks(self): + lst = [m for m in self.encoder.features.children()] + block0 = lst[:4] + block1 = lst[4:6] + block2 = lst[6:8] + block3 = lst[8:10] + block4 = lst[10:] + return block0, block1, block2, block3, block4 + + +''' +Decoder +''' + + +class ConvCompressH(nn.Module): + ''' Reduce feature height by factor of two ''' + + def __init__(self, in_c, out_c, ks=3): + super(ConvCompressH, self).__init__() + assert ks % 2 == 1 + self.layers = nn.Sequential( + nn.Conv2d(in_c, out_c, kernel_size=ks, stride=(2, 1), padding=ks // 2), + nn.BatchNorm2d(out_c), + nn.ReLU(inplace=True), + ) + + def forward(self, x): + return self.layers(x) + + +class GlobalHeightConv(nn.Module): + def __init__(self, in_c, out_c): + super(GlobalHeightConv, self).__init__() + self.layer = nn.Sequential( + ConvCompressH(in_c, in_c // 2), + ConvCompressH(in_c // 2, in_c // 2), + ConvCompressH(in_c // 2, in_c // 4), + ConvCompressH(in_c // 4, out_c), + ) + + def forward(self, x, out_w): + x = self.layer(x) + + factor = out_w // x.shape[3] + x = torch.cat([x[..., -1:], x, x[..., :1]], 3) # 先补左右,相当于warp模式,然后进行插值 + d_type = x.dtype + x = F.interpolate(x, size=(x.shape[2], out_w + 2 * factor), mode='bilinear', align_corners=False) + # if x.dtype != d_type: + # x = x.type(d_type) + x = x[..., factor:-factor] + return x + + +class GlobalHeightStage(nn.Module): + def __init__(self, c1, c2, c3, c4, out_scale=8): + ''' Process 4 blocks from encoder to single multiscale features ''' + super(GlobalHeightStage, self).__init__() + self.cs = c1, c2, c3, c4 + self.out_scale = out_scale + self.ghc_lst = nn.ModuleList([ + GlobalHeightConv(c1, c1 // out_scale), + GlobalHeightConv(c2, c2 // out_scale), + GlobalHeightConv(c3, c3 // out_scale), + GlobalHeightConv(c4, c4 // out_scale), + ]) + + def forward(self, conv_list, out_w): + assert len(conv_list) == 4 + bs = conv_list[0].shape[0] + feature = torch.cat([ + f(x, out_w).reshape(bs, -1, out_w) + for f, x, out_c in zip(self.ghc_lst, conv_list, self.cs) + ], dim=1) + # conv_list: + # 0 [b, 256(d), 128(h), 256(w)] ->(4*{conv3*3 step2*1} : d/8 h/16)-> [b 32(d) 8(h) 256(w)] + # 1 [b, 512(d), 64(h), 128(w)] ->(4*{conv3*3 step2*1} : d/8 h/16)-> [b 64(d) 4(h) 128(w)] + # 2 [b, 1024(d), 32(h), 64(w)] ->(4*{conv3*3 step2*1} : d/8 h/16)-> [b 128(d) 2(h) 64(w)] + # 3 [b, 2048(d), 16(h), 32(w)] ->(4*{conv3*3 step2*1} : d/8 h/16)-> [b 256(d) 1(h) 32(w)] + # 0 ->(unsampledW256} : w=256)-> [b 32(d) 8(h) 256(w)] ->(reshapeH1} : h=1)-> [b 256(d) 1(h) 256(w)] + # 1 ->(unsampledW256} : w=256)-> [b 64(d) 4(h) 256(w)] ->(reshapeH1} : h=1)-> [b 256(d) 1(h) 256(w)] + # 2 ->(unsampledW256} : w=256)-> [b 128(d) 2(h) 256(w)] ->(reshapeH1} : h=1)-> [b 256(d) 1(h) 256(w)] + # 3 ->(unsampledW256} : w=256)-> [b 256(d) 1(h) 256(w)] ->(reshapeH1} : h=1)-> [b 256(d) 1(h) 256(w)] + # 0 --\ + # 1 -- \ + # ---- cat [b 1024(d) 1(h) 256(w)] + # 2 -- / + # 3 --/ + return feature # [b 1024(d) 256(w)] + + +class HorizonNetFeatureExtractor(nn.Module): + x_mean = torch.FloatTensor(np.array([0.485, 0.456, 0.406])[None, :, None, None]) + x_std = torch.FloatTensor(np.array([0.229, 0.224, 0.225])[None, :, None, None]) + + def __init__(self, backbone='resnet50'): + super(HorizonNetFeatureExtractor, self).__init__() + self.out_scale = 8 + self.step_cols = 4 + + # Encoder + if backbone.startswith('res'): + self.feature_extractor = Resnet(backbone, pretrained=True) + elif backbone.startswith('dense'): + self.feature_extractor = Densenet(backbone, pretrained=True) + else: + raise NotImplementedError() + + # Inference channels number from each block of the encoder + with torch.no_grad(): + dummy = torch.zeros(1, 3, 512, 1024) + c1, c2, c3, c4 = [b.shape[1] for b in self.feature_extractor(dummy)] + self.c_last = (c1 * 8 + c2 * 4 + c3 * 2 + c4 * 1) // self.out_scale + + # Convert features from 4 blocks of the encoder into B x C x 1 x W' + self.reduce_height_module = GlobalHeightStage(c1, c2, c3, c4, self.out_scale) + self.x_mean.requires_grad = False + self.x_std.requires_grad = False + wrap_lr_pad(self) + + def _prepare_x(self, x): + x = x.clone() + if self.x_mean.device != x.device: + self.x_mean = self.x_mean.to(x.device) + self.x_std = self.x_std.to(x.device) + x[:, :3] = (x[:, :3] - self.x_mean) / self.x_std + + return x + + def forward(self, x): + # x [b 3 512 1024] + x = self._prepare_x(x) # [b 3 512 1024] + conv_list = self.feature_extractor(x) + # conv_list: + # 0 [b, 256(d), 128(h), 256(w)] + # 1 [b, 512(d), 64(h), 128(w)] + # 2 [b, 1024(d), 32(h), 64(w)] + # 3 [b, 2048(d), 16(h), 32(w)] + x = self.reduce_height_module(conv_list, x.shape[3] // self.step_cols) # [b 1024(d) 1(h) 256(w)] + # After reduce_Height_module, h becomes 1, the information is compressed to d, + # and w contains different resolutions + # 0 [b, 256(d), 128(h), 256(w)] -> [b, 256/8(d) * 128/16(h') = 256(d), 1(h) 256(w)] + # 1 [b, 512(d), 64(h), 128(w)] -> [b, 512/8(d) * 64/16(h') = 256(d), 1(h) 256(w)] + # 2 [b, 1024(d), 32(h), 64(w)] -> [b, 1024/8(d) * 32/16(h') = 256(d), 1(h) 256(w)] + # 3 [b, 2048(d), 16(h), 32(w)] -> [b, 2048/8(d) * 16/16(h') = 256(d), 1(h) 256(w)] + return x # [b 1024(d) 1(h) 256(w)] + + +if __name__ == '__main__': + from PIL import Image + extractor = HorizonNetFeatureExtractor() + img = np.array(Image.open("../../src/demo.png")).transpose((2, 0, 1)) + input = torch.Tensor([img]) # 1 3 512 1024 + feature = extractor(input) + print(feature.shape) # 1, 1024, 256 | 1024 = (out_c_0*h_0 +... + out_c_3*h_3) = 256 * 4 diff --git a/models/modules/patch_feature_extractor.py b/models/modules/patch_feature_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..8901b123d2845bfaecc1a42f66be13fdf1ddd349 --- /dev/null +++ b/models/modules/patch_feature_extractor.py @@ -0,0 +1,57 @@ +import numpy as np +import torch +import torch.nn as nn +from einops.layers.torch import Rearrange + + +class PatchFeatureExtractor(nn.Module): + x_mean = torch.FloatTensor(np.array([0.485, 0.456, 0.406])[None, :, None, None]) + x_std = torch.FloatTensor(np.array([0.229, 0.224, 0.225])[None, :, None, None]) + + def __init__(self, patch_num=256, input_shape=None): + super(PatchFeatureExtractor, self).__init__() + + if input_shape is None: + input_shape = [3, 512, 1024] + self.patch_dim = 1024 + self.patch_num = patch_num + + img_channel = input_shape[0] + img_h = input_shape[1] + img_w = input_shape[2] + + p_h, p_w = img_h, img_w // self.patch_num + p_dim = p_h * p_w * img_channel + + self.patch_embedding = nn.Sequential( + Rearrange('b c h (p_n p_w) -> b p_n (h p_w c)', p_w=p_w), + nn.Linear(p_dim, self.patch_dim) + ) + + self.x_mean.requires_grad = False + self.x_std.requires_grad = False + + def _prepare_x(self, x): + x = x.clone() + if self.x_mean.device != x.device: + self.x_mean = self.x_mean.to(x.device) + self.x_std = self.x_std.to(x.device) + x[:, :3] = (x[:, :3] - self.x_mean) / self.x_std + + return x + + def forward(self, x): + # x [b 3 512 1024] + x = self._prepare_x(x) # [b 3 512 1024] + x = self.patch_embedding(x) # [b 256(patch_num) 1024(d)] + x = x.permute(0, 2, 1) # [b 1024(d) 256(patch_num)] + return x + + +if __name__ == '__main__': + from PIL import Image + extractor = PatchFeatureExtractor() + img = np.array(Image.open("../../src/demo.png")).transpose((2, 0, 1)) + input = torch.Tensor([img]) # 1 3 512 1024 + feature = extractor(input) + print(feature.shape) # 1, 1024, 256 diff --git a/models/modules/swg_transformer.py b/models/modules/swg_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..aa368e3616058b30419cc6249862a816f7252fed --- /dev/null +++ b/models/modules/swg_transformer.py @@ -0,0 +1,49 @@ +from models.modules.transformer_modules import * + + +class SWG_Transformer(nn.Module): + def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim, + dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1): + super().__init__() + self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout, + patch_num, ape) + self.pos_dropout = nn.Dropout(dropout) + self.layers = nn.ModuleList([]) + for i in range(depth): + if i % 2 == 0: + attention = WinAttention(dim, win_size=win_size, shift=0 if (i % 3 == 0) else win_size // 2, + heads=heads, dim_head=dim_head, dropout=dropout, rpe=rpe, rpe_pos=rpe_pos) + else: + attention = Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout, + patch_num=patch_num, rpe=rpe, rpe_pos=rpe_pos) + + self.layers.append(nn.ModuleList([ + PreNorm(dim, attention), + PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)), + ])) + + def forward(self, x): + if self.absolute_pos_embed is not None: + x = self.absolute_pos_embed(x) + x = self.pos_dropout(x) + for attn, ff in self.layers: + x = attn(x) + x + x = ff(x) + x + return x + + +if __name__ == '__main__': + token_dim = 1024 + toke_len = 256 + + transformer = SWG_Transformer(dim=token_dim, + depth=6, + heads=16, + win_size=8, + dim_head=64, + mlp_dim=2048, + dropout=0.1) + + input = torch.randn(1, toke_len, token_dim) + output = transformer(input) + print(output.shape) diff --git a/models/modules/swin_transformer.py b/models/modules/swin_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..29996bbc08af9302dfad40e64edd9a3d976fb3a2 --- /dev/null +++ b/models/modules/swin_transformer.py @@ -0,0 +1,43 @@ +from models.modules.transformer_modules import * + + +class Swin_Transformer(nn.Module): + def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim, + dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1): + super().__init__() + self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout, + patch_num, ape) + self.pos_dropout = nn.Dropout(dropout) + self.layers = nn.ModuleList([]) + for i in range(depth): + self.layers.append(nn.ModuleList([ + PreNorm(dim, WinAttention(dim, win_size=win_size, shift=0 if (i % 2 == 0) else win_size // 2, + heads=heads, dim_head=dim_head, dropout=dropout, rpe=rpe, rpe_pos=rpe_pos)), + PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)), + ])) + + def forward(self, x): + if self.absolute_pos_embed is not None: + x = self.absolute_pos_embed(x) + x = self.pos_dropout(x) + for attn, ff in self.layers: + x = attn(x) + x + x = ff(x) + x + return x + + +if __name__ == '__main__': + token_dim = 1024 + toke_len = 256 + + transformer = Swin_Transformer(dim=token_dim, + depth=6, + heads=16, + win_size=8, + dim_head=64, + mlp_dim=2048, + dropout=0.1) + + input = torch.randn(1, toke_len, token_dim) + output = transformer(input) + print(output.shape) diff --git a/models/modules/transformer.py b/models/modules/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..f3d2aa093c748bbc1408491cacab153977b4a4cb --- /dev/null +++ b/models/modules/transformer.py @@ -0,0 +1,44 @@ +from models.modules.transformer_modules import * + + +class Transformer(nn.Module): + def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim, + dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1): + super().__init__() + + self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout, + patch_num, ape) + self.pos_dropout = nn.Dropout(dropout) + self.layers = nn.ModuleList([]) + for _ in range(depth): + self.layers.append(nn.ModuleList([ + PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout, patch_num=patch_num, + rpe=rpe, rpe_pos=rpe_pos)), + PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)) + ])) + + def forward(self, x): + if self.absolute_pos_embed is not None: + x = self.absolute_pos_embed(x) + x = self.pos_dropout(x) + for attn, ff in self.layers: + x = attn(x) + x + x = ff(x) + x + return x + + +if __name__ == '__main__': + token_dim = 1024 + toke_len = 256 + + transformer = Transformer(dim=token_dim, depth=6, heads=16, + dim_head=64, mlp_dim=2048, dropout=0.1, + patch_num=256, ape='lr_parameter', rpe='lr_parameter_mirror') + + total = sum(p.numel() for p in transformer.parameters()) + trainable = sum(p.numel() for p in transformer.parameters() if p.requires_grad) + print('parameter total:{:,}, trainable:{:,}'.format(total, trainable)) + + input = torch.randn(1, toke_len, token_dim) + output = transformer(input) + print(output.shape) diff --git a/models/modules/transformer_modules.py b/models/modules/transformer_modules.py new file mode 100644 index 0000000000000000000000000000000000000000..475d5047e8b08d51e7a91ead1bf158f004698d08 --- /dev/null +++ b/models/modules/transformer_modules.py @@ -0,0 +1,250 @@ +""" +@Date: 2021/09/01 +@description: +""" +import warnings +import math +import torch +import torch.nn.functional as F + +from torch import nn, einsum +from einops import rearrange + + +def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): + # Cut & paste from PyTorch official master until it's in a few official releases - RW + # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1. + math.erf(x / math.sqrt(2.))) / 2. + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " + "The distribution of values may be incorrect.", + stacklevel=2) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + l = norm_cdf((a - mean) / std) + u = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [l, u], then translate to + # [2l-1, 2u-1]. + tensor.uniform_(2 * l - 1, 2 * u - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +class PreNorm(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.norm = nn.LayerNorm(dim) + self.fn = fn + + def forward(self, x, **kwargs): + return self.fn(self.norm(x), **kwargs) + + +# compatibility pytorch < 1.4 +class GELU(nn.Module): + def forward(self, input): + return F.gelu(input) + + +class Attend(nn.Module): + + def __init__(self, dim=None): + super().__init__() + self.dim = dim + + def forward(self, input): + return F.softmax(input, dim=self.dim, dtype=input.dtype) + + +class FeedForward(nn.Module): + def __init__(self, dim, hidden_dim, dropout=0.): + super().__init__() + self.net = nn.Sequential( + nn.Linear(dim, hidden_dim), + GELU(), + nn.Dropout(dropout), + nn.Linear(hidden_dim, dim), + nn.Dropout(dropout) + ) + + def forward(self, x): + return self.net(x) + + +class RelativePosition(nn.Module): + def __init__(self, heads, patch_num=None, rpe=None): + super().__init__() + self.rpe = rpe + self.heads = heads + self.patch_num = patch_num + + if rpe == 'lr_parameter': + # -255 ~ 0 ~ 255 all count : patch * 2 - 1 + count = patch_num * 2 - 1 + self.rpe_table = nn.Parameter(torch.Tensor(count, heads)) + nn.init.xavier_uniform_(self.rpe_table) + elif rpe == 'lr_parameter_mirror': + # 0 ~ 127 128 ~ 1 all count : patch_num // 2 + 1 + count = patch_num // 2 + 1 + self.rpe_table = nn.Parameter(torch.Tensor(count, heads)) + nn.init.xavier_uniform_(self.rpe_table) + elif rpe == 'lr_parameter_half': + # -127 ~ 0 ~ 128 all count : patch + count = patch_num + self.rpe_table = nn.Parameter(torch.Tensor(count, heads)) + nn.init.xavier_uniform_(self.rpe_table) + elif rpe == 'fix_angle': + # 0 ~ 127 128 ~ 1 all count : patch_num // 2 + 1 + count = patch_num // 2 + 1 + # we think that closer proximity should have stronger relationships + rpe_table = (torch.arange(count, 0, -1) / count)[..., None].repeat(1, heads) + self.register_buffer('rpe_table', rpe_table) + + def get_relative_pos_embed(self): + range_vec = torch.arange(self.patch_num) + distance_mat = range_vec[None, :] - range_vec[:, None] + if self.rpe == 'lr_parameter': + # -255 ~ 0 ~ 255 -> 0 ~ 255 ~ 255 + 255 + distance_mat += self.patch_num - 1 # remove negative + return self.rpe_table[distance_mat].permute(2, 0, 1)[None] + elif self.rpe == 'lr_parameter_mirror' or self.rpe == 'fix_angle': + distance_mat[distance_mat < 0] = -distance_mat[distance_mat < 0] # mirror + distance_mat[distance_mat > self.patch_num // 2] = self.patch_num - distance_mat[ + distance_mat > self.patch_num // 2] # remove repeat + return self.rpe_table[distance_mat].permute(2, 0, 1)[None] + elif self.rpe == 'lr_parameter_half': + distance_mat[distance_mat > self.patch_num // 2] = distance_mat[ + distance_mat > self.patch_num // 2] - self.patch_num # remove repeat > 128 exp: 129 -> -127 + distance_mat[distance_mat < -self.patch_num // 2 + 1] = distance_mat[ + distance_mat < -self.patch_num // 2 + 1] + self.patch_num # remove repeat < -127 exp: -128 -> 128 + # -127 ~ 0 ~ 128 -> 0 ~ 0 ~ 127 + 127 + 128 + distance_mat += self.patch_num//2 - 1 # remove negative + return self.rpe_table[distance_mat].permute(2, 0, 1)[None] + + def forward(self, attn): + return attn + self.get_relative_pos_embed() + + +class Attention(nn.Module): + def __init__(self, dim, heads=8, dim_head=64, dropout=0., patch_num=None, rpe=None, rpe_pos=1): + """ + :param dim: + :param heads: + :param dim_head: + :param dropout: + :param patch_num: + :param rpe: relative position embedding + """ + super().__init__() + + self.relative_pos_embed = None if patch_num is None or rpe is None else RelativePosition(heads, patch_num, rpe) + inner_dim = dim_head * heads + project_out = not (heads == 1 and dim_head == dim) + + self.heads = heads + self.scale = dim_head ** -0.5 + self.rpe_pos = rpe_pos + + self.attend = Attend(dim=-1) + self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False) + + self.to_out = nn.Sequential( + nn.Linear(inner_dim, dim), + nn.Dropout(dropout) + ) if project_out else nn.Identity() + + def forward(self, x): + b, n, _, h = *x.shape, self.heads + qkv = self.to_qkv(x).chunk(3, dim=-1) + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv) + + dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale + + if self.rpe_pos == 0: + if self.relative_pos_embed is not None: + dots = self.relative_pos_embed(dots) + + attn = self.attend(dots) + + if self.rpe_pos == 1: + if self.relative_pos_embed is not None: + attn = self.relative_pos_embed(attn) + + out = einsum('b h i j, b h j d -> b h i d', attn, v) + out = rearrange(out, 'b h n d -> b n (h d)') + return self.to_out(out) + + +class AbsolutePosition(nn.Module): + def __init__(self, dim, dropout=0., patch_num=None, ape=None): + super().__init__() + self.ape = ape + + if ape == 'lr_parameter': + self.absolute_pos_embed = nn.Parameter(torch.zeros(1, patch_num, dim)) + trunc_normal_(self.absolute_pos_embed, std=.02) + + elif ape == 'fix_angle': + angle = torch.arange(0, patch_num, dtype=torch.float) / patch_num * (math.pi * 2) + self.absolute_pos_embed = torch.sin(angle)[..., None].repeat(1, dim)[None] + + def forward(self, x): + return x + self.absolute_pos_embed + + +class WinAttention(nn.Module): + def __init__(self, dim, win_size=8, shift=0, heads=8, dim_head=64, dropout=0., rpe=None, rpe_pos=1): + super().__init__() + + self.win_size = win_size + self.shift = shift + self.attend = Attention(dim, heads=heads, dim_head=dim_head, + dropout=dropout, patch_num=win_size, rpe=None if rpe is None else 'lr_parameter', + rpe_pos=rpe_pos) + + def forward(self, x): + b = x.shape[0] + if self.shift != 0: + x = torch.roll(x, shifts=self.shift, dims=-2) + x = rearrange(x, 'b (m w) d -> (b m) w d', w=self.win_size) # split windows + + out = self.attend(x) + + out = rearrange(out, '(b m) w d -> b (m w) d ', b=b) # recover windows + if self.shift != 0: + out = torch.roll(out, shifts=-self.shift, dims=-2) + + return out + + +class Conv(nn.Module): + def __init__(self, dim, dropout=0.): + super().__init__() + self.dim = dim + self.net = nn.Sequential( + nn.Conv1d(dim, dim, kernel_size=3, stride=1, padding=0), + nn.Dropout(dropout) + ) + + def forward(self, x): + x = x.transpose(1, 2) + x = torch.cat([x[..., -1:], x, x[..., :1]], dim=-1) + x = self.net(x) + return x.transpose(1, 2) diff --git a/models/other/__init__.py b/models/other/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1e9fb83dd3209daf1bc988f961e9cb640e7c0561 --- /dev/null +++ b/models/other/__init__.py @@ -0,0 +1,4 @@ +""" +@Date: 2021/07/18 +@description: +""" diff --git a/models/other/criterion.py b/models/other/criterion.py new file mode 100644 index 0000000000000000000000000000000000000000..04d0db3913b5dc36afb91798d3d1a33fde63dcb1 --- /dev/null +++ b/models/other/criterion.py @@ -0,0 +1,72 @@ +""" +@date: 2021/7/19 +@description: +""" +import torch +import loss + +from utils.misc import tensor2np + + +def build_criterion(config, logger): + criterion = {} + device = config.TRAIN.DEVICE + + for k in config.TRAIN.CRITERION.keys(): + sc = config.TRAIN.CRITERION[k] + if sc.WEIGHT is None or float(sc.WEIGHT) == 0: + continue + criterion[sc.NAME] = { + 'loss': getattr(loss, sc.LOSS)(), + 'weight': float(sc.WEIGHT), + 'sub_weights': sc.WEIGHTS, + 'need_all': sc.NEED_ALL + } + + criterion[sc.NAME]['loss'] = criterion[sc.NAME]['loss'].to(device) + if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device: + criterion[sc.NAME]['loss'] = criterion[sc.NAME]['loss'].type(torch.float16) + + # logger.info(f"Build criterion:{sc.WEIGHT}_{sc.NAME}_{sc.LOSS}_{sc.WEIGHTS}") + return criterion + + +def calc_criterion(criterion, gt, dt, epoch_loss_d): + loss = None + postfix_d = {} + for k in criterion.keys(): + if criterion[k]['need_all']: + single_loss = criterion[k]['loss'](gt, dt) + ws_loss = None + for i, sub_weight in enumerate(criterion[k]['sub_weights']): + if sub_weight == 0: + continue + if ws_loss is None: + ws_loss = single_loss[i] * sub_weight + else: + ws_loss = ws_loss + single_loss[i] * sub_weight + single_loss = ws_loss if ws_loss is not None else single_loss + else: + assert k in gt.keys(), "ground label is None:" + k + assert k in dt.keys(), "detection key is None:" + k + if k == 'ratio' and gt[k].shape[-1] != dt[k].shape[-1]: + gt[k] = gt[k].repeat(1, dt[k].shape[-1]) + single_loss = criterion[k]['loss'](gt[k], dt[k]) + + postfix_d[k] = tensor2np(single_loss) + if k not in epoch_loss_d.keys(): + epoch_loss_d[k] = [] + epoch_loss_d[k].append(postfix_d[k]) + + single_loss = single_loss * criterion[k]['weight'] + if loss is None: + loss = single_loss + else: + loss = loss + single_loss + + k = 'loss' + postfix_d[k] = tensor2np(loss) + if k not in epoch_loss_d.keys(): + epoch_loss_d[k] = [] + epoch_loss_d[k].append(postfix_d[k]) + return loss, postfix_d, epoch_loss_d diff --git a/models/other/init_env.py b/models/other/init_env.py new file mode 100644 index 0000000000000000000000000000000000000000..3654f11d0fe7b3f113bcf9af4a7f43807bf31a79 --- /dev/null +++ b/models/other/init_env.py @@ -0,0 +1,37 @@ +""" +@Date: 2021/08/15 +@description: +""" +import random +import torch +import torch.backends.cudnn as cudnn +import numpy as np +import os +import cv2 + + +def init_env(seed, deterministic=False, loader_work_num=0): + # Fix seed + # Python & NumPy + np.random.seed(seed) + random.seed(seed) + os.environ['PYTHONHASHSEED'] = str(seed) + + # PyTorch + torch.manual_seed(seed) # 为CPU设置随机种子 + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) # 为当前GPU设置随机种子 + torch.cuda.manual_seed_all(seed) # 为所有GPU设置随机种子 + + # cuDNN + if deterministic: + # 复现 + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True # 将这个 flag 置为 True 的话,每次返回的卷积算法将是确定的,即默认算法 + else: + cudnn.benchmark = True # 如果网络的输入数据维度或类型上变化不大,设置true + torch.backends.cudnn.deterministic = False + + # Using multiple threads in Opencv can cause deadlocks + if loader_work_num != 0: + cv2.setNumThreads(0) diff --git a/models/other/optimizer.py b/models/other/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..8bd1c6959dc52028d061693673222e2cb3ef150d --- /dev/null +++ b/models/other/optimizer.py @@ -0,0 +1,24 @@ +""" +@Date: 2021/07/18 +@description: +""" +from torch import optim as optim + + +def build_optimizer(config, model, logger): + name = config.TRAIN.OPTIMIZER.NAME.lower() + + optimizer = None + if name == 'sgd': + optimizer = optim.SGD(model.parameters(), momentum=config.TRAIN.OPTIMIZER.MOMENTUM, nesterov=True, + lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY) + elif name == 'adamw': + optimizer = optim.AdamW(model.parameters(), eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS, + lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY) + elif name == 'adam': + optimizer = optim.Adam(model.parameters(), eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS, + lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY) + + logger.info(f"Build optimizer: {name}, lr:{config.TRAIN.BASE_LR}") + + return optimizer diff --git a/models/other/scheduler.py b/models/other/scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..27d93bc4a6f72059d5e00e6589bc1715f5452aab --- /dev/null +++ b/models/other/scheduler.py @@ -0,0 +1,51 @@ +""" +@Date: 2021/09/14 +@description: +""" + + +class WarmupScheduler: + def __init__(self, optimizer, lr_pow, init_lr, warmup_lr, warmup_step, max_step, **kwargs): + self.lr_pow = lr_pow + self.init_lr = init_lr + self.running_lr = init_lr + self.warmup_lr = warmup_lr + self.warmup_step = warmup_step + self.max_step = max_step + self.optimizer = optimizer + + def step_update(self, cur_step): + if cur_step < self.warmup_step: + frac = cur_step / self.warmup_step + step = self.warmup_lr - self.init_lr + self.running_lr = self.init_lr + step * frac + else: + frac = (float(cur_step) - self.warmup_step) / (self.max_step - self.warmup_step) + scale_running_lr = max((1. - frac), 0.) ** self.lr_pow + self.running_lr = self.warmup_lr * scale_running_lr + + if self.optimizer is not None: + for param_group in self.optimizer.param_groups: + param_group['lr'] = self.running_lr + + +if __name__ == '__main__': + import matplotlib.pyplot as plt + + scheduler = WarmupScheduler(optimizer=None, + lr_pow=4, + init_lr=0.0000003, + warmup_lr=0.00003, + warmup_step=10000, + max_step=100000) + + x = [] + y = [] + for i in range(100000): + if i == 10000-1: + print() + scheduler.step_update(i) + x.append(i) + y.append(scheduler.running_lr) + plt.plot(x, y, linewidth=1) + plt.show() diff --git a/postprocessing/__init__.py b/postprocessing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a6fb3961ff067e512a90ae61786a9ad1cdc25a30 --- /dev/null +++ b/postprocessing/__init__.py @@ -0,0 +1,4 @@ +""" +@Date: 2021/10/06 +@description: +""" diff --git a/postprocessing/dula/__init__.py b/postprocessing/dula/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a6fb3961ff067e512a90ae61786a9ad1cdc25a30 --- /dev/null +++ b/postprocessing/dula/__init__.py @@ -0,0 +1,4 @@ +""" +@Date: 2021/10/06 +@description: +""" diff --git a/postprocessing/dula/layout.py b/postprocessing/dula/layout.py new file mode 100644 index 0000000000000000000000000000000000000000..9101a65800d866a660a99b2e4ee809517ffeedf1 --- /dev/null +++ b/postprocessing/dula/layout.py @@ -0,0 +1,226 @@ +""" +@Date: 2021/10/06 +@description: Use the approach proposed by DuLa-Net +""" +import cv2 +import numpy as np +import math +import matplotlib.pyplot as plt + +from visualization.floorplan import draw_floorplan + + +def merge_near(lst, diag): + group = [[0, ]] + for i in range(1, len(lst)): + if lst[i][1] == 0 and lst[i][0] - np.mean(group[-1]) < diag * 0.02: + group[-1].append(lst[i][0]) + else: + group.append([lst[i][0], ]) + if len(group) == 1: + group = [lst[0][0], lst[-1][0]] + else: + group = [int(np.mean(x)) for x in group] + return group + + +def fit_layout(floor_xz, need_cube=False, show=False, block_eps=0.2): + show_radius = np.linalg.norm(floor_xz, axis=-1).max() + side_l = 512 + floorplan = draw_floorplan(xz=floor_xz, show_radius=show_radius, show=show, scale=1, side_l=side_l).astype(np.uint8) + center = np.array([side_l / 2, side_l / 2]) + polys = cv2.findContours(floorplan, 1, 2) + if isinstance(polys, tuple): + if len(polys) == 3: + # opencv 3 + polys = list(polys[1]) + else: + polys = list(polys[0]) + polys.sort(key=lambda x: cv2.contourArea(x), reverse=True) + poly = polys[0] + sub_x, sub_y, w, h = cv2.boundingRect(poly) + floorplan_sub = floorplan[sub_y:sub_y + h, sub_x:sub_x + w] + sub_center = center - np.array([sub_x, sub_y]) + polys = cv2.findContours(floorplan_sub, 1, 2) + if isinstance(polys, tuple): + if len(polys) == 3: + polys = polys[1] + else: + polys = polys[0] + poly = polys[0] + epsilon = 0.005 * cv2.arcLength(poly, True) + poly = cv2.approxPolyDP(poly, epsilon, True) + + x_lst = [[0, 0], ] + y_lst = [[0, 0], ] + + ans = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1])) + + for i in range(len(poly)): + p1 = poly[i][0] + p2 = poly[(i + 1) % len(poly)][0] + # We added occlusion detection + cp1 = p1 - sub_center + cp2 = p2 - sub_center + p12 = p2 - p1 + l1 = np.linalg.norm(cp1) + l2 = np.linalg.norm(cp2) + l3 = np.linalg.norm(p12) + # We added occlusion detection + is_block1 = abs(np.cross(cp1/l1, cp2/l2)) < block_eps + is_block2 = abs(np.cross(cp2/l2, p12/l3)) < block_eps*2 + is_block = is_block1 and is_block2 + + if (p2[0] - p1[0]) == 0: + slope = 10 + else: + slope = abs((p2[1] - p1[1]) / (p2[0] - p1[0])) + + if is_block: + s = p1[1] if l1 < l2 else p2[1] + y_lst.append([s, 1]) + s = p1[0] if l1 < l2 else p2[0] + x_lst.append([s, 1]) + + left = p1[0] if p1[0] < p2[0] else p2[0] + right = p1[0] if p1[0] > p2[0] else p2[0] + top = p1[1] if p1[1] < p2[1] else p2[1] + bottom = p1[1] if p1[1] > p2[1] else p2[1] + sample = floorplan_sub[top:bottom, left:right] + score = 0 if sample.size == 0 else sample.mean() + if score >= 0.3: + ans[top:bottom, left:right] = 1 + + else: + if slope <= 1: + s = int((p1[1] + p2[1]) / 2) + y_lst.append([s, 0]) + elif slope > 1: + s = int((p1[0] + p2[0]) / 2) + x_lst.append([s, 0]) + + debug_show = False + if debug_show: + plt.figure(dpi=300) + plt.axis('off') + a = cv2.drawMarker(floorplan_sub.copy()*0.5, tuple([floorplan_sub.shape[1] // 2, floorplan_sub.shape[0] // 2]), [1], markerType=0, markerSize=10, thickness=2) + plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1)) + plt.savefig('src/1.png', bbox_inches='tight', transparent=True, pad_inches=0) + plt.show() + + plt.figure(dpi=300) + plt.axis('off') + a = cv2.drawMarker(ans.copy()*0.5, tuple([floorplan_sub.shape[1] // 2, floorplan_sub.shape[0] // 2]), [1], markerType=0, markerSize=10, thickness=2) + plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1)) + # plt.show() + plt.savefig('src/2.png', bbox_inches='tight', transparent=True, pad_inches=0) + plt.show() + + x_lst.append([floorplan_sub.shape[1], 0]) + y_lst.append([floorplan_sub.shape[0], 0]) + x_lst.sort(key=lambda x: x[0]) + y_lst.sort(key=lambda x: x[0]) + + diag = math.sqrt(math.pow(floorplan_sub.shape[1], 2) + math.pow(floorplan_sub.shape[0], 2)) + x_lst = merge_near(x_lst, diag) + y_lst = merge_near(y_lst, diag) + if need_cube and len(x_lst) > 2: + x_lst = [x_lst[0], x_lst[-1]] + if need_cube and len(y_lst) > 2: + y_lst = [y_lst[0], y_lst[-1]] + + for i in range(len(x_lst) - 1): + for j in range(len(y_lst) - 1): + sample = floorplan_sub[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] + score = 0 if sample.size == 0 else sample.mean() + if score >= 0.3: + ans[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] = 1 + + if debug_show: + plt.figure(dpi=300) + plt.axis('off') + a = cv2.drawMarker(ans.copy() * 0.5, tuple([floorplan_sub.shape[1] // 2, floorplan_sub.shape[0] // 2]), [1], + markerType=0, markerSize=10, thickness=2) + plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1)) + # plt.show() + plt.savefig('src/3.png', bbox_inches='tight', transparent=True, pad_inches=0) + plt.show() + + pred = np.uint8(ans) + pred_polys = cv2.findContours(pred, 1, 3) + if isinstance(pred_polys, tuple): + if len(pred_polys) == 3: + pred_polys = pred_polys[1] + else: + pred_polys = pred_polys[0] + + pred_polys.sort(key=lambda x: cv2.contourArea(x), reverse=True) + pred_polys = pred_polys[0] + + if debug_show: + plt.figure(dpi=300) + plt.axis('off') + a = cv2.drawMarker(ans.copy() * 0.5, tuple([floorplan_sub.shape[1] // 2, floorplan_sub.shape[0] // 2]), [1], + markerType=0, markerSize=10, thickness=2) + a = cv2.drawContours(a, [poly], 0, 0.8, 1) + a = cv2.drawContours(a, [pred_polys], 0, 1, 1) + plt.imshow(a) + # plt.show() + plt.savefig('src/4.png', bbox_inches='tight', transparent=True, pad_inches=0) + plt.show() + + polygon = [(p[0][1], p[0][0]) for p in pred_polys[::-1]] + + v = np.array([p[0] + sub_y for p in polygon]) + u = np.array([p[1] + sub_x for p in polygon]) + # side_l + # v<-----------|o + # | | | + # | ----|----z | side_l + # | | | + # | x \|/ + # |------------u + side_l = floorplan.shape[0] + pred_xz = np.concatenate((u[:, np.newaxis] - side_l // 2, side_l // 2 - v[:, np.newaxis]), axis=1) + + pred_xz = pred_xz * show_radius / (side_l // 2) + if show: + draw_floorplan(pred_xz, show_radius=show_radius, show=show) + + show_process = False + if show_process: + img = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1], 3)) + for x in x_lst: + cv2.line(img, (x, 0), (x, floorplan_sub.shape[0]), (0, 255, 0), 1) + for y in y_lst: + cv2.line(img, (0, y), (floorplan_sub.shape[1], y), (255, 0, 0), 1) + + fig = plt.figure() + plt.axis('off') + ax1 = fig.add_subplot(2, 2, 1) + ax1.imshow(floorplan) + ax3 = fig.add_subplot(2, 2, 2) + ax3.imshow(floorplan_sub) + ax4 = fig.add_subplot(2, 2, 3) + ax4.imshow(img) + ax5 = fig.add_subplot(2, 2, 4) + ax5.imshow(ans) + plt.show() + + return pred_xz + + +if __name__ == '__main__': + from utils.conversion import uv2xyz + + pano_img = np.zeros([512, 1024, 3]) + corners = np.array([[0.1, 0.7], + [0.4, 0.7], + [0.3, 0.6], + [0.6, 0.6], + [0.8, 0.7]]) + xz = uv2xyz(corners)[..., ::2] + draw_floorplan(xz, show=True, marker_color=None, center_color=0.8) + + xz = fit_layout(xz) + draw_floorplan(xz, show=True, marker_color=None, center_color=0.8) diff --git a/postprocessing/dula/layout_old.py b/postprocessing/dula/layout_old.py new file mode 100644 index 0000000000000000000000000000000000000000..4945147000f0ed35ca627366a7c0fb96f1b33a4b --- /dev/null +++ b/postprocessing/dula/layout_old.py @@ -0,0 +1,134 @@ +""" +@Date: 2021/10/06 +@description: Use the approach proposed by DuLa-Net +""" +import cv2 +import numpy as np +import math +import matplotlib.pyplot as plt + +from visualization.floorplan import draw_floorplan + + +def merge_near(lst, diag): + group = [[0, ]] + for i in range(1, len(lst)): + if lst[i] - np.mean(group[-1]) < diag * 0.02: + group[-1].append(lst[i]) + else: + group.append([lst[i], ]) + if len(group) == 1: + group = [lst[0], lst[-1]] + else: + group = [int(np.mean(x)) for x in group] + return group + + +def fit_layout_old(floor_xz, need_cube=False, show=False, block_eps=0.05): + show_radius = np.linalg.norm(floor_xz, axis=-1).max() + side_l = 512 + floorplan = draw_floorplan(xz=floor_xz, show_radius=show_radius, show=show, scale=1, side_l=side_l).astype(np.uint8) + center = np.array([side_l / 2, side_l / 2]) + polys = cv2.findContours(floorplan, 1, 2) + if isinstance(polys, tuple): + if len(polys) == 3: + # opencv 3 + polys = list(polys[1]) + else: + polys = list(polys[0]) + polys.sort(key=lambda x: cv2.contourArea(x), reverse=True) + poly = polys[0] + sub_x, sub_y, w, h = cv2.boundingRect(poly) + floorplan_sub = floorplan[sub_y:sub_y + h, sub_x:sub_x + w] + sub_center = center - np.array([sub_x, sub_y]) + polys = cv2.findContours(floorplan_sub, 1, 2) + if isinstance(polys, tuple): + if len(polys) == 3: + polys = polys[1] + else: + polys = polys[0] + poly = polys[0] + epsilon = 0.005 * cv2.arcLength(poly, True) + poly = cv2.approxPolyDP(poly, epsilon, True) + + x_lst = [0, ] + y_lst = [0, ] + for i in range(len(poly)): + p1 = poly[i][0] + p2 = poly[(i + 1) % len(poly)][0] + + if (p2[0] - p1[0]) == 0: + slope = 10 + else: + slope = abs((p2[1] - p1[1]) / (p2[0] - p1[0])) + + if slope <= 1: + s = int((p1[1] + p2[1]) / 2) + y_lst.append(s) + elif slope > 1: + s = int((p1[0] + p2[0]) / 2) + x_lst.append(s) + + x_lst.append(floorplan_sub.shape[1]) + y_lst.append(floorplan_sub.shape[0]) + x_lst.sort() + y_lst.sort() + + diag = math.sqrt(math.pow(floorplan_sub.shape[1], 2) + math.pow(floorplan_sub.shape[0], 2)) + x_lst = merge_near(x_lst, diag) + y_lst = merge_near(y_lst, diag) + if need_cube and len(x_lst) > 2: + x_lst = [x_lst[0], x_lst[-1]] + if need_cube and len(y_lst) > 2: + y_lst = [y_lst[0], y_lst[-1]] + + ans = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1])) + for i in range(len(x_lst) - 1): + for j in range(len(y_lst) - 1): + sample = floorplan_sub[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] + score = 0 if sample.size == 0 else sample.mean() + if score >= 0.3: + ans[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] = 1 + + pred = np.uint8(ans) + pred_polys = cv2.findContours(pred, 1, 3) + if isinstance(pred_polys, tuple): + if len(pred_polys) == 3: + pred_polys = pred_polys[1] + else: + pred_polys = pred_polys[0] + + polygon = [(p[0][1], p[0][0]) for p in pred_polys[0][::-1]] + + v = np.array([p[0] + sub_y for p in polygon]) + u = np.array([p[1] + sub_x for p in polygon]) + # side_l + # v<-----------|o + # | | | + # | ----|----z | side_l + # | | | + # | x \|/ + # |------------u + side_l = floorplan.shape[0] + pred_xz = np.concatenate((u[:, np.newaxis] - side_l // 2, side_l // 2 - v[:, np.newaxis]), axis=1) + + pred_xz = pred_xz * show_radius / (side_l // 2) + if show: + draw_floorplan(pred_xz, show_radius=show_radius, show=show) + return pred_xz + + +if __name__ == '__main__': + from utils.conversion import uv2xyz + + pano_img = np.zeros([512, 1024, 3]) + corners = np.array([[0.1, 0.7], + [0.4, 0.7], + [0.3, 0.6], + [0.6, 0.6], + [0.8, 0.7]]) + xz = uv2xyz(corners)[..., ::2] + draw_floorplan(xz, show=True, marker_color=None, center_color=0.8) + + xz = fit_layout_old(xz) + draw_floorplan(xz, show=True, marker_color=None, center_color=0.8) diff --git a/postprocessing/post_process.py b/postprocessing/post_process.py new file mode 100644 index 0000000000000000000000000000000000000000..c58d894d58d6ed1e90fc1c35d85b55acb24a3125 --- /dev/null +++ b/postprocessing/post_process.py @@ -0,0 +1,34 @@ +""" +@Date: 2021/10/08 +@description: +""" +import numpy as np +import cv2 + +from postprocessing.dula.layout import fit_layout +from postprocessing.dula.layout_old import fit_layout_old +from utils.conversion import depth2xyz, xyz2depth + + +def post_process(b_depth, type_name='manhattan', need_cube=False): + plan_y = 1 + b_xyz = depth2xyz(b_depth, plan_y) + + b_processed_xyz = [] + for xyz in b_xyz: + if type_name == 'manhattan': + processed_xz = fit_layout(floor_xz=xyz[..., ::2], need_cube=need_cube, show=False) + elif type_name == 'manhattan_old': + processed_xz = fit_layout_old(floor_xz=xyz[..., ::2], need_cube=need_cube, show=False) + elif type_name == 'atalanta': + processed_xz = cv2.approxPolyDP(xyz[..., ::2].astype(np.float32), 0.1, False)[:, 0, :] + else: + raise NotImplementedError("Unknown post-processing type") + + if need_cube: + assert len(processed_xz) == 4 + + processed_xyz = np.insert(processed_xz, 1, plan_y, axis=1) + b_processed_xyz.append(processed_xyz) + + return np.array(b_processed_xyz) \ No newline at end of file diff --git a/preprocessing/__init__.py b/preprocessing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..11899be3b477996772cd1ae754815056f22ca205 --- /dev/null +++ b/preprocessing/__init__.py @@ -0,0 +1,4 @@ +""" +@date: 2021/7/5 +@description: +""" diff --git a/preprocessing/filter.py b/preprocessing/filter.py new file mode 100644 index 0000000000000000000000000000000000000000..6e95ec7c48be2ab545cbd7bab5b33c93ada38022 --- /dev/null +++ b/preprocessing/filter.py @@ -0,0 +1,90 @@ +""" +@date: 2021/7/5 +@description: +""" +import json +import math +import shutil + +import numpy as np +from utils.boundary import * +import dataset +import os +from tqdm import tqdm +from PIL import Image +from visualization.boundary import * +from visualization.floorplan import * +from shapely.geometry import Polygon, Point + + +def filter_center(ceil_corners): + xyz = uv2xyz(ceil_corners, plan_y=1.6) + xz = xyz[:, ::2] + poly = Polygon(xz).buffer(-0.01) + return poly.contains(Point(0, 0)) + + +def filter_boundary(corners): + if is_ceil_boundary(corners): + return True + elif is_floor_boundary(corners): + return True + else: + # An intersection occurs and an exception is considered + return False + + +def filter_self_intersection(corners): + xz = uv2xyz(corners)[:, ::2] + poly = Polygon(xz) + return poly.is_valid + + +def filter_dataset(dataset, show=False, output_dir=None): + if output_dir is None: + output_dir = os.path.join(dataset.root_dir, dataset.mode) + output_img_dir = os.path.join(output_dir, 'img_align') + output_label_dir = os.path.join(output_dir, 'label_cor_align') + else: + output_dir = os.path.join(output_dir, dataset.mode) + output_img_dir = os.path.join(output_dir, 'img') + output_label_dir = os.path.join(output_dir, 'label_cor') + + if not os.path.exists(output_img_dir): + os.makedirs(output_img_dir) + + if not os.path.exists(output_label_dir): + os.makedirs(output_label_dir) + + bar = tqdm(dataset, total=len(dataset)) + for data in bar: + name = data['name'] + bar.set_description(f"Processing {name}") + img = data['img'] + corners = data['corners'] + + if not filter_center(corners[1::2]): + if show: + draw_boundaries(img, corners_list=[corners[0::2], corners[1::2]], show=True) + if not os.path.exists(data['img_path']): + print("already remove") + else: + print(f"move {name}") + shutil.move(data['img_path'], os.path.join(output_img_dir, os.path.basename(data['img_path']))) + shutil.move(data['label_path'], os.path.join(output_label_dir, os.path.basename(data['label_path']))) + + +def execute_filter_dataset(root_dir, dataset_name="PanoS2D3DDataset", modes=None, output_dir=None): + if modes is None: + modes = ["train", "test", "valid"] + + for mode in modes: + print("mode: {}".format(mode)) + + filter_dataset(getattr(dataset, dataset_name)(root_dir, mode), show=False, output_dir=output_dir) + + +if __name__ == '__main__': + execute_filter_dataset(root_dir='/root/data/hd/hnet_dataset', + dataset_name="PanoS2D3DDataset", modes=['train', "test", "valid"], + output_dir='/root/data/hd/hnet_dataset_close') diff --git a/preprocessing/pano_lsd_align.py b/preprocessing/pano_lsd_align.py new file mode 100644 index 0000000000000000000000000000000000000000..2594029f5cb1a9d1afd580ef59a579af41580376 --- /dev/null +++ b/preprocessing/pano_lsd_align.py @@ -0,0 +1,911 @@ +''' +This script is helper function for preprocessing. +Most of the code are converted from LayoutNet official's matlab code. +All functions, naming rule and data flow follow official for easier +converting and comparing. +Code is not optimized for python or numpy yet. +''' + +import sys +import numpy as np +from scipy.ndimage import map_coordinates +import cv2 +from pylsd import lsd + + +def computeUVN(n, in_, planeID): + ''' + compute v given u and normal. + ''' + if planeID == 2: + n = np.array([n[1], n[2], n[0]]) + elif planeID == 3: + n = np.array([n[2], n[0], n[1]]) + bc = n[0] * np.sin(in_) + n[1] * np.cos(in_) + bs = n[2] + out = np.arctan(-bc / (bs + 1e-9)) + return out + + +def computeUVN_vec(n, in_, planeID): + ''' + vectorization version of computeUVN + @n N x 3 + @in_ MN x 1 + @planeID N + ''' + n = n.copy() + if (planeID == 2).sum(): + n[planeID == 2] = np.roll(n[planeID == 2], 2, axis=1) + if (planeID == 3).sum(): + n[planeID == 3] = np.roll(n[planeID == 3], 1, axis=1) + n = np.repeat(n, in_.shape[0] // n.shape[0], axis=0) + assert n.shape[0] == in_.shape[0] + bc = n[:, [0]] * np.sin(in_) + n[:, [1]] * np.cos(in_) + bs = n[:, [2]] + out = np.arctan(-bc / (bs + 1e-9)) + return out + + +def xyz2uvN(xyz, planeID=1): + ID1 = (int(planeID) - 1 + 0) % 3 + ID2 = (int(planeID) - 1 + 1) % 3 + ID3 = (int(planeID) - 1 + 2) % 3 + normXY = np.sqrt(xyz[:, [ID1]] ** 2 + xyz[:, [ID2]] ** 2) + normXY[normXY < 0.000001] = 0.000001 + normXYZ = np.sqrt(xyz[:, [ID1]] ** 2 + xyz[:, [ID2]] ** 2 + xyz[:, [ID3]] ** 2) + v = np.arcsin(xyz[:, [ID3]] / normXYZ) + u = np.arcsin(xyz[:, [ID1]] / normXY) + valid = (xyz[:, [ID2]] < 0) & (u >= 0) + u[valid] = np.pi - u[valid] + valid = (xyz[:, [ID2]] < 0) & (u <= 0) + u[valid] = -np.pi - u[valid] + uv = np.hstack([u, v]) + uv[np.isnan(uv[:, 0]), 0] = 0 + return uv + + +def uv2xyzN(uv, planeID=1): + ID1 = (int(planeID) - 1 + 0) % 3 + ID2 = (int(planeID) - 1 + 1) % 3 + ID3 = (int(planeID) - 1 + 2) % 3 + xyz = np.zeros((uv.shape[0], 3)) + xyz[:, ID1] = np.cos(uv[:, 1]) * np.sin(uv[:, 0]) + xyz[:, ID2] = np.cos(uv[:, 1]) * np.cos(uv[:, 0]) + xyz[:, ID3] = np.sin(uv[:, 1]) + return xyz + + +def uv2xyzN_vec(uv, planeID): + ''' + vectorization version of uv2xyzN + @uv N x 2 + @planeID N + ''' + assert (planeID.astype(int) != planeID).sum() == 0 + planeID = planeID.astype(int) + ID1 = (planeID - 1 + 0) % 3 + ID2 = (planeID - 1 + 1) % 3 + ID3 = (planeID - 1 + 2) % 3 + ID = np.arange(len(uv)) + xyz = np.zeros((len(uv), 3)) + xyz[ID, ID1] = np.cos(uv[:, 1]) * np.sin(uv[:, 0]) + xyz[ID, ID2] = np.cos(uv[:, 1]) * np.cos(uv[:, 0]) + xyz[ID, ID3] = np.sin(uv[:, 1]) + return xyz + + +def warpImageFast(im, XXdense, YYdense): + minX = max(1., np.floor(XXdense.min()) - 1) + minY = max(1., np.floor(YYdense.min()) - 1) + + maxX = min(im.shape[1], np.ceil(XXdense.max()) + 1) + maxY = min(im.shape[0], np.ceil(YYdense.max()) + 1) + + im = im[int(round(minY-1)):int(round(maxY)), + int(round(minX-1)):int(round(maxX))] + + assert XXdense.shape == YYdense.shape + out_shape = XXdense.shape + coordinates = [ + (YYdense - minY).reshape(-1), + (XXdense - minX).reshape(-1), + ] + im_warp = np.stack([ + map_coordinates(im[..., c], coordinates, order=1).reshape(out_shape) + for c in range(im.shape[-1])], + axis=-1) + + return im_warp + + +def rotatePanorama(img, vp=None, R=None): + ''' + Rotate panorama + if R is given, vp (vanishing point) will be overlooked + otherwise R is computed from vp + ''' + sphereH, sphereW, C = img.shape + + # new uv coordinates + TX, TY = np.meshgrid(range(1, sphereW + 1), range(1, sphereH + 1)) + TX = TX.reshape(-1, 1, order='F') + TY = TY.reshape(-1, 1, order='F') + ANGx = (TX - sphereW/2 - 0.5) / sphereW * np.pi * 2 + ANGy = -(TY - sphereH/2 - 0.5) / sphereH * np.pi + uvNew = np.hstack([ANGx, ANGy]) + xyzNew = uv2xyzN(uvNew, 1) + + # rotation matrix + if R is None: + R = np.linalg.inv(vp.T) + + xyzOld = np.linalg.solve(R, xyzNew.T).T + uvOld = xyz2uvN(xyzOld, 1) + + Px = (uvOld[:, 0] + np.pi) / (2*np.pi) * sphereW + 0.5 + Py = (-uvOld[:, 1] + np.pi/2) / np.pi * sphereH + 0.5 + + Px = Px.reshape(sphereH, sphereW, order='F') + Py = Py.reshape(sphereH, sphereW, order='F') + + # boundary + imgNew = np.zeros((sphereH+2, sphereW+2, C), np.float64) + imgNew[1:-1, 1:-1, :] = img + imgNew[1:-1, 0, :] = img[:, -1, :] + imgNew[1:-1, -1, :] = img[:, 0, :] + imgNew[0, 1:sphereW//2+1, :] = img[0, sphereW-1:sphereW//2-1:-1, :] + imgNew[0, sphereW//2+1:-1, :] = img[0, sphereW//2-1::-1, :] + imgNew[-1, 1:sphereW//2+1, :] = img[-1, sphereW-1:sphereW//2-1:-1, :] + imgNew[-1, sphereW//2+1:-1, :] = img[0, sphereW//2-1::-1, :] + imgNew[0, 0, :] = img[0, 0, :] + imgNew[-1, -1, :] = img[-1, -1, :] + imgNew[0, -1, :] = img[0, -1, :] + imgNew[-1, 0, :] = img[-1, 0, :] + + rotImg = warpImageFast(imgNew, Px+1, Py+1) + + return rotImg + + +def imgLookAt(im, CENTERx, CENTERy, new_imgH, fov): + sphereH = im.shape[0] + sphereW = im.shape[1] + warped_im = np.zeros((new_imgH, new_imgH, 3)) + TX, TY = np.meshgrid(range(1, new_imgH + 1), range(1, new_imgH + 1)) + TX = TX.reshape(-1, 1, order='F') + TY = TY.reshape(-1, 1, order='F') + TX = TX - 0.5 - new_imgH/2 + TY = TY - 0.5 - new_imgH/2 + r = new_imgH / 2 / np.tan(fov/2) + + # convert to 3D + R = np.sqrt(TY ** 2 + r ** 2) + ANGy = np.arctan(- TY / r) + ANGy = ANGy + CENTERy + + X = np.sin(ANGy) * R + Y = -np.cos(ANGy) * R + Z = TX + + INDn = np.nonzero(np.abs(ANGy) > np.pi/2) + + # project back to sphere + ANGx = np.arctan(Z / -Y) + RZY = np.sqrt(Z ** 2 + Y ** 2) + ANGy = np.arctan(X / RZY) + + ANGx[INDn] = ANGx[INDn] + np.pi + ANGx = ANGx + CENTERx + + INDy = np.nonzero(ANGy < -np.pi/2) + ANGy[INDy] = -np.pi - ANGy[INDy] + ANGx[INDy] = ANGx[INDy] + np.pi + + INDx = np.nonzero(ANGx <= -np.pi); ANGx[INDx] = ANGx[INDx] + 2 * np.pi + INDx = np.nonzero(ANGx > np.pi); ANGx[INDx] = ANGx[INDx] - 2 * np.pi + INDx = np.nonzero(ANGx > np.pi); ANGx[INDx] = ANGx[INDx] - 2 * np.pi + INDx = np.nonzero(ANGx > np.pi); ANGx[INDx] = ANGx[INDx] - 2 * np.pi + + Px = (ANGx + np.pi) / (2*np.pi) * sphereW + 0.5 + Py = ((-ANGy) + np.pi/2) / np.pi * sphereH + 0.5 + + INDxx = np.nonzero(Px < 1) + Px[INDxx] = Px[INDxx] + sphereW + im = np.concatenate([im, im[:, :2]], 1) + + Px = Px.reshape(new_imgH, new_imgH, order='F') + Py = Py.reshape(new_imgH, new_imgH, order='F') + + warped_im = warpImageFast(im, Px, Py) + + return warped_im + + +def separatePano(panoImg, fov, x, y, imgSize=320): + '''cut a panorama image into several separate views''' + assert x.shape == y.shape + if not isinstance(fov, np.ndarray): + fov = fov * np.ones_like(x) + + sepScene = [ + { + 'img': imgLookAt(panoImg.copy(), xi, yi, imgSize, fovi), + 'vx': xi, + 'vy': yi, + 'fov': fovi, + 'sz': imgSize, + } + for xi, yi, fovi in zip(x, y, fov) + ] + + return sepScene + + +def lsdWrap(img): + ''' + Opencv implementation of + Rafael Grompone von Gioi, Jérémie Jakubowicz, Jean-Michel Morel, and Gregory Randall, + LSD: a Line Segment Detector, Image Processing On Line, vol. 2012. + [Rafael12] http://www.ipol.im/pub/art/2012/gjmr-lsd/?utm_source=doi + @img + input image + ''' + if len(img.shape) == 3: + img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + + lines = lsd(img, quant=0.7) + if lines is None: + return np.zeros_like(img), np.array([]) + edgeMap = np.zeros_like(img) + for i in range(lines.shape[0]): + pt1 = (int(lines[i, 0]), int(lines[i, 1])) + pt2 = (int(lines[i, 2]), int(lines[i, 3])) + width = lines[i, 4] + cv2.line(edgeMap, pt1, pt2, 255, int(np.ceil(width / 2))) + edgeList = np.concatenate([lines, np.ones_like(lines[:, :2])], 1) + return edgeMap, edgeList + + +def edgeFromImg2Pano(edge): + edgeList = edge['edgeLst'] + if len(edgeList) == 0: + return np.array([]) + + vx = edge['vx'] + vy = edge['vy'] + fov = edge['fov'] + imH, imW = edge['img'].shape + + R = (imW/2) / np.tan(fov/2) + + # im is the tangent plane, contacting with ball at [x0 y0 z0] + x0 = R * np.cos(vy) * np.sin(vx) + y0 = R * np.cos(vy) * np.cos(vx) + z0 = R * np.sin(vy) + vecposX = np.array([np.cos(vx), -np.sin(vx), 0]) + vecposY = np.cross(np.array([x0, y0, z0]), vecposX) + vecposY = vecposY / np.sqrt(vecposY @ vecposY.T) + vecposX = vecposX.reshape(1, -1) + vecposY = vecposY.reshape(1, -1) + Xc = (0 + imW-1) / 2 + Yc = (0 + imH-1) / 2 + + vecx1 = edgeList[:, [0]] - Xc + vecy1 = edgeList[:, [1]] - Yc + vecx2 = edgeList[:, [2]] - Xc + vecy2 = edgeList[:, [3]] - Yc + + vec1 = np.tile(vecx1, [1, 3]) * vecposX + np.tile(vecy1, [1, 3]) * vecposY + vec2 = np.tile(vecx2, [1, 3]) * vecposX + np.tile(vecy2, [1, 3]) * vecposY + coord1 = [[x0, y0, z0]] + vec1 + coord2 = [[x0, y0, z0]] + vec2 + + normal = np.cross(coord1, coord2, axis=1) + normal = normal / np.linalg.norm(normal, axis=1, keepdims=True) + + panoList = np.hstack([normal, coord1, coord2, edgeList[:, [-1]]]) + + return panoList + + +def _intersection(range1, range2): + if range1[1] < range1[0]: + range11 = [range1[0], 1] + range12 = [0, range1[1]] + else: + range11 = range1 + range12 = [0, 0] + + if range2[1] < range2[0]: + range21 = [range2[0], 1] + range22 = [0, range2[1]] + else: + range21 = range2 + range22 = [0, 0] + + b = max(range11[0], range21[0]) < min(range11[1], range21[1]) + if b: + return b + b2 = max(range12[0], range22[0]) < min(range12[1], range22[1]) + b = b or b2 + return b + + +def _insideRange(pt, range): + if range[1] > range[0]: + b = pt >= range[0] and pt <= range[1] + else: + b1 = pt >= range[0] and pt <= 1 + b2 = pt >= 0 and pt <= range[1] + b = b1 or b2 + return b + + +def combineEdgesN(edges): + ''' + Combine some small line segments, should be very conservative + OUTPUT + lines: combined line segments + ori_lines: original line segments + line format [nx ny nz projectPlaneID umin umax LSfov score] + ''' + arcList = [] + for edge in edges: + panoLst = edge['panoLst'] + if len(panoLst) == 0: + continue + arcList.append(panoLst) + arcList = np.vstack(arcList) + + # ori lines + numLine = len(arcList) + ori_lines = np.zeros((numLine, 8)) + areaXY = np.abs(arcList[:, 2]) + areaYZ = np.abs(arcList[:, 0]) + areaZX = np.abs(arcList[:, 1]) + planeIDs = np.argmax(np.stack([areaXY, areaYZ, areaZX], -1), 1) + 1 # XY YZ ZX + + for i in range(numLine): + ori_lines[i, :3] = arcList[i, :3] + ori_lines[i, 3] = planeIDs[i] + coord1 = arcList[i, 3:6] + coord2 = arcList[i, 6:9] + uv = xyz2uvN(np.stack([coord1, coord2]), planeIDs[i]) + umax = uv[:, 0].max() + np.pi + umin = uv[:, 0].min() + np.pi + if umax - umin > np.pi: + ori_lines[i, 4:6] = np.array([umax, umin]) / 2 / np.pi + else: + ori_lines[i, 4:6] = np.array([umin, umax]) / 2 / np.pi + ori_lines[i, 6] = np.arccos(( + np.dot(coord1, coord2) / (np.linalg.norm(coord1) * np.linalg.norm(coord2)) + ).clip(-1, 1)) + ori_lines[i, 7] = arcList[i, 9] + + # additive combination + lines = ori_lines.copy() + for _ in range(3): + numLine = len(lines) + valid_line = np.ones(numLine, bool) + for i in range(numLine): + if not valid_line[i]: + continue + dotProd = (lines[:, :3] * lines[[i], :3]).sum(1) + valid_curr = np.logical_and((np.abs(dotProd) > np.cos(np.pi / 180)), valid_line) + valid_curr[i] = False + for j in np.nonzero(valid_curr)[0]: + range1 = lines[i, 4:6] + range2 = lines[j, 4:6] + valid_rag = _intersection(range1, range2) + if not valid_rag: + continue + + # combine + I = np.argmax(np.abs(lines[i, :3])) + if lines[i, I] * lines[j, I] > 0: + nc = lines[i, :3] * lines[i, 6] + lines[j, :3] * lines[j, 6] + else: + nc = lines[i, :3] * lines[i, 6] - lines[j, :3] * lines[j, 6] + nc = nc / np.linalg.norm(nc) + + if _insideRange(range1[0], range2): + nrmin = range2[0] + else: + nrmin = range1[0] + + if _insideRange(range1[1], range2): + nrmax = range2[1] + else: + nrmax = range1[1] + + u = np.array([[nrmin], [nrmax]]) * 2 * np.pi - np.pi + v = computeUVN(nc, u, lines[i, 3]) + xyz = uv2xyzN(np.hstack([u, v]), lines[i, 3]) + l = np.arccos(np.dot(xyz[0, :], xyz[1, :]).clip(-1, 1)) + scr = (lines[i,6]*lines[i,7] + lines[j,6]*lines[j,7]) / (lines[i,6]+lines[j,6]) + + lines[i] = [*nc, lines[i, 3], nrmin, nrmax, l, scr] + valid_line[j] = False + + lines = lines[valid_line] + + return lines, ori_lines + + +def icosahedron2sphere(level): + # this function use a icosahedron to sample uniformly on a sphere + a = 2 / (1 + np.sqrt(5)) + M = np.array([ + 0, a, -1, a, 1, 0, -a, 1, 0, + 0, a, 1, -a, 1, 0, a, 1, 0, + 0, a, 1, 0, -a, 1, -1, 0, a, + 0, a, 1, 1, 0, a, 0, -a, 1, + 0, a, -1, 0, -a, -1, 1, 0, -a, + 0, a, -1, -1, 0, -a, 0, -a, -1, + 0, -a, 1, a, -1, 0, -a, -1, 0, + 0, -a, -1, -a, -1, 0, a, -1, 0, + -a, 1, 0, -1, 0, a, -1, 0, -a, + -a, -1, 0, -1, 0, -a, -1, 0, a, + a, 1, 0, 1, 0, -a, 1, 0, a, + a, -1, 0, 1, 0, a, 1, 0, -a, + 0, a, 1, -1, 0, a, -a, 1, 0, + 0, a, 1, a, 1, 0, 1, 0, a, + 0, a, -1, -a, 1, 0, -1, 0, -a, + 0, a, -1, 1, 0, -a, a, 1, 0, + 0, -a, -1, -1, 0, -a, -a, -1, 0, + 0, -a, -1, a, -1, 0, 1, 0, -a, + 0, -a, 1, -a, -1, 0, -1, 0, a, + 0, -a, 1, 1, 0, a, a, -1, 0]) + + coor = M.T.reshape(3, 60, order='F').T + coor, idx = np.unique(coor, return_inverse=True, axis=0) + tri = idx.reshape(3, 20, order='F').T + + # extrude + coor = list(coor / np.tile(np.linalg.norm(coor, axis=1, keepdims=True), (1, 3))) + + for _ in range(level): + triN = [] + for t in range(len(tri)): + n = len(coor) + coor.append((coor[tri[t, 0]] + coor[tri[t, 1]]) / 2) + coor.append((coor[tri[t, 1]] + coor[tri[t, 2]]) / 2) + coor.append((coor[tri[t, 2]] + coor[tri[t, 0]]) / 2) + + triN.append([n, tri[t, 0], n+2]) + triN.append([n, tri[t, 1], n+1]) + triN.append([n+1, tri[t, 2], n+2]) + triN.append([n, n+1, n+2]) + tri = np.array(triN) + + # uniquefy + coor, idx = np.unique(coor, return_inverse=True, axis=0) + tri = idx[tri] + + # extrude + coor = list(coor / np.tile(np.sqrt(np.sum(coor * coor, 1, keepdims=True)), (1, 3))) + + return np.array(coor), np.array(tri) + + +def curveFitting(inputXYZ, weight): + ''' + @inputXYZ: N x 3 + @weight : N x 1 + ''' + l = np.linalg.norm(inputXYZ, axis=1, keepdims=True) + inputXYZ = inputXYZ / l + weightXYZ = inputXYZ * weight + XX = np.sum(weightXYZ[:, 0] ** 2) + YY = np.sum(weightXYZ[:, 1] ** 2) + ZZ = np.sum(weightXYZ[:, 2] ** 2) + XY = np.sum(weightXYZ[:, 0] * weightXYZ[:, 1]) + YZ = np.sum(weightXYZ[:, 1] * weightXYZ[:, 2]) + ZX = np.sum(weightXYZ[:, 2] * weightXYZ[:, 0]) + + A = np.array([ + [XX, XY, ZX], + [XY, YY, YZ], + [ZX, YZ, ZZ]]) + U, S, Vh = np.linalg.svd(A) + outputNM = Vh[-1, :] + outputNM = outputNM / np.linalg.norm(outputNM) + + return outputNM + + +def sphereHoughVote(segNormal, segLength, segScores, binRadius, orthTolerance, candiSet, force_unempty=True): + # initial guess + numLinesg = len(segNormal) + + voteBinPoints = candiSet.copy() + voteBinPoints = voteBinPoints[~(voteBinPoints[:,2] < 0)] + reversValid = (segNormal[:, 2] < 0).reshape(-1) + segNormal[reversValid] = -segNormal[reversValid] + + voteBinUV = xyz2uvN(voteBinPoints) + numVoteBin = len(voteBinPoints) + voteBinValues = np.zeros(numVoteBin) + for i in range(numLinesg): + tempNorm = segNormal[[i]] + tempDots = (voteBinPoints * tempNorm).sum(1) + + valid = np.abs(tempDots) < np.cos((90 - binRadius) * np.pi / 180) + + voteBinValues[valid] = voteBinValues[valid] + segScores[i] * segLength[i] + + checkIDs1 = np.nonzero(voteBinUV[:, [1]] > np.pi / 3)[0] + voteMax = 0 + checkID1Max = 0 + checkID2Max = 0 + checkID3Max = 0 + + for j in range(len(checkIDs1)): + checkID1 = checkIDs1[j] + vote1 = voteBinValues[checkID1] + if voteBinValues[checkID1] == 0 and force_unempty: + continue + checkNormal = voteBinPoints[[checkID1]] + dotProduct = (voteBinPoints * checkNormal).sum(1) + checkIDs2 = np.nonzero(np.abs(dotProduct) < np.cos((90 - orthTolerance) * np.pi / 180))[0] + + for i in range(len(checkIDs2)): + checkID2 = checkIDs2[i] + if voteBinValues[checkID2] == 0 and force_unempty: + continue + vote2 = vote1 + voteBinValues[checkID2] + cpv = np.cross(voteBinPoints[checkID1], voteBinPoints[checkID2]).reshape(1, 3) + cpn = np.linalg.norm(cpv) + dotProduct = (voteBinPoints * cpv).sum(1) / cpn + checkIDs3 = np.nonzero(np.abs(dotProduct) > np.cos(orthTolerance * np.pi / 180))[0] + + for k in range(len(checkIDs3)): + checkID3 = checkIDs3[k] + if voteBinValues[checkID3] == 0 and force_unempty: + continue + vote3 = vote2 + voteBinValues[checkID3] + if vote3 > voteMax: + lastStepCost = vote3 - voteMax + if voteMax != 0: + tmp = (voteBinPoints[[checkID1Max, checkID2Max, checkID3Max]] * \ + voteBinPoints[[checkID1, checkID2, checkID3]]).sum(1) + lastStepAngle = np.arccos(tmp.clip(-1, 1)) + else: + lastStepAngle = np.zeros(3) + + checkID1Max = checkID1 + checkID2Max = checkID2 + checkID3Max = checkID3 + + voteMax = vote3 + + if checkID1Max == 0: + print('[WARN] sphereHoughVote: no orthogonal voting exist', file=sys.stderr) + return None, 0, 0 + initXYZ = voteBinPoints[[checkID1Max, checkID2Max, checkID3Max]] + + # refine + refiXYZ = np.zeros((3, 3)) + dotprod = (segNormal * initXYZ[[0]]).sum(1) + valid = np.abs(dotprod) < np.cos((90 - binRadius) * np.pi / 180) + validNm = segNormal[valid] + validWt = segLength[valid] * segScores[valid] + validWt = validWt / validWt.max() + refiNM = curveFitting(validNm, validWt) + refiXYZ[0] = refiNM.copy() + + dotprod = (segNormal * initXYZ[[1]]).sum(1) + valid = np.abs(dotprod) < np.cos((90 - binRadius) * np.pi / 180) + validNm = segNormal[valid] + validWt = segLength[valid] * segScores[valid] + validWt = validWt / validWt.max() + validNm = np.vstack([validNm, refiXYZ[[0]]]) + validWt = np.vstack([validWt, validWt.sum(0, keepdims=1) * 0.1]) + refiNM = curveFitting(validNm, validWt) + refiXYZ[1] = refiNM.copy() + + refiNM = np.cross(refiXYZ[0], refiXYZ[1]) + refiXYZ[2] = refiNM / np.linalg.norm(refiNM) + + return refiXYZ, lastStepCost, lastStepAngle + + +def findMainDirectionEMA(lines): + '''compute vp from set of lines''' + + # initial guess + segNormal = lines[:, :3] + segLength = lines[:, [6]] + segScores = np.ones((len(lines), 1)) + + shortSegValid = (segLength < 5 * np.pi / 180).reshape(-1) + segNormal = segNormal[~shortSegValid, :] + segLength = segLength[~shortSegValid] + segScores = segScores[~shortSegValid] + + numLinesg = len(segNormal) + candiSet, tri = icosahedron2sphere(3) + ang = np.arccos((candiSet[tri[0,0]] * candiSet[tri[0,1]]).sum().clip(-1, 1)) / np.pi * 180 + binRadius = ang / 2 + initXYZ, score, angle = sphereHoughVote(segNormal, segLength, segScores, 2*binRadius, 2, candiSet) + + if initXYZ is None: + print('[WARN] findMainDirectionEMA: initial failed', file=sys.stderr) + return None, score, angle + + # iterative refine + iter_max = 3 + candiSet, tri = icosahedron2sphere(5) + numCandi = len(candiSet) + angD = np.arccos((candiSet[tri[0, 0]] * candiSet[tri[0, 1]]).sum().clip(-1, 1)) / np.pi * 180 + binRadiusD = angD / 2 + curXYZ = initXYZ.copy() + tol = np.linspace(4*binRadius, 4*binRadiusD, iter_max) # shrink down ls and candi + for it in range(iter_max): + dot1 = np.abs((segNormal * curXYZ[[0]]).sum(1)) + dot2 = np.abs((segNormal * curXYZ[[1]]).sum(1)) + dot3 = np.abs((segNormal * curXYZ[[2]]).sum(1)) + valid1 = dot1 < np.cos((90 - tol[it]) * np.pi / 180) + valid2 = dot2 < np.cos((90 - tol[it]) * np.pi / 180) + valid3 = dot3 < np.cos((90 - tol[it]) * np.pi / 180) + valid = valid1 | valid2 | valid3 + + if np.sum(valid) == 0: + print('[WARN] findMainDirectionEMA: zero line segments for voting', file=sys.stderr) + break + + subSegNormal = segNormal[valid] + subSegLength = segLength[valid] + subSegScores = segScores[valid] + + dot1 = np.abs((candiSet * curXYZ[[0]]).sum(1)) + dot2 = np.abs((candiSet * curXYZ[[1]]).sum(1)) + dot3 = np.abs((candiSet * curXYZ[[2]]).sum(1)) + valid1 = dot1 > np.cos(tol[it] * np.pi / 180) + valid2 = dot2 > np.cos(tol[it] * np.pi / 180) + valid3 = dot3 > np.cos(tol[it] * np.pi / 180) + valid = valid1 | valid2 | valid3 + + if np.sum(valid) == 0: + print('[WARN] findMainDirectionEMA: zero line segments for voting', file=sys.stderr) + break + + subCandiSet = candiSet[valid] + + tcurXYZ, _, _ = sphereHoughVote(subSegNormal, subSegLength, subSegScores, 2*binRadiusD, 2, subCandiSet) + + if tcurXYZ is None: + print('[WARN] findMainDirectionEMA: no answer found', file=sys.stderr) + break + curXYZ = tcurXYZ.copy() + + mainDirect = curXYZ.copy() + mainDirect[0] = mainDirect[0] * np.sign(mainDirect[0,2]) + mainDirect[1] = mainDirect[1] * np.sign(mainDirect[1,2]) + mainDirect[2] = mainDirect[2] * np.sign(mainDirect[2,2]) + + uv = xyz2uvN(mainDirect) + I1 = np.argmax(uv[:,1]) + J = np.setdiff1d(np.arange(3), I1) + I2 = np.argmin(np.abs(np.sin(uv[J,0]))) + I2 = J[I2] + I3 = np.setdiff1d(np.arange(3), np.hstack([I1, I2])) + mainDirect = np.vstack([mainDirect[I1], mainDirect[I2], mainDirect[I3]]) + + mainDirect[0] = mainDirect[0] * np.sign(mainDirect[0,2]) + mainDirect[1] = mainDirect[1] * np.sign(mainDirect[1,1]) + mainDirect[2] = mainDirect[2] * np.sign(mainDirect[2,0]) + + mainDirect = np.vstack([mainDirect, -mainDirect]) + + return mainDirect, score, angle + + +def multi_linspace(start, stop, num): + div = (num - 1) + y = np.arange(0, num, dtype=np.float64) + steps = (stop - start) / div + return steps.reshape(-1, 1) * y + start.reshape(-1, 1) + + +def assignVanishingType(lines, vp, tol, area=10): + numLine = len(lines) + numVP = len(vp) + typeCost = np.zeros((numLine, numVP)) + # perpendicular + for vid in range(numVP): + cosint = (lines[:, :3] * vp[[vid]]).sum(1) + typeCost[:, vid] = np.arcsin(np.abs(cosint).clip(-1, 1)) + + # infinity + u = np.stack([lines[:, 4], lines[:, 5]], -1) + u = u.reshape(-1, 1) * 2 * np.pi - np.pi + v = computeUVN_vec(lines[:, :3], u, lines[:, 3]) + xyz = uv2xyzN_vec(np.hstack([u, v]), np.repeat(lines[:, 3], 2)) + xyz = multi_linspace(xyz[0::2].reshape(-1), xyz[1::2].reshape(-1), 100) + xyz = np.vstack([blk.T for blk in np.split(xyz, numLine)]) + xyz = xyz / np.linalg.norm(xyz, axis=1, keepdims=True) + for vid in range(numVP): + ang = np.arccos(np.abs((xyz * vp[[vid]]).sum(1)).clip(-1, 1)) + notok = (ang < area * np.pi / 180).reshape(numLine, 100).sum(1) != 0 + typeCost[notok, vid] = 100 + + I = typeCost.min(1) + tp = typeCost.argmin(1) + tp[I > tol] = numVP + 1 + + return tp, typeCost + + +def refitLineSegmentB(lines, vp, vpweight=0.1): + ''' + Refit direction of line segments + INPUT: + lines: original line segments + vp: vannishing point + vpweight: if set to 0, lines will not change; if set to inf, lines will + be forced to pass vp + ''' + numSample = 100 + numLine = len(lines) + xyz = np.zeros((numSample+1, 3)) + wei = np.ones((numSample+1, 1)) + wei[numSample] = vpweight * numSample + lines_ali = lines.copy() + for i in range(numLine): + n = lines[i, :3] + sid = lines[i, 4] * 2 * np.pi + eid = lines[i, 5] * 2 * np.pi + if eid < sid: + x = np.linspace(sid, eid + 2 * np.pi, numSample) % (2 * np.pi) + else: + x = np.linspace(sid, eid, numSample) + u = -np.pi + x.reshape(-1, 1) + v = computeUVN(n, u, lines[i, 3]) + xyz[:numSample] = uv2xyzN(np.hstack([u, v]), lines[i, 3]) + xyz[numSample] = vp + outputNM = curveFitting(xyz, wei) + lines_ali[i, :3] = outputNM + + return lines_ali + + +def paintParameterLine(parameterLine, width, height): + lines = parameterLine.copy() + panoEdgeC = np.zeros((height, width)) + + num_sample = max(height, width) + for i in range(len(lines)): + n = lines[i, :3] + sid = lines[i, 4] * 2 * np.pi + eid = lines[i, 5] * 2 * np.pi + if eid < sid: + x = np.linspace(sid, eid + 2 * np.pi, num_sample) + x = x % (2 * np.pi) + else: + x = np.linspace(sid, eid, num_sample) + u = -np.pi + x.reshape(-1, 1) + v = computeUVN(n, u, lines[i, 3]) + xyz = uv2xyzN(np.hstack([u, v]), lines[i, 3]) + uv = xyz2uvN(xyz, 1) + m = np.minimum(np.floor((uv[:,0] + np.pi) / (2 * np.pi) * width) + 1, + width).astype(np.int32) + n = np.minimum(np.floor(((np.pi / 2) - uv[:, 1]) / np.pi * height) + 1, + height).astype(np.int32) + panoEdgeC[n-1, m-1] = i + + return panoEdgeC + + +def panoEdgeDetection(img, viewSize=320, qError=0.7, refineIter=3): + ''' + line detection on panorama + INPUT: + img: image waiting for detection, double type, range 0~1 + viewSize: image size of croped views + qError: set smaller if more line segment wanted + OUTPUT: + oLines: detected line segments + vp: vanishing point + views: separate views of panorama + edges: original detection of line segments in separate views + panoEdge: image for visualize line segments + ''' + cutSize = viewSize + fov = np.pi / 3 + xh = np.arange(-np.pi, np.pi*5/6, np.pi/6) + yh = np.zeros(xh.shape[0]) + xp = np.array([-3/3, -2/3, -1/3, 0/3, 1/3, 2/3, -3/3, -2/3, -1/3, 0/3, 1/3, 2/3]) * np.pi + yp = np.array([ 1/4, 1/4, 1/4, 1/4, 1/4, 1/4, -1/4, -1/4, -1/4, -1/4, -1/4, -1/4]) * np.pi + x = np.concatenate([xh, xp, [0, 0]]) + y = np.concatenate([yh, yp, [np.pi/2., -np.pi/2]]) + + sepScene = separatePano(img.copy(), fov, x, y, cutSize) + edge = [] + for i, scene in enumerate(sepScene): + edgeMap, edgeList = lsdWrap(scene['img']) + edge.append({ + 'img': edgeMap, + 'edgeLst': edgeList, + 'vx': scene['vx'], + 'vy': scene['vy'], + 'fov': scene['fov'], + }) + edge[-1]['panoLst'] = edgeFromImg2Pano(edge[-1]) + lines, olines = combineEdgesN(edge) + + clines = lines.copy() + for _ in range(refineIter): + mainDirect, score, angle = findMainDirectionEMA(clines) + + tp, typeCost = assignVanishingType(lines, mainDirect[:3], 0.1, 10) + lines1 = lines[tp==0] + lines2 = lines[tp==1] + lines3 = lines[tp==2] + + lines1rB = refitLineSegmentB(lines1, mainDirect[0], 0) + lines2rB = refitLineSegmentB(lines2, mainDirect[1], 0) + lines3rB = refitLineSegmentB(lines3, mainDirect[2], 0) + + clines = np.vstack([lines1rB, lines2rB, lines3rB]) + + panoEdge1r = paintParameterLine(lines1rB, img.shape[1], img.shape[0]) + panoEdge2r = paintParameterLine(lines2rB, img.shape[1], img.shape[0]) + panoEdge3r = paintParameterLine(lines3rB, img.shape[1], img.shape[0]) + panoEdger = np.stack([panoEdge1r, panoEdge2r, panoEdge3r], -1) + + # output + olines = clines + vp = mainDirect + views = sepScene + edges = edge + panoEdge = panoEdger + + return olines, vp, views, edges, panoEdge, score, angle + + +if __name__ == '__main__': + + # disable OpenCV3's non thread safe OpenCL option + cv2.ocl.setUseOpenCL(False) + + import os + import argparse + import PIL + from PIL import Image + import time + + parser = argparse.ArgumentParser() + parser.add_argument('--i', required=True) + parser.add_argument('--o_prefix', required=True) + parser.add_argument('--qError', default=0.7, type=float) + parser.add_argument('--refineIter', default=3, type=int) + args = parser.parse_args() + + # Read image + img_ori = np.array(Image.open(args.i).resize((1024, 512))) + + # Vanishing point estimation & Line segments detection + s_time = time.time() + olines, vp, views, edges, panoEdge, score, angle = panoEdgeDetection(img_ori, + qError=args.qError, + refineIter=args.refineIter) + print('Elapsed time: %.2f' % (time.time() - s_time)) + panoEdge = (panoEdge > 0) + + print('Vanishing point:') + for v in vp[2::-1]: + print('%.6f %.6f %.6f' % tuple(v)) + + # Visualization + edg = rotatePanorama(panoEdge.astype(np.float64), vp[2::-1]) + img = rotatePanorama(img_ori / 255.0, vp[2::-1]) + one = img.copy() * 0.5 + one[(edg > 0.5).sum(-1) > 0] = 0 + one[edg[..., 0] > 0.5, 0] = 1 + one[edg[..., 1] > 0.5, 1] = 1 + one[edg[..., 2] > 0.5, 2] = 1 + Image.fromarray((edg * 255).astype(np.uint8)).save('%s_edg.png' % args.o_prefix) + Image.fromarray((img * 255).astype(np.uint8)).save('%s_img.png' % args.o_prefix) + Image.fromarray((one * 255).astype(np.uint8)).save('%s_one.png' % args.o_prefix) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..5229c031f13b9a08189f154c7175bab18349eff0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +numpy==1.21.0 +torch==1.7.1 +torchvision==0.8.2 +yacs==0.1.8 +einops==0.3.0 +opencv-python==4.5.3.56 +pylsd-nova==1.2.0 +tqdm==4.64.0 +scipy==1.8.1 +termcolor==1.1.0 +shapely==1.8.2 +imageio==2.19.2 +open3d==0.15.2 +gdown==4.4.0 \ No newline at end of file diff --git a/src/config/ablation_study/full.yaml b/src/config/ablation_study/full.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d0cff48c202744c9e340f69269f91278b9ab7c7 --- /dev/null +++ b/src/config/ablation_study/full.yaml @@ -0,0 +1,43 @@ +COMMENT: 'Ablation Study: Ours (full) on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_full' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/ablation_study/w_ape.yaml b/src/config/ablation_study/w_ape.yaml new file mode 100644 index 0000000000000000000000000000000000000000..070c9a3d3ca3b72b7576dd38902d8e9cef8265e0 --- /dev/null +++ b/src/config/ablation_study/w_ape.yaml @@ -0,0 +1,44 @@ +COMMENT: 'Ablation Study: w APE on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_w_ape' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'ape': 'lr_parameter', + 'rpe': None, + 'dropout': 0.0, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/ablation_study/w_g_rpe1.yaml b/src/config/ablation_study/w_g_rpe1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d81adfd2460e1f2d18d266b7c6965f2a82da9e9c --- /dev/null +++ b/src/config/ablation_study/w_g_rpe1.yaml @@ -0,0 +1,43 @@ +COMMENT: 'Ablation Study: w G-RPE1 on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_w_g_rpe1' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter', + 'dropout': 0.0, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/ablation_study/w_g_rpe2.yaml b/src/config/ablation_study/w_g_rpe2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c1fb103b8b094202b74989481f234346aa609a5 --- /dev/null +++ b/src/config/ablation_study/w_g_rpe2.yaml @@ -0,0 +1,43 @@ +COMMENT: 'Ablation Study: w G-RPE2 on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_w_g_rpe2' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_half', + 'dropout': 0.0, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/ablation_study/w_lstm.yaml b/src/config/ablation_study/w_lstm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5693b7c30022ba3f08b57417e1f92a051abb2544 --- /dev/null +++ b/src/config/ablation_study/w_lstm.yaml @@ -0,0 +1,41 @@ +COMMENT: 'Ablation Study: w/ Bi-LSTM on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_w_lstm' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [{ + 'decoder_name': 'LSTM', + 'output_name': 'LGT', + 'dropout': 0.5, + }] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-04 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/ablation_study/w_vit.yaml b/src/config/ablation_study/w_vit.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b263fc268733dd1be50216af172a9442e92d9286 --- /dev/null +++ b/src/config/ablation_study/w_vit.yaml @@ -0,0 +1,46 @@ +COMMENT: 'Ablation Study: w/ ViT on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_w_vit' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'backbone': 'patch', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'depth': 16, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' + diff --git a/src/config/ablation_study/w_vit_zind.yaml b/src/config/ablation_study/w_vit_zind.yaml new file mode 100644 index 0000000000000000000000000000000000000000..750a78c1aec344d675fcd1cb003df2ad9acde26f --- /dev/null +++ b/src/config/ablation_study/w_vit_zind.yaml @@ -0,0 +1,46 @@ +COMMENT: 'Ablation Study: w/ ViT on ZInd' +VAL_NAME: 'test' +TAG: 'ablation_study_w_vit_zind' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'backbone': 'patch', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'depth': 16, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 200 +DATA: + DATASET: 'zind' + DIR: 'src/dataset/zind' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' + diff --git a/src/config/ablation_study/wo_global.yaml b/src/config/ablation_study/wo_global.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8e168205322de45cf4f88a152c36dbf92a4d015a --- /dev/null +++ b/src/config/ablation_study/wo_global.yaml @@ -0,0 +1,43 @@ +COMMENT: 'Ablation Study: w/o Global Block on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_wo_global' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [{ + 'decoder_name': 'Swin_Transformer', + 'win_size': 16, + 'rpe':'lr_parameter_mirror', + 'dropout': 0.0, + 'output_name': 'LGT' + }] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/ablation_study/wo_gradient.yaml b/src/config/ablation_study/wo_gradient.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74b19c79af1a9afd1c999b622edb913ffea72d71 --- /dev/null +++ b/src/config/ablation_study/wo_gradient.yaml @@ -0,0 +1,43 @@ +COMMENT: 'Ablation Study: w/o Gradient on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_wo_gradient' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 0.0 ] # only normal loss + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' \ No newline at end of file diff --git a/src/config/ablation_study/wo_height.yaml b/src/config/ablation_study/wo_height.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f65dec84fbfc02223d08e8715e1f6f501622539b --- /dev/null +++ b/src/config/ablation_study/wo_height.yaml @@ -0,0 +1,40 @@ +COMMENT: 'Ablation Study: w/o Height on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_wo_height' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'output_name': 'LED' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 1.0 + LOSS: 'LEDLoss' + NEED_ALL: True + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'sgd' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' \ No newline at end of file diff --git a/src/config/ablation_study/wo_nomal_gradient.yaml b/src/config/ablation_study/wo_nomal_gradient.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71128445b203c1e92d2937baebff3e820e94969a --- /dev/null +++ b/src/config/ablation_study/wo_nomal_gradient.yaml @@ -0,0 +1,40 @@ +COMMENT: 'Ablation Study: w/o Normal+Gradient on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_wo_normal_gradient' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + EPOCHS: 1000 + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' \ No newline at end of file diff --git a/src/config/ablation_study/wo_pe.yaml b/src/config/ablation_study/wo_pe.yaml new file mode 100644 index 0000000000000000000000000000000000000000..516503900f21021e7d37e75268ea7dc236a6f078 --- /dev/null +++ b/src/config/ablation_study/wo_pe.yaml @@ -0,0 +1,43 @@ +COMMENT: 'Ablation Study: w/o PE on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_wo_pe' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': None, + 'dropout': 0.0, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/ablation_study/wo_window.yaml b/src/config/ablation_study/wo_window.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0d6657853bd0eaa8db9e334708f36b127ba7739 --- /dev/null +++ b/src/config/ablation_study/wo_window.yaml @@ -0,0 +1,44 @@ +COMMENT: 'Ablation Study: w/o Window Block on MatterportLayout' +VAL_NAME: 'test' +TAG: 'ablation_study_wo_window' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [{ + 'decoder_name': 'Transformer', + 'win_size': 16, + 'rpe':'lr_parameter_mirror', + 'dropout': 0.0, + 'output_name': 'LGT' + }] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' + diff --git a/src/config/mp3d.yaml b/src/config/mp3d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc03283aa34245011449fe70d296649e197fc47b --- /dev/null +++ b/src/config/mp3d.yaml @@ -0,0 +1,43 @@ +COMMENT: 'Training on MatterportLayout' +TAG: 'mp3d' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'depth': 8, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/mp3d_scheduler.yaml b/src/config/mp3d_scheduler.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4598fdfd26cda5fec9365a78edd11d01309d77e6 --- /dev/null +++ b/src/config/mp3d_scheduler.yaml @@ -0,0 +1,47 @@ +COMMENT: 'Training on MatterportLayout' +VAL_NAME: 'test' +TAG: 'mp3d_scheduler' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'depth': 8, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 3e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 + LR_SCHEDULER: + NAME: 'StepLR' + ARGS: [ { 'step_size': 20, 'gamma': 0.9, 'last_epoch': -1} ] +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/other/horizon_net_mp3d.yaml b/src/config/other/horizon_net_mp3d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98b612f4cc5c76674cf50214264c330fd0d3f487 --- /dev/null +++ b/src/config/other/horizon_net_mp3d.yaml @@ -0,0 +1,37 @@ +COMMENT: 'Using our framework to implement HorizonNet, Training on MatterportLayout' +TAG: 'mp3d' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'LSTM', + 'output_name': 'Horizon', + 'dropout': 0.5, + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 1.0 + LOSS: 'BoundaryLoss' + NEED_ALL: True + BASE_LR: + 3e-4 + EPOCHS: 1000 + RESUME_LAST: True + OPTIMIZER: + NAME: 'adam' +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' \ No newline at end of file diff --git a/src/config/other/horizon_net_zind.yaml b/src/config/other/horizon_net_zind.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88a125ab5c3a620876d38f78d424bf9b95d0d7a5 --- /dev/null +++ b/src/config/other/horizon_net_zind.yaml @@ -0,0 +1,37 @@ +COMMENT: 'Using our framework to implement HorizonNet, Training on ZInd' +TAG: 'zind' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'LSTM', + 'output_name': 'Horizon', + 'dropout': 0.5, + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 1.0 + LOSS: 'BoundaryLoss' + NEED_ALL: True + BASE_LR: + 3e-4 + EPOCHS: 200 + RESUME_LAST: True + OPTIMIZER: + NAME: 'adam' +DATA: + DATASET: 'zind' + DIR: 'src/dataset/zind' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' \ No newline at end of file diff --git a/src/config/other/led_net_mp3d.yaml b/src/config/other/led_net_mp3d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3412cdf4ac67a3da61d7c62f8e1d553981f2353b --- /dev/null +++ b/src/config/other/led_net_mp3d.yaml @@ -0,0 +1,37 @@ +COMMENT: 'Using our framework to implement LED2-Net, Training on MatterportLayout' +TAG: 'mp3d' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'LSTM', + 'output_name': 'LED', + 'dropout': 0.5, + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 1.0 + LOSS: 'LEDLoss' + NEED_ALL: True + BASE_LR: + 3e-4 + EPOCHS: 1000 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' +DATA: + DATASET: 'mp3d' + DIR: 'src/dataset/mp3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' \ No newline at end of file diff --git a/src/config/other/led_net_zind.yaml b/src/config/other/led_net_zind.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ec3b6c106849cb90d8dce7ae8e281ce8ac7888fe --- /dev/null +++ b/src/config/other/led_net_zind.yaml @@ -0,0 +1,37 @@ +COMMENT: 'Using our framework to implement LED2-Net, Training on ZInd' +TAG: 'zind' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'LSTM', + 'output_name': 'LED', + 'dropout': 0.5, + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 1.0 + LOSS: 'LEDLoss' + NEED_ALL: True + BASE_LR: + 3e-4 + EPOCHS: 200 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' +DATA: + DATASET: 'zind' + DIR: 'src/dataset/zind' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' \ No newline at end of file diff --git a/src/config/pano.yaml b/src/config/pano.yaml new file mode 100644 index 0000000000000000000000000000000000000000..369a18ef3a636567bb1861fc1eb9550657d22338 --- /dev/null +++ b/src/config/pano.yaml @@ -0,0 +1,44 @@ +COMMENT: 'Training on PanoContext(train)+Stanford2D-3D(whole)' +TAG: 'pano' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'depth': 6, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'pano_s2d3d_mix' + DIR: 'src/dataset/pano_s2d3d' + SUBSET: 'pano' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/s2d3d.yaml b/src/config/s2d3d.yaml new file mode 100644 index 0000000000000000000000000000000000000000..570f352c6ae010fa51e8260311389f9307045a65 --- /dev/null +++ b/src/config/s2d3d.yaml @@ -0,0 +1,44 @@ +COMMENT: 'Training on Stanford2D-3D(train)+PanoContext(whole)' +TAG: 's2d3d' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'depth': 6, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:2' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'pano_s2d3d_mix' + DIR: 'src/dataset/pano_s2d3d' + SUBSET: 's2d3d' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/config/zind.yaml b/src/config/zind.yaml new file mode 100644 index 0000000000000000000000000000000000000000..463034fb66d50d345b0ccaf41c51e171923be664 --- /dev/null +++ b/src/config/zind.yaml @@ -0,0 +1,43 @@ +COMMENT: 'Training on ZInd' +TAG: 'zind' +SEED: 123 +MODEL: + NAME: 'LGT_Net' + ARGS: [ { + 'decoder_name': 'SWG_Transformer', + 'win_size': 16, + 'rpe': 'lr_parameter_mirror', + 'dropout': 0.0, + 'depth': 6, + 'output_name': 'LGT' + } ] +TRAIN: + DEVICE: 'cuda:0' + SCRATCH: False + DETERMINISTIC: True + CRITERION: + DEPTH: + WEIGHT: 0.9 + RATIO: + WEIGHT: 0.1 + GRAD: + WEIGHT: 0.1 + WEIGHTS: [ 1.0, 1.0 ] + BASE_LR: + 1e-4 + RESUME_LAST: False + OPTIMIZER: + NAME: 'adam' + EPOCHS: 1000 +DATA: + DATASET: 'zind' + DIR: 'src/dataset/zind' + BATCH_SIZE: 6 + NUM_WORKERS: 6 + FOR_TEST_INDEX: None + AUG: + FLIP: True + STRETCH: True + ROTATE: True + GAMMA: True +AMP_OPT_LEVEL: 'O0' diff --git a/src/demo/default_vp.txt b/src/demo/default_vp.txt new file mode 100644 index 0000000000000000000000000000000000000000..bdae02eb42027f62195cfdbc03c833628eb8fdd3 --- /dev/null +++ b/src/demo/default_vp.txt @@ -0,0 +1,4 @@ +0 0 1 +0 1 0 +1 0 0 + diff --git a/src/demo/demo.json b/src/demo/demo.json new file mode 100644 index 0000000000000000000000000000000000000000..e73c92ad27ffea0eaf594c9d3e454ee50abdc39c --- /dev/null +++ b/src/demo/demo.json @@ -0,0 +1,205 @@ +{ + "cameraHeight": 1.6, + "layoutHeight": 2.9809624004364013, + "layoutObj2ds": { + "num": 0, + "obj2ds": [] + }, + "layoutPoints": { + "num": 6, + "points": [ + { + "coords": [ + 0.7081447345651483, + 0.5 + ], + "id": 0, + "xyz": [ + 3.0078125, + 0.0, + -0.8097623087756155 + ] + }, + { + "coords": [ + 0.8447738331945455, + 0.5 + ], + "id": 0, + "xyz": [ + 3.0078125, + 0.0, + 2.03786496 + ] + }, + { + "coords": [ + 0.009142142599636915, + 0.5 + ], + "id": 0, + "xyz": [ + -0.1171875, + 0.0, + 2.03786496 + ] + }, + { + "coords": [ + 0.02702105153167117, + 0.5 + ], + "id": 0, + "xyz": [ + -0.1171875, + 0.0, + 0.68359375 + ] + }, + { + "coords": [ + 0.20330907731820486, + 0.5 + ], + "id": 0, + "xyz": [ + -2.26292525056, + 0.0, + 0.68359375 + ] + }, + { + "coords": [ + 0.304692157890135, + 0.5 + ], + "id": 0, + "xyz": [ + -2.26292525056, + 0.0, + -0.8097623087756155 + ] + } + ] + }, + "layoutWalls": { + "num": 6, + "walls": [ + { + "id": 0, + "normal": [ + 1.0, + 0.0, + -0.0 + ], + "planeEquation": [ + 1.0, + 0.0, + -0.0, + -3.0078125 + ], + "pointsIdx": [ + 0, + 1 + ], + "width": 2.8476272687756152 + }, + { + "id": 0, + "normal": [ + 0.0, + 0.0, + 1.0 + ], + "planeEquation": [ + 0.0, + 0.0, + 1.0, + -2.03786496 + ], + "pointsIdx": [ + 1, + 2 + ], + "width": 3.125 + }, + { + "id": 0, + "normal": [ + -1.0, + -0.0, + -0.0 + ], + "planeEquation": [ + -1.0, + -0.0, + -0.0, + -0.1171875 + ], + "pointsIdx": [ + 2, + 3 + ], + "width": 1.3542712099999998 + }, + { + "id": 0, + "normal": [ + 0.0, + 0.0, + 1.0 + ], + "planeEquation": [ + 0.0, + 0.0, + 1.0, + -0.68359375 + ], + "pointsIdx": [ + 3, + 4 + ], + "width": 2.14573775056 + }, + { + "id": 0, + "normal": [ + -1.0, + -0.0, + -0.0 + ], + "planeEquation": [ + -1.0, + -0.0, + -0.0, + -2.26292525056 + ], + "pointsIdx": [ + 4, + 5 + ], + "width": 1.4933560587756154 + }, + { + "id": 0, + "normal": [ + 0.0, + 0.0, + -1.0 + ], + "planeEquation": [ + 0.0, + 0.0, + -1.0, + -0.8097623087756155 + ], + "pointsIdx": [ + 5, + 0 + ], + "width": 5.27073775056 + } + ] + }, + "panoId": "nothing" +} \ No newline at end of file diff --git a/src/demo/mp3d_demo1.png b/src/demo/mp3d_demo1.png new file mode 100755 index 0000000000000000000000000000000000000000..592c52949fdaba7862023adf4c0112d84765972d Binary files /dev/null and b/src/demo/mp3d_demo1.png differ diff --git a/src/demo/mp3d_demo2.png b/src/demo/mp3d_demo2.png new file mode 100755 index 0000000000000000000000000000000000000000..f1e384e524d15ef77cf28c4ce77b8c2dd5baa871 Binary files /dev/null and b/src/demo/mp3d_demo2.png differ diff --git a/src/demo/mp3d_demo3.png b/src/demo/mp3d_demo3.png new file mode 100644 index 0000000000000000000000000000000000000000..70ef1ec35028cc0f4ccfa77d780b58636e364e0b Binary files /dev/null and b/src/demo/mp3d_demo3.png differ diff --git a/src/demo/other_demo1.png b/src/demo/other_demo1.png new file mode 100644 index 0000000000000000000000000000000000000000..038a9090e5f81f86600db915ce783b509ae03c5f Binary files /dev/null and b/src/demo/other_demo1.png differ diff --git a/src/demo/other_demo2.png b/src/demo/other_demo2.png new file mode 100644 index 0000000000000000000000000000000000000000..867c1f39e43bd6bcfb31c0d70ccc731342a910fe Binary files /dev/null and b/src/demo/other_demo2.png differ diff --git a/src/demo/pano_demo1.png b/src/demo/pano_demo1.png new file mode 100644 index 0000000000000000000000000000000000000000..2933bbc3f5694e0b8f9170c0cee1d2a070ba96b0 Binary files /dev/null and b/src/demo/pano_demo1.png differ diff --git a/src/demo/zind_demo1.png b/src/demo/zind_demo1.png new file mode 100755 index 0000000000000000000000000000000000000000..62b29bbed8e57db7738f75dad4cf6d42bf14b891 Binary files /dev/null and b/src/demo/zind_demo1.png differ diff --git a/src/demo/zind_demo2.png b/src/demo/zind_demo2.png new file mode 100755 index 0000000000000000000000000000000000000000..bedd30e4cec953a62562fc1a659574145d60b486 Binary files /dev/null and b/src/demo/zind_demo2.png differ diff --git a/src/demo/zind_demo3.png b/src/demo/zind_demo3.png new file mode 100755 index 0000000000000000000000000000000000000000..366d85ea9a98c6919119b51fdc41cacaf5e3d7e8 Binary files /dev/null and b/src/demo/zind_demo3.png differ diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02f1ee32e3c69bcf40722de4d5fb831ede759aae --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,4 @@ +""" +@date: 2021/06/19 +@description: +""" \ No newline at end of file diff --git a/utils/boundary.py b/utils/boundary.py new file mode 100644 index 0000000000000000000000000000000000000000..034691cb8769aff85927ba1ea222b4a690f95e82 --- /dev/null +++ b/utils/boundary.py @@ -0,0 +1,473 @@ +""" +@date: 2021/06/19 +@description: +""" +import math +import functools + +from scipy import stats +from scipy.ndimage.filters import maximum_filter +import numpy as np +from typing import List +from utils.conversion import uv2xyz, xyz2uv, depth2xyz, uv2pixel, depth2uv, pixel2uv, xyz2pixel, uv2lonlat +from utils.visibility_polygon import calc_visible_polygon + + +def connect_corners_uv(uv1: np.ndarray, uv2: np.ndarray, length=256) -> np.ndarray: + """ + :param uv1: [u, v] + :param uv2: [u, v] + :param length: Fix the total length in pixel coordinates + :return: + """ + # why -0.5? Check out the uv2Pixel function + p_u1 = uv1[0] * length - 0.5 + p_u2 = uv2[0] * length - 0.5 + + if abs(p_u1 - p_u2) < length / 2: + start = np.ceil(min(p_u1, p_u2)) + p = max(p_u1, p_u2) + end = np.floor(p) + if end == np.ceil(p): + end = end - 1 + else: + start = np.ceil(max(p_u1, p_u2)) + p = min(p_u1, p_u2) + length + end = np.floor(p) + if end == np.ceil(p): + end = end - 1 + p_us = (np.arange(start, end + 1) % length).astype(np.float64) + if len(p_us) == 0: + return None + us = (p_us + 0.5) / length # why +0.5? Check out the uv2Pixel function + + plan_y = boundary_type(np.array([uv1, uv2])) + xyz1 = uv2xyz(np.array(uv1), plan_y) + xyz2 = uv2xyz(np.array(uv2), plan_y) + x1 = xyz1[0] + z1 = xyz1[2] + x2 = xyz2[0] + z2 = xyz2[2] + + d_x = x2 - x1 + d_z = z2 - z1 + + lon_s = (us - 0.5) * 2 * np.pi + k = np.tan(lon_s) + ps = (k * z1 - x1) / (d_x - k * d_z) + cs = np.sqrt((z1 + ps * d_z) ** 2 + (x1 + ps * d_x) ** 2) + + lats = np.arctan2(plan_y, cs) + vs = lats / np.pi + 0.5 + uv = np.stack([us, vs], axis=-1) + + if start == end: + return uv[0:1] + return uv + + +def connect_corners_xyz(uv1: np.ndarray, uv2: np.ndarray, step=0.01) -> np.ndarray: + """ + :param uv1: [u, v] + :param uv2: [u, v] + :param step: Fixed step size in xyz coordinates + :return: + """ + plan_y = boundary_type(np.array([uv1, uv2])) + xyz1 = uv2xyz(np.array(uv1), plan_y) + xyz2 = uv2xyz(np.array(uv2), plan_y) + + vec = xyz2 - xyz1 + norm = np.linalg.norm(vec, ord=2) + direct = vec / norm + xyz = np.array([xyz1 + direct * dis for dis in np.linspace(0, norm, int(norm / step))]) + if len(xyz) == 0: + xyz = np.array([xyz2]) + uv = xyz2uv(xyz) + return uv + + +def connect_corners(uv1: np.ndarray, uv2: np.ndarray, step=0.01, length=None) -> np.ndarray: + """ + :param uv1: [u, v] + :param uv2: [u, v] + :param step: + :param length: + :return: [[u1, v1], [u2, v2]....] if length!=None,length of return result = length + """ + if length is not None: + uv = connect_corners_uv(uv1, uv2, length) + elif step is not None: + uv = connect_corners_xyz(uv1, uv2, step) + else: + uv = np.array([uv1]) + return uv + + +def visibility_corners(corners): + plan_y = boundary_type(corners) + xyz = uv2xyz(corners, plan_y) + xz = xyz[:, ::2] + xz = calc_visible_polygon(center=np.array([0, 0]), polygon=xz, show=False) + xyz = np.insert(xz, 1, plan_y, axis=1) + output = xyz2uv(xyz).astype(np.float32) + return output + + +def corners2boundary(corners: np.ndarray, step=0.01, length=None, visible=True) -> np.ndarray: + """ + When there is occlusion, even if the length is fixed, the final output length may be greater than the given length, + which is more defined as the fixed step size under UV + :param length: + :param step: + :param corners: [[u1, v1], [u2, v2]....] + :param visible: + :return: [[u1, v1], [u2, v2]....] if length!=None,length of return result = length + """ + assert step is not None or length is not None, "the step and length parameters cannot be null at the same time" + if len(corners) < 3: + return corners + + if visible: + corners = visibility_corners(corners) + + n_con = len(corners) + boundary = None + for j in range(n_con): + uv = connect_corners(corners[j], corners[(j + 1) % n_con], step, length) + if uv is None: + continue + if boundary is None: + boundary = uv + else: + boundary = np.concatenate((boundary, uv)) + boundary = np.roll(boundary, -boundary.argmin(axis=0)[0], axis=0) + + output_polygon = [] + for i, p in enumerate(boundary): + q = boundary[(i + 1) % len(boundary)] + if int(p[0] * 10000) == int(q[0] * 10000): + continue + output_polygon.append(p) + output_polygon = np.array(output_polygon, dtype=np.float32) + return output_polygon + + +def corners2boundaries(ratio: float, corners_xyz: np.ndarray = None, corners_uv: np.ndarray = None, step=0.01, + length=None, visible=True): + """ + When both step and length are None, corners are also returned + :param ratio: + :param corners_xyz: + :param corners_uv: + :param step: + :param length: + :param visible: + :return: floor_boundary, ceil_boundary + """ + if corners_xyz is None: + plan_y = boundary_type(corners_uv) + xyz = uv2xyz(corners_uv, plan_y) + floor_xyz = xyz.copy() + ceil_xyz = xyz.copy() + if plan_y > 0: + ceil_xyz[:, 1] *= -ratio + else: + floor_xyz[:, 1] /= -ratio + else: + floor_xyz = corners_xyz.copy() + ceil_xyz = corners_xyz.copy() + if corners_xyz[0][1] > 0: + ceil_xyz[:, 1] *= -ratio + else: + floor_xyz[:, 1] /= -ratio + + floor_uv = xyz2uv(floor_xyz) + ceil_uv = xyz2uv(ceil_xyz) + if step is None and length is None: + return floor_uv, ceil_uv + + floor_boundary = corners2boundary(floor_uv, step, length, visible) + ceil_boundary = corners2boundary(ceil_uv, step, length, visible) + return floor_boundary, ceil_boundary + + +def depth2boundary(depth: np.array, step=0.01, length=None,): + xyz = depth2xyz(depth) + uv = xyz2uv(xyz) + return corners2boundary(uv, step, length, visible=False) + + +def depth2boundaries(ratio: float, depth: np.array, step=0.01, length=None,): + """ + + :param ratio: + :param depth: + :param step: + :param length: + :return: floor_boundary, ceil_boundary + """ + xyz = depth2xyz(depth) + return corners2boundaries(ratio, corners_xyz=xyz, step=step, length=length, visible=False) + + +def boundary_type(corners: np.ndarray) -> int: + """ + Returns the boundary type that also represents the projection plane + :param corners: + :return: + """ + if is_ceil_boundary(corners): + plan_y = -1 + elif is_floor_boundary(corners): + plan_y = 1 + else: + # An intersection occurs and an exception is considered + assert False, 'corners error!' + return plan_y + + +def is_normal_layout(boundaries: List[np.array]): + if len(boundaries) != 2: + print("boundaries length must be 2!") + return False + + if boundary_type(boundaries[0]) != -1: + print("ceil boundary error!") + return False + + if boundary_type(boundaries[1]) != 1: + print("floor boundary error!") + return False + return True + + +def is_ceil_boundary(corners: np.ndarray) -> bool: + m = corners[..., 1].max() + return m < 0.5 + + +def is_floor_boundary(corners: np.ndarray) -> bool: + m = corners[..., 1].min() + return m > 0.5 + + +@functools.lru_cache() +def get_gauss_map(sigma=1.5, width=5): + x = np.arange(width*2 + 1) - width + y = stats.norm(0, sigma).pdf(x) + y = y / y.max() + return y + + +def get_heat_map(u_s, patch_num=256, sigma=2, window_width=15, show=False): + """ + :param window_width: + :param sigma: + :param u_s: [u1, u2, u3, ...] + :param patch_num + :param show + :return: + """ + pixel_us = uv2pixel(u_s, w=patch_num, axis=0) + gauss_map = get_gauss_map(sigma, window_width) + heat_map_all = [] + for u in pixel_us: + heat_map = np.zeros(patch_num, dtype=np.float) + left = u-window_width + right = u+window_width+1 + + offset = 0 + if left < 0: + offset = left + elif right > patch_num: + offset = right - patch_num + + left = left - offset + right = right - offset + heat_map[left:right] = gauss_map + if offset != 0: + heat_map = np.roll(heat_map, offset) + heat_map_all.append(heat_map) + + heat_map_all = np.array(heat_map_all).max(axis=0) + if show: + import matplotlib.pyplot as plt + plt.imshow(heat_map_all[None].repeat(50, axis=0)) + plt.show() + return heat_map_all + + +def find_peaks(signal, size=15*2+1, min_v=0.05, N=None): + # code from HorizonNet: https://github.com/sunset1995/HorizonNet/blob/master/inference.py + max_v = maximum_filter(signal, size=size, mode='wrap') + pk_loc = np.where(max_v == signal)[0] + pk_loc = pk_loc[signal[pk_loc] > min_v] + if N is not None: + order = np.argsort(-signal[pk_loc]) + pk_loc = pk_loc[order[:N]] + pk_loc = pk_loc[np.argsort(pk_loc)] + return pk_loc, signal[pk_loc] + + +def get_object_cor(depth, size, center_u, patch_num=256): + width_u = size[0, center_u] + height_v = size[1, center_u] + boundary_v = size[2, center_u] + + center_boundary_v = depth2uv(depth[center_u:center_u + 1])[0, 1] + center_bottom_v = center_boundary_v - boundary_v + center_top_v = center_bottom_v - height_v + + base_v = center_boundary_v - 0.5 + assert base_v > 0 + + center_u = pixel2uv(np.array([center_u]), w=patch_num, h=patch_num // 2, axis=0)[0] + + center_boundary_uv = np.array([center_u, center_boundary_v]) + center_bottom_uv = np.array([center_u, center_bottom_v]) + center_top_uv = np.array([center_u, center_top_v]) + + left_u = center_u - width_u / 2 + right_u = center_u + width_u / 2 + + left_u = 1 + left_u if left_u < 0 else left_u + right_u = right_u - 1 if right_u > 1 else right_u + + pixel_u = uv2pixel(np.array([left_u, right_u]), w=patch_num, h=patch_num // 2, axis=0) + left_pixel_u = pixel_u[0] + right_pixel_u = pixel_u[1] + + left_boundary_v = depth2uv(depth[left_pixel_u:left_pixel_u + 1])[0, 1] + right_boundary_v = depth2uv(depth[right_pixel_u:right_pixel_u + 1])[0, 1] + + left_boundary_uv = np.array([left_u, left_boundary_v]) + right_boundary_uv = np.array([right_u, right_boundary_v]) + + xyz = uv2xyz(np.array([left_boundary_uv, right_boundary_uv, center_boundary_uv])) + left_boundary_xyz = xyz[0] + right_boundary_xyz = xyz[1] + + # need align + center_boundary_xyz = xyz[2] + center_bottom_xyz = uv2xyz(np.array([center_bottom_uv]))[0] + center_top_xyz = uv2xyz(np.array([center_top_uv]))[0] + center_boundary_norm = np.linalg.norm(center_boundary_xyz[::2]) + center_bottom_norm = np.linalg.norm(center_bottom_xyz[::2]) + center_top_norm = np.linalg.norm(center_top_xyz[::2]) + center_bottom_xyz = center_bottom_xyz * center_boundary_norm / center_bottom_norm + center_top_xyz = center_top_xyz * center_boundary_norm / center_top_norm + + left_bottom_xyz = left_boundary_xyz.copy() + left_bottom_xyz[1] = center_bottom_xyz[1] + right_bottom_xyz = right_boundary_xyz.copy() + right_bottom_xyz[1] = center_bottom_xyz[1] + + left_top_xyz = left_boundary_xyz.copy() + left_top_xyz[1] = center_top_xyz[1] + right_top_xyz = right_boundary_xyz.copy() + right_top_xyz[1] = center_top_xyz[1] + + uv = xyz2uv(np.array([left_bottom_xyz, right_bottom_xyz, left_top_xyz, right_top_xyz])) + left_bottom_uv = uv[0] + right_bottom_uv = uv[1] + left_top_uv = uv[2] + right_top_uv = uv[3] + + return [left_bottom_uv, right_bottom_uv, left_top_uv, right_top_uv], \ + [left_bottom_xyz, right_bottom_xyz, left_top_xyz, right_top_xyz] + + +def layout2depth(boundaries: List[np.array], return_mask=False, show=False, camera_height=1.6): + """ + + :param camera_height: + :param boundaries: [[[u_f1, v_f2], [u_f2, v_f2],...], [[u_c1, v_c2], [u_c2, v_c2]]] + :param return_mask: + :param show: + :return: + """ + # code from HorizonNet: https://github.com/sunset1995/HorizonNet/blob/master/eval_general.py + + w = len(boundaries[0]) + h = w//2 + # Convert corners to per-column boundary first + # Up -pi/2, Down pi/2 + vf = uv2lonlat(boundaries[0]) + vc = uv2lonlat(boundaries[1]) + vc = vc[None, :, 1] # [1, w] + vf = vf[None, :, 1] # [1, w] + assert (vc > 0).sum() == 0 + assert (vf < 0).sum() == 0 + + # Per-pixel v coordinate (vertical angle) + vs = ((np.arange(h) + 0.5) / h - 0.5) * np.pi + vs = np.repeat(vs[:, None], w, axis=1) # [h, w] + + # Floor-plane to depth + floor_h = camera_height + floor_d = np.abs(floor_h / np.sin(vs)) + + # wall to camera distance on horizontal plane at cross camera center + cs = floor_h / np.tan(vf) + + # Ceiling-plane to depth + ceil_h = np.abs(cs * np.tan(vc)) # [1, w] + ceil_d = np.abs(ceil_h / np.sin(vs)) # [h, w] + + # Wall to depth + wall_d = np.abs(cs / np.cos(vs)) # [h, w] + + # Recover layout depth + floor_mask = (vs > vf) + ceil_mask = (vs < vc) + wall_mask = (~floor_mask) & (~ceil_mask) + depth = np.zeros([h, w], np.float32) # [h, w] + depth[floor_mask] = floor_d[floor_mask] + depth[ceil_mask] = ceil_d[ceil_mask] + depth[wall_mask] = wall_d[wall_mask] + + assert (depth == 0).sum() == 0 + if return_mask: + return depth, floor_mask, ceil_mask, wall_mask + if show: + import matplotlib.pyplot as plt + plt.imshow(depth) + plt.show() + return depth + + +def calc_rotation(corners: np.ndarray): + xz = uv2xyz(corners)[..., 0::2] + max_norm = -1 + max_v = None + for i in range(len(xz)): + p_c = xz[i] + p_n = xz[(i + 1) % len(xz)] + v_cn = p_n - p_c + v_norm = np.linalg.norm(v_cn) + if v_norm > max_norm: + max_norm = v_norm + max_v = v_cn + + # v<-----------|o + # | | | + # | ----|----z | + # | | | + # | x \|/ + # |------------u + # It is required that the vector be aligned on the x-axis, z equals y, and x is still x. + # In floorplan, x is displayed as the x-coordinate and z as the y-coordinate + rotation = np.arctan2(max_v[1], max_v[0]) + return rotation + + +if __name__ == '__main__': + corners = np.array([[0.2, 0.7], + [0.4, 0.7], + [0.3, 0.6], + [0.6, 0.6], + [0.8, 0.7]]) + get_heat_map(u=corners[..., 0], show=True, sigma=2, width=15) + pass + diff --git a/utils/conversion.py b/utils/conversion.py new file mode 100644 index 0000000000000000000000000000000000000000..906d5f6dcbe635e1d2a67e032fb2de30a9dee5fa --- /dev/null +++ b/utils/conversion.py @@ -0,0 +1,346 @@ +""" +@date: 2021/06/19 +@description: +Specification of 4 coordinate systems: +Pixel coordinates (used in panoramic images), the range is related to the image size, +generally converted to UV coordinates first, the first is horizontal coordinates, +increasing to the right, the second is column coordinates, increasing down + +Uv coordinates (used in panoramic images), the range is [0~1], the upper left corner is the origin, +u is the abscissa and increases to the right, V is the column coordinate and increases to the right + +Longitude and latitude coordinates (spherical), the range of longitude lon is [-pi~ PI], +and the range of dimension is [-pi/2~ PI /2]. The center of the panorama is the origin, +and the longitude increases to the right and the dimension increases to the down + +Xyz coordinate (used in 3-dimensional space, of course, +it can also represent longitude and latitude coordinates on the sphere). +If on the sphere, the coordinate mode length is 1, when y is projected to the height of the camera, +the real position information of space points will be obtained + +Correspondence between longitude and latitude coordinates and xyz coordinates: + | -pi/2 + | + lef _ _ _ _ _ |_ _ _ _ _ + -pi / | \ + pi | - - - - - -\ - z 0 mid + right \_ _ _ _ _ /_|_ _ _ _ _ _/ + / | + / | + x/ | y pi/2 +""" + +import numpy as np +import torch +import functools + + +@functools.lru_cache() +def get_u(w, is_np, b=None): + u = pixel2uv(np.array(range(w)) if is_np else torch.arange(0, w), w=w, axis=0) + if b is not None: + u = u[np.newaxis].repeat(b) if is_np else u.repeat(b, 1) + return u + + +@functools.lru_cache() +def get_lon(w, is_np, b=None): + lon = pixel2lonlat(np.array(range(w)) if is_np else torch.arange(0, w), w=w, axis=0) + if b is not None: + lon = lon[np.newaxis].repeat(b, axis=0) if is_np else lon.repeat(b, 1) + return lon + + +def pixel2uv(pixel, w=1024, h=512, axis=None): + pixel = pixel.astype(np.float) if isinstance(pixel, np.ndarray) else pixel.float() + # +0.5 will make left/right and up/down coordinates symmetric + if axis is None: + u = (pixel[..., 0:1] + 0.5) / w + v = (pixel[..., 1:] + 0.5) / h + elif axis == 0: + u = (pixel + 0.5) / (w * 1.0) + return u + elif axis == 1: + v = (pixel + 0.5) / (h * 1.0) + return v + else: + assert False, "axis error" + + lst = [u, v] + uv = np.concatenate(lst, axis=-1) if isinstance(pixel, np.ndarray) else torch.cat(lst, dim=-1) + return uv + + +def pixel2lonlat(pixel, w=1024, h=512, axis=None): + uv = pixel2uv(pixel, w, h, axis) + lonlat = uv2lonlat(uv, axis) + return lonlat + + +def pixel2xyz(pixel, w=1024, h=512): + lonlat = pixel2lonlat(pixel, w, h) + xyz = lonlat2xyz(lonlat) + return xyz + + +def uv2lonlat(uv, axis=None): + if axis is None: + lon = (uv[..., 0:1] - 0.5) * 2 * np.pi + lat = (uv[..., 1:] - 0.5) * np.pi + elif axis == 0: + lon = (uv - 0.5) * 2 * np.pi + return lon + elif axis == 1: + lat = (uv - 0.5) * np.pi + return lat + else: + assert False, "axis error" + + lst = [lon, lat] + lonlat = np.concatenate(lst, axis=-1) if isinstance(uv, np.ndarray) else torch.cat(lst, dim=-1) + return lonlat + + +def uv2xyz(uv, plan_y=None, spherical=False): + lonlat = uv2lonlat(uv) + xyz = lonlat2xyz(lonlat) + if spherical: + # Projection onto the sphere + return xyz + + if plan_y is None: + from utils.boundary import boundary_type + plan_y = boundary_type(uv) + # Projection onto the specified plane + xyz = xyz * (plan_y / xyz[..., 1])[..., None] + + return xyz + + +def lonlat2xyz(lonlat, plan_y=None): + lon = lonlat[..., 0:1] + lat = lonlat[..., 1:] + cos = np.cos if isinstance(lonlat, np.ndarray) else torch.cos + sin = np.sin if isinstance(lonlat, np.ndarray) else torch.sin + x = cos(lat) * sin(lon) + y = sin(lat) + z = cos(lat) * cos(lon) + lst = [x, y, z] + xyz = np.concatenate(lst, axis=-1) if isinstance(lonlat, np.ndarray) else torch.cat(lst, dim=-1) + + if plan_y is not None: + xyz = xyz * (plan_y / xyz[..., 1])[..., None] + + return xyz + + +##################### + + +def xyz2lonlat(xyz): + atan2 = np.arctan2 if isinstance(xyz, np.ndarray) else torch.atan2 + asin = np.arcsin if isinstance(xyz, np.ndarray) else torch.asin + norm = np.linalg.norm(xyz, axis=-1) if isinstance(xyz, np.ndarray) else torch.norm(xyz, p=2, dim=-1) + xyz_norm = xyz / norm[..., None] + x = xyz_norm[..., 0:1] + y = xyz_norm[..., 1:2] + z = xyz_norm[..., 2:] + lon = atan2(x, z) + lat = asin(y) + lst = [lon, lat] + lonlat = np.concatenate(lst, axis=-1) if isinstance(xyz, np.ndarray) else torch.cat(lst, dim=-1) + return lonlat + + +def xyz2uv(xyz): + lonlat = xyz2lonlat(xyz) + uv = lonlat2uv(lonlat) + return uv + + +def xyz2pixel(xyz, w=1024, h=512): + uv = xyz2uv(xyz) + pixel = uv2pixel(uv, w, h) + return pixel + + +def lonlat2uv(lonlat, axis=None): + if axis is None: + u = lonlat[..., 0:1] / (2 * np.pi) + 0.5 + v = lonlat[..., 1:] / np.pi + 0.5 + elif axis == 0: + u = lonlat / (2 * np.pi) + 0.5 + return u + elif axis == 1: + v = lonlat / np.pi + 0.5 + return v + else: + assert False, "axis error" + + lst = [u, v] + uv = np.concatenate(lst, axis=-1) if isinstance(lonlat, np.ndarray) else torch.cat(lst, dim=-1) + return uv + + +def lonlat2pixel(lonlat, w=1024, h=512, axis=None, need_round=True): + uv = lonlat2uv(lonlat, axis) + pixel = uv2pixel(uv, w, h, axis, need_round) + return pixel + + +def uv2pixel(uv, w=1024, h=512, axis=None, need_round=True): + """ + :param uv: [[u1, v1], [u2, v2] ...] + :param w: width of panorama image + :param h: height of panorama image + :param axis: sometimes the input data is only u(axis =0) or only v(axis=1) + :param need_round: + :return: + """ + if axis is None: + pu = uv[..., 0:1] * w - 0.5 + pv = uv[..., 1:] * h - 0.5 + elif axis == 0: + pu = uv * w - 0.5 + if need_round: + pu = pu.round().astype(np.int) if isinstance(uv, np.ndarray) else pu.round().int() + return pu + elif axis == 1: + pv = uv * h - 0.5 + if need_round: + pv = pv.round().astype(np.int) if isinstance(uv, np.ndarray) else pv.round().int() + return pv + else: + assert False, "axis error" + + lst = [pu, pv] + if need_round: + pixel = np.concatenate(lst, axis=-1).round().astype(np.int) if isinstance(uv, np.ndarray) else torch.cat(lst, + dim=-1).round().int() + else: + pixel = np.concatenate(lst, axis=-1) if isinstance(uv, np.ndarray) else torch.cat(lst, dim=-1) + pixel[..., 0] = pixel[..., 0] % w + pixel[..., 1] = pixel[..., 1] % h + + return pixel + + +##################### + + +def xyz2depth(xyz, plan_y=1): + """ + :param xyz: + :param plan_y: + :return: + """ + xyz = xyz * (plan_y / xyz[..., 1])[..., None] + xz = xyz[..., ::2] + depth = np.linalg.norm(xz, axis=-1) if isinstance(xz, np.ndarray) else torch.norm(xz, dim=-1) + return depth + + +def uv2depth(uv, plan_y=None): + if plan_y is None: + from utils.boundary import boundary_type + plan_y = boundary_type(uv) + + xyz = uv2xyz(uv, plan_y) + depth = xyz2depth(xyz, plan_y) + return depth + + +def lonlat2depth(lonlat, plan_y=None): + if plan_y is None: + from utils.boundary import boundary_type + plan_y = boundary_type(lonlat2uv(lonlat)) + + xyz = lonlat2xyz(lonlat, plan_y) + depth = xyz2depth(xyz, plan_y) + return depth + + +def depth2xyz(depth, plan_y=1): + """ + :param depth: [patch_num] or [b, patch_num] + :param plan_y: + :return: + """ + is_np = isinstance(depth, np.ndarray) + w = depth.shape[-1] + + lon = get_lon(w, is_np, b=depth.shape[0] if len(depth.shape) == 2 else None) + if not is_np: + lon = lon.to(depth.device) + + cos = np.cos if is_np else torch.cos + sin = np.sin if is_np else torch.sin + # polar covert to cartesian + if len(depth.shape) == 2: + b = depth.shape[0] + xyz = np.zeros((b, w, 3)) if is_np else torch.zeros((b, w, 3)) + else: + xyz = np.zeros((w, 3)) if is_np else torch.zeros((w, 3)) + + if not is_np: + xyz = xyz.to(depth.device) + + xyz[..., 0] = depth * sin(lon) + xyz[..., 1] = plan_y + xyz[..., 2] = depth * cos(lon) + return xyz + + +def depth2uv(depth, plan_y=1): + xyz = depth2xyz(depth, plan_y) + uv = xyz2uv(xyz) + return uv + + +def depth2pixel(depth, w=1024, h=512, need_round=True, plan_y=1): + uv = depth2uv(depth, plan_y) + pixel = uv2pixel(uv, w, h, need_round=need_round) + return pixel + + +if __name__ == '__main__': + a = np.array([[0.5, 1, 0.5]]) + a = xyz2pixel(a) + print(a) + + +if __name__ == '__main__1': + np.set_printoptions(suppress=True) + + a = np.array([[0, 0], [1023, 511]]) + a = pixel2xyz(a) + a = xyz2pixel(a) + print(a) + + ########### + a = torch.tensor([[0, 0], [1023, 511]]) + a = pixel2xyz(a) + a = xyz2pixel(a) + print(a) + + ########### + u = np.array([0, 256, 512, 1023]) + lon = pixel2lonlat(u, axis=0) + u = lonlat2pixel(lon, axis=0) + print(u) + + u = torch.tensor([0, 256, 512, 1023]) + lon = pixel2lonlat(u, axis=0) + u = lonlat2pixel(lon, axis=0) + print(u) + + ########### + v = np.array([0, 256, 511]) + lat = pixel2lonlat(v, axis=1) + v = lonlat2pixel(lat, axis=1) + print(v) + + v = torch.tensor([0, 256, 511]) + lat = pixel2lonlat(v, axis=1) + v = lonlat2pixel(lat, axis=1) + print(v) diff --git a/utils/height.py b/utils/height.py new file mode 100644 index 0000000000000000000000000000000000000000..9abbffcef5d4e1baf3614d6fd2902d0bd4337e60 --- /dev/null +++ b/utils/height.py @@ -0,0 +1,131 @@ +""" +@date: 2021/6/30 +@description: +""" +import numpy as np +from typing import List + +from utils.boundary import * +from scipy.optimize import least_squares +from functools import partial + + +def lsq_fit(ceil_norm, floor_norm): + """ + Least Squares + :param ceil_norm: + :param floor_norm: + :return: + """ + + def error_fun(ratio, ceil_norm, floor_norm): + error = np.abs(ratio * ceil_norm - floor_norm) + return error + + init_ratio = np.mean(floor_norm / ceil_norm, axis=-1) + error_func = partial(error_fun, ceil_norm=ceil_norm, floor_norm=floor_norm) + ret = least_squares(error_func, init_ratio, verbose=0) + ratio = ret.x[0] + return ratio + + +def mean_percentile_fit(ceil_norm, floor_norm, p1=25, p2=75): + """ + :param ceil_norm: + :param floor_norm: + :param p1: + :param p2: + :return: + """ + ratio = floor_norm / ceil_norm + r_min = np.percentile(ratio, p1) + r_max = np.percentile(ratio, p2) + return ratio[(r_min <= ratio) & (ratio <= r_max)].mean() + + +def calc_ceil_ratio(boundaries: List[np.array], mode='lsq'): + """ + :param boundaries: [ [[cu1, cv1], [cu2, cv2], ...], [[fu1, fv1], [fu2, fv2], ...] ] + :param mode: 'lsq' or 'mean' + :return: + """ + assert len(boundaries[0].shape) < 4 and len(boundaries[1].shape) < 4, 'error shape' + if not is_normal_layout(boundaries): + return 0 + + ceil_boundary = boundaries[0] + floor_boundary = boundaries[1] + assert ceil_boundary.shape[-2] == floor_boundary.shape[-2], "boundary need same length" + + ceil_xyz = uv2xyz(ceil_boundary, -1) + floor_xyz = uv2xyz(floor_boundary, 1) + + ceil_xz = ceil_xyz[..., ::2] + floor_xz = floor_xyz[..., ::2] + + ceil_norm = np.linalg.norm(ceil_xz, axis=-1) + floor_norm = np.linalg.norm(floor_xz, axis=-1) + + if mode == "lsq": + if len(ceil_norm.shape) == 2: + ratio = np.array([lsq_fit(ceil_norm[i], floor_norm[i]) for i in range(ceil_norm.shape[0])]) + else: + ratio = lsq_fit(ceil_norm, floor_norm) + else: + if len(ceil_norm.shape) == 2: + ratio = np.array([mean_percentile_fit(ceil_norm[i], floor_norm[i]) for i in range(ceil_norm.shape[0])]) + else: + ratio = mean_percentile_fit(ceil_norm, floor_norm) + + return ratio + + +def calc_ceil_height(boundaries: List[np.array], camera_height=1.6, mode='lsq') -> float: + """ + :param boundaries: [ [[cu1, cv1], [cu2, cv2], ...], [[fu1, fv1], [fu2, fv2], ...] ] + :param camera_height: + :param mode: + :return: + """ + ratio = calc_ceil_ratio(boundaries, mode) + ceil_height = camera_height * ratio + return ceil_height + + +def calc_room_height(boundaries: List[np.array], camera_height=1.6, mode='lsq') -> float: + """ + :param boundaries: also can corners,format: [ [[cu1, cv1], [cu2, cv2], ...], [[fu1, fv1], [fu2, fv2], ...] ], + 0 denotes ceil, 1 denotes floor + :param camera_height: actual camera height determines the scale + :param mode: fitting method lsq or mean + :return: + """ + ceil_height = calc_ceil_height(boundaries, camera_height, mode) + room_height = camera_height + ceil_height + return room_height + + +def height2ratio(height, camera_height=1.6): + ceil_height = height - camera_height + ratio = ceil_height / camera_height + return ratio + + +def ratio2height(ratio, camera_height=1.6): + ceil_height = camera_height * ratio + room_height = camera_height + ceil_height + return room_height + + +if __name__ == '__main__': + from dataset.mp3d_dataset import MP3DDataset + + dataset = MP3DDataset(root_dir="../src/dataset/mp3d", mode="train") + for data in dataset: + ceil_corners = data['corners'][::2] + floor_corners = data['corners'][1::2] + # ceil_boundary = corners2boundary(ceil_corners, length=1024) + # floor_boundary = corners2boundary(floor_corners, length=1024) + room_height1 = calc_room_height([ceil_corners, floor_corners], camera_height=1.6, mode='mean') + room_height2 = calc_room_height([ceil_corners, floor_corners], camera_height=1.6, mode='lsq') + print(room_height1, room_height2, data['cameraCeilingHeight'] + 1.6) diff --git a/utils/logger.py b/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..0f2e4dc66099c7e4784e37ab924e8594ffa03e27 --- /dev/null +++ b/utils/logger.py @@ -0,0 +1,49 @@ +""" +@Date: 2021/07/17 +@description: +""" +import os +import sys +import logging +import functools +from termcolor import colored + + +def build_logger(config): + output_dir = config.LOGGER.DIR + local_rank = config.LOCAL_RANK + name = config.MODEL.NAME + logger = get_logger(output_dir, local_rank, name) + return logger + + +@functools.lru_cache() +def get_logger(output_dir=None, local_rank=None, name="PLTNet"): + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # create logger + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + logger.propagate = False + + # create formatter + fmt = f'[%(asctime)s %(name)s][%(levelname)1.1s](%(filename)s %(lineno)d): %(message)s' + color_fmt = colored(f'[%(asctime)s %(name)s][%(levelname)1.1s][{local_rank}]', 'green') + colored( + f'(%(filename)s %(lineno)d)', + 'yellow') + ': %(message)s' + if local_rank in [0] or local_rank is None: + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(logging.DEBUG) + console_handler.setFormatter( + logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S')) + logger.addHandler(console_handler) + + if output_dir is not None: + # create file handlers + file_handler = logging.FileHandler(os.path.join(output_dir, f'log_rank{local_rank}.log'), mode='a') + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S')) + logger.addHandler(file_handler) + + return logger diff --git a/utils/misc.py b/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..d262d86e2c9a12e22bad7266dba429ce09c9a036 --- /dev/null +++ b/utils/misc.py @@ -0,0 +1,23 @@ +""" +@date: 2021/8/4 +@description: +""" +import numpy as np +import torch + + +def tensor2np(t: torch.Tensor) -> np.array: + if isinstance(t, torch.Tensor): + if t.device == 'cpu': + return t.detach().numpy() + else: + return t.detach().cpu().numpy() + else: + return t + + +def tensor2np_d(d: dict) -> dict: + output = {} + for k in d.keys(): + output[k] = tensor2np(d[k]) + return output diff --git a/utils/time_watch.py b/utils/time_watch.py new file mode 100644 index 0000000000000000000000000000000000000000..e710d8376fe1d670ed40e9986298d9713ad4bdb0 --- /dev/null +++ b/utils/time_watch.py @@ -0,0 +1,25 @@ +""" +@Date: 2021/07/18 +@description: +""" +import time + + +class TimeWatch: + def __init__(self, name="", logger=None): + self.name = name + self.start = time.time() + self.logger = logger + + def __del__(self): + end = time.time() + output = f"{self.name} | time use {(end - self.start):.2f}s." + if self.logger: + self.logger.info(output) + else: + print(output) + + +if __name__ == '__main__': + w = TimeWatch("__main__") + time.sleep(2) \ No newline at end of file diff --git a/utils/visibility_polygon.py b/utils/visibility_polygon.py new file mode 100644 index 0000000000000000000000000000000000000000..b07873768dffb7d01ad59fc051bffb3975639b78 --- /dev/null +++ b/utils/visibility_polygon.py @@ -0,0 +1,268 @@ +""" +@date: 2021/7/20 +@description: reference https://www.redblobgames.com/articles/visibility/ +""" +import math +import numpy as np +from functools import cmp_to_key as ctk +from PIL import Image + + +class Point: + def __init__(self, x: float, y: float): + self.x = x + self.y = y + + +class EndPoint(Point): + def __init__(self, x: float, y: float, begins_segment: bool = None, segment=None, angle: float = None): + super().__init__(x, y) + self.begins_segment = begins_segment + self.segment = segment + self.angle = angle + + +class Segment: + def __init__(self, x1: float, y1: float, x2: float, y2: float, d: float = None): + self.p1 = EndPoint(x1, y1) + self.p2 = EndPoint(x2, y2) + self.p1.segment = self + self.p2.segment = self + self.d = d + + +def calculate_end_point_angles(light_source: Point, segment: Segment) -> None: + x = light_source.x + y = light_source.y + dx = 0.5 * (segment.p1.x + segment.p2.x) - x + dy = 0.5 * (segment.p1.y + segment.p2.y) - y + segment.d = (dx * dx) + (dy * dy) + segment.p1.angle = math.atan2(segment.p1.y - y, segment.p1.x - x) + segment.p2.angle = math.atan2(segment.p2.y - y, segment.p2.x - x) + + +def set_segment_beginning(segment: Segment) -> None: + d_angle = segment.p2.angle - segment.p1.angle + if d_angle <= -math.pi: + d_angle += 2 * math.pi + if d_angle > math.pi: + d_angle -= 2 * math.pi + segment.p1.begins_segment = d_angle > 0 + segment.p2.begins_segment = not segment.p1.begins_segment + + +def endpoint_compare(point_a: EndPoint, point_b: EndPoint): + if point_a.angle > point_b.angle: + return 1 + if point_a.angle < point_b.angle: + return -1 + if not point_a.begins_segment and point_b.begins_segment: + return 1 + if point_a.begins_segment and not point_b.begins_segment: + return -1 + return 0 + + +def polygon_to_segments(polygon: np.array) -> np.array: + segments = [] + polygon = np.concatenate((polygon, [polygon[0]])) + for i in range(len(polygon) - 1): + p1 = polygon[i] + p2 = polygon[i + 1] + segments.append([p1, p2]) + segments = np.array(segments) + return segments + + +def segment_in_front_of(segment_a: Segment, segment_b: Segment, relative_point: Point): + def left_of(segment: Segment, point: Point): + cross = (segment.p2.x - segment.p1.x) * (point.y - segment.p1.y) - (segment.p2.y - segment.p1.y) * ( + point.x - segment.p1.x) + return cross < 0 + + def interpolate(point_a: Point, point_b: Point, f: float): + point = Point(x=point_a.x * (1 - f) + point_b.x * f, + y=point_a.y * (1 - f) + point_b.y * f) + return point + + a1 = left_of(segment_a, interpolate(segment_b.p1, segment_b.p2, 0.01)) + a2 = left_of(segment_a, interpolate(segment_b.p2, segment_b.p1, 0.01)) + a3 = left_of(segment_a, relative_point) + b1 = left_of(segment_b, interpolate(segment_a.p1, segment_a.p2, 0.01)) + b2 = left_of(segment_b, interpolate(segment_a.p2, segment_a.p1, 0.01)) + b3 = left_of(segment_b, relative_point) + if b1 == b2 and not (b2 == b3): + return True + if a1 == a2 and a2 == a3: + return True + if a1 == a2 and not (a2 == a3): + return False + if b1 == b2 and b2 == b3: + return False + return False + + +def line_intersection(point1: Point, point2: Point, point3: Point, point4: Point): + a = (point4.y - point3.y) * (point2.x - point1.x) - (point4.x - point3.x) * (point2.y - point1.y) + b = (point4.x - point3.x) * (point1.y - point3.y) - (point4.y - point3.y) * (point1.x - point3.x) + assert a != 0 or a == b, "center on polygon, it not support!" + if a == 0: + s = 1 + else: + s = b / a + + return Point( + point1.x + s * (point2.x - point1.x), + point1.y + s * (point2.y - point1.y) + ) + + +def get_triangle_points(origin: Point, angle1: float, angle2: float, segment: Segment): + p1 = origin + p2 = Point(origin.x + math.cos(angle1), origin.y + math.sin(angle1)) + p3 = Point(0, 0) + p4 = Point(0, 0) + + if segment: + p3.x = segment.p1.x + p3.y = segment.p1.y + p4.x = segment.p2.x + p4.y = segment.p2.y + else: + p3.x = origin.x + math.cos(angle1) * 2000 + p3.y = origin.y + math.sin(angle1) * 2000 + p4.x = origin.x + math.cos(angle2) * 2000 + p4.y = origin.y + math.sin(angle2) * 2000 + + # use the endpoint directly when the rays are parallel to segment + if abs(segment.p1.angle - segment.p2.angle) < 1e-6: + return [p4, p3] + + # it's maybe generate error coordinate when the rays are parallel to segment + p_begin = line_intersection(p3, p4, p1, p2) + p2.x = origin.x + math.cos(angle2) + p2.y = origin.y + math.sin(angle2) + p_end = line_intersection(p3, p4, p1, p2) + + return [p_begin, p_end] + + +def calc_visible_polygon(center: np.array, polygon: np.array = None, segments: np.array = None, show: bool = False): + if segments is None and polygon is not None: + segments = polygon_to_segments(polygon) + + origin = Point(x=center[0], y=center[1]) + endpoints = [] + for s in segments: + p1 = s[0] + p2 = s[1] + segment = Segment(x1=p1[0], y1=p1[1], x2=p2[0], y2=p2[1]) + calculate_end_point_angles(origin, segment) + set_segment_beginning(segment) + endpoints.extend([segment.p1, segment.p2]) + + open_segments = [] + output = [] + begin_angle = 0 + endpoints = sorted(endpoints, key=ctk(endpoint_compare)) + + for pas in range(2): + for endpoint in endpoints: + open_segment = open_segments[0] if len(open_segments) else None + if endpoint.begins_segment: + index = 0 + segment = open_segments[index] if index < len(open_segments) else None + while segment and segment_in_front_of(endpoint.segment, segment, origin): + index += 1 + segment = open_segments[index] if index < len(open_segments) else None + + if not segment: + open_segments.append(endpoint.segment) + else: + open_segments.insert(index, endpoint.segment) + else: + if endpoint.segment in open_segments: + open_segments.remove(endpoint.segment) + + if open_segment is not (open_segments[0] if len(open_segments) else None): + if pas == 1 and open_segment: + triangle_points = get_triangle_points(origin, begin_angle, endpoint.angle, open_segment) + output.extend(triangle_points) + begin_angle = endpoint.angle + + output_polygon = [] + # Remove duplicate + for i, p in enumerate(output): + q = output[(i + 1) % len(output)] + if int(p.x * 10000) == int(q.x * 10000) and int(p.y * 10000) == int(q.y * 10000): + continue + output_polygon.append([p.x, p.y]) + + output_polygon.reverse() + output_polygon = np.array(output_polygon) + + if show: + visualization(segments, output_polygon, center) + return output_polygon + + +def visualization(segments: np.array, output_polygon: np.array, center: np.array, side_l=1000): + """ + :param segments: original segments + :param output_polygon: result polygon + :param center: visibility center + :param side_l: side length of board + :return: + """ + try: + import cv2 + import matplotlib.pyplot as plt + except ImportError: + print("visualization need cv2 and matplotlib") + return + offset = np.array([side_l / 2, side_l / 2]) - center + segments = segments + offset + output_polygon = output_polygon + offset + origin = np.array([side_l / 2, side_l / 2]) + + # +0.5 as board + scale = side_l / 2.5 / np.abs(segments - origin).max() + board = np.zeros((side_l, side_l)) + for segment in segments: + segment = (segment - origin) * scale + origin + segment = segment.astype(np.int) + cv2.line(board, tuple(segment[0]), tuple(segment[1]), 0.5, thickness=3) + board = cv2.drawMarker(board, tuple(origin.astype(np.int)), 1, thickness=3) + + output_polygon = (output_polygon - origin) * scale + origin + board = cv2.drawContours(board, [output_polygon.astype(np.int)], 0, 1, 3) + board = cv2.drawMarker(board, tuple(origin.astype(np.int)), 1, thickness=3) + plt.axis('off') + plt.imshow(board) + plt.show() + + +if __name__ == '__main__': + import numpy as np + + from dataset.mp3d_dataset import MP3DDataset + from utils.boundary import depth2boundaries + from utils.conversion import uv2xyz, depth2xyz + from visualization.boundary import draw_boundaries + from visualization.floorplan import draw_floorplan, draw_iou_floorplan + + mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train', + split_list=[['e9zR4mvMWw7', '2224be23a70a475ea6daa55d4c90a91b']]) + gt = mp3d_dataset.__getitem__(0) + gt['corners'] = gt['corners'][gt['corners'][..., 0] + gt['corners'][..., 1] != 0] # Take effective corners + + img = draw_floorplan(depth2xyz(gt['depth'])[:, ::2], fill_color=[1, 1, 1, 0], + show=True, scale=1, marker_color=[0, 0, 1, 1], side_l=1024) + # img = draw_iou_floorplan(gt_xz=uv2xyz(gt['corners'])[..., ::2], + # dt_xz=calc_visible_polygon(np.array([0, 0]), uv2xyz(gt['corners'])[..., ::2]), + # dt_board_color=[0, 0, 1, 0], + # gt_board_color=[0, 0, 1, 0], + # show=True, side_l=1024) + + result = Image.fromarray((img[250: -100, 100:-20] * 255).astype(np.uint8)) + result.save('../src/fig/sample3.png') diff --git a/utils/writer.py b/utils/writer.py new file mode 100644 index 0000000000000000000000000000000000000000..bfa99ff33a75588b02163201e7a478f5b5223b3a --- /dev/null +++ b/utils/writer.py @@ -0,0 +1,56 @@ +""" +@Date: 2021/11/06 +@description: +""" +import cv2 +import numpy as np + + +def xyz2json(xyz, ratio, camera_height=1.6): + xyz = xyz * camera_height + ceiling_height = camera_height * ratio + layout_height = camera_height + ceiling_height + data = { + 'cameraHeight': camera_height, + 'layoutHeight': layout_height, + 'cameraCeilingHeight': ceiling_height, + 'layoutObj2ds': { + 'num': 0, + 'obj2ds': [] + }, + 'layoutPoints': { + 'num': xyz.shape[0], + 'points': [] + }, + 'layoutWalls': { + 'num': xyz.shape[0], + 'walls': [] + } + } + + xyz = np.concatenate([xyz, xyz[0:1, :]], axis=0) + R_180 = cv2.Rodrigues(np.array([0, -1 * np.pi, 0], np.float32))[0] + for i in range(xyz.shape[0] - 1): + a = np.dot(R_180, xyz[i, :]) + a[0] *= -1 + b = np.dot(R_180, xyz[i + 1, :]) + b[0] *= -1 + c = a.copy() + c[1] = 0 + normal = np.cross(a - b, a - c) + normal /= np.linalg.norm(normal) + d = -np.sum(normal * a) + plane = np.asarray([normal[0], normal[1], normal[2], d]) + + data['layoutPoints']['points'].append({'xyz': a.tolist(), 'id': i}) + + next_i = 0 if i + 1 >= (xyz.shape[0] - 1) else i + 1 + tmp = { + 'normal': normal.tolist(), + 'planeEquation': plane.tolist(), + 'pointsIdx': [i, next_i] + } + data['layoutWalls']['walls'].append(tmp) + + return data + diff --git a/visualization/__init__.py b/visualization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02f1ee32e3c69bcf40722de4d5fb831ede759aae --- /dev/null +++ b/visualization/__init__.py @@ -0,0 +1,4 @@ +""" +@date: 2021/06/19 +@description: +""" \ No newline at end of file diff --git a/visualization/boundary.py b/visualization/boundary.py new file mode 100644 index 0000000000000000000000000000000000000000..8a87a5c5d2edb73ffb79ea08fec1d50c31fd8498 --- /dev/null +++ b/visualization/boundary.py @@ -0,0 +1,161 @@ +""" +@date: 2021/06/19 +@description: +""" + +import matplotlib.pyplot as plt +import cv2 +import numpy as np +from utils.conversion import uv2pixel +from utils.boundary import corners2boundary, corners2boundaries, find_peaks, connect_corners_uv, get_object_cor, \ + visibility_corners + + +def draw_boundary(pano_img, corners: np.ndarray = None, boundary: np.ndarray = None, draw_corners=True, show=False, + step=0.01, length=None, boundary_color=None, marker_color=None, title=None, visible=True): + if marker_color is None: + marker_color = [0, 0, 1] + if boundary_color is None: + boundary_color = [0, 1, 0] + + assert corners is not None or boundary is not None, "corners or boundary error" + + shape = sorted(pano_img.shape) + assert len(shape) > 1, "pano_img shape error" + w = shape[-1] + h = shape[-2] + + pano_img = pano_img.copy() + if (corners is not None and len(corners) > 2) or \ + (boundary is not None and len(boundary) > 2): + if isinstance(boundary_color, list) or isinstance(boundary_color, np.array): + if boundary is None: + boundary = corners2boundary(corners, step, length, visible) + + boundary = uv2pixel(boundary, w, h) + pano_img[boundary[:, 1], boundary[:, 0]] = boundary_color + pano_img[np.clip(boundary[:, 1] + 1, 0, h - 1), boundary[:, 0]] = boundary_color + pano_img[np.clip(boundary[:, 1] - 1, 0, h - 1), boundary[:, 0]] = boundary_color + + if pano_img.shape[1] > 512: + pano_img[np.clip(boundary[:, 1] + 1, 0, h - 1), np.clip(boundary[:, 0] + 1, 0, w - 1)] = boundary_color + pano_img[np.clip(boundary[:, 1] + 1, 0, h - 1), np.clip(boundary[:, 0] - 1, 0, w - 1)] = boundary_color + pano_img[np.clip(boundary[:, 1] - 1, 0, h - 1), np.clip(boundary[:, 0] + 1, 0, w - 1)] = boundary_color + pano_img[np.clip(boundary[:, 1] - 1, 0, h - 1), np.clip(boundary[:, 0] - 1, 0, w - 1)] = boundary_color + + pano_img[boundary[:, 1], np.clip(boundary[:, 0] + 1, 0, w - 1)] = boundary_color + pano_img[boundary[:, 1], np.clip(boundary[:, 0] - 1, 0, w - 1)] = boundary_color + + if corners is not None and draw_corners: + if visible: + corners = visibility_corners(corners) + corners = uv2pixel(corners, w, h) + for corner in corners: + cv2.drawMarker(pano_img, tuple(corner), marker_color, markerType=0, markerSize=10, thickness=2) + + if show: + plt.figure(figsize=(10, 5)) + if title is not None: + plt.title(title) + + plt.axis('off') + plt.imshow(pano_img) + plt.show() + + return pano_img + + +def draw_boundaries(pano_img, corners_list: list = None, boundary_list: list = None, draw_corners=True, show=False, + step=0.01, length=None, boundary_color=None, marker_color=None, title=None, ratio=None, visible=True): + """ + + :param visible: + :param pano_img: + :param corners_list: + :param boundary_list: + :param draw_corners: + :param show: + :param step: + :param length: + :param boundary_color: RGB color + :param marker_color: RGB color + :param title: + :param ratio: ceil_height/camera_height + :return: + """ + assert corners_list is not None or boundary_list is not None, "corners_list or boundary_list error" + + if corners_list is not None: + if ratio is not None and len(corners_list) == 1: + corners_list = corners2boundaries(ratio, corners_uv=corners_list[0], step=None, visible=visible) + + for i, corners in enumerate(corners_list): + pano_img = draw_boundary(pano_img, corners=corners, draw_corners=draw_corners, + show=show if i == len(corners_list) - 1 else False, + step=step, length=length, boundary_color=boundary_color, marker_color=marker_color, + title=title, visible=visible) + elif boundary_list is not None: + if ratio is not None and len(boundary_list) == 1: + boundary_list = corners2boundaries(ratio, corners_uv=boundary_list[0], step=None, visible=visible) + + for i, boundary in enumerate(boundary_list): + pano_img = draw_boundary(pano_img, boundary=boundary, draw_corners=draw_corners, + show=show if i == len(boundary_list) - 1 else False, + step=step, length=length, boundary_color=boundary_color, marker_color=marker_color, + title=title, visible=visible) + + return pano_img + + +def draw_object(pano_img, heat_maps, size, depth, window_width=15, show=False): + # window, door, opening + colors = [[1, 0, 0], [1, 1, 0], [0, 0, 1]] + for i, heat_map in enumerate(heat_maps): + pk_u_s, _ = find_peaks(heat_map, size=window_width*2+1) + for pk_u in pk_u_s: + uv, xyz = get_object_cor(depth, size, center_u=pk_u, patch_num=len(heat_map)) + + bottom_poly = connect_corners_uv(uv[0], uv[1], length=pano_img.shape[1]) + top_poly = connect_corners_uv(uv[2], uv[3], length=pano_img.shape[1])[::-1] + + bottom_max_index = bottom_poly[..., 0].argmax() + if bottom_max_index != len(bottom_poly)-1: + top_max_index = top_poly[..., 0].argmax() + poly1 = np.concatenate([bottom_poly[:bottom_max_index+1], top_poly[top_max_index:]]) + poly1 = uv2pixel(poly1, w=pano_img.shape[1], h=pano_img.shape[0]) + poly1 = poly1[:, None, :] + + poly2 = np.concatenate([bottom_poly[bottom_max_index+1:], top_poly[:top_max_index]]) + poly2 = uv2pixel(poly2, w=pano_img.shape[1], h=pano_img.shape[0]) + poly2 = poly2[:, None, :] + + poly = [poly1, poly2] + else: + poly = np.concatenate([bottom_poly, top_poly]) + poly = uv2pixel(poly, w=pano_img.shape[1], h=pano_img.shape[0]) + poly = poly[:, None, :] + poly = [poly] + + cv2.drawContours(pano_img, poly, -1, colors[i], 1) + # + # boundary_center_xyz = uv2xyz(np.array([pk_u, pk_v])) + # + # l_b_xyz = + if show: + plt.imshow(pano_img) + plt.show() + + +if __name__ == '__main__': + from visualization.floorplan import draw_floorplan + from utils.conversion import uv2xyz + + pano_img = np.zeros([512, 1024, 3]) + corners = np.array([[0.2, 0.7], + [0.4, 0.7], + [0.3, 0.6], + [0.6, 0.6], + [0.8, 0.7]]) + # draw_boundary(pano_img, corners, show=True) + draw_boundaries(pano_img, corners_list=[corners], show=True, length=1024, ratio=1.2) + draw_floorplan(uv2xyz(corners)[..., ::2], show=True, marker_color=None, center_color=0.8) \ No newline at end of file diff --git a/visualization/floorplan.py b/visualization/floorplan.py new file mode 100644 index 0000000000000000000000000000000000000000..2c38e9f8410d4225cc3893fe76e4ff8b96810332 --- /dev/null +++ b/visualization/floorplan.py @@ -0,0 +1,147 @@ +""" +@date: 2021/6/29 +@description: +""" +import cv2 + + +import matplotlib.pyplot as plt + +from PIL import Image +from utils.boundary import * + + +def draw_floorplan(xz, fill_color=None, border_color=None, side_l=512, show_radius=None, show=False, marker_color=None, + center_color=None, scale=1.5): + """ + :param scale: + :param center_color: + :param marker_color: for corners marking + :param fill_color: + :param border_color: boundary color + :param xz: [[x1, z1], [x2, z2], ....] + :param side_l: side length (pixel) of the output result + :param show_radius: The displayed maximum radius m (proportional to the projection plane plan_y of xz), + such as set to 1, means that the pixel value of side_l/2 is expressed as 1m, if not set this value to display all + :param show: + :return: + """ + if fill_color is None: + fill_color = [1] + + board = np.zeros([side_l, side_l, len(fill_color)], dtype=np.float) + + if show_radius is None: + show_radius = np.linalg.norm(xz, axis=-1).max() + + xz = xz * side_l / (2*scale) / show_radius + # v<-----------|o + # | | | + # | ----|----z | + # | | | + # | x \|/ + # |------------u + xz[:, 1] = -xz[:, 1] + xz += side_l // 2 # moving to center + xz = xz.astype(np.int) + cv2.fillPoly(board, [xz], fill_color) + if border_color: + cv2.drawContours(board, [xz], 0, border_color, 2) + + if marker_color is not None: + for p in xz: + cv2.drawMarker(board, tuple(p), marker_color, markerType=0, markerSize=10, thickness=2) + if center_color is not None: + cv2.drawMarker(board, tuple([side_l // 2, side_l // 2]), center_color, markerType=0, markerSize=10, thickness=2) + + if show: + # plt.rcParams['figure.dpi'] = 300 + plt.axis('off') + plt.imshow(board[..., 0] if board.shape[-1] == 1 else board) + plt.show() + + return board + + +def draw_iou_floorplan(dt_xz, gt_xz, show_radius=None, show=False, side_l=512, + iou_2d=None, iou_3d=None, dt_board_color=None, gt_board_color=None): + """ + :param gt_board_color: + :param dt_board_color: + :param dt_xz: [[x1, z1], [x2, z2], ....] + :param gt_xz: [[x1, z1], [x2, z2], ....] + :param show: + :param side_l: side length (pixel) of the output result + :param show_radius: The displayed maximum radius m (proportional to the projection plane plan_y of xz), + such as set to 1, means that the pixel value of side_l/2 is expressed as 1m, if not set this value to display all + :param iou_2d: + :param iou_3d: + :return: + """ + if dt_board_color is None: + dt_board_color = [0, 1, 0, 1] + if gt_board_color is None: + gt_board_color = [0, 0, 1, 1] + center_color = [1, 0, 0, 1] + fill_color = [0.2, 0.2, 0.2, 0.2] + + if show_radius is None: + # niform scale + gt_radius = np.linalg.norm(gt_xz, axis=-1).max() + dt_radius = np.linalg.norm(dt_xz, axis=-1).max() + show_radius = gt_radius if gt_radius > dt_radius else dt_radius + + dt_floorplan = draw_floorplan(dt_xz, show_radius=show_radius, fill_color=fill_color, + border_color=dt_board_color, side_l=side_l, show=False) + gt_floorplan = draw_floorplan(gt_xz, show_radius=show_radius, fill_color=fill_color, + border_color=gt_board_color, side_l=side_l, show=False, + center_color=[1, 0, 0, 1]) + + dt_floorplan = Image.fromarray((dt_floorplan * 255).astype(np.uint8), mode='RGBA') + gt_floorplan = Image.fromarray((gt_floorplan * 255).astype(np.uint8), mode='RGBA') + iou_floorplan = Image.alpha_composite(gt_floorplan, dt_floorplan) + + back = np.zeros([side_l, side_l, len(fill_color)], dtype=np.float) + back[..., :] = [0.8, 0.8, 0.8, 1] + back = Image.fromarray((back * 255).astype(np.uint8), mode='RGBA') + + iou_floorplan = Image.alpha_composite(back, iou_floorplan).convert("RGB") + iou_floorplan = np.array(iou_floorplan) / 255.0 + + if iou_2d is not None: + cv2.putText(iou_floorplan, f'2d:{iou_2d * 100:.2f}', (10, 30), 2, 1, (0, 0, 0), 1) + if iou_3d is not None: + cv2.putText(iou_floorplan, f'3d:{iou_3d * 100:.2f}', (10, 60), 2, 1, (0, 0, 0), 1) + + if show: + plt.axis('off') + plt.imshow(iou_floorplan) + plt.show() + return iou_floorplan + + +if __name__ == '__main__': + import numpy as np + from dataset.mp3d_dataset import MP3DDataset + from utils.boundary import depth2boundaries + from utils.conversion import uv2xyz + from visualization.boundary import draw_boundaries + + mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train') + gt = mp3d_dataset.__getitem__(0) + + # boundary_list = depth2boundaries(gt['ratio'], gt['depth'], step=None) + # pano_img = draw_boundaries(gt['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) + # draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, marker_color=None, center_color=0.8) + # draw_floorplan(depth2xyz(gt['depth'])[..., ::2], show=True, marker_color=None, center_color=0.8) + + corners = gt['corners'][gt['corners'][..., 0] + gt['corners'][..., 1] != 0] + dt_corners = corners + 0.1 + # img = draw_floorplan(uv2xyz(corners)[..., ::2], show=True, fill_color=[0.8, 0.8, 0.8, 0.2], + # marker_color=None, center_color=[1, 0, 0, 1], border_color=[0, 0, 1, 1]) + # cv2.imwrite('../src/fig/flp.png', (img*255).astype(np.uint8)) + + img = draw_iou_floorplan(uv2xyz(dt_corners)[..., ::2], uv2xyz(corners)[..., ::2], side_l=512, show=True) + img[..., 0:3] = img[..., 0:3][..., ::-1] + # cv2.imwrite('../src/fig/flp.png', (img*255).astype(np.uint8)) + diff --git a/visualization/grad.py b/visualization/grad.py new file mode 100644 index 0000000000000000000000000000000000000000..fdc0a259baf55a8e1c4aa4d103ff0edeb4989531 --- /dev/null +++ b/visualization/grad.py @@ -0,0 +1,117 @@ +""" +@Date: 2021/11/06 +@description: +""" +import cv2 +import numpy as np +import torch +import matplotlib.pyplot as plt + +from utils.conversion import depth2xyz + + +def convert_img(value, h, need_nor=True, cmap=None): + value = value.clone().detach().cpu().numpy()[None] + if need_nor: + value -= value.min() + value /= value.max() - value.min() + grad_img = value.repeat(int(h), axis=0) + + if cmap is None: + grad_img = grad_img[..., np.newaxis].repeat(3, axis=-1) + elif cmap == cv2.COLORMAP_PLASMA: + grad_img = cv2.applyColorMap((grad_img * 255).astype(np.uint8), colormap=cmap) + grad_img = grad_img[..., ::-1] + grad_img = grad_img.astype(np.float) / 255.0 + elif cmap == 'HSV': + grad_img = np.round(grad_img * 1000) / 1000.0 + grad_img = grad_img[..., np.newaxis].repeat(3, axis=-1) + grad_img[..., 0] = grad_img[..., 0] * 180 + grad_img[..., 1] = 255 + grad_img[..., 2] = 255 + grad_img = grad_img.astype(np.uint8) + grad_img = cv2.cvtColor(grad_img, cv2.COLOR_HSV2RGB) + grad_img = grad_img.astype(np.float) / 255.0 + return grad_img + + +def show_grad(depth, grad_conv, h=5, show=False): + """ + :param h: + :param depth: [patch_num] + :param grad_conv: + :param show: + :return: + """ + + direction, angle, grad = get_all(depth[None], grad_conv) + + # depth_img = convert_img(depth, h) + # angle_img = convert_img(angle[0], h) + # grad_img = convert_img(grad[0], depth.shape[-1] // 4 - h * 2) + depth_img = convert_img(depth, h, cmap=cv2.COLORMAP_PLASMA) + angle_img = convert_img(angle[0], h, cmap='HSV') + + # vis_grad = grad[0] / grad[0].max() / 2 + 0.5 + grad_img = convert_img(grad[0], h) + img = np.concatenate([depth_img, angle_img, grad_img], axis=0) + if show: + plt.imshow(img) + plt.show() + return img + + +def get_grad(direction): + """ + :param direction: [b patch_num] + :return:[b patch_num] + """ + a = torch.roll(direction, -1, dims=1) # xz[i+1] + b = torch.roll(direction, 1, dims=1) # xz[i-1] + grad = torch.acos(torch.clip(a[..., 0] * b[..., 0] + a[..., 1] * b[..., 1], -1+1e-6, 1-1e-6)) + return grad + + +def get_grad2(angle, grad_conv): + """ + :param angle: [b patch_num] + :param grad_conv: + :return:[b patch_num] + """ + angle = torch.sin(angle) + angle = angle + 1 + + angle = torch.cat([angle[..., -1:], angle, angle[..., :1]], dim=-1) + grad = grad_conv(angle[:, None]) # [b, patch_num] -> [b, 1, patch_num] + # grad = torch.abs(grad) + return grad.reshape(angle.shape[0], -1) + + +def get_edge_angle(direction): + """ + :param direction: [b patch_num 2] + :return: + """ + angle = torch.atan2(direction[..., 1], direction[..., 0]) + return angle + + +def get_edge_direction(depth): + xz = depth2xyz(depth)[..., ::2] + direction = torch.roll(xz, -1, dims=1) - xz # direct[i] = xz[i+1] - xz[i] + direction = direction / direction.norm(p=2, dim=-1)[..., None] + return direction + + +def get_all(depth, grad_conv): + """ + + :param grad_conv: + :param depth: [b patch_num] + :return: + """ + direction = get_edge_direction(depth) + angle = get_edge_angle(direction) + # angle_grad = get_grad(direction) + angle_grad = get_grad2(angle, grad_conv) # signed gradient + return direction, angle, angle_grad diff --git a/visualization/obj3d.py b/visualization/obj3d.py new file mode 100644 index 0000000000000000000000000000000000000000..d7f632300800341be19df4b905eccfd7444e4fc8 --- /dev/null +++ b/visualization/obj3d.py @@ -0,0 +1,62 @@ +""" +@author: Zhigang Jiang +@time: 2022/05/25 +@description: reference: https://github.com/sunset1995/PanoPlane360/blob/main/vis_planes.py +""" +import open3d +import numpy as np +from utils.conversion import pixel2lonlat + + +def create_3d_obj(img, depth, save_path=None, mesh=True, mesh_show_back_face=False, show=False): + assert img.shape[0] == depth.shape[0], "" + h = img.shape[0] + w = img.shape[1] + # Project to 3d + lon = pixel2lonlat(np.array(range(w)), w=w, axis=0)[None].repeat(h, axis=0) + lat = pixel2lonlat(np.array(range(h)), h=h, axis=1)[..., None].repeat(w, axis=1) + + z = depth * np.sin(lat) + x = depth * np.cos(lat) * np.cos(lon) + y = depth * np.cos(lat) * np.sin(lon) + pts_xyz = np.stack([x, -z, y], -1).reshape(-1, 3) + pts_rgb = img.reshape(-1, 3) + + if mesh: + pid = np.arange(len(pts_xyz)).reshape(h, w) + faces = np.concatenate([ + np.stack([ + pid[:-1, :-1], pid[1:, :-1], np.roll(pid, -1, axis=1)[:-1, :-1], + ], -1), + np.stack([ + pid[1:, :-1], np.roll(pid, -1, axis=1)[1:, :-1], np.roll(pid, -1, axis=1)[:-1, :-1], + ], -1) + ]).reshape(-1, 3).tolist() + scene = open3d.geometry.TriangleMesh() + scene.vertices = open3d.utility.Vector3dVector(pts_xyz) + scene.vertex_colors = open3d.utility.Vector3dVector(pts_rgb) + scene.triangles = open3d.utility.Vector3iVector(faces) + + else: + scene = open3d.geometry.PointCloud() + scene.points = open3d.utility.Vector3dVector(pts_xyz) + scene.colors = open3d.utility.Vector3dVector(pts_rgb) + if save_path: + open3d.io.write_triangle_mesh(save_path, scene, write_triangle_uvs=True) + if show: + open3d.visualization.draw_geometries([scene], mesh_show_back_face=mesh_show_back_face) + + +if __name__ == '__main__': + from dataset.mp3d_dataset import MP3DDataset + from utils.boundary import depth2boundaries, layout2depth + from visualization.boundary import draw_boundaries + + mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train', for_test_index=10, patch_num=1024) + gt = mp3d_dataset.__getitem__(3) + + boundary_list = depth2boundaries(gt['ratio'], gt['depth'], step=None) + pano_img = draw_boundaries(gt['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True) + layout_depth = layout2depth(boundary_list, show=False) + create_3d_obj(gt['image'].transpose(1, 2, 0), layout_depth, save_path=f"../src/output/{gt['id']}_3d.gltf", + mesh=True)