Spaces:

zhigangjiang
/

3D-Room-Layout-Estimation_LGT-Net

Running

App Files Files Community

zhigangjiang commited on May 27, 2022

Commit

88b0dcb

1 Parent(s): 46e6683

no message

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +4 -0
LICENSE +21 -0
Post-Porcessing.md +35 -0
app.py +139 -0
config/__init__.py +4 -0
config/defaults.py +289 -0
convert_ckpt.py +61 -0
dataset/__init__.py +0 -0
dataset/build.py +115 -0
dataset/communal/__init__.py +4 -0
dataset/communal/base_dataset.py +127 -0
dataset/communal/data_augmentation.py +279 -0
dataset/communal/read.py +214 -0
dataset/mp3d_dataset.py +110 -0
dataset/pano_s2d3d_dataset.py +107 -0
dataset/pano_s2d3d_mix_dataset.py +91 -0
dataset/zind_dataset.py +138 -0
evaluation/__init__.py +4 -0
evaluation/accuracy.py +249 -0
evaluation/analyse_layout_type.py +83 -0
evaluation/eval_visible_iou.py +56 -0
evaluation/f1_score.py +78 -0
evaluation/iou.py +148 -0
inference.py +261 -0
loss/__init__.py +10 -0
loss/boundary_loss.py +51 -0
loss/grad_loss.py +57 -0
loss/led_loss.py +47 -0
loss/object_loss.py +42 -0
main.py +401 -0
models/__init__.py +1 -0
models/base_model.py +150 -0
models/build.py +81 -0
models/lgt_net.py +213 -0
models/modules/__init__.py +8 -0
models/modules/conv_transformer.py +128 -0
models/modules/horizon_net_feature_extractor.py +267 -0
models/modules/patch_feature_extractor.py +57 -0
models/modules/swg_transformer.py +49 -0
models/modules/swin_transformer.py +43 -0
models/modules/transformer.py +44 -0
models/modules/transformer_modules.py +250 -0
models/other/__init__.py +4 -0
models/other/criterion.py +72 -0
models/other/init_env.py +37 -0
models/other/optimizer.py +24 -0
models/other/scheduler.py +51 -0
postprocessing/__init__.py +4 -0
postprocessing/dula/__init__.py +4 -0
postprocessing/dula/layout.py +226 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+checkpoints
+src/output
+visualization/visualizer
+flagged

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 ZhiGang Jiang
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

Post-Porcessing.md ADDED Viewed

	@@ -0,0 +1,35 @@

+# Post-Processing
+## Step
+1. Simplify polygon by [DP algorithm](https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm)
+![img.png](src/fig/post_processing/img_0.png)
+2. Detect occlusion, calculating box fill with 1
+![img.png](src/fig/post_processing/img_1.png)
+3. Fill in reasonable sampling section
+![img.png](src/fig/post_processing/img_2.png)
+4. Output processed polygon
+![img.png](src/fig/post_processing/img_3.png)
+## performance
+It works, and a performance comparison on the MatterportLayout dataset:
+| Method | 2D IoU(%)  | 3D IoU(%) | RMSE | $\mathbf{\delta_{1}}$ |
+|--|--|--|--|--|
+without post-proc    | 83.52 | 81.11 | 0.204 | 0.951 |
+original post-proc |83.12 | 80.71 | 0.230 | 0.936|\
+optimized  post-proc | 83.48 | 81.08| 0.214 | 0.940 |
+original:
+![img.png](src/fig/post_processing/original.png)
+optimized:
+![img.png](src/fig/post_processing/optimized.png)

app.py ADDED Viewed

	@@ -0,0 +1,139 @@

+'''
+@author: Zhigang Jiang
+@time: 2022/05/23
+@description:
+'''
+import gradio as gr
+import numpy as np
+import os
+import torch
+from PIL import Image
+from utils.logger import get_logger
+from config.defaults import get_config
+from inference import preprocess, run_one_inference
+from models.build import build_model
+from argparse import Namespace
+import gdown
+def down_ckpt(model_cfg, ckpt_dir):
+    model_ids = [
+        ['src/config/mp3d.yaml', '1o97oAmd-yEP5bQrM0eAWFPLq27FjUDbh'],
+        ['src/config/zind.yaml', '1PzBj-dfDfH_vevgSkRe5kczW0GVl_43I'],
+        ['src/config/pano.yaml', '1JoeqcPbm_XBPOi6O9GjjWi3_rtyPZS8m'],
+        ['src/config/s2d3d.yaml', '1PfJzcxzUsbwwMal7yTkBClIFgn8IdEzI'],
+        ['src/config/ablation_study/full.yaml', '1U16TxUkvZlRwJNaJnq9nAUap-BhCVIha']
+    ]
+    for model_id in model_ids:
+        if model_id[0] != model_cfg:
+            continue
+        path = os.path.join(ckpt_dir, 'best.pkl')
+        if not os.path.exists(path):
+            logger.info(f"Downloading {model_id}")
+            os.makedirs(ckpt_dir, exist_ok=True)
+            gdown.download(f"https://drive.google.com/uc?id={model_id[1]}", path, False)
+def greet(img_path, pre_processing, weight_name, post_processing, visualization, mesh_format, mesh_resolution):
+    args.pre_processing = pre_processing
+    args.post_processing = post_processing
+    if weight_name == 'mp3d':
+        model = mp3d_model
+    elif weight_name == 'zind':
+        model = zind_model
+    else:
+        logger.error("unknown pre-trained weight name")
+        raise NotImplementedError
+    img_name = os.path.basename(img_path).split('.')[0]
+    img = np.array(Image.open(img_path).resize((1024, 512), Image.Resampling.BICUBIC))[..., :3]
+    vp_cache_path = 'src/demo/default_vp.txt'
+    if args.pre_processing:
+        vp_cache_path = os.path.join('src/output', f'{img_name}_vp.txt')
+        logger.info("pre-processing ...")
+        img, vp = preprocess(img, vp_cache_path=vp_cache_path)
+    img = (img / 255.0).astype(np.float32)
+    run_one_inference(img, model, args, img_name,
+                      logger=logger, show=False,
+                      show_depth='depth-normal-gradient' in visualization,
+                      show_floorplan='2d-floorplan' in visualization,
+                      mesh_format=mesh_format, mesh_resolution=int(mesh_resolution))
+    return [os.path.join(args.output_dir, f"{img_name}_pred.png"),
+            os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"),
+            os.path.join(args.output_dir, f"{img_name}_3d{mesh_format}"),
+            vp_cache_path,
+            os.path.join(args.output_dir, f"{img_name}_pred.json")]
+def get_model(args):
+    config = get_config(args)
+    down_ckpt(args.cfg, config.CKPT.DIR)
+    if ('cuda' in args.device or 'cuda' in config.TRAIN.DEVICE) and not torch.cuda.is_available():
+        logger.info(f'The {args.device} is not available, will use cpu ...')
+        config.defrost()
+        args.device = "cpu"
+        config.TRAIN.DEVICE = "cpu"
+        config.freeze()
+    model, _, _, _ = build_model(config, logger)
+    return model
+if __name__ == '__main__':
+    logger = get_logger()
+    args = Namespace(device='cuda', output_dir='src/output', visualize_3d=False, output_3d=True)
+    os.makedirs(args.output_dir, exist_ok=True)
+    args.cfg = 'src/config/mp3d.yaml'
+    mp3d_model = get_model(args)
+    args.cfg = 'src/config/zind.yaml'
+    zind_model = get_model(args)
+    description = "This demo of the project " \
+                  "<a href='https://github.com/zhigangjiang/LGT-Net' target='_blank'>LGT-Net</a>. " \
+                  "It uses the Geometry-Aware Transformer Network to predict the 3d room layout of an rgb panorama."
+    demo = gr.Interface(fn=greet,
+                        inputs=[gr.Image(type='filepath', label='input rgb panorama', value='src/demo/pano_demo1.png'),
+                                gr.Checkbox(label='pre-processing', value=True),
+                                gr.Radio(['mp3d', 'zind'],
+                                         label='pre-trained weight',
+                                         value='mp3d'),
+                                gr.Radio(['manhattan', 'atalanta', 'original'],
+                                         label='post-processing method',
+                                         value='manhattan'),
+                                gr.CheckboxGroup(['depth-normal-gradient', '2d-floorplan'],
+                                                 label='2d-visualization',
+                                                 value=['depth-normal-gradient', '2d-floorplan']),
+                                gr.Radio(['.gltf', '.obj', '.glb'],
+                                         label='output format of 3d mesh',
+                                         value='.gltf'),
+                                gr.Radio(['128', '256', '512', '1024'],
+                                         label='output resolution of 3d mesh',
+                                         value='256'),
+                                ],
+                        outputs=[gr.Image(label='predicted result 2d-visualization', type='filepath'),
+                                 gr.Model3D(label='3d mesh reconstruction', clear_color=[1.0, 1.0, 1.0, 1.0]),
+                                 gr.File(label='3d mesh file'),
+                                 gr.File(label='vanishing point information'),
+                                 gr.File(label='layout json')],
+                        examples=[
+                            ['src/demo/pano_demo1.png',  True,  'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
+                            ['src/demo/mp3d_demo1.png',  False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
+                            ['src/demo/mp3d_demo2.png',  False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
+                            ['src/demo/mp3d_demo3.png',  False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
+                            ['src/demo/zind_demo1.png',  True, 'zind', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
+                            ['src/demo/zind_demo2.png',  False, 'zind',  'atalanta', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
+                            ['src/demo/zind_demo3.png',  True, 'zind', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
+                            ['src/demo/other_demo1.png', False, 'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
+                            ['src/demo/other_demo2.png', True,  'mp3d', 'manhattan', ['depth-normal-gradient', '2d-floorplan'], '.gltf', '256'],
+                        ], title='LGT-Net', allow_flagging="never", cache_examples=False, description=description)
+    demo.launch(debug=True, enable_queue=False)

config/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+@Date: 2021/07/17
+@description:
+"""

config/defaults.py ADDED Viewed

	@@ -0,0 +1,289 @@

+"""
+@Date: 2021/07/17
+@description:
+"""
+import os
+import logging
+from yacs.config import CfgNode as CN
+_C = CN()
+_C.DEBUG = False
+_C.MODE = 'train'
+_C.VAL_NAME = 'val'
+_C.TAG = 'default'
+_C.COMMENT = 'add some comments to help you understand'
+_C.SHOW_BAR = True
+_C.SAVE_EVAL = False
+_C.MODEL = CN()
+_C.MODEL.NAME = 'model_name'
+_C.MODEL.SAVE_BEST = True
+_C.MODEL.SAVE_LAST = True
+_C.MODEL.ARGS = []
+_C.MODEL.FINE_TUNE = []
+# -----------------------------------------------------------------------------
+# Training settings
+# -----------------------------------------------------------------------------
+_C.TRAIN = CN()
+_C.TRAIN.SCRATCH = False
+_C.TRAIN.START_EPOCH = 0
+_C.TRAIN.EPOCHS = 300
+_C.TRAIN.DETERMINISTIC = False
+_C.TRAIN.SAVE_FREQ = 5
+_C.TRAIN.BASE_LR = 5e-4
+_C.TRAIN.WARMUP_EPOCHS = 20
+_C.TRAIN.WEIGHT_DECAY = 0
+_C.TRAIN.WARMUP_LR = 5e-7
+_C.TRAIN.MIN_LR = 5e-6
+# Clip gradient norm
+_C.TRAIN.CLIP_GRAD = 5.0
+# Auto resume from latest checkpoint
+_C.TRAIN.RESUME_LAST = True
+# Gradient accumulation steps
+# could be overwritten by command line argument
+_C.TRAIN.ACCUMULATION_STEPS = 0
+# Whether to use gradient checkpointing to save memory
+# could be overwritten by command line argument
+_C.TRAIN.USE_CHECKPOINT = False
+# 'cpu' or 'cuda:0, 1, 2, 3' or 'cuda'
+_C.TRAIN.DEVICE = 'cuda'
+# LR scheduler
+_C.TRAIN.LR_SCHEDULER = CN()
+_C.TRAIN.LR_SCHEDULER.NAME = ''
+_C.TRAIN.LR_SCHEDULER.ARGS = []
+# Optimizer
+_C.TRAIN.OPTIMIZER = CN()
+_C.TRAIN.OPTIMIZER.NAME = 'adam'
+# Optimizer Epsilon
+_C.TRAIN.OPTIMIZER.EPS = 1e-8
+# Optimizer Betas
+_C.TRAIN.OPTIMIZER.BETAS = (0.9, 0.999)
+# SGD momentum
+_C.TRAIN.OPTIMIZER.MOMENTUM = 0.9
+# Criterion
+_C.TRAIN.CRITERION = CN()
+# Boundary loss (Horizon-Net)
+_C.TRAIN.CRITERION.BOUNDARY = CN()
+_C.TRAIN.CRITERION.BOUNDARY.NAME = 'boundary'
+_C.TRAIN.CRITERION.BOUNDARY.LOSS = 'BoundaryLoss'
+_C.TRAIN.CRITERION.BOUNDARY.WEIGHT = 0.0
+_C.TRAIN.CRITERION.BOUNDARY.WEIGHTS = []
+_C.TRAIN.CRITERION.BOUNDARY.NEED_ALL = True
+# Up and Down depth loss (LED2-Net)
+_C.TRAIN.CRITERION.LEDDepth = CN()
+_C.TRAIN.CRITERION.LEDDepth.NAME = 'led_depth'
+_C.TRAIN.CRITERION.LEDDepth.LOSS = 'LEDLoss'
+_C.TRAIN.CRITERION.LEDDepth.WEIGHT = 0.0
+_C.TRAIN.CRITERION.LEDDepth.WEIGHTS = []
+_C.TRAIN.CRITERION.LEDDepth.NEED_ALL = True
+# Depth loss
+_C.TRAIN.CRITERION.DEPTH = CN()
+_C.TRAIN.CRITERION.DEPTH.NAME = 'depth'
+_C.TRAIN.CRITERION.DEPTH.LOSS = 'L1Loss'
+_C.TRAIN.CRITERION.DEPTH.WEIGHT = 0.0
+_C.TRAIN.CRITERION.DEPTH.WEIGHTS = []
+_C.TRAIN.CRITERION.DEPTH.NEED_ALL = False
+# Ratio(Room Height) loss
+_C.TRAIN.CRITERION.RATIO = CN()
+_C.TRAIN.CRITERION.RATIO.NAME = 'ratio'
+_C.TRAIN.CRITERION.RATIO.LOSS = 'L1Loss'
+_C.TRAIN.CRITERION.RATIO.WEIGHT = 0.0
+_C.TRAIN.CRITERION.RATIO.WEIGHTS = []
+_C.TRAIN.CRITERION.RATIO.NEED_ALL = False
+# Grad(Normal) loss
+_C.TRAIN.CRITERION.GRAD = CN()
+_C.TRAIN.CRITERION.GRAD.NAME = 'grad'
+_C.TRAIN.CRITERION.GRAD.LOSS = 'GradLoss'
+_C.TRAIN.CRITERION.GRAD.WEIGHT = 0.0
+_C.TRAIN.CRITERION.GRAD.WEIGHTS = [1.0, 1.0]
+_C.TRAIN.CRITERION.GRAD.NEED_ALL = True
+# Object loss
+_C.TRAIN.CRITERION.OBJECT = CN()
+_C.TRAIN.CRITERION.OBJECT.NAME = 'object'
+_C.TRAIN.CRITERION.OBJECT.LOSS = 'ObjectLoss'
+_C.TRAIN.CRITERION.OBJECT.WEIGHT = 0.0
+_C.TRAIN.CRITERION.OBJECT.WEIGHTS = []
+_C.TRAIN.CRITERION.OBJECT.NEED_ALL = True
+# Heatmap loss
+_C.TRAIN.CRITERION.CHM = CN()
+_C.TRAIN.CRITERION.CHM.NAME = 'corner_heat_map'
+_C.TRAIN.CRITERION.CHM.LOSS = 'HeatmapLoss'
+_C.TRAIN.CRITERION.CHM.WEIGHT = 0.0
+_C.TRAIN.CRITERION.CHM.WEIGHTS = []
+_C.TRAIN.CRITERION.CHM.NEED_ALL = False
+_C.TRAIN.VIS_MERGE = True
+_C.TRAIN.VIS_WEIGHT = 1024
+# -----------------------------------------------------------------------------
+# Output settings
+# -----------------------------------------------------------------------------
+_C.CKPT = CN()
+_C.CKPT.PYTORCH = './'
+_C.CKPT.ROOT = "./checkpoints"
+_C.CKPT.DIR = os.path.join(_C.CKPT.ROOT, _C.MODEL.NAME, _C.TAG)
+_C.CKPT.RESULT_DIR = os.path.join(_C.CKPT.DIR, 'results', _C.MODE)
+_C.LOGGER = CN()
+_C.LOGGER.DIR = os.path.join(_C.CKPT.DIR, "logs")
+_C.LOGGER.LEVEL = logging.DEBUG
+# -----------------------------------------------------------------------------
+# Misc
+# -----------------------------------------------------------------------------
+# Mixed precision opt level, if O0, no amp is used ('O0', 'O1', 'O2'), Please confirm your device support FP16(Half).
+# overwritten by command line argument
+_C.AMP_OPT_LEVEL = 'O1'
+# Path to output folder, overwritten by command line argument
+_C.OUTPUT = ''
+# Tag of experiment, overwritten by command line argument
+_C.TAG = 'default'
+# Frequency to save checkpoint
+_C.SAVE_FREQ = 1
+# Frequency to logging info
+_C.PRINT_FREQ = 10
+# Fixed random seed
+_C.SEED = 0
+# Perform evaluation only, overwritten by command line argument
+_C.EVAL_MODE = False
+# Test throughput only, overwritten by command line argument
+_C.THROUGHPUT_MODE = False
+# -----------------------------------------------------------------------------
+# FIX
+# -----------------------------------------------------------------------------
+_C.LOCAL_RANK = 0
+_C.WORLD_SIZE = 0
+# -----------------------------------------------------------------------------
+# Data settings
+# -----------------------------------------------------------------------------
+_C.DATA = CN()
+# Sub dataset of pano_s2d3d
+_C.DATA.SUBSET = None
+# Dataset name
+_C.DATA.DATASET = 'mp3d'
+# Path to dataset, could be overwritten by command line argument
+_C.DATA.DIR = ''
+# Max wall number
+_C.DATA.WALL_NUM = 0  # all
+# Panorama image size
+_C.DATA.SHAPE = [512, 1024]
+# Really camera height
+_C.DATA.CAMERA_HEIGHT = 1.6
+# Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.
+_C.DATA.PIN_MEMORY = True
+# Debug use, fast test performance of model
+_C.DATA.FOR_TEST_INDEX = None
+# Batch size for a single GPU, could be overwritten by command line argument
+_C.DATA.BATCH_SIZE = 8
+# Number of data loading threads
+_C.DATA.NUM_WORKERS = 8
+# Training augment
+_C.DATA.AUG = CN()
+# Flip the panorama horizontally
+_C.DATA.AUG.FLIP = True
+# Pano Stretch Data Augmentation by HorizonNet
+_C.DATA.AUG.STRETCH = True
+# Rotate the panorama horizontally
+_C.DATA.AUG.ROTATE = True
+# Gamma adjusting
+_C.DATA.AUG.GAMMA = True
+_C.DATA.KEYS = []
+_C.EVAL = CN()
+_C.EVAL.POST_PROCESSING = None
+_C.EVAL.NEED_CPE = False
+_C.EVAL.NEED_F1 = False
+_C.EVAL.NEED_RMSE = False
+_C.EVAL.FORCE_CUBE = False
+def merge_from_file(cfg_path):
+    config = _C.clone()
+    config.merge_from_file(cfg_path)
+    return config
+def get_config(args=None):
+    config = _C.clone()
+    if args:
+        if 'cfg' in args and args.cfg:
+            config.merge_from_file(args.cfg)
+        if 'mode' in args and args.mode:
+            config.MODE = args.mode
+        if 'debug' in args and args.debug:
+            config.DEBUG = args.debug
+        if 'hidden_bar' in args and args.hidden_bar:
+            config.SHOW_BAR = False
+        if 'bs' in args and args.bs:
+            config.DATA.BATCH_SIZE = args.bs
+        if 'save_eval' in args and args.save_eval:
+            config.SAVE_EVAL = True
+        if 'val_name' in args and args.val_name:
+            config.VAL_NAME = args.val_name
+        if 'post_processing' in args and args.post_processing:
+            config.EVAL.POST_PROCESSING = args.post_processing
+        if 'need_cpe' in args and args.need_cpe:
+            config.EVAL.NEED_CPE = args.need_cpe
+        if 'need_f1' in args and args.need_f1:
+            config.EVAL.NEED_F1 = args.need_f1
+        if 'need_rmse' in args and args.need_rmse:
+            config.EVAL.NEED_RMSE = args.need_rmse
+        if 'force_cube' in args and args.force_cube:
+            config.EVAL.FORCE_CUBE = args.force_cube
+        if 'wall_num' in args and args.wall_num:
+            config.DATA.WALL_NUM = args.wall_num
+    args = config.MODEL.ARGS[0]
+    config.CKPT.DIR = os.path.join(config.CKPT.ROOT, f"{args['decoder_name']}_{args['output_name']}_Net",
+                                   config.TAG, 'debug' if config.DEBUG else '')
+    config.CKPT.RESULT_DIR = os.path.join(config.CKPT.DIR, 'results', config.MODE)
+    config.LOGGER.DIR = os.path.join(config.CKPT.DIR, "logs")
+    core_number = os.popen("grep 'physical id' /proc/cpuinfo | sort | uniq | wc -l").read()
+    try:
+        config.DATA.NUM_WORKERS = int(core_number) * 2
+        print(f"System core number: {config.DATA.NUM_WORKERS}")
+    except ValueError:
+        print(f"Can't get system core number, will use config: { config.DATA.NUM_WORKERS}")
+    config.freeze()
+    return config
+def get_rank_config(cfg, local_rank, world_size):
+    local_rank = 0 if local_rank is None else local_rank
+    config = cfg.clone()
+    config.defrost()
+    if world_size > 1:
+        ids = config.TRAIN.DEVICE.split(':')[-1].split(',') if ':' in config.TRAIN.DEVICE else range(world_size)
+        config.TRAIN.DEVICE = f'cuda:{ids[local_rank]}'
+    config.LOCAL_RANK = local_rank
+    config.WORLD_SIZE = world_size
+    config.SEED = config.SEED + local_rank
+    config.freeze()
+    return config

convert_ckpt.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""
+@date: 2021/11/22
+@description: Conversion training ckpt into inference ckpt
+"""
+import argparse
+import os
+import torch
+from config.defaults import merge_from_file
+def parse_option():
+    parser = argparse.ArgumentParser(description='Conversion training ckpt into inference ckpt')
+    parser.add_argument('--cfg',
+                        type=str,
+                        required=True,
+                        metavar='FILE',
+                        help='path of config file')
+    parser.add_argument('--output_path',
+                        type=str,
+                        help='path of output ckpt')
+    args = parser.parse_args()
+    print("arguments:")
+    for arg in vars(args):
+        print(arg, ":", getattr(args, arg))
+    print("-" * 50)
+    return args
+def convert_ckpt():
+    args = parse_option()
+    config = merge_from_file(args.cfg)
+    ck_dir = os.path.join("checkpoints", f"{config.MODEL.ARGS[0]['decoder_name']}_{config.MODEL.ARGS[0]['output_name']}_Net",
+                          config.TAG)
+    print(f"Processing {ck_dir}")
+    model_paths = [name for name in os.listdir(ck_dir) if '_best_' in name]
+    if len(model_paths) == 0:
+        print("Not find best ckpt")
+        return
+    model_path = os.path.join(ck_dir, model_paths[0])
+    print(f"Loading {model_path}")
+    checkpoint = torch.load(model_path, map_location=torch.device('cuda:0'))
+    net = checkpoint['net']
+    output_path = None
+    if args.output_path is None:
+        output_path = os.path.join(ck_dir, 'best.pkl')
+    else:
+        output_path = args.output_path
+    if output_path is None:
+        print("Output path is invalid")
+    print(f"Save on: {output_path}")
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    torch.save(net, output_path)
+if __name__ == '__main__':
+    convert_ckpt()

dataset/__init__.py ADDED Viewed

File without changes

dataset/build.py ADDED Viewed

	@@ -0,0 +1,115 @@

+"""
+@Date: 2021/07/18
+@description:
+"""
+import numpy as np
+import torch.utils.data
+from dataset.mp3d_dataset import MP3DDataset
+from dataset.pano_s2d3d_dataset import PanoS2D3DDataset
+from dataset.pano_s2d3d_mix_dataset import PanoS2D3DMixDataset
+from dataset.zind_dataset import ZindDataset
+def build_loader(config, logger):
+    name = config.DATA.DATASET
+    ddp = config.WORLD_SIZE > 1
+    train_dataset = None
+    train_data_loader = None
+    if config.MODE == 'train':
+        train_dataset = build_dataset(mode='train', config=config, logger=logger)
+    val_dataset = build_dataset(mode=config.VAL_NAME if config.MODE != 'test' else 'test', config=config, logger=logger)
+    train_sampler = None
+    val_sampler = None
+    if ddp:
+        if train_dataset:
+            train_sampler = torch.utils.data.DistributedSampler(train_dataset, shuffle=True)
+        val_sampler = torch.utils.data.DistributedSampler(val_dataset, shuffle=False)
+    batch_size = config.DATA.BATCH_SIZE
+    num_workers = 0 if config.DEBUG else config.DATA.NUM_WORKERS
+    pin_memory = config.DATA.PIN_MEMORY
+    if train_dataset:
+        logger.info(f'Train data loader batch size: {batch_size}')
+        train_data_loader = torch.utils.data.DataLoader(
+            train_dataset, sampler=train_sampler,
+            batch_size=batch_size,
+            shuffle=True,
+            num_workers=num_workers,
+            pin_memory=pin_memory,
+            drop_last=True,
+        )
+    batch_size = batch_size - (len(val_dataset) % np.arange(batch_size, 0, -1)).tolist().index(0)
+    logger.info(f'Val data loader batch size: {batch_size}')
+    val_data_loader = torch.utils.data.DataLoader(
+        val_dataset, sampler=val_sampler,
+        batch_size=batch_size,
+        shuffle=False,
+        num_workers=num_workers,
+        pin_memory=pin_memory,
+        drop_last=False
+    )
+    logger.info(f'Build data loader: num_workers:{num_workers} pin_memory:{pin_memory}')
+    return train_data_loader, val_data_loader
+def build_dataset(mode, config, logger):
+    name = config.DATA.DATASET
+    if name == 'mp3d':
+        dataset = MP3DDataset(
+            root_dir=config.DATA.DIR,
+            mode=mode,
+            shape=config.DATA.SHAPE,
+            max_wall_num=config.DATA.WALL_NUM,
+            aug=config.DATA.AUG if mode == 'train' else None,
+            camera_height=config.DATA.CAMERA_HEIGHT,
+            logger=logger,
+            for_test_index=config.DATA.FOR_TEST_INDEX,
+            keys=config.DATA.KEYS
+        )
+    elif name == 'pano_s2d3d':
+        dataset = PanoS2D3DDataset(
+            root_dir=config.DATA.DIR,
+            mode=mode,
+            shape=config.DATA.SHAPE,
+            max_wall_num=config.DATA.WALL_NUM,
+            aug=config.DATA.AUG if mode == 'train' else None,
+            camera_height=config.DATA.CAMERA_HEIGHT,
+            logger=logger,
+            for_test_index=config.DATA.FOR_TEST_INDEX,
+            subset=config.DATA.SUBSET,
+            keys=config.DATA.KEYS
+        )
+    elif name == 'pano_s2d3d_mix':
+        dataset = PanoS2D3DMixDataset(
+            root_dir=config.DATA.DIR,
+            mode=mode,
+            shape=config.DATA.SHAPE,
+            max_wall_num=config.DATA.WALL_NUM,
+            aug=config.DATA.AUG if mode == 'train' else None,
+            camera_height=config.DATA.CAMERA_HEIGHT,
+            logger=logger,
+            for_test_index=config.DATA.FOR_TEST_INDEX,
+            subset=config.DATA.SUBSET,
+            keys=config.DATA.KEYS
+        )
+    elif name == 'zind':
+        dataset = ZindDataset(
+            root_dir=config.DATA.DIR,
+            mode=mode,
+            shape=config.DATA.SHAPE,
+            max_wall_num=config.DATA.WALL_NUM,
+            aug=config.DATA.AUG if mode == 'train' else None,
+            camera_height=config.DATA.CAMERA_HEIGHT,
+            logger=logger,
+            for_test_index=config.DATA.FOR_TEST_INDEX,
+            is_simple=True,
+            is_ceiling_flat=False,
+            keys=config.DATA.KEYS,
+            vp_align=config.EVAL.POST_PROCESSING is not None and 'manhattan' in config.EVAL.POST_PROCESSING
+        )
+    else:
+        raise NotImplementedError(f"Unknown dataset: {name}")
+    return dataset

dataset/communal/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+@Date: 2021/09/22
+@description:
+"""

dataset/communal/base_dataset.py ADDED Viewed

	@@ -0,0 +1,127 @@

+"""
+@Date: 2021/07/26
+@description:
+"""
+import numpy as np
+import torch
+from utils.boundary import corners2boundary, visibility_corners, get_heat_map
+from utils.conversion import xyz2depth, uv2xyz, uv2pixel
+from dataset.communal.data_augmentation import PanoDataAugmentation
+class BaseDataset(torch.utils.data.Dataset):
+    def __init__(self, mode, shape=None, max_wall_num=999, aug=None, camera_height=1.6, patch_num=256, keys=None):
+        if keys is None or len(keys) == 0:
+            keys = ['image', 'depth', 'ratio', 'id', 'corners']
+        if shape is None:
+            shape = [512, 1024]
+        assert mode == 'train' or mode == 'val' or mode == 'test' or mode is None, 'unknown mode!'
+        self.mode = mode
+        self.keys = keys
+        self.shape = shape
+        self.pano_aug = None if aug is None or mode == 'val' else PanoDataAugmentation(aug)
+        self.camera_height = camera_height
+        self.max_wall_num = max_wall_num
+        self.patch_num = patch_num
+        self.data = None
+    def __len__(self):
+        return len(self.data)
+    @staticmethod
+    def get_depth(corners, plan_y=1, length=256, visible=True):
+        visible_floor_boundary = corners2boundary(corners, length=length, visible=visible)
+        # The horizon-depth relative to plan_y
+        visible_depth = xyz2depth(uv2xyz(visible_floor_boundary, plan_y), plan_y)
+        return visible_depth
+    def process_data(self, label, image, patch_num):
+        """
+        :param label:
+        :param image:
+        :param patch_num:
+        :return:
+        """
+        corners = label['corners']
+        if self.pano_aug is not None:
+            corners, image = self.pano_aug.execute_aug(corners, image if 'image' in self.keys else None)
+        eps = 1e-3
+        corners[:, 1] = np.clip(corners[:, 1], 0.5+eps, 1-eps)
+        output = {}
+        if 'image' in self.keys:
+            image = image.transpose(2, 0, 1)
+            output['image'] = image
+        visible_corners = None
+        if 'corner_class' in self.keys or 'depth' in self.keys:
+            visible_corners = visibility_corners(corners)
+        if 'depth' in self.keys:
+            depth = self.get_depth(visible_corners, length=patch_num, visible=False)
+            assert len(depth) == patch_num, f"{label['id']}, {len(depth)}, {self.pano_aug.parameters}, {corners}"
+            output['depth'] = depth
+        if 'ratio' in self.keys:
+            # Why use ratio? Because when floor_height =y_plan=1, we only need to predict ceil_height(ratio).
+            output['ratio'] = label['ratio']
+        if 'id' in self.keys:
+            output['id'] = label['id']
+        if 'corners' in self.keys:
+            # all corners for evaluating Full_IoU
+            assert len(label['corners']) <= 32, "len(label['corners']):"+len(label['corners'])
+            output['corners'] = np.zeros((32, 2), dtype=np.float32)
+            output['corners'][:len(label['corners'])] = label['corners']
+        if 'corner_heat_map' in self.keys:
+            output['corner_heat_map'] = get_heat_map(visible_corners[..., 0])
+        if 'object' in self.keys and 'objects' in label:
+            output[f'object_heat_map'] = np.zeros((3, patch_num), dtype=np.float32)
+            output['object_size'] = np.zeros((3, patch_num), dtype=np.float32)  # width, height, bottom_height
+            for i, type in enumerate(label['objects']):
+                if len(label['objects'][type]) == 0:
+                    continue
+                u_s = []
+                for obj in label['objects'][type]:
+                    center_u = obj['center_u']
+                    u_s.append(center_u)
+                    center_pixel_u = uv2pixel(np.array([center_u]), w=patch_num, axis=0)[0]
+                    output['object_size'][0, center_pixel_u] = obj['width_u']
+                    output['object_size'][1, center_pixel_u] = obj['height_v']
+                    output['object_size'][2, center_pixel_u] = obj['boundary_v']
+                output[f'object_heat_map'][i] = get_heat_map(np.array(u_s))
+        return output
+if __name__ == '__main__':
+    from dataset.communal.read import read_image, read_label
+    from visualization.boundary import draw_boundaries
+    from utils.boundary import depth2boundaries
+    from tqdm import trange
+    # np.random.seed(0)
+    dataset = BaseDataset()
+    dataset.pano_aug = PanoDataAugmentation(aug={
+        'STRETCH': True,
+        'ROTATE': True,
+        'FLIP': True,
+    })
+    # pano_img = read_image("../src/demo.png")
+    # label = read_label("../src/demo.json")
+    pano_img_path = "../../src/dataset/mp3d/image/yqstnuAEVhm_6589ad7a5a0444b59adbf501c0f0fe53.png"
+    label_path = "../../src/dataset/mp3d/label/yqstnuAEVhm_6589ad7a5a0444b59adbf501c0f0fe53.json"
+    pano_img = read_image(pano_img_path)
+    label = read_label(label_path)
+    # batch test
+    for i in trange(1):
+        output = dataset.process_data(label, pano_img, 256)
+        boundary_list = depth2boundaries(output['ratio'], output['depth'], step=None)
+        draw_boundaries(output['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True)

dataset/communal/data_augmentation.py ADDED Viewed

	@@ -0,0 +1,279 @@

+"""
+@Date: 2021/07/27
+@description:
+"""
+import numpy as np
+import cv2
+import functools
+from utils.conversion import pixel2lonlat, lonlat2pixel, uv2lonlat, lonlat2uv, pixel2uv
+@functools.lru_cache()
+def prepare_stretch(w, h):
+    lon = pixel2lonlat(np.array(range(w)), w=w, axis=0)
+    lat = pixel2lonlat(np.array(range(h)), h=h, axis=1)
+    sin_lon = np.sin(lon)
+    cos_lon = np.cos(lon)
+    tan_lat = np.tan(lat)
+    return sin_lon, cos_lon, tan_lat
+def pano_stretch_image(pano_img, kx, ky, kz):
+    """
+    Note that this is the inverse mapping, which refers to Equation 3 in HorizonNet paper (the coordinate system in
+    the paper is different from here, xz needs to be swapped)
+    :param pano_img: a panorama image, shape must be [h,w,c]
+    :param kx: stretching along left-right direction
+    :param ky: stretching along up-down direction
+    :param kz: stretching along front-back direction
+    :return:
+    """
+    w = pano_img.shape[1]
+    h = pano_img.shape[0]
+    sin_lon, cos_lon, tan_lat = prepare_stretch(w, h)
+    n_lon = np.arctan2(sin_lon * kz / kx, cos_lon)
+    n_lat = np.arctan(tan_lat[..., None] * np.sin(n_lon) / sin_lon * kx / ky)
+    n_pu = lonlat2pixel(n_lon, w=w, axis=0, need_round=False)
+    n_pv = lonlat2pixel(n_lat, h=h, axis=1, need_round=False)
+    pixel_map = np.empty((h, w, 2), dtype=np.float32)
+    pixel_map[..., 0] = n_pu
+    pixel_map[..., 1] = n_pv
+    map1 = pixel_map[..., 0]
+    map2 = pixel_map[..., 1]
+    # using wrap mode because it is continues at left or right of panorama
+    new_img = cv2.remap(pano_img, map1, map2, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_WRAP)
+    return new_img
+def pano_stretch_conner(corners, kx, ky, kz):
+    """
+    :param corners:
+    :param kx: stretching along left-right direction
+    :param ky: stretching along up-down direction
+    :param kz: stretching along front-back direction
+    :return:
+    """
+    lonlat = uv2lonlat(corners)
+    sin_lon = np.sin(lonlat[..., 0:1])
+    cos_lon = np.cos(lonlat[..., 0:1])
+    tan_lat = np.tan(lonlat[..., 1:2])
+    n_lon = np.arctan2(sin_lon * kx / kz, cos_lon)
+    a = np.bitwise_or(corners[..., 0] == 0.5, corners[..., 0] == 1)
+    b = np.bitwise_not(a)
+    w = np.zeros_like(n_lon)
+    w[b] = np.sin(n_lon[b]) / sin_lon[b]
+    w[a] = kx / kz
+    n_lat = np.arctan(tan_lat * w / kx * ky)
+    lst = [n_lon, n_lat]
+    lonlat = np.concatenate(lst, axis=-1)
+    new_corners = lonlat2uv(lonlat)
+    return new_corners
+def pano_stretch(pano_img, corners, kx, ky, kz):
+    """
+    :param pano_img: a panorama image, shape must be [h,w,c]
+    :param corners:
+    :param kx: stretching along left-right direction
+    :param ky: stretching along up-down direction
+    :param kz: stretching along front-back direction
+    :return:
+    """
+    new_img = pano_stretch_image(pano_img, kx, ky, kz)
+    new_corners = pano_stretch_conner(corners, kx, ky, kz)
+    return new_img, new_corners
+class PanoDataAugmentation:
+    def __init__(self, aug):
+        self.aug = aug
+        self.parameters = {}
+    def need_aug(self, name):
+        return name in self.aug and self.aug[name]
+    def execute_space_aug(self, corners, image):
+        if image is None:
+            return image
+        if self.aug is None:
+            return corners, image
+        w = image.shape[1]
+        h = image.shape[0]
+        if self.need_aug('STRETCH'):
+            kx = np.random.uniform(1, 2)
+            kx = 1 / kx if np.random.randint(2) == 0 else kx
+            # we found that the ky transform may cause IoU to drop (HorizonNet also only x and z transform)
+            # ky = np.random.uniform(1, 2)
+            # ky = 1 / ky if np.random.randint(2) == 0 else ky
+            ky = 1
+            kz = np.random.uniform(1, 2)
+            kz = 1 / kz if np.random.randint(2) == 0 else kz
+            image, corners = pano_stretch(image, corners, kx, ky, kz)
+            self.parameters['STRETCH'] = {'kx': kx, 'ky': ky, 'kz': kz}
+        else:
+            self.parameters['STRETCH'] = None
+        if self.need_aug('ROTATE'):
+            d_pu = np.random.randint(w)
+            image = np.roll(image, d_pu, axis=1)
+            corners[..., 0] = (corners[..., 0] + pixel2uv(np.array([d_pu]), w, h)) % pixel2uv(np.array([w]), w, h)
+            self.parameters['ROTATE'] = d_pu
+        else:
+            self.parameters['ROTATE'] = None
+        if self.need_aug('FLIP') and np.random.randint(2) == 0:
+            image = np.flip(image, axis=1).copy()
+            corners[..., 0] = pixel2uv(np.array([w]), w, h) - corners[..., 0]
+            corners = corners[::-1]
+            self.parameters['FLIP'] = True
+        else:
+            self.parameters['FLIP'] = None
+        return corners, image
+    def execute_visual_aug(self, image):
+        if self.need_aug('GAMMA'):
+            p = np.random.uniform(1, 2)
+            if np.random.randint(2) == 0:
+                p = 1 / p
+            image = image ** p
+            self.parameters['GAMMA'] = p
+        else:
+            self.parameters['GAMMA'] = None
+        # The following visual augmentation methods are only implemented but not tested
+        if self.need_aug('HUE') or self.need_aug('SATURATION'):
+            image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
+            if self.need_aug('HUE') and np.random.randint(2) == 0:
+                p = np.random.uniform(-0.1, 0.1)
+                image[..., 0] = np.mod(image[..., 0] + p * 180, 180)
+                self.parameters['HUE'] = p
+            else:
+                self.parameters['HUE'] = None
+            if self.need_aug('SATURATION') and np.random.randint(2) == 0:
+                p = np.random.uniform(0.5, 1.5)
+                image[..., 1] = np.clip(image[..., 1] * p, 0, 1)
+                self.parameters['SATURATION'] = p
+            else:
+                self.parameters['SATURATION'] = None
+            image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
+        if self.need_aug('CONTRAST') and np.random.randint(2) == 0:
+            p = np.random.uniform(0.9, 1.1)
+            mean = image.mean(axis=0).mean(axis=0)
+            image = (image - mean) * p + mean
+            image = np.clip(image, 0, 1)
+            self.parameters['CONTRAST'] = p
+        else:
+            self.parameters['CONTRAST'] = None
+        return image
+    def execute_aug(self, corners, image):
+        corners, image = self.execute_space_aug(corners, image)
+        if image is not None:
+            image = self.execute_visual_aug(image)
+        return corners, image
+if __name__ == '__main__1':
+    from tqdm import trange
+    from visualization.floorplan import draw_floorplan
+    from dataset.communal.read import read_image, read_label
+    from utils.time_watch import TimeWatch
+    from utils.conversion import uv2xyz
+    from utils.boundary import corners2boundary
+    np.random.seed(123)
+    pano_img_path = "../../src/dataset/mp3d/image/TbHJrupSAjP_f320ae084f3a447da3e8ab11dd5f9320.png"
+    label_path = "../../src/dataset/mp3d/label/TbHJrupSAjP_f320ae084f3a447da3e8ab11dd5f9320.json"
+    pano_img = read_image(pano_img_path)
+    label = read_label(label_path)
+    corners = label['corners']
+    ratio = label['ratio']
+    pano_aug = PanoDataAugmentation(aug={
+        'STRETCH': True,
+        'ROTATE': True,
+        'FLIP': True,
+        'GAMMA': True,
+        # 'HUE': True,
+        # 'SATURATION': True,
+        # 'CONTRAST': True
+    })
+    # draw_floorplan(corners, show=True, marker_color=0.5, center_color=0.8, plan_y=1.6, show_radius=8)
+    # draw_boundaries(pano_img, corners_list=[corners], show=True, length=1024, ratio=ratio)
+    w = TimeWatch("test")
+    for i in trange(50000):
+        new_corners, new_pano_img = pano_aug.execute_aug(corners.copy(), pano_img.copy())
+        # draw_floorplan(uv2xyz(new_corners, plan_y=1.6)[..., ::2], show=True, marker_color=0.5, center_color=0.8,
+        #                show_radius=8)
+        # draw_boundaries(new_pano_img, corners_list=[new_corners], show=True, length=1024, ratio=ratio)
+if __name__ == '__main__':
+    from utils.boundary import corners2boundary
+    from visualization.floorplan import draw_floorplan
+    from utils.boundary import visibility_corners
+    corners = np.array([[0.7664539, 0.7416811],
+                        [0.06641078, 0.6521386],
+                        [0.30997428, 0.57855356],
+                        [0.383300784, 0.58726823],
+                        [0.383300775, 0.8005296],
+                        [0.5062902, 0.74822706]])
+    corners = visibility_corners(corners)
+    print(corners)
+    # draw_floorplan(uv2xyz(corners, plan_y=1.6)[..., ::2], show=True, marker_color=0.5, center_color=0.8,
+    #                show_radius=8)
+    visible_floor_boundary = corners2boundary(corners, length=256, visible=True)
+    # visible_depth = xyz2depth(uv2xyz(visible_floor_boundary, 1), 1)
+    print(len(visible_floor_boundary))
+if __name__ == '__main__0':
+    from visualization.floorplan import draw_floorplan
+    from dataset.communal.read import read_image, read_label
+    from utils.time_watch import TimeWatch
+    from utils.conversion import uv2xyz
+    # np.random.seed(1234)
+    pano_img_path = "../../src/dataset/mp3d/image/VVfe2KiqLaN_35b41dcbfcf84f96878f6ca28c70e5af.png"
+    label_path = "../../src/dataset/mp3d/label/VVfe2KiqLaN_35b41dcbfcf84f96878f6ca28c70e5af.json"
+    pano_img = read_image(pano_img_path)
+    label = read_label(label_path)
+    corners = label['corners']
+    ratio = label['ratio']
+    # draw_floorplan(corners, show=True, marker_color=0.5, center_color=0.8, plan_y=1.6, show_radius=8)
+    w = TimeWatch()
+    for i in range(5):
+        kx = np.random.uniform(1, 2)
+        kx = 1 / kx if np.random.randint(2) == 0 else kx
+        ky = np.random.uniform(1, 2)
+        ky = 1 / ky if np.random.randint(2) == 0 else ky
+        kz = np.random.uniform(1, 2)
+        kz = 1 / kz if np.random.randint(2) == 0 else kz
+        new_corners = pano_stretch_conner(corners.copy(), kx, ky, kz)
+        draw_floorplan(uv2xyz(new_corners, plan_y=1.6)[..., ::2], show=True, marker_color=0.5, center_color=0.8,
+                       show_radius=8)

dataset/communal/read.py ADDED Viewed

	@@ -0,0 +1,214 @@

+"""
+@Date: 2021/07/28
+@description:
+"""
+import os
+import numpy as np
+import cv2
+import json
+from PIL import Image
+from utils.conversion import xyz2uv, pixel2uv
+from utils.height import calc_ceil_ratio
+def read_image(image_path, shape=None):
+    if shape is None:
+        shape = [512, 1024]
+    img = np.array(Image.open(image_path)).astype(np.float32) / 255
+    if img.shape[0] != shape[0] or img.shape[1] != shape[1]:
+        img = cv2.resize(img, dsize=tuple(shape[::-1]), interpolation=cv2.INTER_AREA)
+    return np.array(img)
+def read_label(label_path, data_type='MP3D'):
+    if data_type == 'MP3D':
+        with open(label_path, 'r') as f:
+            label = json.load(f)
+        point_idx = [one['pointsIdx'][0] for one in label['layoutWalls']['walls']]
+        camera_height = label['cameraHeight']
+        room_height = label['layoutHeight']
+        camera_ceiling_height = room_height - camera_height
+        ratio = camera_ceiling_height / camera_height
+        xyz = [one['xyz'] for one in label['layoutPoints']['points']]
+        assert len(xyz) == len(point_idx), "len(xyz) != len(point_idx)"
+        xyz = [xyz[i] for i in point_idx]
+        xyz = np.asarray(xyz, dtype=np.float32)
+        xyz[:, 2] *= -1
+        xyz[:, 1] = camera_height
+        corners = xyz2uv(xyz)
+    elif data_type == 'Pano_S2D3D':
+        with open(label_path, 'r') as f:
+            lines = [line for line in f.readlines() if
+                     len([c for c in line.split(' ') if c[0].isnumeric()]) > 1]
+        corners_list = np.array([line.strip().split() for line in lines], np.float32)
+        uv_list = pixel2uv(corners_list)
+        ceil_uv = uv_list[::2]
+        floor_uv = uv_list[1::2]
+        ratio = calc_ceil_ratio([ceil_uv, floor_uv], mode='mean')
+        corners = floor_uv
+    else:
+        return None
+    output = {
+        'ratio': np.array([ratio], dtype=np.float32),
+        'corners': corners,
+        'id': os.path.basename(label_path).split('.')[0]
+    }
+    return output
+def move_not_simple_image(data_dir, simple_panos):
+    import shutil
+    for house_index in os.listdir(data_dir):
+        house_path = os.path.join(data_dir, house_index)
+        if not os.path.isdir(house_path) or house_index == 'visualization':
+            continue
+        floor_plan_path = os.path.join(house_path, 'floor_plans')
+        if os.path.exists(floor_plan_path):
+            print(f'move:{floor_plan_path}')
+            dst_floor_plan_path = floor_plan_path.replace('zind', 'zind2')
+            os.makedirs(dst_floor_plan_path, exist_ok=True)
+            shutil.move(floor_plan_path, dst_floor_plan_path)
+        panos_path = os.path.join(house_path, 'panos')
+        for pano in os.listdir(panos_path):
+            pano_path = os.path.join(panos_path, pano)
+            pano_index = '_'.join(pano.split('.')[0].split('_')[-2:])
+            if f'{house_index}_{pano_index}' not in simple_panos and os.path.exists(pano_path):
+                print(f'move:{pano_path}')
+                dst_pano_path = pano_path.replace('zind', 'zind2')
+                os.makedirs(os.path.dirname(dst_pano_path), exist_ok=True)
+                shutil.move(pano_path, dst_pano_path)
+def read_zind(partition_path, simplicity_path, data_dir, mode, is_simple=True,
+              layout_type='layout_raw', is_ceiling_flat=False, plan_y=1):
+    with open(simplicity_path, 'r') as f:
+        simple_tag = json.load(f)
+        simple_panos = {}
+        for k in simple_tag.keys():
+            if not simple_tag[k]:
+                continue
+            split = k.split('_')
+            house_index = split[0]
+            pano_index = '_'.join(split[-2:])
+            simple_panos[f'{house_index}_{pano_index}'] = True
+    # move_not_simple_image(data_dir, simple_panos)
+    pano_list = []
+    with open(partition_path, 'r') as f1:
+        house_list = json.load(f1)[mode]
+    for house_index in house_list:
+        with open(os.path.join(data_dir, house_index, f"zind_data.json"), 'r') as f2:
+            data = json.load(f2)
+        panos = []
+        merger = data['merger']
+        for floor in merger.values():
+            for complete_room in floor.values():
+                for partial_room in complete_room.values():
+                    for pano_index in partial_room:
+                        pano = partial_room[pano_index]
+                        pano['index'] = pano_index
+                        panos.append(pano)
+        for pano in panos:
+            if layout_type not in pano:
+                continue
+            pano_index = pano['index']
+            if is_simple and f'{house_index}_{pano_index}' not in simple_panos.keys():
+                continue
+            if is_ceiling_flat and not pano['is_ceiling_flat']:
+                continue
+            layout = pano[layout_type]
+            # corners
+            corner_xz = np.array(layout['vertices'])
+            corner_xz[..., 0] = -corner_xz[..., 0]
+            corner_xyz = np.insert(corner_xz, 1, pano['camera_height'], axis=1)
+            corners = xyz2uv(corner_xyz).astype(np.float32)
+            # ratio
+            ratio = np.array([(pano['ceiling_height'] - pano['camera_height']) / pano['camera_height']], dtype=np.float32)
+            # Ours future work: detection window, door, opening
+            objects = {
+                'windows': [],
+                'doors': [],
+                'openings': [],
+            }
+            for label_index, wdo_type in enumerate(["windows", "doors", "openings"]):
+                if wdo_type not in layout:
+                    continue
+                wdo_vertices = np.array(layout[wdo_type])
+                if len(wdo_vertices) == 0:
+                    continue
+                assert len(wdo_vertices) % 3 == 0
+                for i in range(0, len(wdo_vertices), 3):
+                    # In the Zind dataset, the camera height is 1, and the default camera height in our code is also 1,
+                    # so the xyz coordinate here can be used directly
+                    # Since we're taking the opposite z-axis, we're changing the order of left and right
+                    left_bottom_xyz = np.array(
+                        [-wdo_vertices[i + 1][0], -wdo_vertices[i + 2][0], wdo_vertices[i + 1][1]])
+                    right_bottom_xyz = np.array(
+                        [-wdo_vertices[i][0], -wdo_vertices[i + 2][0], wdo_vertices[i][1]])
+                    center_bottom_xyz = (left_bottom_xyz + right_bottom_xyz) / 2
+                    center_top_xyz = center_bottom_xyz.copy()
+                    center_top_xyz[1] = -wdo_vertices[i + 2][1]
+                    center_boundary_xyz = center_bottom_xyz.copy()
+                    center_boundary_xyz[1] = plan_y
+                    uv = xyz2uv(np.array([left_bottom_xyz, right_bottom_xyz,
+                                          center_bottom_xyz, center_top_xyz,
+                                          center_boundary_xyz]))
+                    left_bottom_uv = uv[0]
+                    right_bottom_uv = uv[1]
+                    width_u = abs(right_bottom_uv[0] - left_bottom_uv[0])
+                    width_u = 1 - width_u if width_u > 0.5 else width_u
+                    assert width_u > 0, width_u
+                    center_bottom_uv = uv[2]
+                    center_top_uv = uv[3]
+                    height_v = center_bottom_uv[1] - center_top_uv[1]
+                    if height_v < 0:
+                        continue
+                    center_boundary_uv = uv[4]
+                    boundary_v = center_boundary_uv[1] - center_bottom_uv[1] if wdo_type == 'windows' else 0
+                    boundary_v = 0 if boundary_v < 0 else boundary_v
+                    center_u = center_bottom_uv[0]
+                    objects[wdo_type].append({
+                        'width_u': width_u,
+                        'height_v': height_v,
+                        'boundary_v': boundary_v,
+                        'center_u': center_u
+                    })
+            pano_list.append({
+                'img_path': os.path.join(data_dir, house_index, pano['image_path']),
+                'corners': corners,
+                'objects': objects,
+                'ratio': ratio,
+                'id': f'{house_index}_{pano_index}',
+                'is_inside': pano['is_inside']
+            })
+    return pano_list

dataset/mp3d_dataset.py ADDED Viewed

	@@ -0,0 +1,110 @@

+"""
+@date: 2021/6/25
+@description:
+"""
+import os
+import json
+from dataset.communal.read import read_image, read_label
+from dataset.communal.base_dataset import BaseDataset
+from utils.logger import get_logger
+class MP3DDataset(BaseDataset):
+    def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None,
+                 split_list=None, patch_num=256, keys=None, for_test_index=None):
+        super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys)
+        if logger is None:
+            logger = get_logger()
+        self.root_dir = root_dir
+        split_dir = os.path.join(root_dir, 'split')
+        label_dir = os.path.join(root_dir, 'label')
+        img_dir = os.path.join(root_dir, 'image')
+        if split_list is None:
+            with open(os.path.join(split_dir, f"{mode}.txt"), 'r') as f:
+                split_list = [x.rstrip().split() for x in f]
+        split_list.sort()
+        if for_test_index is not None:
+            split_list = split_list[:for_test_index]
+        self.data = []
+        invalid_num = 0
+        for name in split_list:
+            name = "_".join(name)
+            img_path = os.path.join(img_dir, f"{name}.png")
+            label_path = os.path.join(label_dir, f"{name}.json")
+            if not os.path.exists(img_path):
+                logger.warning(f"{img_path} not exists")
+                invalid_num += 1
+                continue
+            if not os.path.exists(label_path):
+                logger.warning(f"{label_path} not exists")
+                invalid_num += 1
+                continue
+            with open(label_path, 'r') as f:
+                label = json.load(f)
+                if self.max_wall_num >= 10:
+                    if label['layoutWalls']['num'] < self.max_wall_num:
+                        invalid_num += 1
+                        continue
+                elif self.max_wall_num != 0 and label['layoutWalls']['num'] != self.max_wall_num:
+                    invalid_num += 1
+                    continue
+            # print(label['layoutWalls']['num'])
+            self.data.append([img_path, label_path])
+        logger.info(
+            f"Build dataset mode: {self.mode} max_wall_num: {self.max_wall_num} valid: {len(self.data)} invalid: {invalid_num}")
+    def __getitem__(self, idx):
+        rgb_path, label_path = self.data[idx]
+        label = read_label(label_path, data_type='MP3D')
+        image = read_image(rgb_path, self.shape)
+        output = self.process_data(label, image, self.patch_num)
+        return output
+if __name__ == "__main__":
+    import numpy as np
+    from PIL import Image
+    from tqdm import tqdm
+    from visualization.boundary import draw_boundaries
+    from visualization.floorplan import draw_floorplan
+    from utils.boundary import depth2boundaries
+    from utils.conversion import uv2xyz
+    modes = ['test', 'val']
+    for i in range(1):
+        for mode in modes:
+            print(mode)
+            mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode=mode, aug={
+                'STRETCH': True,
+                'ROTATE': True,
+                'FLIP': True,
+                'GAMMA': True
+            })
+            save_dir = f'../src/dataset/mp3d/visualization/{mode}'
+            if not os.path.isdir(save_dir):
+                os.makedirs(save_dir)
+            bar = tqdm(mp3d_dataset, ncols=100)
+            for data in bar:
+                bar.set_description(f"Processing {data['id']}")
+                boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None)
+                pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True)
+                Image.fromarray((pano_img * 255).astype(np.uint8)).save(
+                    os.path.join(save_dir, f"{data['id']}_boundary.png"))
+                floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True,
+                                           marker_color=None, center_color=0.8, show_radius=None)
+                Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save(
+                    os.path.join(save_dir, f"{data['id']}_floorplan.png"))

dataset/pano_s2d3d_dataset.py ADDED Viewed

	@@ -0,0 +1,107 @@

+"""
+@date: 2021/6/16
+@description:
+"""
+import math
+import os
+import numpy as np
+from dataset.communal.read import read_image, read_label
+from dataset.communal.base_dataset import BaseDataset
+from utils.logger import get_logger
+class PanoS2D3DDataset(BaseDataset):
+    def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None,
+                 split_list=None, patch_num=256, keys=None, for_test_index=None, subset=None):
+        super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys)
+        if logger is None:
+            logger = get_logger()
+        self.root_dir = root_dir
+        if mode is None:
+            return
+        label_dir = os.path.join(root_dir, 'valid' if mode == 'val' else mode, 'label_cor')
+        img_dir = os.path.join(root_dir, 'valid' if mode == 'val' else mode, 'img')
+        if split_list is None:
+            split_list = [name.split('.')[0] for name in os.listdir(label_dir) if
+                          not name.startswith('.') and name.endswith('txt')]
+        split_list.sort()
+        assert subset == 'pano' or subset == 's2d3d' or subset is None, 'error subset'
+        if subset == 'pano':
+            split_list = [name for name in split_list if 'pano_' in name]
+            logger.info(f"Use PanoContext Dataset")
+        elif subset == 's2d3d':
+            split_list = [name for name in split_list if 'camera_' in name]
+            logger.info(f"Use Stanford2D3D Dataset")
+        if for_test_index is not None:
+            split_list = split_list[:for_test_index]
+        self.data = []
+        invalid_num = 0
+        for name in split_list:
+            img_path = os.path.join(img_dir, f"{name}.png")
+            label_path = os.path.join(label_dir, f"{name}.txt")
+            if not os.path.exists(img_path):
+                logger.warning(f"{img_path} not exists")
+                invalid_num += 1
+                continue
+            if not os.path.exists(label_path):
+                logger.warning(f"{label_path} not exists")
+                invalid_num += 1
+                continue
+            with open(label_path, 'r') as f:
+                lines = [line for line in f.readlines() if
+                         len([c for c in line.split(' ') if c[0].isnumeric()]) > 1]
+                if len(lines) % 2 != 0:
+                    invalid_num += 1
+                    continue
+            self.data.append([img_path, label_path])
+        logger.info(
+            f"Build dataset mode: {self.mode} valid: {len(self.data)} invalid: {invalid_num}")
+    def __getitem__(self, idx):
+        rgb_path, label_path = self.data[idx]
+        label = read_label(label_path, data_type='Pano_S2D3D')
+        image = read_image(rgb_path, self.shape)
+        output = self.process_data(label, image, self.patch_num)
+        return output
+if __name__ == '__main__':
+    modes = ['test', 'val', 'train']
+    for i in range(1):
+        for mode in modes:
+            print(mode)
+            mp3d_dataset = PanoS2D3DDataset(root_dir='../src/dataset/pano_s2d3d', mode=mode, aug={
+                # 'STRETCH': True,
+                # 'ROTATE': True,
+                # 'FLIP': True,
+                # 'GAMMA': True
+            })
+            continue
+            save_dir = f'../src/dataset/pano_s2d3d/visualization/{mode}'
+            if not os.path.isdir(save_dir):
+                os.makedirs(save_dir)
+            bar = tqdm(mp3d_dataset, ncols=100)
+            for data in bar:
+                bar.set_description(f"Processing {data['id']}")
+                boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None)
+                pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=False)
+                Image.fromarray((pano_img * 255).astype(np.uint8)).save(
+                    os.path.join(save_dir, f"{data['id']}_boundary.png"))
+                floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=False,
+                                           marker_color=None, center_color=0.8, show_radius=None)
+                Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save(
+                    os.path.join(save_dir, f"{data['id']}_floorplan.png"))

dataset/pano_s2d3d_mix_dataset.py ADDED Viewed

	@@ -0,0 +1,91 @@

+"""
+@date: 2021/6/16
+@description:
+"""
+import os
+from dataset.pano_s2d3d_dataset import PanoS2D3DDataset
+from utils.logger import get_logger
+class PanoS2D3DMixDataset(PanoS2D3DDataset):
+    def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None,
+                 split_list=None, patch_num=256, keys=None, for_test_index=None, subset=None):
+        assert subset == 's2d3d' or subset == 'pano', 'error subset'
+        super().__init__(root_dir, None, shape, max_wall_num, aug, camera_height, logger,
+                         split_list, patch_num, keys, None, subset)
+        if logger is None:
+            logger = get_logger()
+        self.mode = mode
+        if mode == 'train':
+            if subset == 'pano':
+                s2d3d_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger,
+                                                    split_list, patch_num, keys, None, 's2d3d').data
+                s2d3d_val_data = PanoS2D3DDataset(root_dir, 'val', shape, max_wall_num, aug, camera_height, logger,
+                                                  split_list, patch_num, keys, None, 's2d3d').data
+                s2d3d_test_data = PanoS2D3DDataset(root_dir, 'test', shape, max_wall_num, aug, camera_height, logger,
+                                                   split_list, patch_num, keys, None, 's2d3d').data
+                s2d3d_all_data = s2d3d_train_data + s2d3d_val_data + s2d3d_test_data
+                pano_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger,
+                                                   split_list, patch_num, keys, None, 'pano').data
+                self.data = s2d3d_all_data + pano_train_data
+            elif subset == 's2d3d':
+                pano_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger,
+                                                   split_list, patch_num, keys, None, 'pano').data
+                pano_val_data = PanoS2D3DDataset(root_dir, 'val', shape, max_wall_num, aug, camera_height, logger,
+                                                 split_list, patch_num, keys, None, 'pano').data
+                pano_test_data = PanoS2D3DDataset(root_dir, 'test', shape, max_wall_num, aug, camera_height, logger,
+                                                  split_list, patch_num, keys, None, 'pano').data
+                pano_all_data = pano_train_data + pano_val_data + pano_test_data
+                s2d3d_train_data = PanoS2D3DDataset(root_dir, 'train', shape, max_wall_num, aug, camera_height, logger,
+                                                    split_list, patch_num, keys, None, 's2d3d').data
+                self.data = pano_all_data + s2d3d_train_data
+        else:
+            self.data = PanoS2D3DDataset(root_dir, mode, shape, max_wall_num, aug, camera_height, logger,
+                                         split_list, patch_num, keys, None, subset).data
+        if for_test_index is not None:
+            self.data = self.data[:for_test_index]
+        logger.info(f"Build dataset mode: {self.mode}  valid: {len(self.data)}")
+if __name__ == '__main__':
+    import numpy as np
+    from PIL import Image
+    from tqdm import tqdm
+    from visualization.boundary import draw_boundaries
+    from visualization.floorplan import draw_floorplan
+    from utils.boundary import depth2boundaries
+    from utils.conversion import uv2xyz
+    modes = ['test', 'val', 'train']
+    for i in range(1):
+        for mode in modes:
+            print(mode)
+            mp3d_dataset = PanoS2D3DMixDataset(root_dir='../src/dataset/pano_s2d3d', mode=mode, aug={
+                # 'STRETCH': True,
+                # 'ROTATE': True,
+                # 'FLIP': True,
+                # 'GAMMA': True
+            }, subset='pano')
+            continue
+            save_dir = f'../src/dataset/pano_s2d3d/visualization1/{mode}'
+            if not os.path.isdir(save_dir):
+                os.makedirs(save_dir)
+            bar = tqdm(mp3d_dataset, ncols=100)
+            for data in bar:
+                bar.set_description(f"Processing {data['id']}")
+                boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None)
+                pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=False)
+                Image.fromarray((pano_img * 255).astype(np.uint8)).save(
+                    os.path.join(save_dir, f"{data['id']}_boundary.png"))
+                floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=False,
+                                           marker_color=None, center_color=0.8, show_radius=None)
+                Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save(
+                    os.path.join(save_dir, f"{data['id']}_floorplan.png"))

dataset/zind_dataset.py ADDED Viewed

	@@ -0,0 +1,138 @@

+"""
+@Date: 2021/09/22
+@description:
+"""
+import os
+import json
+import math
+import numpy as np
+from dataset.communal.read import read_image, read_label, read_zind
+from dataset.communal.base_dataset import BaseDataset
+from utils.logger import get_logger
+from preprocessing.filter import filter_center, filter_boundary, filter_self_intersection
+from utils.boundary import calc_rotation
+class ZindDataset(BaseDataset):
+    def __init__(self, root_dir, mode, shape=None, max_wall_num=0, aug=None, camera_height=1.6, logger=None,
+                 split_list=None, patch_num=256, keys=None, for_test_index=None,
+                 is_simple=True, is_ceiling_flat=False, vp_align=False):
+        # if keys is None:
+        #     keys = ['image', 'depth', 'ratio', 'id', 'corners', 'corner_heat_map', 'object']
+        super().__init__(mode, shape, max_wall_num, aug, camera_height, patch_num, keys)
+        if logger is None:
+            logger = get_logger()
+        self.root_dir = root_dir
+        self.vp_align = vp_align
+        data_dir = os.path.join(root_dir)
+        img_dir = os.path.join(root_dir, 'image')
+        pano_list = read_zind(partition_path=os.path.join(data_dir, f"zind_partition.json"),
+                              simplicity_path=os.path.join(data_dir, f"room_shape_simplicity_labels.json"),
+                              data_dir=data_dir, mode=mode, is_simple=is_simple, is_ceiling_flat=is_ceiling_flat)
+        if for_test_index is not None:
+            pano_list = pano_list[:for_test_index]
+        if split_list:
+            pano_list = [pano for pano in pano_list if pano['id'] in split_list]
+        self.data = []
+        invalid_num = 0
+        for pano in pano_list:
+            if not os.path.exists(pano['img_path']):
+                logger.warning(f"{pano['img_path']} not exists")
+                invalid_num += 1
+                continue
+            if not filter_center(pano['corners']):
+                # logger.warning(f"{pano['id']} camera center not in layout")
+                # invalid_num += 1
+                continue
+            if self.max_wall_num >= 10:
+                if len(pano['corners']) < self.max_wall_num:
+                    invalid_num += 1
+                    continue
+            elif self.max_wall_num != 0 and len(pano['corners']) != self.max_wall_num:
+                invalid_num += 1
+                continue
+            if not filter_boundary(pano['corners']):
+                logger.warning(f"{pano['id']} boundary cross")
+                invalid_num += 1
+                continue
+            if not filter_self_intersection(pano['corners']):
+                logger.warning(f"{pano['id']} self_intersection")
+                invalid_num += 1
+                continue
+            self.data.append(pano)
+        logger.info(
+            f"Build dataset mode: {self.mode} max_wall_num: {self.max_wall_num} valid: {len(self.data)} invalid: {invalid_num}")
+    def __getitem__(self, idx):
+        pano = self.data[idx]
+        rgb_path = pano['img_path']
+        label = pano
+        image = read_image(rgb_path, self.shape)
+        if self.vp_align:
+            #  Equivalent to vanishing point alignment step
+            rotation = calc_rotation(corners=label['corners'])
+            shift = math.modf(rotation / (2 * np.pi) + 1)[0]
+            image = np.roll(image, round(shift * self.shape[1]), axis=1)
+            label['corners'][:, 0] = np.modf(label['corners'][:, 0] + shift)[0]
+        output = self.process_data(label, image, self.patch_num)
+        return output
+if __name__ == "__main__":
+    import numpy as np
+    from PIL import Image
+    from tqdm import tqdm
+    from visualization.boundary import draw_boundaries, draw_object
+    from visualization.floorplan import draw_floorplan
+    from utils.boundary import depth2boundaries, calc_rotation
+    from utils.conversion import uv2xyz
+    from models.other.init_env import init_env
+    init_env(123)
+    modes = ['val']
+    for i in range(1):
+        for mode in modes:
+            print(mode)
+            mp3d_dataset = ZindDataset(root_dir='../src/dataset/zind', mode=mode, aug={
+                'STRETCH': False,
+                'ROTATE': False,
+                'FLIP': False,
+                'GAMMA': False
+            })
+            # continue
+            # save_dir = f'../src/dataset/zind/visualization/{mode}'
+            # if not os.path.isdir(save_dir):
+            #     os.makedirs(save_dir)
+            bar = tqdm(mp3d_dataset, ncols=100)
+            for data in bar:
+                # if data['id'] != '1079_pano_18':
+                #     continue
+                bar.set_description(f"Processing {data['id']}")
+                boundary_list = depth2boundaries(data['ratio'], data['depth'], step=None)
+                pano_img = draw_boundaries(data['image'].transpose(1, 2, 0), boundary_list=boundary_list, show=True)
+                # Image.fromarray((pano_img * 255).astype(np.uint8)).save(
+                #     os.path.join(save_dir, f"{data['id']}_boundary.png"))
+                # draw_object(pano_img, heat_maps=data['object_heat_map'], depth=data['depth'],
+                #             size=data['object_size'], show=True)
+                # pass
+                #
+                floorplan = draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True,
+                                           marker_color=None, center_color=0.2)
+                # Image.fromarray((floorplan.squeeze() * 255).astype(np.uint8)).save(
+                #     os.path.join(save_dir, f"{data['id']}_floorplan.png"))

evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+@date: 2021/6/29
+@description:
+"""

evaluation/accuracy.py ADDED Viewed

	@@ -0,0 +1,249 @@

+"""
+@date: 2021/8/4
+@description:
+"""
+import numpy as np
+import cv2
+import scipy
+from evaluation.f1_score import f1_score_2d
+from loss import GradLoss
+from utils.boundary import corners2boundaries, layout2depth
+from utils.conversion import depth2xyz, uv2xyz, get_u, depth2uv, xyz2uv, uv2pixel
+from utils.height import calc_ceil_ratio
+from evaluation.iou import calc_IoU, calc_Iou_height
+from visualization.boundary import draw_boundaries
+from visualization.floorplan import draw_iou_floorplan
+from visualization.grad import show_grad
+def calc_accuracy(dt, gt, visualization=False, h=512):
+    visb_iou_2ds = []
+    visb_iou_3ds = []
+    full_iou_2ds = []
+    full_iou_3ds = []
+    iou_heights = []
+    visb_iou_floodplans = []
+    full_iou_floodplans = []
+    pano_bds = []
+    if 'depth' not in dt.keys():
+        dt['depth'] = gt['depth']
+    for i in range(len(gt['depth'])):
+        # print(i)
+        dt_xyz = dt['processed_xyz'][i] if 'processed_xyz' in dt else depth2xyz(np.abs(dt['depth'][i]))
+        visb_gt_xyz = depth2xyz(np.abs(gt['depth'][i]))
+        corners = gt['corners'][i]
+        full_gt_corners = corners[corners[..., 0] + corners[..., 1] != 0]  # Take effective corners
+        full_gt_xyz = uv2xyz(full_gt_corners)
+        dt_xz = dt_xyz[..., ::2]
+        visb_gt_xz = visb_gt_xyz[..., ::2]
+        full_gt_xz = full_gt_xyz[..., ::2]
+        gt_ratio = gt['ratio'][i][0]
+        if 'ratio' not in dt.keys():
+            if 'boundary' in dt.keys():
+                w = len(dt['boundary'][i])
+                boundary = np.clip(dt['boundary'][i], 0.0001, 0.4999)
+                depth = np.clip(dt['depth'][i], 0.001, 9999)
+                dt_ceil_boundary = np.concatenate([get_u(w, is_np=True)[..., None], boundary], axis=-1)
+                dt_floor_boundary = depth2uv(depth)
+                dt_ratio = calc_ceil_ratio(boundaries=[dt_ceil_boundary, dt_floor_boundary])
+            else:
+                dt_ratio = gt_ratio
+        else:
+            dt_ratio = dt['ratio'][i][0]
+        visb_iou_2d, visb_iou_3d = calc_IoU(dt_xz, visb_gt_xz, dt_height=1 + dt_ratio, gt_height=1 + gt_ratio)
+        full_iou_2d, full_iou_3d = calc_IoU(dt_xz, full_gt_xz, dt_height=1 + dt_ratio, gt_height=1 + gt_ratio)
+        iou_height = calc_Iou_height(dt_height=1 + dt_ratio, gt_height=1 + gt_ratio)
+        visb_iou_2ds.append(visb_iou_2d)
+        visb_iou_3ds.append(visb_iou_3d)
+        full_iou_2ds.append(full_iou_2d)
+        full_iou_3ds.append(full_iou_3d)
+        iou_heights.append(iou_height)
+        if visualization:
+            pano_img = cv2.resize(gt['image'][i].transpose(1, 2, 0), (h*2, h))
+            # visb_iou_floodplans.append(draw_iou_floorplan(dt_xz, visb_gt_xz, iou_2d=visb_iou_2d, iou_3d=visb_iou_3d, side_l=h))
+            # full_iou_floodplans.append(draw_iou_floorplan(dt_xz, full_gt_xz, iou_2d=full_iou_2d, iou_3d=full_iou_3d, side_l=h))
+            visb_iou_floodplans.append(draw_iou_floorplan(dt_xz, visb_gt_xz, side_l=h))
+            full_iou_floodplans.append(draw_iou_floorplan(dt_xz, full_gt_xz, side_l=h))
+            gt_boundaries = corners2boundaries(gt_ratio, corners_xyz=full_gt_xyz, step=None, length=1024, visible=False)
+            dt_boundaries = corners2boundaries(dt_ratio, corners_xyz=dt_xyz, step=None, visible=False,
+                                               length=1024)#visb_gt_xyz.shape[0] if dt_xyz.shape[0] != visb_gt_xyz.shape[0] else None)
+            pano_bd = draw_boundaries(pano_img, boundary_list=gt_boundaries, boundary_color=[0, 0, 1])
+            pano_bd = draw_boundaries(pano_bd, boundary_list=dt_boundaries, boundary_color=[0, 1, 0])
+            pano_bds.append(pano_bd)
+    visb_iou_2d = np.array(visb_iou_2ds).mean()
+    visb_iou_3d = np.array(visb_iou_3ds).mean()
+    full_iou_2d = np.array(full_iou_2ds).mean()
+    full_iou_3d = np.array(full_iou_3ds).mean()
+    iou_height = np.array(iou_heights).mean()
+    if visualization:
+        visb_iou_floodplans = np.array(visb_iou_floodplans).transpose(0, 3, 1, 2)  # NCHW
+        full_iou_floodplans = np.array(full_iou_floodplans).transpose(0, 3, 1, 2)  # NCHW
+        pano_bds = np.array(pano_bds).transpose(0, 3, 1, 2)
+    return [visb_iou_2d, visb_iou_3d, visb_iou_floodplans],\
+           [full_iou_2d, full_iou_3d, full_iou_floodplans], iou_height, pano_bds, full_iou_2ds
+def calc_ce(dt, gt):
+    w = 1024
+    h = 512
+    ce_s = []
+    for i in range(len(gt['corners'])):
+        floor_gt_corners = gt['corners'][i]
+        # Take effective corners
+        floor_gt_corners = floor_gt_corners[floor_gt_corners[..., 0] + floor_gt_corners[..., 1] != 0]
+        floor_gt_corners = np.roll(floor_gt_corners, -np.argmin(floor_gt_corners[..., 0]), 0)
+        gt_ratio = gt['ratio'][i][0]
+        ceil_gt_corners = corners2boundaries(gt_ratio, corners_uv=floor_gt_corners, step=None)[1]
+        gt_corners = np.concatenate((floor_gt_corners, ceil_gt_corners))
+        gt_corners = uv2pixel(gt_corners, w, h)
+        floor_dt_corners = xyz2uv(dt['processed_xyz'][i])
+        floor_dt_corners = np.roll(floor_dt_corners, -np.argmin(floor_dt_corners[..., 0]), 0)
+        dt_ratio = dt['ratio'][i][0]
+        ceil_dt_corners = corners2boundaries(dt_ratio, corners_uv=floor_dt_corners, step=None)[1]
+        dt_corners = np.concatenate((floor_dt_corners, ceil_dt_corners))
+        dt_corners = uv2pixel(dt_corners, w, h)
+        mse = np.sqrt(((gt_corners - dt_corners) ** 2).sum(1)).mean()
+        ce = 100 * mse / np.sqrt(w ** 2 + h ** 2)
+        ce_s.append(ce)
+    return np.array(ce_s).mean()
+def calc_pe(dt, gt):
+    w = 1024
+    h = 512
+    pe_s = []
+    for i in range(len(gt['corners'])):
+        floor_gt_corners = gt['corners'][i]
+        # Take effective corners
+        floor_gt_corners = floor_gt_corners[floor_gt_corners[..., 0] + floor_gt_corners[..., 1] != 0]
+        floor_gt_corners = np.roll(floor_gt_corners, -np.argmin(floor_gt_corners[..., 0]), 0)
+        gt_ratio = gt['ratio'][i][0]
+        gt_floor_boundary, gt_ceil_boundary = corners2boundaries(gt_ratio, corners_uv=floor_gt_corners, length=w)
+        gt_floor_boundary = uv2pixel(gt_floor_boundary, w, h)
+        gt_ceil_boundary = uv2pixel(gt_ceil_boundary, w, h)
+        floor_dt_corners = xyz2uv(dt['processed_xyz'][i])
+        floor_dt_corners = np.roll(floor_dt_corners, -np.argmin(floor_dt_corners[..., 0]), 0)
+        dt_ratio = dt['ratio'][i][0]
+        dt_floor_boundary, dt_ceil_boundary = corners2boundaries(dt_ratio, corners_uv=floor_dt_corners, length=w)
+        dt_floor_boundary = uv2pixel(dt_floor_boundary, w, h)
+        dt_ceil_boundary = uv2pixel(dt_ceil_boundary, w, h)
+        gt_surface = np.zeros((h, w), dtype=np.int32)
+        gt_surface[gt_ceil_boundary[..., 1], np.arange(w)] = 1
+        gt_surface[gt_floor_boundary[..., 1], np.arange(w)] = 1
+        gt_surface = np.cumsum(gt_surface, axis=0)
+        dt_surface = np.zeros((h, w), dtype=np.int32)
+        dt_surface[dt_ceil_boundary[..., 1], np.arange(w)] = 1
+        dt_surface[dt_floor_boundary[..., 1], np.arange(w)] = 1
+        dt_surface = np.cumsum(dt_surface, axis=0)
+        pe = 100 * (dt_surface != gt_surface).sum() / (h * w)
+        pe_s.append(pe)
+    return np.array(pe_s).mean()
+def calc_rmse_delta_1(dt, gt):
+    rmse_s = []
+    delta_1_s = []
+    for i in range(len(gt['depth'])):
+        gt_boundaries = corners2boundaries(gt['ratio'][i], corners_xyz=depth2xyz(gt['depth'][i]), step=None,
+                                           visible=False)
+        dt_xyz = dt['processed_xyz'][i] if 'processed_xyz' in dt else depth2xyz(np.abs(dt['depth'][i]))
+        dt_boundaries = corners2boundaries(dt['ratio'][i], corners_xyz=dt_xyz, step=None,
+                                           length=256 if 'processed_xyz' in dt else None,
+                                           visible=True if 'processed_xyz' in dt else False)
+        gt_layout_depth = layout2depth(gt_boundaries, show=False)
+        dt_layout_depth = layout2depth(dt_boundaries, show=False)
+        rmse = ((gt_layout_depth - dt_layout_depth) ** 2).mean() ** 0.5
+        threshold = np.maximum(gt_layout_depth / dt_layout_depth, dt_layout_depth / gt_layout_depth)
+        delta_1 = (threshold < 1.25).mean()
+        rmse_s.append(rmse)
+        delta_1_s.append(delta_1)
+    return np.array(rmse_s).mean(), np.array(delta_1_s).mean()
+def calc_f1_score(dt, gt, threshold=10):
+    w = 1024
+    h = 512
+    f1_s = []
+    precision_s = []
+    recall_s = []
+    for i in range(len(gt['corners'])):
+        floor_gt_corners = gt['corners'][i]
+        # Take effective corners
+        floor_gt_corners = floor_gt_corners[floor_gt_corners[..., 0] + floor_gt_corners[..., 1] != 0]
+        floor_gt_corners = np.roll(floor_gt_corners, -np.argmin(floor_gt_corners[..., 0]), 0)
+        gt_ratio = gt['ratio'][i][0]
+        ceil_gt_corners = corners2boundaries(gt_ratio, corners_uv=floor_gt_corners, step=None)[1]
+        gt_corners = np.concatenate((floor_gt_corners, ceil_gt_corners))
+        gt_corners = uv2pixel(gt_corners, w, h)
+        floor_dt_corners = xyz2uv(dt['processed_xyz'][i])
+        floor_dt_corners = np.roll(floor_dt_corners, -np.argmin(floor_dt_corners[..., 0]), 0)
+        dt_ratio = dt['ratio'][i][0]
+        ceil_dt_corners = corners2boundaries(dt_ratio, corners_uv=floor_dt_corners, step=None)[1]
+        dt_corners = np.concatenate((floor_dt_corners, ceil_dt_corners))
+        dt_corners = uv2pixel(dt_corners, w, h)
+        Fs, Ps, Rs = f1_score_2d(gt_corners, dt_corners, [threshold])
+        f1_s.append(Fs[0])
+        precision_s.append(Ps[0])
+        recall_s.append(Rs[0])
+    return np.array(f1_s).mean(), np.array(precision_s).mean(), np.array(recall_s).mean()
+def show_heat_map(dt, gt, vis_w=1024):
+    dt_heat_map = dt['corner_heat_map'].detach().cpu().numpy()
+    gt_heat_map = gt['corner_heat_map'].detach().cpu().numpy()
+    dt_heat_map_imgs = []
+    gt_heat_map_imgs = []
+    for i in range(len(gt['depth'])):
+        dt_heat_map_img = dt_heat_map[..., np.newaxis].repeat(3, axis=-1).repeat(20, axis=0)
+        gt_heat_map_img = gt_heat_map[..., np.newaxis].repeat(3, axis=-1).repeat(20, axis=0)
+        dt_heat_map_imgs.append(cv2.resize(dt_heat_map_img, (vis_w, dt_heat_map_img.shape[0])).transpose(2, 0, 1))
+        gt_heat_map_imgs.append(cv2.resize(gt_heat_map_img, (vis_w, dt_heat_map_img.shape[0])).transpose(2, 0, 1))
+    return dt_heat_map_imgs, gt_heat_map_imgs
+def show_depth_normal_grad(dt, gt, device, vis_w=1024):
+    grad_conv = GradLoss().to(device).grad_conv
+    gt_grad_imgs = []
+    dt_grad_imgs = []
+    if 'depth' not in dt.keys():
+        dt['depth'] = gt['depth']
+    if vis_w == 1024:
+        h = 5
+    else:
+        h = int(vis_w / (12 * 10))
+    for i in range(len(gt['depth'])):
+        gt_grad_img = show_grad(gt['depth'][i], grad_conv, h)
+        dt_grad_img = show_grad(dt['depth'][i], grad_conv, h)
+        vis_h = dt_grad_img.shape[0] * (vis_w // dt_grad_img.shape[1])
+        gt_grad_imgs.append(cv2.resize(gt_grad_img, (vis_w, vis_h), interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1))
+        dt_grad_imgs.append(cv2.resize(dt_grad_img, (vis_w, vis_h), interpolation=cv2.INTER_NEAREST).transpose(2, 0, 1))
+    return gt_grad_imgs, dt_grad_imgs

evaluation/analyse_layout_type.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""
+@Date: 2022/01/31
+@description:
+ZInd:
+{'test': {'mw': 2789, 'aw': 381}, 'train': {'mw': 21228, 'aw': 3654}, 'val': {'mw': 2647, 'aw': 433}}
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import json
+from tqdm import tqdm
+from evaluation.iou import calc_IoU_2D
+from visualization.floorplan import draw_floorplan
+from visualization.boundary import draw_boundaries
+from utils.conversion import depth2xyz, uv2xyz
+def analyse_layout_type(dataset, show=False):
+    bar = tqdm(dataset, total=len(dataset), ncols=100)
+    manhattan = 0
+    atlanta = 0
+    corner_type = {}
+    for data in bar:
+        bar.set_description(f"Processing {data['id']}")
+        corners = data['corners']
+        corners = corners[corners[..., 0] + corners[..., 1] != 0]  # Take effective corners
+        corners_count = str(len(corners)) if len(corners) < 10 else "10"
+        if corners_count not in corner_type:
+            corner_type[corners_count] = 0
+        corner_type[corners_count] += 1
+        all_xz = uv2xyz(corners)[..., ::2]
+        c = len(all_xz)
+        flag = False
+        for i in range(c - 1):
+            l1 = all_xz[i + 1] - all_xz[i]
+            l2 = all_xz[(i + 2) % c] - all_xz[i + 1]
+            a = (np.linalg.norm(l1)*np.linalg.norm(l2))
+            if a == 0:
+                continue
+            dot = np.dot(l1, l2)/a
+            if 0.9 > abs(dot) > 0.1:
+                # cos-1(0.1)=84.26 > angle > cos-1(0.9)=25.84 or
+                # cos-1(-0.9)=154.16 > angle > cos-1(-0.1)=95.74
+                flag = True
+                break
+        if flag:
+            atlanta += 1
+        else:
+            manhattan += 1
+        if flag and show:
+            draw_floorplan(all_xz, show=True)
+            draw_boundaries(data['image'].transpose(1, 2, 0), [corners], ratio=data['ratio'], show=True)
+    corner_type = dict(sorted(corner_type.items(), key=lambda item: int(item[0])))
+    return {'manhattan': manhattan, "atlanta": atlanta, "corner_type": corner_type}
+def execute_analyse_layout_type(root_dir, dataset, modes=None):
+    if modes is None:
+        modes = ["train", "val", "test"]
+    iou2d_d = {}
+    for mode in modes:
+        print("mode: {}".format(mode))
+        types = analyse_layout_type(dataset(root_dir, mode), show=False)
+        iou2d_d[mode] = types
+        print(json.dumps(types, indent=4))
+    return iou2d_d
+if __name__ == '__main__':
+    from dataset.zind_dataset import ZindDataset
+    from dataset.mp3d_dataset import MP3DDataset
+    iou2d_d = execute_analyse_layout_type(root_dir='../src/dataset/mp3d',
+                                          dataset=MP3DDataset)
+    # iou2d_d = execute_analyse_layout_type(root_dir='../src/dataset/zind',
+    #                                       dataset=ZindDataset)
+    print(json.dumps(iou2d_d, indent=4))

evaluation/eval_visible_iou.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""
+@Date: 2021/08/02
+@description:
+The 2DIoU for calculating the visible and full boundaries, such as the MP3D dataset,
+has the following data: {'train': 0.9775843958583535, 'test': 0.9828616219607289, 'val': 0.9883810438132491},
+indicating that our best performance is limited to below 98.29% 2DIoU using our approach.
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+from evaluation.iou import calc_IoU_2D
+from visualization.floorplan import draw_iou_floorplan
+from utils.conversion import depth2xyz, uv2xyz
+def eval_dataset_visible_IoU(dataset, show=False):
+    bar = tqdm(dataset, total=len(dataset), ncols=100)
+    iou2ds = []
+    for data in bar:
+        bar.set_description(f"Processing {data['id']}")
+        corners = data['corners']
+        corners = corners[corners[..., 0] + corners[..., 1] != 0]  # Take effective corners
+        all_xz = uv2xyz(corners)[..., ::2]
+        visible_xz = depth2xyz(data['depth'])[..., ::2]
+        iou2d = calc_IoU_2D(all_xz, visible_xz)
+        iou2ds.append(iou2d)
+        if show:
+            layout_floorplan = draw_iou_floorplan(all_xz, visible_xz, iou2d=iou2d)
+            plt.imshow(layout_floorplan)
+            plt.show()
+    mean_iou2d = np.array(iou2ds).mean()
+    return mean_iou2d
+def execute_eval_dataset_visible_IoU(root_dir, dataset, modes=None):
+    if modes is None:
+        modes = ["train", "test", "valid"]
+    iou2d_d = {}
+    for mode in modes:
+        print("mode: {}".format(mode))
+        iou2d = eval_dataset_visible_IoU(dataset(root_dir, mode, patch_num=1024,
+                                                 keys=['depth', 'visible_corners', 'corners', 'id']), show=False)
+        iou2d_d[mode] = iou2d
+    return iou2d_d
+if __name__ == '__main__':
+    from dataset.mp3d_dataset import MP3DDataset
+    iou2d_d = execute_eval_dataset_visible_IoU(root_dir='../src/dataset/mp3d',
+                                               dataset=MP3DDataset,
+                                               modes=['train', 'test', 'val'])
+    print(iou2d_d)

evaluation/f1_score.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""
+@author: Zhigang Jiang
+@time: 2022/01/28
+@description:
+Holistic 3D Vision Challenge on General Room Layout Estimation Track Evaluation Package
+Reference: https://github.com/bertjiazheng/indoor-layout-evaluation
+"""
+from scipy.optimize import linear_sum_assignment
+import numpy as np
+import scipy
+HEIGHT, WIDTH = 512, 1024
+MAX_DISTANCE = np.sqrt(HEIGHT**2 + WIDTH**2)
+def f1_score_2d(gt_corners, dt_corners, thresholds):
+    distances = scipy.spatial.distance.cdist(gt_corners, dt_corners)
+    return eval_junctions(distances, thresholds=thresholds)
+def eval_junctions(distances, thresholds=5):
+    thresholds = thresholds if isinstance(thresholds, tuple) or isinstance(
+        thresholds, list) else list([thresholds])
+    num_gts, num_preds = distances.shape
+    # filter the matches between ceiling-wall and floor-wall junctions
+    mask = np.zeros_like(distances, dtype=np.bool)
+    mask[:num_gts//2, :num_preds//2] = True
+    mask[num_gts//2:, num_preds//2:] = True
+    distances[~mask] = np.inf
+    # F-measure under different thresholds
+    Fs = []
+    Ps = []
+    Rs = []
+    for threshold in thresholds:
+        distances_temp = distances.copy()
+        # filter the mis-matched pairs
+        distances_temp[distances_temp > threshold] = np.inf
+        # remain the rows and columns that contain non-inf elements
+        distances_temp = distances_temp[:, np.any(np.isfinite(distances_temp), axis=0)]
+        if np.prod(distances_temp.shape) == 0:
+            Fs.append(0)
+            Ps.append(0)
+            Rs.append(0)
+            continue
+        distances_temp = distances_temp[np.any(np.isfinite(distances_temp), axis=1), :]
+        # solve the bipartite graph matching problem
+        row_ind, col_ind = linear_sum_assignment_with_inf(distances_temp)
+        true_positive = np.sum(np.isfinite(distances_temp[row_ind, col_ind]))
+        # compute precision and recall
+        precision = true_positive / num_preds
+        recall = true_positive / num_gts
+        # compute F measure
+        Fs.append(2 * precision * recall / (precision + recall))
+        Ps.append(precision)
+        Rs.append(recall)
+    return Fs, Ps, Rs
+def linear_sum_assignment_with_inf(cost_matrix):
+    """
+    Deal with linear_sum_assignment with inf according to
+    https://github.com/scipy/scipy/issues/6900#issuecomment-451735634
+    """
+    cost_matrix = np.copy(cost_matrix)
+    cost_matrix[np.isinf(cost_matrix)] = MAX_DISTANCE
+    return linear_sum_assignment(cost_matrix)

evaluation/iou.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+@date: 2021/6/29
+@description:
+The method with "_floorplan" suffix is only for comparison, which is used for calculation in LED2-net.
+However, the floorplan is affected by show_radius. Setting too large will result in the decrease of accuracy,
+and setting too small will result in the failure of calculation beyond the range.
+"""
+import numpy as np
+from shapely.geometry import Polygon
+def calc_inter_area(dt_xz, gt_xz):
+    """
+    :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
+    :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
+    :return:
+    """
+    dt_polygon = Polygon(dt_xz)
+    gt_polygon = Polygon(gt_xz)
+    dt_area = dt_polygon.area
+    gt_area = gt_polygon.area
+    inter_area = dt_polygon.intersection(gt_polygon).area
+    return dt_area, gt_area, inter_area
+def calc_IoU_2D(dt_xz, gt_xz):
+    """
+    :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
+    :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
+    :return:
+    """
+    dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz)
+    iou_2d = inter_area / (gt_area + dt_area - inter_area)
+    return iou_2d
+def calc_IoU_3D(dt_xz, gt_xz, dt_height, gt_height):
+    """
+    :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
+    :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
+    :param dt_height:
+    :param gt_height:
+    :return:
+    """
+    dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz)
+    dt_volume = dt_area * dt_height
+    gt_volume = gt_area * gt_height
+    inter_volume = inter_area * min(dt_height, gt_height)
+    iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume)
+    return iou_3d
+def calc_IoU(dt_xz, gt_xz, dt_height, gt_height):
+    """
+    :param dt_xz: Prediction boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
+    :param gt_xz: Ground truth boundaries can also be corners, format: [[x1, z1], [x2, z2], ...]
+    :param dt_height:
+    :param gt_height:
+    :return:
+    """
+    dt_area, gt_area, inter_area = calc_inter_area(dt_xz, gt_xz)
+    iou_2d = inter_area / (gt_area + dt_area - inter_area)
+    dt_volume = dt_area * dt_height
+    gt_volume = gt_area * gt_height
+    inter_volume = inter_area * min(dt_height, gt_height)
+    iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume)
+    return iou_2d, iou_3d
+def calc_Iou_height(dt_height, gt_height):
+    return min(dt_height, gt_height) / max(dt_height, gt_height)
+# the following is for testing only
+def calc_inter_area_floorplan(dt_floorplan, gt_floorplan):
+    intersect = np.sum(np.logical_and(dt_floorplan, gt_floorplan))
+    dt_area = np.sum(dt_floorplan)
+    gt_area = np.sum(gt_floorplan)
+    return dt_area, gt_area, intersect
+def calc_IoU_2D_floorplan(dt_floorplan, gt_floorplan):
+    dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan)
+    iou_2d = inter_area / (gt_area + dt_area - inter_area)
+    return iou_2d
+def calc_IoU_3D_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height):
+    dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan)
+    dt_volume = dt_area * dt_height
+    gt_volume = gt_area * gt_height
+    inter_volume = inter_area * min(dt_height, gt_height)
+    iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume)
+    return iou_3d
+def calc_IoU_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height):
+    dt_area, gt_area, inter_area = calc_inter_area_floorplan(dt_floorplan, gt_floorplan)
+    iou_2d = inter_area / (gt_area + dt_area - inter_area)
+    dt_volume = dt_area * dt_height
+    gt_volume = gt_area * gt_height
+    inter_volume = inter_area * min(dt_height, gt_height)
+    iou_3d = inter_volume / (dt_volume + gt_volume - inter_volume)
+    return iou_2d, iou_3d
+if __name__ == '__main__':
+    from visualization.floorplan import draw_floorplan, draw_iou_floorplan
+    from visualization.boundary import draw_boundaries, corners2boundaries
+    from utils.conversion import uv2xyz
+    from utils.height import height2ratio
+    # dummy data
+    dt_floor_corners = np.array([[0.2, 0.7],
+                                 [0.4, 0.7],
+                                 [0.6, 0.7],
+                                 [0.8, 0.7]])
+    dt_height = 2.8
+    gt_floor_corners = np.array([[0.3, 0.7],
+                                 [0.5, 0.7],
+                                 [0.7, 0.7],
+                                 [0.9, 0.7]])
+    gt_height = 3.2
+    dt_xz = uv2xyz(dt_floor_corners)[..., ::2]
+    gt_xz = uv2xyz(gt_floor_corners)[..., ::2]
+    dt_floorplan = draw_floorplan(dt_xz, show=False, show_radius=1)
+    gt_floorplan = draw_floorplan(gt_xz, show=False, show_radius=1)
+    # dt_floorplan = draw_floorplan(dt_xz, show=False, show_radius=2)
+    # gt_floorplan = draw_floorplan(gt_xz, show=False, show_radius=2)
+    iou_2d, iou_3d = calc_IoU_floorplan(dt_floorplan, gt_floorplan, dt_height, gt_height)
+    print('use floor plan image:', iou_2d, iou_3d)
+    iou_2d, iou_3d = calc_IoU(dt_xz, gt_xz, dt_height, gt_height)
+    print('use floor plan polygon:', iou_2d, iou_3d)
+    draw_iou_floorplan(dt_xz, gt_xz, show=True, iou_2d=iou_2d, iou_3d=iou_3d)
+    pano_bd = draw_boundaries(np.zeros([512, 1024, 3]), corners_list=[dt_floor_corners],
+                              boundary_color=[0, 0, 1], ratio=height2ratio(dt_height), draw_corners=False)
+    pano_bd = draw_boundaries(pano_bd, corners_list=[gt_floor_corners],
+                              boundary_color=[0, 1, 0], ratio=height2ratio(gt_height), show=True, draw_corners=False)

inference.py ADDED Viewed

	@@ -0,0 +1,261 @@

+"""
+@Date: 2021/09/19
+@description:
+"""
+import json
+import os
+import argparse
+import cv2
+import numpy as np
+import torch
+import matplotlib.pyplot as plt
+import glob
+from tqdm import tqdm
+from PIL import Image
+from config.defaults import merge_from_file, get_config
+from dataset.mp3d_dataset import MP3DDataset
+from dataset.zind_dataset import ZindDataset
+from models.build import build_model
+from loss import GradLoss
+from postprocessing.post_process import post_process
+from preprocessing.pano_lsd_align import panoEdgeDetection, rotatePanorama
+from utils.boundary import corners2boundaries, layout2depth
+from utils.conversion import depth2xyz
+from utils.logger import get_logger
+from utils.misc import tensor2np_d, tensor2np
+from evaluation.accuracy import show_grad
+from models.lgt_net import LGT_Net
+from utils.writer import xyz2json
+from visualization.boundary import draw_boundaries
+from visualization.floorplan import draw_floorplan, draw_iou_floorplan
+from visualization.obj3d import create_3d_obj
+def parse_option():
+    parser = argparse.ArgumentParser(description='Panorama Layout Transformer training and evaluation script')
+    parser.add_argument('--img_glob',
+                        type=str,
+                        required=True,
+                        help='image glob path')
+    parser.add_argument('--cfg',
+                        type=str,
+                        required=True,
+                        metavar='FILE',
+                        help='path of config file')
+    parser.add_argument('--post_processing',
+                        type=str,
+                        default='manhattan',
+                        choices=['manhattan', 'atalanta', 'original'],
+                        help='post-processing type')
+    parser.add_argument('--output_dir',
+                        type=str,
+                        default='src/output',
+                        help='path of output')
+    parser.add_argument('--visualize_3d', action='store_true',
+                        help='visualize_3d')
+    parser.add_argument('--output_3d', action='store_true',
+                        help='output_3d')
+    parser.add_argument('--device',
+                        type=str,
+                        default='cuda',
+                        help='device')
+    args = parser.parse_args()
+    args.mode = 'test'
+    print("arguments:")
+    for arg in vars(args):
+        print(arg, ":", getattr(args, arg))
+    print("-" * 50)
+    return args
+def visualize_2d(img, dt, show_depth=True, show_floorplan=True, show=False, save_path=None):
+    dt_np = tensor2np_d(dt)
+    dt_depth = dt_np['depth'][0]
+    dt_xyz = depth2xyz(np.abs(dt_depth))
+    dt_ratio = dt_np['ratio'][0][0]
+    dt_boundaries = corners2boundaries(dt_ratio, corners_xyz=dt_xyz, step=None, visible=False, length=img.shape[1])
+    vis_img = draw_boundaries(img, boundary_list=dt_boundaries, boundary_color=[0, 1, 0])
+    if 'processed_xyz' in dt:
+        dt_boundaries = corners2boundaries(dt_ratio, corners_xyz=dt['processed_xyz'][0], step=None, visible=False,
+                                           length=img.shape[1])
+        vis_img = draw_boundaries(vis_img, boundary_list=dt_boundaries, boundary_color=[1, 0, 0])
+    if show_depth:
+        dt_grad_img = show_depth_normal_grad(dt)
+        grad_h = dt_grad_img.shape[0]
+        vis_merge = [
+            vis_img[0:-grad_h, :, :],
+            dt_grad_img,
+        ]
+        vis_img = np.concatenate(vis_merge, axis=0)
+        # vis_img = dt_grad_img.transpose(1, 2, 0)[100:]
+    if show_floorplan:
+        if 'processed_xyz' in dt:
+            floorplan = draw_iou_floorplan(dt['processed_xyz'][0][..., ::2], dt_xyz[..., ::2],
+                                           dt_board_color=[1, 0, 0, 1], gt_board_color=[0, 1, 0, 1])
+        else:
+            floorplan = show_alpha_floorplan(dt_xyz, border_color=[0, 1, 0, 1])
+        vis_img = np.concatenate([vis_img, floorplan[:, 60:-60, :]], axis=1)
+    if show:
+        plt.imshow(vis_img)
+        plt.show()
+    if save_path:
+        result = Image.fromarray((vis_img * 255).astype(np.uint8))
+        result.save(save_path)
+    return vis_img
+def preprocess(img_ori, q_error=0.7, refine_iter=3, vp_cache_path=None):
+    # Align images with VP
+    if os.path.exists(vp_cache_path):
+        with open(vp_cache_path) as f:
+            vp = [[float(v) for v in line.rstrip().split(' ')] for line in f.readlines()]
+            vp = np.array(vp)
+    else:
+        # VP detection and line segment extraction
+        _, vp, _, _, _, _, _ = panoEdgeDetection(img_ori,
+                                                 qError=q_error,
+                                                 refineIter=refine_iter)
+    i_img = rotatePanorama(img_ori, vp[2::-1])
+    if vp_cache_path is not None:
+        with open(vp_cache_path, 'w') as f:
+            for i in range(3):
+                f.write('%.6f %.6f %.6f\n' % (vp[i, 0], vp[i, 1], vp[i, 2]))
+    return i_img, vp
+def show_depth_normal_grad(dt):
+    grad_conv = GradLoss().to(dt['depth'].device).grad_conv
+    dt_grad_img = show_grad(dt['depth'][0], grad_conv, 50)
+    dt_grad_img = cv2.resize(dt_grad_img, (1024, 60), interpolation=cv2.INTER_NEAREST)
+    return dt_grad_img
+def show_alpha_floorplan(dt_xyz, side_l=512, border_color=None):
+    if border_color is None:
+        border_color = [1, 0, 0, 1]
+    fill_color = [0.2, 0.2, 0.2, 0.2]
+    dt_floorplan = draw_floorplan(xz=dt_xyz[..., ::2], fill_color=fill_color,
+                                  border_color=border_color, side_l=side_l, show=False, center_color=[1, 0, 0, 1])
+    dt_floorplan = Image.fromarray((dt_floorplan * 255).astype(np.uint8), mode='RGBA')
+    back = np.zeros([side_l, side_l, len(fill_color)], dtype=np.float)
+    back[..., :] = [0.8, 0.8, 0.8, 1]
+    back = Image.fromarray((back * 255).astype(np.uint8), mode='RGBA')
+    iou_floorplan = Image.alpha_composite(back, dt_floorplan).convert("RGB")
+    dt_floorplan = np.array(iou_floorplan) / 255.0
+    return dt_floorplan
+def save_pred_json(xyz, ration, save_path):
+    # xyz[..., -1] = -xyz[..., -1]
+    json_data = xyz2json(xyz, ration)
+    with open(save_path, 'w') as f:
+        f.write(json.dumps(json_data, indent=4) + '\n')
+    return json_data
+def inference():
+    if len(img_paths) == 0:
+        logger.error('No images found')
+        return
+    bar = tqdm(img_paths, ncols=100)
+    for img_path in bar:
+        if not os.path.isfile(img_path):
+            logger.error(f'The {img_path} not is file')
+            continue
+        name = os.path.basename(img_path).split('.')[0]
+        bar.set_description(name)
+        img = np.array(Image.open(img_path).resize((1024, 512), Image.Resampling.BICUBIC))[..., :3]
+        if args.post_processing is not None and 'manhattan' in args.post_processing:
+            bar.set_description("Preprocessing")
+            img, vp = preprocess(img, vp_cache_path=os.path.join(args.output_dir, f"{name}_vp.txt"))
+        img = (img / 255.0).astype(np.float32)
+        run_one_inference(img, model, args, name)
+def inference_dataset(dataset):
+    bar = tqdm(dataset, ncols=100)
+    for data in bar:
+        bar.set_description(data['id'])
+        run_one_inference(data['image'].transpose(1, 2, 0), model, args, name=data['id'], logger=logger)
+@torch.no_grad()
+def run_one_inference(img, model, args, name, logger, show=True, show_depth=True,
+                      show_floorplan=True, mesh_format='.gltf', mesh_resolution=512):
+    model.eval()
+    logger.info("model inference...")
+    dt = model(torch.from_numpy(img.transpose(2, 0, 1)[None]).to(args.device))
+    if args.post_processing != 'original':
+        logger.info(f"post-processing, type:{args.post_processing}...")
+        dt['processed_xyz'] = post_process(tensor2np(dt['depth']), type_name=args.post_processing)
+    visualize_2d(img, dt,
+                 show_depth=show_depth,
+                 show_floorplan=show_floorplan,
+                 show=show,
+                 save_path=os.path.join(args.output_dir, f"{name}_pred.png"))
+    output_xyz = dt['processed_xyz'][0] if 'processed_xyz' in dt else depth2xyz(tensor2np(dt['depth'][0]))
+    logger.info(f"saving predicted layout json...")
+    json_data = save_pred_json(output_xyz, tensor2np(dt['ratio'][0])[0],
+                               save_path=os.path.join(args.output_dir, f"{name}_pred.json"))
+    # if args.visualize_3d:
+    #     from visualization.visualizer.visualizer import visualize_3d
+    #     visualize_3d(json_data, (img * 255).astype(np.uint8))
+    if args.visualize_3d or args.output_3d:
+        dt_boundaries = corners2boundaries(tensor2np(dt['ratio'][0])[0], corners_xyz=output_xyz, step=None,
+                                           length=mesh_resolution if 'processed_xyz' in dt else None,
+                                           visible=True if 'processed_xyz' in dt else False)
+        dt_layout_depth = layout2depth(dt_boundaries, show=False)
+        logger.info(f"creating 3d mesh ...")
+        create_3d_obj(cv2.resize(img, dt_layout_depth.shape[::-1]), dt_layout_depth,
+                      save_path=os.path.join(args.output_dir, f"{name}_3d{mesh_format}") if args.output_3d else None,
+                      mesh=True,  show=args.visualize_3d)
+if __name__ == '__main__':
+    logger = get_logger()
+    args = parse_option()
+    config = get_config(args)
+    if ('cuda' in args.device or 'cuda' in config.TRAIN.DEVICE) and not torch.cuda.is_available():
+        logger.info(f'The {args.device} is not available, will use cpu ...')
+        config.defrost()
+        args.device = "cpu"
+        config.TRAIN.DEVICE = "cpu"
+        config.freeze()
+    model, _, _, _ = build_model(config, logger)
+    os.makedirs(args.output_dir, exist_ok=True)
+    img_paths = sorted(glob.glob(args.img_glob))
+    inference()
+    # dataset = MP3DDataset(root_dir='./src/dataset/mp3d', mode='test', split_list=[
+    #     ['7y3sRwLe3Va', '155fac2d50764bf09feb6c8f33e8fb76'],
+    #     ['e9zR4mvMWw7', 'c904c55a5d0e420bbd6e4e030b9fe5b4'],
+    # ])
+    # dataset = ZindDataset(root_dir='./src/dataset/zind', mode='test', split_list=[
+    #     '1169_pano_21',
+    #     '0583_pano_59',
+    # ], vp_align=True)
+    # inference_dataset(dataset)

loss/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""
+@date: 2021/7/19
+@description:
+"""
+from torch.nn import L1Loss
+from loss.led_loss import LEDLoss
+from loss.grad_loss import GradLoss
+from loss.boundary_loss import BoundaryLoss
+from loss.object_loss import ObjectLoss, HeatmapLoss

loss/boundary_loss.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""
+@Date: 2021/08/12
+@description: For HorizonNet, using latitudes to calculate loss.
+"""
+import torch
+import torch.nn as nn
+from utils.conversion import depth2xyz, xyz2lonlat
+class BoundaryLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.loss = nn.L1Loss()
+    def forward(self, gt, dt):
+        gt_floor_xyz = depth2xyz(gt['depth'])
+        gt_ceil_xyz = gt_floor_xyz.clone()
+        gt_ceil_xyz[..., 1] = -gt['ratio']
+        gt_floor_boundary = xyz2lonlat(gt_floor_xyz)[..., -1:]
+        gt_ceil_boundary = xyz2lonlat(gt_ceil_xyz)[..., -1:]
+        gt_boundary = torch.cat([gt_floor_boundary, gt_ceil_boundary], dim=-1).permute(0, 2, 1)
+        dt_boundary = dt['boundary']
+        loss = self.loss(gt_boundary, dt_boundary)
+        return loss
+if __name__ == '__main__':
+    import numpy as np
+    from dataset.mp3d_dataset import MP3DDataset
+    mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train')
+    gt = mp3d_dataset.__getitem__(0)
+    gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis])  # batch size is 1
+    gt['ratio'] = torch.from_numpy(gt['ratio'][np.newaxis])  # batch size is 1
+    dummy_dt = {
+        'depth': gt['depth'].clone(),
+        'boundary': torch.cat([
+            xyz2lonlat(depth2xyz(gt['depth']))[..., -1:],
+            xyz2lonlat(depth2xyz(gt['depth'], plan_y=-gt['ratio']))[..., -1:]
+            ], dim=-1).permute(0, 2, 1)
+    }
+    # dummy_dt['boundary'][:, :, :20] /= 1.2  # some different
+    boundary_loss = BoundaryLoss()
+    loss = boundary_loss(gt, dummy_dt)
+    print(loss)

loss/grad_loss.py ADDED Viewed

	@@ -0,0 +1,57 @@

+"""
+@Date: 2021/08/12
+@description:
+"""
+import torch
+import torch.nn as nn
+import numpy as np
+from visualization.grad import get_all
+class GradLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.loss = nn.L1Loss()
+        self.cos = nn.CosineSimilarity(dim=-1, eps=0)
+        self.grad_conv = nn.Conv1d(1, 1, kernel_size=3, stride=1, padding=0, bias=False, padding_mode='circular')
+        self.grad_conv.weight = nn.Parameter(torch.tensor([[[1, 0, -1]]]).float())
+        self.grad_conv.weight.requires_grad = False
+    def forward(self, gt, dt):
+        gt_direction, _, gt_angle_grad = get_all(gt['depth'], self.grad_conv)
+        dt_direction, _, dt_angle_grad = get_all(dt['depth'], self.grad_conv)
+        normal_loss = (1 - self.cos(gt_direction, dt_direction)).mean()
+        grad_loss = self.loss(gt_angle_grad, dt_angle_grad)
+        return [normal_loss, grad_loss]
+if __name__ == '__main__':
+    from dataset.mp3d_dataset import MP3DDataset
+    from utils.boundary import depth2boundaries
+    from utils.conversion import uv2xyz
+    from visualization.boundary import draw_boundaries
+    from visualization.floorplan import draw_floorplan
+    def show_boundary(image, depth, ratio):
+        boundary_list = depth2boundaries(ratio, depth, step=None)
+        draw_boundaries(image.transpose(1, 2, 0), boundary_list=boundary_list, show=True)
+        draw_floorplan(uv2xyz(boundary_list[0])[..., ::2], show=True, center_color=0.8)
+    mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train', patch_num=256)
+    gt = mp3d_dataset.__getitem__(1)
+    gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis])  # batch size is 1
+    dummy_dt = {
+        'depth': gt['depth'].clone(),
+    }
+    # dummy_dt['depth'][..., 20] *= 3  # some different
+    # show_boundary(gt['image'], gt['depth'][0].numpy(), gt['ratio'])
+    # show_boundary(gt['image'], dummy_dt['depth'][0].numpy(), gt['ratio'])
+    grad_loss = GradLoss()
+    loss = grad_loss(gt, dummy_dt)
+    print(loss)

loss/led_loss.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""
+@Date: 2021/08/12
+@description:
+"""
+import torch
+import torch.nn as nn
+class LEDLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.loss = nn.L1Loss()
+    def forward(self, gt, dt):
+        camera_height = 1.6
+        gt_depth = gt['depth'] * camera_height
+        dt_ceil_depth = dt['ceil_depth'] * camera_height * gt['ratio']
+        dt_floor_depth = dt['depth'] * camera_height
+        ceil_loss = self.loss(gt_depth, dt_ceil_depth)
+        floor_loss = self.loss(gt_depth, dt_floor_depth)
+        loss = floor_loss + ceil_loss
+        return loss
+if __name__ == '__main__':
+    import numpy as np
+    from dataset.mp3d_dataset import MP3DDataset
+    mp3d_dataset = MP3DDataset(root_dir='../src/dataset/mp3d', mode='train')
+    gt = mp3d_dataset.__getitem__(0)
+    gt['depth'] = torch.from_numpy(gt['depth'][np.newaxis])  # batch size is 1
+    gt['ratio'] = torch.from_numpy(gt['ratio'][np.newaxis])  # batch size is 1
+    dummy_dt = {
+        'depth': gt['depth'].clone(),
+        'ceil_depth': gt['depth'] / gt['ratio']
+    }
+    # dummy_dt['depth'][..., :20] *= 3  # some different
+    led_loss = LEDLoss()
+    loss = led_loss(gt, dummy_dt)
+    print(loss)

loss/object_loss.py ADDED Viewed

	@@ -0,0 +1,42 @@

+"""
+@Date: 2021/08/12
+@description:
+"""
+import torch
+import torch.nn as nn
+from loss.grad_loss import GradLoss
+class ObjectLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.heat_map_loss = HeatmapLoss(reduction='mean')  # FocalLoss(reduction='mean')
+        self.l1_loss = nn.SmoothL1Loss()
+    def forward(self, gt, dt):
+        # TODO::
+        return 0
+class HeatmapLoss(nn.Module):
+    def __init__(self, weight=None, alpha=2, beta=4, reduction='mean'):
+        super(HeatmapLoss, self).__init__()
+        self.alpha = alpha
+        self.beta = beta
+        self.reduction = reduction
+    def forward(self, targets, inputs):
+        center_id = (targets == 1.0).float()
+        other_id = (targets != 1.0).float()
+        center_loss = -center_id * (1.0 - inputs) ** self.alpha * torch.log(inputs + 1e-14)
+        other_loss = -other_id * (1 - targets) ** self.beta * inputs ** self.alpha * torch.log(1.0 - inputs + 1e-14)
+        loss = center_loss + other_loss
+        batch_size = loss.size(0)
+        if self.reduction == 'mean':
+            loss = torch.sum(loss) / batch_size
+        if self.reduction == 'sum':
+            loss = torch.sum(loss) / batch_size
+        return loss

main.py ADDED Viewed

	@@ -0,0 +1,401 @@

+"""
+@Date: 2021/07/17
+@description:
+"""
+import sys
+import os
+import shutil
+import argparse
+import numpy as np
+import json
+import torch
+import torch.nn.parallel
+import torch.optim
+import torch.multiprocessing as mp
+import torch.utils.data
+import torch.utils.data.distributed
+import torch.cuda
+from PIL import Image
+from tqdm import tqdm
+from torch.utils.tensorboard import SummaryWriter
+from config.defaults import get_config, get_rank_config
+from models.other.criterion import calc_criterion
+from models.build import build_model
+from models.other.init_env import init_env
+from utils.logger import build_logger
+from utils.misc import tensor2np_d, tensor2np
+from dataset.build import build_loader
+from evaluation.accuracy import calc_accuracy, show_heat_map, calc_ce, calc_pe, calc_rmse_delta_1, \
+    show_depth_normal_grad, calc_f1_score
+from postprocessing.post_process import post_process
+try:
+    from apex import amp
+except ImportError:
+    amp = None
+def parse_option():
+    debug = True if sys.gettrace() else False
+    parser = argparse.ArgumentParser(description='Panorama Layout Transformer training and evaluation script')
+    parser.add_argument('--cfg',
+                        type=str,
+                        metavar='FILE',
+                        help='path to config file')
+    parser.add_argument('--mode',
+                        type=str,
+                        default='train',
+                        choices=['train', 'val', 'test'],
+                        help='train/val/test mode')
+    parser.add_argument('--val_name',
+                        type=str,
+                        choices=['val', 'test'],
+                        help='val name')
+    parser.add_argument('--bs', type=int,
+                        help='batch size')
+    parser.add_argument('--save_eval', action='store_true',
+                        help='save eval result')
+    parser.add_argument('--post_processing', type=str,
+                        choices=['manhattan', 'atalanta', 'manhattan_old'],
+                        help='type of postprocessing ')
+    parser.add_argument('--need_cpe', action='store_true',
+                        help='need to evaluate corner error and pixel error')
+    parser.add_argument('--need_f1', action='store_true',
+                        help='need to evaluate f1-score of corners')
+    parser.add_argument('--need_rmse', action='store_true',
+                        help='need to evaluate root mean squared error and delta error')
+    parser.add_argument('--force_cube', action='store_true',
+                        help='force cube shape when eval')
+    parser.add_argument('--wall_num', type=int,
+                        help='wall number')
+    args = parser.parse_args()
+    args.debug = debug
+    print("arguments:")
+    for arg in vars(args):
+        print(arg, ":", getattr(args, arg))
+    print("-" * 50)
+    return args
+def main():
+    args = parse_option()
+    config = get_config(args)
+    if config.TRAIN.SCRATCH and os.path.exists(config.CKPT.DIR) and config.MODE == 'train':
+        print(f"Train from scratch, delete checkpoint dir: {config.CKPT.DIR}")
+        f = [int(f.split('_')[-1].split('.')[0]) for f in os.listdir(config.CKPT.DIR) if 'pkl' in f]
+        if len(f) > 0:
+            last_epoch = np.array(f).max()
+            if last_epoch > 10:
+                c = input(f"delete it (last_epoch: {last_epoch})?(Y/N)\n")
+                if c != 'y' and c != 'Y':
+                    exit(0)
+        shutil.rmtree(config.CKPT.DIR, ignore_errors=True)
+    os.makedirs(config.CKPT.DIR, exist_ok=True)
+    os.makedirs(config.CKPT.RESULT_DIR, exist_ok=True)
+    os.makedirs(config.LOGGER.DIR, exist_ok=True)
+    if ':' in config.TRAIN.DEVICE:
+        nprocs = len(config.TRAIN.DEVICE.split(':')[-1].split(','))
+    if 'cuda' in config.TRAIN.DEVICE:
+        if not torch.cuda.is_available():
+            print(f"Cuda is not available(config is: {config.TRAIN.DEVICE}), will use cpu ...")
+            config.defrost()
+            config.TRAIN.DEVICE = "cpu"
+            config.freeze()
+            nprocs = 1
+    if config.MODE == 'train':
+        with open(os.path.join(config.CKPT.DIR, "config.yaml"), "w") as f:
+            f.write(config.dump(allow_unicode=True))
+    if config.TRAIN.DEVICE == 'cpu' or nprocs < 2:
+        print(f"Use single process, device:{config.TRAIN.DEVICE}")
+        main_worker(0, config, 1)
+    else:
+        print(f"Use {nprocs} processes ...")
+        mp.spawn(main_worker, nprocs=nprocs, args=(config, nprocs), join=True)
+def main_worker(local_rank, cfg, world_size):
+    config = get_rank_config(cfg, local_rank, world_size)
+    logger = build_logger(config)
+    writer = SummaryWriter(config.CKPT.DIR)
+    logger.info(f"Comment: {config.COMMENT}")
+    cur_pid = os.getpid()
+    logger.info(f"Current process id: {cur_pid}")
+    torch.hub._hub_dir = config.CKPT.PYTORCH
+    logger.info(f"Pytorch hub dir: {torch.hub._hub_dir}")
+    init_env(config.SEED, config.TRAIN.DETERMINISTIC, config.DATA.NUM_WORKERS)
+    model, optimizer, criterion, scheduler = build_model(config, logger)
+    train_data_loader, val_data_loader = build_loader(config, logger)
+    if 'cuda' in config.TRAIN.DEVICE:
+        torch.cuda.set_device(config.TRAIN.DEVICE)
+    if config.MODE == 'train':
+        train(model, train_data_loader, val_data_loader, optimizer, criterion, config, logger, writer, scheduler)
+    else:
+        iou_results, other_results = val_an_epoch(model, val_data_loader,
+                                                  criterion, config, logger, writer=None,
+                                                  epoch=config.TRAIN.START_EPOCH)
+        results = dict(iou_results, **other_results)
+        if config.SAVE_EVAL:
+            save_path = os.path.join(config.CKPT.RESULT_DIR, f"result.json")
+            with open(save_path, 'w+') as f:
+                json.dump(results, f, indent=4)
+def save(model, optimizer, epoch, iou_d, logger, writer, config):
+    model.save(optimizer, epoch, accuracy=iou_d['full_3d'], logger=logger, acc_d=iou_d, config=config)
+    for k in model.acc_d:
+        writer.add_scalar(f"BestACC/{k}", model.acc_d[k]['acc'], epoch)
+def train(model, train_data_loader, val_data_loader, optimizer, criterion, config, logger, writer, scheduler):
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        logger.info("=" * 200)
+        train_an_epoch(model, train_data_loader, optimizer, criterion, config, logger, writer, epoch)
+        epoch_iou_d, _ = val_an_epoch(model, val_data_loader, criterion, config, logger, writer, epoch)
+        if config.LOCAL_RANK == 0:
+            ddp = config.WORLD_SIZE > 1
+            save(model.module if ddp else model, optimizer, epoch, epoch_iou_d, logger, writer, config)
+        if scheduler is not None:
+            if scheduler.min_lr is not None and optimizer.param_groups[0]['lr'] <= scheduler.min_lr:
+                continue
+            scheduler.step()
+    writer.close()
+def train_an_epoch(model, train_data_loader, optimizer, criterion, config, logger, writer, epoch=0):
+    logger.info(f'Start Train Epoch {epoch}/{config.TRAIN.EPOCHS - 1}')
+    model.train()
+    if len(config.MODEL.FINE_TUNE) > 0:
+        model.feature_extractor.eval()
+    optimizer.zero_grad()
+    data_len = len(train_data_loader)
+    start_i = data_len * epoch * config.WORLD_SIZE
+    bar = enumerate(train_data_loader)
+    if config.LOCAL_RANK == 0 and config.SHOW_BAR:
+        bar = tqdm(bar, total=data_len, ncols=200)
+    device = config.TRAIN.DEVICE
+    epoch_loss_d = {}
+    for i, gt in bar:
+        imgs = gt['image'].to(device, non_blocking=True)
+        gt['depth'] = gt['depth'].to(device, non_blocking=True)
+        gt['ratio'] = gt['ratio'].to(device, non_blocking=True)
+        if 'corner_heat_map' in gt:
+            gt['corner_heat_map'] = gt['corner_heat_map'].to(device, non_blocking=True)
+        if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device:
+            imgs = imgs.type(torch.float16)
+            gt['depth'] = gt['depth'].type(torch.float16)
+            gt['ratio'] = gt['ratio'].type(torch.float16)
+        dt = model(imgs)
+        loss, batch_loss_d, epoch_loss_d = calc_criterion(criterion, gt, dt, epoch_loss_d)
+        if config.LOCAL_RANK == 0 and config.SHOW_BAR:
+            bar.set_postfix(batch_loss_d)
+        optimizer.zero_grad()
+        if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device:
+            with amp.scale_loss(loss, optimizer) as scaled_loss:
+                scaled_loss.backward()
+        else:
+            loss.backward()
+        optimizer.step()
+        global_step = start_i + i * config.WORLD_SIZE + config.LOCAL_RANK
+        for key, val in batch_loss_d.items():
+            writer.add_scalar(f'TrainBatchLoss/{key}', val, global_step)
+    if config.LOCAL_RANK != 0:
+        return
+    epoch_loss_d = dict(zip(epoch_loss_d.keys(), [np.array(epoch_loss_d[k]).mean() for k in epoch_loss_d.keys()]))
+    s = 'TrainEpochLoss: '
+    for key, val in epoch_loss_d.items():
+        writer.add_scalar(f'TrainEpochLoss/{key}', val, epoch)
+        s += f" {key}={val}"
+    logger.info(s)
+    writer.add_scalar('LearningRate', optimizer.param_groups[0]['lr'], epoch)
+    logger.info(f"LearningRate: {optimizer.param_groups[0]['lr']}")
+@torch.no_grad()
+def val_an_epoch(model, val_data_loader, criterion, config, logger, writer, epoch=0):
+    model.eval()
+    logger.info(f'Start Validate Epoch {epoch}/{config.TRAIN.EPOCHS - 1}')
+    data_len = len(val_data_loader)
+    start_i = data_len * epoch * config.WORLD_SIZE
+    bar = enumerate(val_data_loader)
+    if config.LOCAL_RANK == 0 and config.SHOW_BAR:
+        bar = tqdm(bar, total=data_len, ncols=200)
+    device = config.TRAIN.DEVICE
+    epoch_loss_d = {}
+    epoch_iou_d = {
+        'visible_2d': [],
+        'visible_3d': [],
+        'full_2d': [],
+        'full_3d': [],
+        'height': []
+    }
+    epoch_other_d = {
+        'ce': [],
+        'pe': [],
+        'f1': [],
+        'precision': [],
+        'recall': [],
+        'rmse': [],
+        'delta_1': []
+    }
+    show_index = np.random.randint(0, data_len)
+    for i, gt in bar:
+        imgs = gt['image'].to(device, non_blocking=True)
+        gt['depth'] = gt['depth'].to(device, non_blocking=True)
+        gt['ratio'] = gt['ratio'].to(device, non_blocking=True)
+        if 'corner_heat_map' in gt:
+            gt['corner_heat_map'] = gt['corner_heat_map'].to(device, non_blocking=True)
+        dt = model(imgs)
+        vis_w = config.TRAIN.VIS_WEIGHT
+        visualization = False  # (config.LOCAL_RANK == 0 and i == show_index) or config.SAVE_EVAL
+        loss, batch_loss_d, epoch_loss_d = calc_criterion(criterion, gt, dt, epoch_loss_d)
+        if config.EVAL.POST_PROCESSING is not None:
+            depth = tensor2np(dt['depth'])
+            dt['processed_xyz'] = post_process(depth, type_name=config.EVAL.POST_PROCESSING,
+                                               need_cube=config.EVAL.FORCE_CUBE)
+            if config.EVAL.FORCE_CUBE and config.EVAL.NEED_CPE:
+                ce = calc_ce(tensor2np_d(dt), tensor2np_d(gt))
+                pe = calc_pe(tensor2np_d(dt), tensor2np_d(gt))
+                epoch_other_d['ce'].append(ce)
+                epoch_other_d['pe'].append(pe)
+            if config.EVAL.NEED_F1:
+                f1, precision, recall = calc_f1_score(tensor2np_d(dt), tensor2np_d(gt))
+                epoch_other_d['f1'].append(f1)
+                epoch_other_d['precision'].append(precision)
+                epoch_other_d['recall'].append(recall)
+        if config.EVAL.NEED_RMSE:
+            rmse, delta_1 = calc_rmse_delta_1(tensor2np_d(dt), tensor2np_d(gt))
+            epoch_other_d['rmse'].append(rmse)
+            epoch_other_d['delta_1'].append(delta_1)
+        visb_iou, full_iou, iou_height, pano_bds, full_iou_2ds = calc_accuracy(tensor2np_d(dt), tensor2np_d(gt),
+                                                                               visualization, h=vis_w // 2)
+        epoch_iou_d['visible_2d'].append(visb_iou[0])
+        epoch_iou_d['visible_3d'].append(visb_iou[1])
+        epoch_iou_d['full_2d'].append(full_iou[0])
+        epoch_iou_d['full_3d'].append(full_iou[1])
+        epoch_iou_d['height'].append(iou_height)
+        if config.LOCAL_RANK == 0 and config.SHOW_BAR:
+            bar.set_postfix(batch_loss_d)
+        global_step = start_i + i * config.WORLD_SIZE + config.LOCAL_RANK
+        if writer:
+            for key, val in batch_loss_d.items():
+                writer.add_scalar(f'ValBatchLoss/{key}', val, global_step)
+        if not visualization:
+            continue
+        gt_grad_imgs, dt_grad_imgs = show_depth_normal_grad(dt, gt, device, vis_w)
+        dt_heat_map_imgs = None
+        gt_heat_map_imgs = None
+        if 'corner_heat_map' in gt:
+            dt_heat_map_imgs, gt_heat_map_imgs = show_heat_map(dt, gt, vis_w)
+        if config.TRAIN.VIS_MERGE or config.SAVE_EVAL:
+            imgs = []
+            for j in range(len(pano_bds)):
+                # floorplan = np.concatenate([visb_iou[2][j], full_iou[2][j]], axis=-1)
+                floorplan = full_iou[2][j]
+                margin_w = int(floorplan.shape[-1] * (60/512))
+                floorplan = floorplan[:, :, margin_w:-margin_w]
+                grad_h = dt_grad_imgs[0].shape[1]
+                vis_merge = [
+                    gt_grad_imgs[j],
+                    pano_bds[j][:, grad_h:-grad_h],
+                    dt_grad_imgs[j]
+                ]
+                if 'corner_heat_map' in gt:
+                    vis_merge = [dt_heat_map_imgs[j], gt_heat_map_imgs[j]] + vis_merge
+                img = np.concatenate(vis_merge, axis=-2)
+                img = np.concatenate([img, ], axis=-1)
+                # img = gt_grad_imgs[j]
+                imgs.append(img)
+            if writer:
+                writer.add_images('VIS/Merge', np.array(imgs), global_step)
+            if config.SAVE_EVAL:
+                for k in range(len(imgs)):
+                    img = imgs[k] * 255.0
+                    save_path = os.path.join(config.CKPT.RESULT_DIR, f"{gt['id'][k]}_{full_iou_2ds[k]:.5f}.png")
+                    Image.fromarray(img.transpose(1, 2, 0).astype(np.uint8)).save(save_path)
+        elif writer:
+            writer.add_images('IoU/Visible_Floorplan', visb_iou[2], global_step)
+            writer.add_images('IoU/Full_Floorplan', full_iou[2], global_step)
+            writer.add_images('IoU/Boundary', pano_bds, global_step)
+            writer.add_images('Grad/gt', gt_grad_imgs, global_step)
+            writer.add_images('Grad/dt', dt_grad_imgs, global_step)
+    if config.LOCAL_RANK != 0:
+        return
+    epoch_loss_d = dict(zip(epoch_loss_d.keys(), [np.array(epoch_loss_d[k]).mean() for k in epoch_loss_d.keys()]))
+    s = 'ValEpochLoss: '
+    for key, val in epoch_loss_d.items():
+        if writer:
+            writer.add_scalar(f'ValEpochLoss/{key}', val, epoch)
+        s += f" {key}={val}"
+    logger.info(s)
+    epoch_iou_d = dict(zip(epoch_iou_d.keys(), [np.array(epoch_iou_d[k]).mean() for k in epoch_iou_d.keys()]))
+    s = 'ValEpochIoU: '
+    for key, val in epoch_iou_d.items():
+        if writer:
+            writer.add_scalar(f'ValEpochIoU/{key}', val, epoch)
+        s += f" {key}={val}"
+    logger.info(s)
+    epoch_other_d = dict(zip(epoch_other_d.keys(),
+                             [np.array(epoch_other_d[k]).mean() if len(epoch_other_d[k]) > 0 else 0 for k in
+                              epoch_other_d.keys()]))
+    logger.info(f'other acc: {epoch_other_d}')
+    return epoch_iou_d, epoch_other_d
+if __name__ == '__main__':
+    main()

models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from models.lgt_net import LGT_Net

models/base_model.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""
+@Date: 2021/07/17
+@description:
+"""
+import os
+import torch
+import torch.nn as nn
+import datetime
+class BaseModule(nn.Module):
+    def __init__(self, ckpt_dir=None):
+        super().__init__()
+        self.ckpt_dir = ckpt_dir
+        if ckpt_dir:
+            if not os.path.exists(ckpt_dir):
+                os.makedirs(ckpt_dir)
+            else:
+                self.model_lst = [x for x in sorted(os.listdir(self.ckpt_dir)) if x.endswith('.pkl')]
+        self.last_model_path = None
+        self.best_model_path = None
+        self.best_accuracy = -float('inf')
+        self.acc_d = {}
+    def show_parameter_number(self, logger):
+        total = sum(p.numel() for p in self.parameters())
+        trainable = sum(p.numel() for p in self.parameters() if p.requires_grad)
+        logger.info('{} parameter total:{:,}, trainable:{:,}'.format(self._get_name(), total, trainable))
+    def load(self, device, logger, optimizer=None, best=False):
+        if len(self.model_lst) == 0:
+            logger.info('*'*50)
+            logger.info("Empty model folder! Using initial weights")
+            logger.info('*'*50)
+            return 0
+        last_model_lst = list(filter(lambda n: '_last_' in n, self.model_lst))
+        best_model_lst = list(filter(lambda n: '_best_' in n, self.model_lst))
+        if len(last_model_lst) == 0 and len(best_model_lst) == 0:
+            logger.info('*'*50)
+            ckpt_path = os.path.join(self.ckpt_dir, self.model_lst[0])
+            logger.info(f"Load: {ckpt_path}")
+            checkpoint = torch.load(ckpt_path, map_location=torch.device(device))
+            self.load_state_dict(checkpoint, strict=False)
+            logger.info('*'*50)
+            return 0
+        checkpoint = None
+        if len(last_model_lst) > 0:
+            self.last_model_path = os.path.join(self.ckpt_dir, last_model_lst[-1])
+            checkpoint = torch.load(self.last_model_path, map_location=torch.device(device))
+            self.best_accuracy = checkpoint['accuracy']
+            self.acc_d = checkpoint['acc_d']
+        if len(best_model_lst) > 0:
+            self.best_model_path = os.path.join(self.ckpt_dir, best_model_lst[-1])
+            best_checkpoint = torch.load(self.best_model_path, map_location=torch.device(device))
+            self.best_accuracy = best_checkpoint['accuracy']
+            self.acc_d = best_checkpoint['acc_d']
+            if best:
+                checkpoint = best_checkpoint
+        for k in self.acc_d:
+            if isinstance(self.acc_d[k], float):
+                self.acc_d[k] = {
+                    'acc': self.acc_d[k],
+                    'epoch': checkpoint['epoch']
+                }
+        if checkpoint is None:
+            logger.error("Invalid checkpoint")
+            return
+        self.load_state_dict(checkpoint['net'], strict=False)
+        if optimizer and not best:  # best的时候使用新的优化器比如从adam->sgd
+            logger.info('Load optimizer')
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            for state in optimizer.state.values():
+                for k, v in state.items():
+                    if torch.is_tensor(v):
+                        state[k] = v.to(device)
+        logger.info('*'*50)
+        if best:
+            logger.info(f"Lode best: {self.best_model_path}")
+        else:
+            logger.info(f"Lode last: {self.last_model_path}")
+        logger.info(f"Best accuracy: {self.best_accuracy}")
+        logger.info(f"Last epoch: {checkpoint['epoch'] + 1}")
+        logger.info('*'*50)
+        return checkpoint['epoch'] + 1
+    def update_acc(self, acc_d, epoch, logger):
+        logger.info("-" * 100)
+        for k in acc_d:
+            if k not in self.acc_d.keys() or acc_d[k] > self.acc_d[k]['acc']:
+                self.acc_d[k] = {
+                    'acc': acc_d[k],
+                    'epoch': epoch
+                }
+            logger.info(f"Update ACC: {k} {self.acc_d[k]['acc']:.4f}({self.acc_d[k]['epoch']}-{epoch})")
+        logger.info("-" * 100)
+    def save(self, optim, epoch, accuracy, logger, replace=True, acc_d=None, config=None):
+        """
+        :param config:
+        :param optim:
+        :param epoch:
+        :param accuracy:
+        :param logger:
+        :param replace:
+        :param acc_d: 其他评估数据，visible_2/3d, full_2/3d, rmse...
+        :return:
+        """
+        if acc_d:
+            self.update_acc(acc_d, epoch, logger)
+        name = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S_last_{:.4f}_{}'.format(accuracy, epoch))
+        name = f"model_{name}.pkl"
+        checkpoint = {
+            'net': self.state_dict(),
+            'optimizer': optim.state_dict(),
+            'epoch': epoch,
+            'accuracy': accuracy,
+            'acc_d': acc_d
+        }
+        # FIXME:: delete always true
+        if (True or config.MODEL.SAVE_LAST) and epoch % config.TRAIN.SAVE_FREQ == 0:
+            if replace and self.last_model_path and os.path.exists(self.last_model_path):
+                os.remove(self.last_model_path)
+            self.last_model_path = os.path.join(self.ckpt_dir, name)
+            torch.save(checkpoint, self.last_model_path)
+            logger.info(f"Saved last model: {self.last_model_path}")
+        if accuracy > self.best_accuracy:
+            self.best_accuracy = accuracy
+            # FIXME:: delete always true
+            if True or config.MODEL.SAVE_BEST:
+                if replace and self.best_model_path and os.path.exists(self.best_model_path):
+                    os.remove(self.best_model_path)
+                self.best_model_path = os.path.join(self.ckpt_dir, name.replace('last', 'best'))
+                torch.save(checkpoint, self.best_model_path)
+                logger.info("#" * 100)
+                logger.info(f"Saved best model: {self.best_model_path}")
+                logger.info("#" * 100)

models/build.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+@Date: 2021/07/18
+@description:
+"""
+import os
+import models
+import torch.distributed as dist
+import torch
+from torch.nn import init
+from torch.optim import lr_scheduler
+from utils.time_watch import TimeWatch
+from models.other.optimizer import build_optimizer
+from models.other.criterion import build_criterion
+def build_model(config, logger):
+    name = config.MODEL.NAME
+    w = TimeWatch(f"Build model: {name}", logger)
+    ddp = config.WORLD_SIZE > 1
+    if ddp:
+        logger.info(f"use ddp")
+        dist.init_process_group("nccl", init_method='tcp://127.0.0.1:23456', rank=config.LOCAL_RANK,
+                                world_size=config.WORLD_SIZE)
+    device = config.TRAIN.DEVICE
+    logger.info(f"Creating model: {name} to device:{device}, args:{config.MODEL.ARGS[0]}")
+    net = getattr(models, name)
+    ckpt_dir = os.path.abspath(os.path.join(config.CKPT.DIR, os.pardir)) if config.DEBUG else config.CKPT.DIR
+    if len(config.MODEL.ARGS) != 0:
+        model = net(ckpt_dir=ckpt_dir, **config.MODEL.ARGS[0])
+    else:
+        model = net(ckpt_dir=ckpt_dir)
+    logger.info(f'model dropout: {model.dropout_d}')
+    model = model.to(device)
+    optimizer = None
+    scheduler = None
+    if config.MODE == 'train':
+        optimizer = build_optimizer(config, model, logger)
+    config.defrost()
+    config.TRAIN.START_EPOCH = model.load(device, logger,  optimizer, best=config.MODE != 'train' or not config.TRAIN.RESUME_LAST)
+    config.freeze()
+    if config.MODE == 'train' and len(config.MODEL.FINE_TUNE) > 0:
+        for param in model.parameters():
+            param.requires_grad = False
+        for layer in config.MODEL.FINE_TUNE:
+            logger.info(f'Fine-tune: {layer}')
+            getattr(model, layer).requires_grad_(requires_grad=True)
+            getattr(model, layer).reset_parameters()
+    model.show_parameter_number(logger)
+    if config.MODE == 'train':
+        if len(config.TRAIN.LR_SCHEDULER.NAME) > 0:
+            if 'last_epoch' not in config.TRAIN.LR_SCHEDULER.ARGS[0].keys():
+                config.TRAIN.LR_SCHEDULER.ARGS[0]['last_epoch'] = config.TRAIN.START_EPOCH - 1
+            scheduler = getattr(lr_scheduler, config.TRAIN.LR_SCHEDULER.NAME)(optimizer=optimizer,
+                                                                              **config.TRAIN.LR_SCHEDULER.ARGS[0])
+            logger.info(f"Use scheduler: name:{config.TRAIN.LR_SCHEDULER.NAME} args: {config.TRAIN.LR_SCHEDULER.ARGS[0]}")
+            logger.info(f"Current scheduler last lr: {scheduler.get_last_lr()}")
+        else:
+            scheduler = None
+        if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device:
+            import apex
+            logger.info(f"use amp:{config.AMP_OPT_LEVEL}")
+            model, optimizer = apex.amp.initialize(model, optimizer, opt_level=config.AMP_OPT_LEVEL, verbosity=0)
+        if ddp:
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.TRAIN.DEVICE],
+                                                              broadcast_buffers=True)  # use rank:0 bn
+    criterion = build_criterion(config, logger)
+    if optimizer is not None:
+        logger.info(f"Finally lr: {optimizer.param_groups[0]['lr']}")
+    return model, optimizer, criterion, scheduler

models/lgt_net.py ADDED Viewed

	@@ -0,0 +1,213 @@

+import torch.nn
+import torch
+import torch.nn as nn
+import models.modules as modules
+import numpy as np
+from models.base_model import BaseModule
+from models.modules.horizon_net_feature_extractor import HorizonNetFeatureExtractor
+from models.modules.patch_feature_extractor import PatchFeatureExtractor
+from utils.conversion import uv2depth, get_u, lonlat2depth, get_lon, lonlat2uv
+from utils.height import calc_ceil_ratio
+from utils.misc import tensor2np
+class LGT_Net(BaseModule):
+    def __init__(self, ckpt_dir=None, backbone='resnet50', dropout=0.0, output_name='LGT',
+                 decoder_name='Transformer', win_size=8, depth=6,
+                 ape=None, rpe=None, corner_heat_map=False, rpe_pos=1):
+        super().__init__(ckpt_dir)
+        self.patch_num = 256
+        self.patch_dim = 1024
+        self.decoder_name = decoder_name
+        self.output_name = output_name
+        self.corner_heat_map = corner_heat_map
+        self.dropout_d = dropout
+        if backbone == 'patch':
+            self.feature_extractor = PatchFeatureExtractor(patch_num=self.patch_num, input_shape=[3, 512, 1024])
+        else:
+            # feature extractor
+            self.feature_extractor = HorizonNetFeatureExtractor(backbone)
+        if 'Transformer' in self.decoder_name:
+            # transformer encoder
+            transformer_dim = self.patch_dim
+            transformer_layers = depth
+            transformer_heads = 8
+            transformer_head_dim = transformer_dim // transformer_heads
+            transformer_ff_dim = 2048
+            rpe = None if rpe == 'None' else rpe
+            self.transformer = getattr(modules, decoder_name)(dim=transformer_dim, depth=transformer_layers,
+                                                              heads=transformer_heads, dim_head=transformer_head_dim,
+                                                              mlp_dim=transformer_ff_dim, win_size=win_size,
+                                                              dropout=self.dropout_d, patch_num=self.patch_num,
+                                                              ape=ape, rpe=rpe, rpe_pos=rpe_pos)
+        elif self.decoder_name == 'LSTM':
+            self.bi_rnn = nn.LSTM(input_size=self.feature_extractor.c_last,
+                                  hidden_size=self.patch_dim // 2,
+                                  num_layers=2,
+                                  dropout=self.dropout_d,
+                                  batch_first=False,
+                                  bidirectional=True)
+            self.drop_out = nn.Dropout(self.dropout_d)
+        else:
+            raise NotImplementedError("Only support *Transformer and LSTM")
+        if self.output_name == 'LGT':
+            # omnidirectional-geometry aware output
+            self.linear_depth_output = nn.Linear(in_features=self.patch_dim, out_features=1)
+            self.linear_ratio = nn.Linear(in_features=self.patch_dim, out_features=1)
+            self.linear_ratio_output = nn.Linear(in_features=self.patch_num, out_features=1)
+        elif self.output_name == 'LED' or self.output_name == 'Horizon':
+            # horizon-depth or latitude output
+            self.linear = nn.Linear(in_features=self.patch_dim, out_features=2)
+        else:
+            raise NotImplementedError("Unknown output")
+        if self.corner_heat_map:
+            # corners heat map output
+            self.linear_corner_heat_map_output = nn.Linear(in_features=self.patch_dim, out_features=1)
+        self.name = f"{self.decoder_name}_{self.output_name}_Net"
+    def lgt_output(self, x):
+        """
+        :param x: [ b, 256(patch_num), 1024(d)]
+        :return: {
+            'depth': [b, 256(patch_num & d)]
+            'ratio': [b, 1(d)]
+        }
+        """
+        depth = self.linear_depth_output(x)  # [b, 256(patch_num), 1(d)]
+        depth = depth.view(-1, self.patch_num)  # [b, 256(patch_num & d)]
+        # ratio represent room height
+        ratio = self.linear_ratio(x)  # [b, 256(patch_num), 1(d)]
+        ratio = ratio.view(-1, self.patch_num)  # [b, 256(patch_num & d)]
+        ratio = self.linear_ratio_output(ratio)  # [b, 1(d)]
+        output = {
+            'depth': depth,
+            'ratio': ratio
+        }
+        return output
+    def led_output(self, x):
+        """
+        :param x: [ b, 256(patch_num), 1024(d)]
+        :return: {
+            'depth':  [b, 256(patch_num)]
+            'ceil_depth': [b, 256(patch_num)]
+            'ratio': [b, 1(d)]
+        }
+        """
+        bon = self.linear(x)  # [b, 256(patch_num), 2(d)]
+        bon = bon.permute(0, 2, 1)  # [b, 2(d), 256(patch_num)]
+        bon = torch.sigmoid(bon)
+        ceil_v = bon[:, 0, :] * -0.5 + 0.5  # [b, 256(patch_num)]
+        floor_v = bon[:, 1, :] * 0.5 + 0.5  # [b, 256(patch_num)]
+        u = get_u(w=self.patch_num, is_np=False, b=ceil_v.shape[0]).to(ceil_v.device)
+        ceil_boundary = torch.stack((u, ceil_v), axis=-1)  # [b, 256(patch_num), 2]
+        floor_boundary = torch.stack((u, floor_v), axis=-1)  # [b, 256(patch_num), 2]
+        output = {
+            'depth': uv2depth(floor_boundary),  # [b, 256(patch_num)]
+            'ceil_depth': uv2depth(ceil_boundary),  # [b, 256(patch_num)]
+        }
+        # print(output['depth'].mean())
+        if not self.training:
+            # [b, 1(d)]
+            output['ratio'] = calc_ceil_ratio([tensor2np(ceil_boundary), tensor2np(floor_boundary)], mode='lsq').reshape(-1, 1)
+        return output
+    def horizon_output(self, x):
+        """
+        :param x: [ b, 256(patch_num), 1024(d)]
+        :return: {
+            'floor_boundary':  [b, 256(patch_num)]
+            'ceil_boundary': [b, 256(patch_num)]
+        }
+        """
+        bon = self.linear(x)  # [b, 256(patch_num), 2(d)]
+        bon = bon.permute(0, 2, 1)  # [b, 2(d), 256(patch_num)]
+        output = {
+            'boundary': bon
+        }
+        if not self.training:
+            lon = get_lon(w=self.patch_num, is_np=False, b=bon.shape[0]).to(bon.device)
+            floor_lat = torch.clip(bon[:, 0, :], 1e-4, np.pi / 2)
+            ceil_lat = torch.clip(bon[:, 1, :], -np.pi / 2, -1e-4)
+            floor_lonlat = torch.stack((lon, floor_lat), axis=-1)  # [b, 256(patch_num), 2]
+            ceil_lonlat = torch.stack((lon, ceil_lat), axis=-1)  # [b, 256(patch_num), 2]
+            output['depth'] = lonlat2depth(floor_lonlat)
+            output['ratio'] = calc_ceil_ratio([tensor2np(lonlat2uv(ceil_lonlat)),
+                                               tensor2np(lonlat2uv(floor_lonlat))], mode='mean').reshape(-1, 1)
+        return output
+    def forward(self, x):
+        """
+        :param x: [b, 3(d), 512(h), 1024(w)]
+        :return: {
+            'depth': [b, 256(patch_num & d)]
+            'ratio': [b, 1(d)]
+        }
+        """
+        # feature extractor
+        x = self.feature_extractor(x)  # [b 1024(d) 256(w)]
+        if 'Transformer' in self.decoder_name:
+            # transformer decoder
+            x = x.permute(0, 2, 1)  # [b 256(patch_num) 1024(d)]
+            x = self.transformer(x)  # [b 256(patch_num) 1024(d)]
+        elif self.decoder_name == 'LSTM':
+            # lstm decoder
+            x = x.permute(2, 0, 1)  # [256(patch_num), b, 1024(d)]
+            self.bi_rnn.flatten_parameters()
+            x, _ = self.bi_rnn(x)  # [256(patch_num & seq_len), b, 1024(d)]
+            x = x.permute(1, 0, 2)  # [b, 256(patch_num), 1024(d)]
+            x = self.drop_out(x)
+        output = None
+        if self.output_name == 'LGT':
+            # plt output
+            output = self.lgt_output(x)
+        elif self.output_name == 'LED':
+            # led output
+            output = self.led_output(x)
+        elif self.output_name == 'Horizon':
+            # led output
+            output = self.horizon_output(x)
+        if self.corner_heat_map:
+            corner_heat_map = self.linear_corner_heat_map_output(x)  # [b, 256(patch_num), 1]
+            corner_heat_map = corner_heat_map.view(-1, self.patch_num)
+            corner_heat_map = torch.sigmoid(corner_heat_map)
+            output['corner_heat_map'] = corner_heat_map
+        return output
+if __name__ == '__main__':
+    from PIL import Image
+    import numpy as np
+    from models.other.init_env import init_env
+    init_env(0, deterministic=True)
+    net = LGT_Net()
+    total = sum(p.numel() for p in net.parameters())
+    trainable = sum(p.numel() for p in net.parameters() if p.requires_grad)
+    print('parameter total:{:,}, trainable:{:,}'.format(total, trainable))
+    img = np.array(Image.open("../src/demo.png")).transpose((2, 0, 1))
+    input = torch.Tensor([img])  # 1 3 512 1024
+    output = net(input)
+    print(output['depth'].shape)  # 1 256
+    print(output['ratio'].shape)  # 1 1

models/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""
+@Date: 2021/09/01
+@description:
+"""
+from models.modules.swin_transformer import Swin_Transformer
+from models.modules.swg_transformer import SWG_Transformer
+from models.modules.transformer import Transformer

models/modules/conv_transformer.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import torch
+import torch.nn.functional as F
+from torch import nn, einsum
+from einops import rearrange
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+    def forward(self, x, **kwargs):
+        return self.fn(self.norm(x), **kwargs)
+class GELU(nn.Module):
+    def forward(self, input):
+        return F.gelu(input)
+class Attend(nn.Module):
+    def __init__(self, dim=None):
+        super().__init__()
+        self.dim = dim
+    def forward(self, input):
+        return F.softmax(input, dim=self.dim, dtype=input.dtype)
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout=0.):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden_dim),
+            GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        return self.net(x)
+class Attention(nn.Module):
+    def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
+        super().__init__()
+        inner_dim = dim_head * heads
+        project_out = not (heads == 1 and dim_head == dim)
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+        self.attend = Attend(dim=-1)
+        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
+        self.to_out = nn.Sequential(
+            nn.Linear(inner_dim, dim),
+            nn.Dropout(dropout)
+        ) if project_out else nn.Identity()
+    def forward(self, x):
+        b, n, _, h = *x.shape, self.heads
+        qkv = self.to_qkv(x).chunk(3, dim=-1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv)
+        dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
+        attn = self.attend(dots)
+        out = einsum('b h i j, b h j d -> b h i d', attn, v)
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        return self.to_out(out)
+class Conv(nn.Module):
+    def __init__(self, dim, dropout=0.):
+        super().__init__()
+        self.dim = dim
+        self.net = nn.Sequential(
+            nn.Conv1d(dim, dim, kernel_size=3, stride=1, padding=0),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        x = x.transpose(1, 2)
+        x = torch.cat([x[..., -1:], x, x[..., :1]], dim=-1)
+        x = self.net(x)
+        return x.transpose(1, 2)
+class ConvTransformer(nn.Module):
+    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)),
+                PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)),
+                PreNorm(dim, Conv(dim, dropout=dropout))
+            ]))
+    def forward(self, x):
+        for attn, ff, cov in self.layers:
+            x = attn(x) + x
+            x = ff(x) + x
+            x = cov(x) + x
+        return x
+if __name__ == '__main__':
+    token_dim = 1024
+    toke_len = 256
+    transformer = ConvTransformer(dim=token_dim,
+                                  depth=6,
+                                  heads=16,
+                                  dim_head=64,
+                                  mlp_dim=2048,
+                                  dropout=0.1)
+    total = sum(p.numel() for p in transformer.parameters())
+    trainable = sum(p.numel() for p in transformer.parameters() if p.requires_grad)
+    print('parameter total:{:,}, trainable:{:,}'.format(total, trainable))
+    input = torch.randn(1, toke_len, token_dim)
+    output = transformer(input)
+    print(output.shape)

models/modules/horizon_net_feature_extractor.py ADDED Viewed

	@@ -0,0 +1,267 @@

+"""
+@author:
+@Date: 2021/07/17
+@description: Use the feature extractor proposed by HorizonNet
+"""
+import numpy as np
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+import functools
+from models.base_model import BaseModule
+ENCODER_RESNET = [
+    'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152',
+    'resnext50_32x4d', 'resnext101_32x8d'
+]
+ENCODER_DENSENET = [
+    'densenet121', 'densenet169', 'densenet161', 'densenet201'
+]
+def lr_pad(x, padding=1):
+    ''' Pad left/right-most to each other instead of zero padding '''
+    return torch.cat([x[..., -padding:], x, x[..., :padding]], dim=3)
+class LR_PAD(nn.Module):
+    ''' Pad left/right-most to each other instead of zero padding '''
+    def __init__(self, padding=1):
+        super(LR_PAD, self).__init__()
+        self.padding = padding
+    def forward(self, x):
+        return lr_pad(x, self.padding)
+def wrap_lr_pad(net):
+    for name, m in net.named_modules():
+        if not isinstance(m, nn.Conv2d):
+            continue
+        if m.padding[1] == 0:
+            continue
+        w_pad = int(m.padding[1])
+        m.padding = (m.padding[0], 0)  # weight padding is 0, LR_PAD then use valid padding will keep dim of weight
+        names = name.split('.')
+        root = functools.reduce(lambda o, i: getattr(o, i), [net] + names[:-1])
+        setattr(
+            root, names[-1],
+            nn.Sequential(LR_PAD(w_pad), m)
+        )
+'''
+Encoder
+'''
+class Resnet(nn.Module):
+    def __init__(self, backbone='resnet50', pretrained=True):
+        super(Resnet, self).__init__()
+        assert backbone in ENCODER_RESNET
+        self.encoder = getattr(models, backbone)(pretrained=pretrained)
+        del self.encoder.fc, self.encoder.avgpool
+    def forward(self, x):
+        features = []
+        x = self.encoder.conv1(x)
+        x = self.encoder.bn1(x)
+        x = self.encoder.relu(x)
+        x = self.encoder.maxpool(x)
+        x = self.encoder.layer1(x)
+        features.append(x)  # 1/4
+        x = self.encoder.layer2(x)
+        features.append(x)  # 1/8
+        x = self.encoder.layer3(x)
+        features.append(x)  # 1/16
+        x = self.encoder.layer4(x)
+        features.append(x)  # 1/32
+        return features
+    def list_blocks(self):
+        lst = [m for m in self.encoder.children()]
+        block0 = lst[:4]
+        block1 = lst[4:5]
+        block2 = lst[5:6]
+        block3 = lst[6:7]
+        block4 = lst[7:8]
+        return block0, block1, block2, block3, block4
+class Densenet(nn.Module):
+    def __init__(self, backbone='densenet169', pretrained=True):
+        super(Densenet, self).__init__()
+        assert backbone in ENCODER_DENSENET
+        self.encoder = getattr(models, backbone)(pretrained=pretrained)
+        self.final_relu = nn.ReLU(inplace=True)
+        del self.encoder.classifier
+    def forward(self, x):
+        lst = []
+        for m in self.encoder.features.children():
+            x = m(x)
+            lst.append(x)
+        features = [lst[4], lst[6], lst[8], self.final_relu(lst[11])]
+        return features
+    def list_blocks(self):
+        lst = [m for m in self.encoder.features.children()]
+        block0 = lst[:4]
+        block1 = lst[4:6]
+        block2 = lst[6:8]
+        block3 = lst[8:10]
+        block4 = lst[10:]
+        return block0, block1, block2, block3, block4
+'''
+Decoder
+'''
+class ConvCompressH(nn.Module):
+    ''' Reduce feature height by factor of two '''
+    def __init__(self, in_c, out_c, ks=3):
+        super(ConvCompressH, self).__init__()
+        assert ks % 2 == 1
+        self.layers = nn.Sequential(
+            nn.Conv2d(in_c, out_c, kernel_size=ks, stride=(2, 1), padding=ks // 2),
+            nn.BatchNorm2d(out_c),
+            nn.ReLU(inplace=True),
+        )
+    def forward(self, x):
+        return self.layers(x)
+class GlobalHeightConv(nn.Module):
+    def __init__(self, in_c, out_c):
+        super(GlobalHeightConv, self).__init__()
+        self.layer = nn.Sequential(
+            ConvCompressH(in_c, in_c // 2),
+            ConvCompressH(in_c // 2, in_c // 2),
+            ConvCompressH(in_c // 2, in_c // 4),
+            ConvCompressH(in_c // 4, out_c),
+        )
+    def forward(self, x, out_w):
+        x = self.layer(x)
+        factor = out_w // x.shape[3]
+        x = torch.cat([x[..., -1:], x, x[..., :1]], 3)  # 先补左右，相当于warp模式，然后进行插值
+        d_type = x.dtype
+        x = F.interpolate(x, size=(x.shape[2], out_w + 2 * factor), mode='bilinear', align_corners=False)
+        # if x.dtype != d_type:
+        #     x = x.type(d_type)
+        x = x[..., factor:-factor]
+        return x
+class GlobalHeightStage(nn.Module):
+    def __init__(self, c1, c2, c3, c4, out_scale=8):
+        ''' Process 4 blocks from encoder to single multiscale features '''
+        super(GlobalHeightStage, self).__init__()
+        self.cs = c1, c2, c3, c4
+        self.out_scale = out_scale
+        self.ghc_lst = nn.ModuleList([
+            GlobalHeightConv(c1, c1 // out_scale),
+            GlobalHeightConv(c2, c2 // out_scale),
+            GlobalHeightConv(c3, c3 // out_scale),
+            GlobalHeightConv(c4, c4 // out_scale),
+        ])
+    def forward(self, conv_list, out_w):
+        assert len(conv_list) == 4
+        bs = conv_list[0].shape[0]
+        feature = torch.cat([
+            f(x, out_w).reshape(bs, -1, out_w)
+            for f, x, out_c in zip(self.ghc_lst, conv_list, self.cs)
+        ], dim=1)
+        # conv_list:
+        # 0 [b,  256(d), 128(h), 256(w)] ->(4*{conv3*3 step2*1} : d/8 h/16)-> [b  32(d) 8(h) 256(w)]
+        # 1 [b,  512(d),  64(h), 128(w)] ->(4*{conv3*3 step2*1} : d/8 h/16)-> [b  64(d) 4(h) 128(w)]
+        # 2 [b, 1024(d),  32(h),  64(w)] ->(4*{conv3*3 step2*1} : d/8 h/16)-> [b 128(d) 2(h)  64(w)]
+        # 3 [b, 2048(d),  16(h),  32(w)] ->(4*{conv3*3 step2*1} : d/8 h/16)-> [b 256(d) 1(h)  32(w)]
+        # 0 ->(unsampledW256} : w=256)-> [b  32(d) 8(h) 256(w)] ->(reshapeH1} : h=1)-> [b 256(d) 1(h) 256(w)]
+        # 1 ->(unsampledW256} : w=256)-> [b  64(d) 4(h) 256(w)] ->(reshapeH1} : h=1)-> [b 256(d) 1(h) 256(w)]
+        # 2 ->(unsampledW256} : w=256)-> [b 128(d) 2(h) 256(w)] ->(reshapeH1} : h=1)-> [b 256(d) 1(h) 256(w)]
+        # 3 ->(unsampledW256} : w=256)-> [b 256(d) 1(h) 256(w)] ->(reshapeH1} : h=1)-> [b 256(d) 1(h) 256(w)]
+        # 0  --\
+        # 1  -- \
+        #         ---- cat [b 1024(d) 1(h) 256(w)]
+        # 2  -- /
+        # 3  --/
+        return feature  # [b 1024(d) 256(w)]
+class HorizonNetFeatureExtractor(nn.Module):
+    x_mean = torch.FloatTensor(np.array([0.485, 0.456, 0.406])[None, :, None, None])
+    x_std = torch.FloatTensor(np.array([0.229, 0.224, 0.225])[None, :, None, None])
+    def __init__(self, backbone='resnet50'):
+        super(HorizonNetFeatureExtractor, self).__init__()
+        self.out_scale = 8
+        self.step_cols = 4
+        # Encoder
+        if backbone.startswith('res'):
+            self.feature_extractor = Resnet(backbone, pretrained=True)
+        elif backbone.startswith('dense'):
+            self.feature_extractor = Densenet(backbone, pretrained=True)
+        else:
+            raise NotImplementedError()
+        # Inference channels number from each block of the encoder
+        with torch.no_grad():
+            dummy = torch.zeros(1, 3, 512, 1024)
+            c1, c2, c3, c4 = [b.shape[1] for b in self.feature_extractor(dummy)]
+            self.c_last = (c1 * 8 + c2 * 4 + c3 * 2 + c4 * 1) // self.out_scale
+        # Convert features from 4 blocks of the encoder into B x C x 1 x W'
+        self.reduce_height_module = GlobalHeightStage(c1, c2, c3, c4, self.out_scale)
+        self.x_mean.requires_grad = False
+        self.x_std.requires_grad = False
+        wrap_lr_pad(self)
+    def _prepare_x(self, x):
+        x = x.clone()
+        if self.x_mean.device != x.device:
+            self.x_mean = self.x_mean.to(x.device)
+            self.x_std = self.x_std.to(x.device)
+        x[:, :3] = (x[:, :3] - self.x_mean) / self.x_std
+        return x
+    def forward(self, x):
+        # x [b 3 512 1024]
+        x = self._prepare_x(x)  # [b 3 512 1024]
+        conv_list = self.feature_extractor(x)
+        # conv_list:
+        # 0 [b,  256(d), 128(h), 256(w)]
+        # 1 [b,  512(d),  64(h), 128(w)]
+        # 2 [b, 1024(d),  32(h),  64(w)]
+        # 3 [b, 2048(d),  16(h),  32(w)]
+        x = self.reduce_height_module(conv_list, x.shape[3] // self.step_cols)  # [b 1024(d) 1(h) 256(w)]
+        # After reduce_Height_module, h becomes 1, the information is compressed to d,
+        # and w contains different resolutions
+        # 0 [b,  256(d), 128(h), 256(w)] -> [b,  256/8(d) * 128/16(h') = 256(d), 1(h) 256(w)]
+        # 1 [b,  512(d),  64(h), 128(w)] -> [b,  512/8(d) *  64/16(h') = 256(d), 1(h) 256(w)]
+        # 2 [b, 1024(d),  32(h),  64(w)] -> [b, 1024/8(d) *  32/16(h') = 256(d), 1(h) 256(w)]
+        # 3 [b, 2048(d),  16(h),  32(w)] -> [b, 2048/8(d) *  16/16(h') = 256(d), 1(h) 256(w)]
+        return x  # [b 1024(d) 1(h) 256(w)]
+if __name__ == '__main__':
+    from PIL import Image
+    extractor = HorizonNetFeatureExtractor()
+    img = np.array(Image.open("../../src/demo.png")).transpose((2, 0, 1))
+    input = torch.Tensor([img])  # 1 3 512 1024
+    feature = extractor(input)
+    print(feature.shape)  # 1, 1024, 256  |  1024 = (out_c_0*h_0 +... + out_c_3*h_3) = 256 * 4

models/modules/patch_feature_extractor.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import numpy as np
+import torch
+import torch.nn as nn
+from einops.layers.torch import Rearrange
+class PatchFeatureExtractor(nn.Module):
+    x_mean = torch.FloatTensor(np.array([0.485, 0.456, 0.406])[None, :, None, None])
+    x_std = torch.FloatTensor(np.array([0.229, 0.224, 0.225])[None, :, None, None])
+    def __init__(self, patch_num=256, input_shape=None):
+        super(PatchFeatureExtractor, self).__init__()
+        if input_shape is None:
+            input_shape = [3, 512, 1024]
+        self.patch_dim = 1024
+        self.patch_num = patch_num
+        img_channel = input_shape[0]
+        img_h = input_shape[1]
+        img_w = input_shape[2]
+        p_h, p_w = img_h, img_w // self.patch_num
+        p_dim = p_h * p_w * img_channel
+        self.patch_embedding = nn.Sequential(
+            Rearrange('b c h (p_n p_w) -> b p_n (h p_w c)', p_w=p_w),
+            nn.Linear(p_dim, self.patch_dim)
+        )
+        self.x_mean.requires_grad = False
+        self.x_std.requires_grad = False
+    def _prepare_x(self, x):
+        x = x.clone()
+        if self.x_mean.device != x.device:
+            self.x_mean = self.x_mean.to(x.device)
+            self.x_std = self.x_std.to(x.device)
+        x[:, :3] = (x[:, :3] - self.x_mean) / self.x_std
+        return x
+    def forward(self, x):
+        # x [b 3 512 1024]
+        x = self._prepare_x(x)  # [b 3 512 1024]
+        x = self.patch_embedding(x)  # [b 256(patch_num) 1024(d)]
+        x = x.permute(0, 2, 1)  # [b 1024(d) 256(patch_num)]
+        return x
+if __name__ == '__main__':
+    from PIL import Image
+    extractor = PatchFeatureExtractor()
+    img = np.array(Image.open("../../src/demo.png")).transpose((2, 0, 1))
+    input = torch.Tensor([img])  # 1 3 512 1024
+    feature = extractor(input)
+    print(feature.shape)  # 1, 1024, 256

models/modules/swg_transformer.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from models.modules.transformer_modules import *
+class SWG_Transformer(nn.Module):
+    def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim,
+                 dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1):
+        super().__init__()
+        self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout,
+                                                                                                 patch_num, ape)
+        self.pos_dropout = nn.Dropout(dropout)
+        self.layers = nn.ModuleList([])
+        for i in range(depth):
+            if i % 2 == 0:
+                attention = WinAttention(dim, win_size=win_size, shift=0 if (i % 3 == 0) else win_size // 2,
+                                         heads=heads, dim_head=dim_head, dropout=dropout, rpe=rpe, rpe_pos=rpe_pos)
+            else:
+                attention = Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout,
+                                      patch_num=patch_num, rpe=rpe, rpe_pos=rpe_pos)
+            self.layers.append(nn.ModuleList([
+                PreNorm(dim, attention),
+                PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)),
+            ]))
+    def forward(self, x):
+        if self.absolute_pos_embed is not None:
+            x = self.absolute_pos_embed(x)
+        x = self.pos_dropout(x)
+        for attn, ff in self.layers:
+            x = attn(x) + x
+            x = ff(x) + x
+        return x
+if __name__ == '__main__':
+    token_dim = 1024
+    toke_len = 256
+    transformer = SWG_Transformer(dim=token_dim,
+                                  depth=6,
+                                  heads=16,
+                                  win_size=8,
+                                  dim_head=64,
+                                  mlp_dim=2048,
+                                  dropout=0.1)
+    input = torch.randn(1, toke_len, token_dim)
+    output = transformer(input)
+    print(output.shape)

models/modules/swin_transformer.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from models.modules.transformer_modules import *
+class Swin_Transformer(nn.Module):
+    def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim,
+                 dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1):
+        super().__init__()
+        self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout,
+                                                                                                 patch_num, ape)
+        self.pos_dropout = nn.Dropout(dropout)
+        self.layers = nn.ModuleList([])
+        for i in range(depth):
+            self.layers.append(nn.ModuleList([
+                PreNorm(dim, WinAttention(dim, win_size=win_size, shift=0 if (i % 2 == 0) else win_size // 2,
+                                          heads=heads, dim_head=dim_head, dropout=dropout, rpe=rpe, rpe_pos=rpe_pos)),
+                PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)),
+            ]))
+    def forward(self, x):
+        if self.absolute_pos_embed is not None:
+            x = self.absolute_pos_embed(x)
+        x = self.pos_dropout(x)
+        for attn, ff in self.layers:
+            x = attn(x) + x
+            x = ff(x) + x
+        return x
+if __name__ == '__main__':
+    token_dim = 1024
+    toke_len = 256
+    transformer = Swin_Transformer(dim=token_dim,
+                                   depth=6,
+                                   heads=16,
+                                   win_size=8,
+                                   dim_head=64,
+                                   mlp_dim=2048,
+                                   dropout=0.1)
+    input = torch.randn(1, toke_len, token_dim)
+    output = transformer(input)
+    print(output.shape)

models/modules/transformer.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from models.modules.transformer_modules import *
+class Transformer(nn.Module):
+    def __init__(self, dim, depth, heads, win_size, dim_head, mlp_dim,
+                 dropout=0., patch_num=None, ape=None, rpe=None, rpe_pos=1):
+        super().__init__()
+        self.absolute_pos_embed = None if patch_num is None or ape is None else AbsolutePosition(dim, dropout,
+                                                                                                 patch_num, ape)
+        self.pos_dropout = nn.Dropout(dropout)
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout, patch_num=patch_num,
+                                       rpe=rpe, rpe_pos=rpe_pos)),
+                PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout))
+            ]))
+    def forward(self, x):
+        if self.absolute_pos_embed is not None:
+            x = self.absolute_pos_embed(x)
+        x = self.pos_dropout(x)
+        for attn, ff in self.layers:
+            x = attn(x) + x
+            x = ff(x) + x
+        return x
+if __name__ == '__main__':
+    token_dim = 1024
+    toke_len = 256
+    transformer = Transformer(dim=token_dim, depth=6, heads=16,
+                              dim_head=64, mlp_dim=2048, dropout=0.1,
+                              patch_num=256, ape='lr_parameter', rpe='lr_parameter_mirror')
+    total = sum(p.numel() for p in transformer.parameters())
+    trainable = sum(p.numel() for p in transformer.parameters() if p.requires_grad)
+    print('parameter total:{:,}, trainable:{:,}'.format(total, trainable))
+    input = torch.randn(1, toke_len, token_dim)
+    output = transformer(input)
+    print(output.shape)

models/modules/transformer_modules.py ADDED Viewed

	@@ -0,0 +1,250 @@

+"""
+@Date: 2021/09/01
+@description:
+"""
+import warnings
+import math
+import torch
+import torch.nn.functional as F
+from torch import nn, einsum
+from einops import rearrange
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1. + math.erf(x / math.sqrt(2.))) / 2.
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+                      "The distribution of values may be incorrect.",
+                      stacklevel=2)
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.))
+        tensor.add_(mean)
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+    def forward(self, x, **kwargs):
+        return self.fn(self.norm(x), **kwargs)
+# compatibility pytorch < 1.4
+class GELU(nn.Module):
+    def forward(self, input):
+        return F.gelu(input)
+class Attend(nn.Module):
+    def __init__(self, dim=None):
+        super().__init__()
+        self.dim = dim
+    def forward(self, input):
+        return F.softmax(input, dim=self.dim, dtype=input.dtype)
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout=0.):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden_dim),
+            GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        return self.net(x)
+class RelativePosition(nn.Module):
+    def __init__(self, heads, patch_num=None, rpe=None):
+        super().__init__()
+        self.rpe = rpe
+        self.heads = heads
+        self.patch_num = patch_num
+        if rpe == 'lr_parameter':
+            # -255 ~ 0 ~ 255 all count : patch * 2 - 1
+            count = patch_num * 2 - 1
+            self.rpe_table = nn.Parameter(torch.Tensor(count, heads))
+            nn.init.xavier_uniform_(self.rpe_table)
+        elif rpe == 'lr_parameter_mirror':
+            # 0 ~ 127 128 ~ 1 all count : patch_num // 2 + 1
+            count = patch_num // 2 + 1
+            self.rpe_table = nn.Parameter(torch.Tensor(count, heads))
+            nn.init.xavier_uniform_(self.rpe_table)
+        elif rpe == 'lr_parameter_half':
+            # -127 ~ 0 ~ 128 all count : patch
+            count = patch_num
+            self.rpe_table = nn.Parameter(torch.Tensor(count, heads))
+            nn.init.xavier_uniform_(self.rpe_table)
+        elif rpe == 'fix_angle':
+            # 0 ~ 127 128 ~ 1 all count : patch_num // 2 + 1
+            count = patch_num // 2 + 1
+            # we think that closer proximity should have stronger relationships
+            rpe_table = (torch.arange(count, 0, -1) / count)[..., None].repeat(1, heads)
+            self.register_buffer('rpe_table', rpe_table)
+    def get_relative_pos_embed(self):
+        range_vec = torch.arange(self.patch_num)
+        distance_mat = range_vec[None, :] - range_vec[:, None]
+        if self.rpe == 'lr_parameter':
+            # -255 ~ 0 ~ 255 -> 0 ~ 255 ~ 255 + 255
+            distance_mat += self.patch_num - 1  # remove negative
+            return self.rpe_table[distance_mat].permute(2, 0, 1)[None]
+        elif self.rpe == 'lr_parameter_mirror' or self.rpe == 'fix_angle':
+            distance_mat[distance_mat < 0] = -distance_mat[distance_mat < 0]  # mirror
+            distance_mat[distance_mat > self.patch_num // 2] = self.patch_num - distance_mat[
+                distance_mat > self.patch_num // 2]  # remove repeat
+            return self.rpe_table[distance_mat].permute(2, 0, 1)[None]
+        elif self.rpe == 'lr_parameter_half':
+            distance_mat[distance_mat > self.patch_num // 2] = distance_mat[
+                 distance_mat > self.patch_num // 2] - self.patch_num  # remove repeat > 128  exp: 129 -> -127
+            distance_mat[distance_mat < -self.patch_num // 2 + 1] = distance_mat[
+                distance_mat < -self.patch_num // 2 + 1] + self.patch_num   # remove repeat < -127 exp: -128 -> 128
+            # -127 ~ 0 ~ 128 -> 0 ~ 0 ~ 127 + 127 + 128
+            distance_mat += self.patch_num//2 - 1  # remove negative
+            return self.rpe_table[distance_mat].permute(2, 0, 1)[None]
+    def forward(self, attn):
+        return attn + self.get_relative_pos_embed()
+class Attention(nn.Module):
+    def __init__(self, dim, heads=8, dim_head=64, dropout=0., patch_num=None, rpe=None, rpe_pos=1):
+        """
+        :param dim:
+        :param heads:
+        :param dim_head:
+        :param dropout:
+        :param patch_num:
+        :param rpe: relative position embedding
+        """
+        super().__init__()
+        self.relative_pos_embed = None if patch_num is None or rpe is None else RelativePosition(heads, patch_num, rpe)
+        inner_dim = dim_head * heads
+        project_out = not (heads == 1 and dim_head == dim)
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+        self.rpe_pos = rpe_pos
+        self.attend = Attend(dim=-1)
+        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
+        self.to_out = nn.Sequential(
+            nn.Linear(inner_dim, dim),
+            nn.Dropout(dropout)
+        ) if project_out else nn.Identity()
+    def forward(self, x):
+        b, n, _, h = *x.shape, self.heads
+        qkv = self.to_qkv(x).chunk(3, dim=-1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv)
+        dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
+        if self.rpe_pos == 0:
+            if self.relative_pos_embed is not None:
+                dots = self.relative_pos_embed(dots)
+        attn = self.attend(dots)
+        if self.rpe_pos == 1:
+            if self.relative_pos_embed is not None:
+                attn = self.relative_pos_embed(attn)
+        out = einsum('b h i j, b h j d -> b h i d', attn, v)
+        out = rearrange(out, 'b h n d -> b n (h d)')
+        return self.to_out(out)
+class AbsolutePosition(nn.Module):
+    def __init__(self, dim, dropout=0., patch_num=None, ape=None):
+        super().__init__()
+        self.ape = ape
+        if ape == 'lr_parameter':
+            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, patch_num, dim))
+            trunc_normal_(self.absolute_pos_embed, std=.02)
+        elif ape == 'fix_angle':
+            angle = torch.arange(0, patch_num, dtype=torch.float) / patch_num * (math.pi * 2)
+            self.absolute_pos_embed = torch.sin(angle)[..., None].repeat(1, dim)[None]
+    def forward(self, x):
+        return x + self.absolute_pos_embed
+class WinAttention(nn.Module):
+    def __init__(self, dim, win_size=8, shift=0, heads=8, dim_head=64, dropout=0., rpe=None, rpe_pos=1):
+        super().__init__()
+        self.win_size = win_size
+        self.shift = shift
+        self.attend = Attention(dim, heads=heads, dim_head=dim_head,
+                                dropout=dropout, patch_num=win_size, rpe=None if rpe is None else 'lr_parameter',
+                                rpe_pos=rpe_pos)
+    def forward(self, x):
+        b = x.shape[0]
+        if self.shift != 0:
+            x = torch.roll(x, shifts=self.shift, dims=-2)
+        x = rearrange(x, 'b (m w) d -> (b m) w d', w=self.win_size)  # split windows
+        out = self.attend(x)
+        out = rearrange(out, '(b m) w d -> b (m w) d ', b=b)  # recover windows
+        if self.shift != 0:
+            out = torch.roll(out, shifts=-self.shift, dims=-2)
+        return out
+class Conv(nn.Module):
+    def __init__(self, dim, dropout=0.):
+        super().__init__()
+        self.dim = dim
+        self.net = nn.Sequential(
+            nn.Conv1d(dim, dim, kernel_size=3, stride=1, padding=0),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        x = x.transpose(1, 2)
+        x = torch.cat([x[..., -1:], x, x[..., :1]], dim=-1)
+        x = self.net(x)
+        return x.transpose(1, 2)

models/other/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+@Date: 2021/07/18
+@description:
+"""

models/other/criterion.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""
+@date: 2021/7/19
+@description:
+"""
+import torch
+import loss
+from utils.misc import tensor2np
+def build_criterion(config, logger):
+    criterion = {}
+    device = config.TRAIN.DEVICE
+    for k in config.TRAIN.CRITERION.keys():
+        sc = config.TRAIN.CRITERION[k]
+        if sc.WEIGHT is None or float(sc.WEIGHT) == 0:
+            continue
+        criterion[sc.NAME] = {
+            'loss': getattr(loss, sc.LOSS)(),
+            'weight': float(sc.WEIGHT),
+            'sub_weights': sc.WEIGHTS,
+            'need_all': sc.NEED_ALL
+        }
+        criterion[sc.NAME]['loss'] = criterion[sc.NAME]['loss'].to(device)
+        if config.AMP_OPT_LEVEL != "O0" and 'cuda' in device:
+            criterion[sc.NAME]['loss'] = criterion[sc.NAME]['loss'].type(torch.float16)
+        # logger.info(f"Build criterion:{sc.WEIGHT}_{sc.NAME}_{sc.LOSS}_{sc.WEIGHTS}")
+    return criterion
+def calc_criterion(criterion, gt, dt, epoch_loss_d):
+    loss = None
+    postfix_d = {}
+    for k in criterion.keys():
+        if criterion[k]['need_all']:
+            single_loss = criterion[k]['loss'](gt, dt)
+            ws_loss = None
+            for i, sub_weight in enumerate(criterion[k]['sub_weights']):
+                if sub_weight == 0:
+                    continue
+                if ws_loss is None:
+                    ws_loss = single_loss[i] * sub_weight
+                else:
+                    ws_loss = ws_loss + single_loss[i] * sub_weight
+            single_loss = ws_loss if ws_loss is not None else single_loss
+        else:
+            assert k in gt.keys(), "ground label is None:" + k
+            assert k in dt.keys(), "detection key is None:" + k
+            if k == 'ratio' and gt[k].shape[-1] != dt[k].shape[-1]:
+                gt[k] = gt[k].repeat(1, dt[k].shape[-1])
+            single_loss = criterion[k]['loss'](gt[k], dt[k])
+        postfix_d[k] = tensor2np(single_loss)
+        if k not in epoch_loss_d.keys():
+            epoch_loss_d[k] = []
+        epoch_loss_d[k].append(postfix_d[k])
+        single_loss = single_loss * criterion[k]['weight']
+        if loss is None:
+            loss = single_loss
+        else:
+            loss = loss + single_loss
+    k = 'loss'
+    postfix_d[k] = tensor2np(loss)
+    if k not in epoch_loss_d.keys():
+        epoch_loss_d[k] = []
+    epoch_loss_d[k].append(postfix_d[k])
+    return loss, postfix_d, epoch_loss_d

models/other/init_env.py ADDED Viewed

	@@ -0,0 +1,37 @@

+"""
+@Date: 2021/08/15
+@description:
+"""
+import random
+import torch
+import torch.backends.cudnn as cudnn
+import numpy as np
+import os
+import cv2
+def init_env(seed, deterministic=False, loader_work_num=0):
+    # Fix seed
+    # Python & NumPy
+    np.random.seed(seed)
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    #  PyTorch
+    torch.manual_seed(seed)  # 为CPU设置随机种子
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)  # 为当前GPU设置随机种子
+        torch.cuda.manual_seed_all(seed)  # 为所有GPU设置随机种子
+    # cuDNN
+    if deterministic:
+        # 复现
+        torch.backends.cudnn.benchmark = False
+        torch.backends.cudnn.deterministic = True  # 将这个 flag 置为 True 的话，每次返回的卷积算法将是确定的，即默认算法
+    else:
+        cudnn.benchmark = True  # 如果网络的输入数据维度或类型上变化不大，设置true
+        torch.backends.cudnn.deterministic = False
+    # Using multiple threads in Opencv can cause deadlocks
+    if loader_work_num != 0:
+        cv2.setNumThreads(0)

models/other/optimizer.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""
+@Date: 2021/07/18
+@description:
+"""
+from torch import optim as optim
+def build_optimizer(config, model, logger):
+    name = config.TRAIN.OPTIMIZER.NAME.lower()
+    optimizer = None
+    if name == 'sgd':
+        optimizer = optim.SGD(model.parameters(), momentum=config.TRAIN.OPTIMIZER.MOMENTUM, nesterov=True,
+                              lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY)
+    elif name == 'adamw':
+        optimizer = optim.AdamW(model.parameters(), eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS,
+                                lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY)
+    elif name == 'adam':
+        optimizer = optim.Adam(model.parameters(), eps=config.TRAIN.OPTIMIZER.EPS, betas=config.TRAIN.OPTIMIZER.BETAS,
+                               lr=config.TRAIN.BASE_LR, weight_decay=config.TRAIN.WEIGHT_DECAY)
+    logger.info(f"Build optimizer: {name}, lr:{config.TRAIN.BASE_LR}")
+    return optimizer

models/other/scheduler.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""
+@Date: 2021/09/14
+@description:
+"""
+class WarmupScheduler:
+    def __init__(self, optimizer, lr_pow, init_lr, warmup_lr, warmup_step, max_step, **kwargs):
+        self.lr_pow = lr_pow
+        self.init_lr = init_lr
+        self.running_lr = init_lr
+        self.warmup_lr = warmup_lr
+        self.warmup_step = warmup_step
+        self.max_step = max_step
+        self.optimizer = optimizer
+    def step_update(self, cur_step):
+        if cur_step < self.warmup_step:
+            frac = cur_step / self.warmup_step
+            step = self.warmup_lr - self.init_lr
+            self.running_lr = self.init_lr + step * frac
+        else:
+            frac = (float(cur_step) - self.warmup_step) / (self.max_step - self.warmup_step)
+            scale_running_lr = max((1. - frac), 0.) ** self.lr_pow
+            self.running_lr = self.warmup_lr * scale_running_lr
+        if self.optimizer is not None:
+            for param_group in self.optimizer.param_groups:
+                param_group['lr'] = self.running_lr
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    scheduler = WarmupScheduler(optimizer=None,
+                                lr_pow=4,
+                                init_lr=0.0000003,
+                                warmup_lr=0.00003,
+                                warmup_step=10000,
+                                max_step=100000)
+    x = []
+    y = []
+    for i in range(100000):
+        if i == 10000-1:
+            print()
+        scheduler.step_update(i)
+        x.append(i)
+        y.append(scheduler.running_lr)
+    plt.plot(x, y, linewidth=1)
+    plt.show()

postprocessing/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+@Date: 2021/10/06
+@description:
+"""

postprocessing/dula/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+@Date: 2021/10/06
+@description:
+"""

postprocessing/dula/layout.py ADDED Viewed

	@@ -0,0 +1,226 @@

+"""
+@Date: 2021/10/06
+@description: Use the approach proposed by DuLa-Net
+"""
+import cv2
+import numpy as np
+import math
+import matplotlib.pyplot as plt
+from visualization.floorplan import draw_floorplan
+def merge_near(lst, diag):
+    group = [[0, ]]
+    for i in range(1, len(lst)):
+        if lst[i][1] == 0 and lst[i][0] - np.mean(group[-1]) < diag * 0.02:
+            group[-1].append(lst[i][0])
+        else:
+            group.append([lst[i][0], ])
+    if len(group) == 1:
+        group = [lst[0][0], lst[-1][0]]
+    else:
+        group = [int(np.mean(x)) for x in group]
+    return group
+def fit_layout(floor_xz, need_cube=False, show=False, block_eps=0.2):
+    show_radius = np.linalg.norm(floor_xz, axis=-1).max()
+    side_l = 512
+    floorplan = draw_floorplan(xz=floor_xz, show_radius=show_radius, show=show, scale=1, side_l=side_l).astype(np.uint8)
+    center = np.array([side_l / 2, side_l / 2])
+    polys = cv2.findContours(floorplan, 1, 2)
+    if isinstance(polys, tuple):
+        if len(polys) == 3:
+            # opencv 3
+            polys = list(polys[1])
+        else:
+            polys = list(polys[0])
+    polys.sort(key=lambda x: cv2.contourArea(x), reverse=True)
+    poly = polys[0]
+    sub_x, sub_y, w, h = cv2.boundingRect(poly)
+    floorplan_sub = floorplan[sub_y:sub_y + h, sub_x:sub_x + w]
+    sub_center = center - np.array([sub_x, sub_y])
+    polys = cv2.findContours(floorplan_sub, 1, 2)
+    if isinstance(polys, tuple):
+        if len(polys) == 3:
+            polys = polys[1]
+        else:
+            polys = polys[0]
+    poly = polys[0]
+    epsilon = 0.005 * cv2.arcLength(poly, True)
+    poly = cv2.approxPolyDP(poly, epsilon, True)
+    x_lst = [[0, 0], ]
+    y_lst = [[0, 0], ]
+    ans = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1]))
+    for i in range(len(poly)):
+        p1 = poly[i][0]
+        p2 = poly[(i + 1) % len(poly)][0]
+        # We added occlusion detection
+        cp1 = p1 - sub_center
+        cp2 = p2 - sub_center
+        p12 = p2 - p1
+        l1 = np.linalg.norm(cp1)
+        l2 = np.linalg.norm(cp2)
+        l3 = np.linalg.norm(p12)
+        # We added occlusion detection
+        is_block1 = abs(np.cross(cp1/l1, cp2/l2)) < block_eps
+        is_block2 = abs(np.cross(cp2/l2, p12/l3)) < block_eps*2
+        is_block = is_block1 and is_block2
+        if (p2[0] - p1[0]) == 0:
+            slope = 10
+        else:
+            slope = abs((p2[1] - p1[1]) / (p2[0] - p1[0]))
+        if is_block:
+            s = p1[1] if l1 < l2 else p2[1]
+            y_lst.append([s, 1])
+            s = p1[0] if l1 < l2 else p2[0]
+            x_lst.append([s, 1])
+            left = p1[0] if p1[0] < p2[0] else p2[0]
+            right = p1[0] if p1[0] > p2[0] else p2[0]
+            top = p1[1] if p1[1] < p2[1] else p2[1]
+            bottom = p1[1] if p1[1] > p2[1] else p2[1]
+            sample = floorplan_sub[top:bottom, left:right]
+            score = 0 if sample.size == 0 else sample.mean()
+            if score >= 0.3:
+                ans[top:bottom, left:right] = 1
+        else:
+            if slope <= 1:
+                s = int((p1[1] + p2[1]) / 2)
+                y_lst.append([s, 0])
+            elif slope > 1:
+                s = int((p1[0] + p2[0]) / 2)
+                x_lst.append([s, 0])
+    debug_show = False
+    if debug_show:
+        plt.figure(dpi=300)
+        plt.axis('off')
+        a = cv2.drawMarker(floorplan_sub.copy()*0.5, tuple([floorplan_sub.shape[1] // 2, floorplan_sub.shape[0] // 2]), [1], markerType=0, markerSize=10, thickness=2)
+        plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1))
+        plt.savefig('src/1.png', bbox_inches='tight', transparent=True, pad_inches=0)
+        plt.show()
+        plt.figure(dpi=300)
+        plt.axis('off')
+        a = cv2.drawMarker(ans.copy()*0.5, tuple([floorplan_sub.shape[1] // 2, floorplan_sub.shape[0] // 2]), [1], markerType=0, markerSize=10, thickness=2)
+        plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1))
+        # plt.show()
+        plt.savefig('src/2.png', bbox_inches='tight', transparent=True, pad_inches=0)
+        plt.show()
+    x_lst.append([floorplan_sub.shape[1], 0])
+    y_lst.append([floorplan_sub.shape[0], 0])
+    x_lst.sort(key=lambda x: x[0])
+    y_lst.sort(key=lambda x: x[0])
+    diag = math.sqrt(math.pow(floorplan_sub.shape[1], 2) + math.pow(floorplan_sub.shape[0], 2))
+    x_lst = merge_near(x_lst, diag)
+    y_lst = merge_near(y_lst, diag)
+    if need_cube and len(x_lst) > 2:
+        x_lst = [x_lst[0], x_lst[-1]]
+    if need_cube and len(y_lst) > 2:
+        y_lst = [y_lst[0], y_lst[-1]]
+    for i in range(len(x_lst) - 1):
+        for j in range(len(y_lst) - 1):
+            sample = floorplan_sub[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]]
+            score = 0 if sample.size == 0 else sample.mean()
+            if score >= 0.3:
+                ans[y_lst[j]:y_lst[j + 1], x_lst[i]:x_lst[i + 1]] = 1
+    if debug_show:
+        plt.figure(dpi=300)
+        plt.axis('off')
+        a = cv2.drawMarker(ans.copy() * 0.5, tuple([floorplan_sub.shape[1] // 2, floorplan_sub.shape[0] // 2]), [1],
+                           markerType=0, markerSize=10, thickness=2)
+        plt.imshow(cv2.drawContours(a, [poly], 0, 1, 1))
+        # plt.show()
+        plt.savefig('src/3.png', bbox_inches='tight', transparent=True, pad_inches=0)
+        plt.show()
+    pred = np.uint8(ans)
+    pred_polys = cv2.findContours(pred, 1, 3)
+    if isinstance(pred_polys, tuple):
+        if len(pred_polys) == 3:
+            pred_polys = pred_polys[1]
+        else:
+            pred_polys = pred_polys[0]
+    pred_polys.sort(key=lambda x: cv2.contourArea(x), reverse=True)
+    pred_polys = pred_polys[0]
+    if debug_show:
+        plt.figure(dpi=300)
+        plt.axis('off')
+        a = cv2.drawMarker(ans.copy() * 0.5, tuple([floorplan_sub.shape[1] // 2, floorplan_sub.shape[0] // 2]), [1],
+                           markerType=0, markerSize=10, thickness=2)
+        a = cv2.drawContours(a, [poly], 0, 0.8, 1)
+        a = cv2.drawContours(a, [pred_polys], 0, 1, 1)
+        plt.imshow(a)
+        # plt.show()
+        plt.savefig('src/4.png', bbox_inches='tight', transparent=True, pad_inches=0)
+        plt.show()
+    polygon = [(p[0][1], p[0][0]) for p in pred_polys[::-1]]
+    v = np.array([p[0] + sub_y for p in polygon])
+    u = np.array([p[1] + sub_x for p in polygon])
+    #     side_l
+    # v<-----------|o
+    # |     |      |
+    # | ----|----z |   side_l
+    # |     |      |
+    # |     x     \|/
+    # |------------u
+    side_l = floorplan.shape[0]
+    pred_xz = np.concatenate((u[:, np.newaxis] - side_l // 2, side_l // 2 - v[:, np.newaxis]), axis=1)
+    pred_xz = pred_xz * show_radius / (side_l // 2)
+    if show:
+        draw_floorplan(pred_xz, show_radius=show_radius, show=show)
+    show_process = False
+    if show_process:
+        img = np.zeros((floorplan_sub.shape[0], floorplan_sub.shape[1], 3))
+        for x in x_lst:
+            cv2.line(img, (x, 0), (x, floorplan_sub.shape[0]), (0, 255, 0), 1)
+        for y in y_lst:
+            cv2.line(img, (0, y), (floorplan_sub.shape[1], y), (255, 0, 0), 1)
+        fig = plt.figure()
+        plt.axis('off')
+        ax1 = fig.add_subplot(2, 2, 1)
+        ax1.imshow(floorplan)
+        ax3 = fig.add_subplot(2, 2, 2)
+        ax3.imshow(floorplan_sub)
+        ax4 = fig.add_subplot(2, 2, 3)
+        ax4.imshow(img)
+        ax5 = fig.add_subplot(2, 2, 4)
+        ax5.imshow(ans)
+        plt.show()
+    return pred_xz
+if __name__ == '__main__':
+    from utils.conversion import uv2xyz
+    pano_img = np.zeros([512, 1024, 3])
+    corners = np.array([[0.1, 0.7],
+                        [0.4, 0.7],
+                        [0.3, 0.6],
+                        [0.6, 0.6],
+                        [0.8, 0.7]])
+    xz = uv2xyz(corners)[..., ::2]
+    draw_floorplan(xz, show=True, marker_color=None, center_color=0.8)
+    xz = fit_layout(xz)
+    draw_floorplan(xz, show=True, marker_color=None, center_color=0.8)