mohamed-boudjoghra commited on
Commit
2b657e9
1 Parent(s): 72bcb98

Upload 1031 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +108 -0
  2. environment.yml +216 -0
  3. models/Mask3D/LICENSE +22 -0
  4. models/Mask3D/MANIFEST.in +1 -0
  5. models/Mask3D/README.md +289 -0
  6. models/Mask3D/__init__.py +0 -0
  7. models/Mask3D/build/lib/mask3d/__init__.py +216 -0
  8. models/Mask3D/build/lib/mask3d/benchmark/__init__.py +0 -0
  9. models/Mask3D/build/lib/mask3d/benchmark/evaluate_semantic_instance.py +1141 -0
  10. models/Mask3D/build/lib/mask3d/benchmark/util.py +128 -0
  11. models/Mask3D/build/lib/mask3d/benchmark/util_3d.py +177 -0
  12. models/Mask3D/build/lib/mask3d/conf/__init__.py +0 -0
  13. models/Mask3D/build/lib/mask3d/conf/augmentation/albumentations_aug.yaml +30 -0
  14. models/Mask3D/build/lib/mask3d/conf/augmentation/volumentations_aug.yaml +53 -0
  15. models/Mask3D/build/lib/mask3d/conf/callbacks/callbacks_instance_segmentation.yaml +11 -0
  16. models/Mask3D/build/lib/mask3d/conf/config_base_instance_segmentation.yaml +75 -0
  17. models/Mask3D/build/lib/mask3d/conf/data/collation_functions/voxelize_collate.yaml +42 -0
  18. models/Mask3D/build/lib/mask3d/conf/data/collation_functions/voxelize_collate_merge.yaml +36 -0
  19. models/Mask3D/build/lib/mask3d/conf/data/data_loaders/simple_loader.yaml +22 -0
  20. models/Mask3D/build/lib/mask3d/conf/data/data_loaders/simple_loader_save_memory.yaml +22 -0
  21. models/Mask3D/build/lib/mask3d/conf/data/datasets/matterport.yaml +48 -0
  22. models/Mask3D/build/lib/mask3d/conf/data/datasets/matterport_scannet.yaml +50 -0
  23. models/Mask3D/build/lib/mask3d/conf/data/datasets/rio.yaml +48 -0
  24. models/Mask3D/build/lib/mask3d/conf/data/datasets/s3dis.yaml +87 -0
  25. models/Mask3D/build/lib/mask3d/conf/data/datasets/scannet.yaml +79 -0
  26. models/Mask3D/build/lib/mask3d/conf/data/datasets/scannet200.yaml +79 -0
  27. models/Mask3D/build/lib/mask3d/conf/data/datasets/semantic_kitti.yaml +42 -0
  28. models/Mask3D/build/lib/mask3d/conf/data/datasets/stpls3d.yaml +95 -0
  29. models/Mask3D/build/lib/mask3d/conf/data/indoor.yaml +43 -0
  30. models/Mask3D/build/lib/mask3d/conf/data/outdoor.yaml +26 -0
  31. models/Mask3D/build/lib/mask3d/conf/logging/base.yaml +10 -0
  32. models/Mask3D/build/lib/mask3d/conf/logging/full.yaml +8 -0
  33. models/Mask3D/build/lib/mask3d/conf/logging/minimal.yaml +5 -0
  34. models/Mask3D/build/lib/mask3d/conf/logging/offline.yaml +10 -0
  35. models/Mask3D/build/lib/mask3d/conf/loss/cross_entropy.yaml +3 -0
  36. models/Mask3D/build/lib/mask3d/conf/loss/set_criterion.yaml +11 -0
  37. models/Mask3D/build/lib/mask3d/conf/loss/set_criterion_custom_weights_1.yaml +11 -0
  38. models/Mask3D/build/lib/mask3d/conf/matcher/hungarian_matcher.yaml +6 -0
  39. models/Mask3D/build/lib/mask3d/conf/metrics/miou.yaml +4 -0
  40. models/Mask3D/build/lib/mask3d/conf/model/mask3d.yaml +47 -0
  41. models/Mask3D/build/lib/mask3d/conf/optimizer/adamw.yaml +3 -0
  42. models/Mask3D/build/lib/mask3d/conf/optimizer/adamw_lower.yaml +3 -0
  43. models/Mask3D/build/lib/mask3d/conf/scheduler/exponentiallr.yaml +11 -0
  44. models/Mask3D/build/lib/mask3d/conf/scheduler/lambdalr.yaml +8 -0
  45. models/Mask3D/build/lib/mask3d/conf/scheduler/onecyclelr.yaml +11 -0
  46. models/Mask3D/build/lib/mask3d/conf/trainer/trainer.yaml +7 -0
  47. models/Mask3D/build/lib/mask3d/conf/trainer/trainer600.yaml +7 -0
  48. models/Mask3D/build/lib/mask3d/datasets/__init__.py +0 -0
  49. models/Mask3D/build/lib/mask3d/datasets/outdoor_semseg.py +206 -0
  50. models/Mask3D/build/lib/mask3d/datasets/preprocessing/__init__.py +0 -0
README.md CHANGED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+
3
+ ## Open-YOLO 3D: Towards Fast and Accurate Open-Vocabulary 3D Instance Segmentation
4
+ <div align="center">
5
+ <img src="./docs/pipeline.png" width="100%">
6
+ </div>
7
+
8
+ </div>
9
+
10
+ <div align="center">
11
+ <a href="">Mohamed El Amine Boudjoghra</a><sup>1</sup>, <a href="">Angela Dai</a><sup>2</sup>, <a href=""> Jean Lahoud</a><sup>1</sup>, <a href="">Hisham Cholakkal</a><sup>1</sup>, <a href="">Rao Muhammad Anwer</a><sup>1,3</sup>, <a href="">Salman Khan</a><sup>1,4</sup>, <a href="">Fahad Khan</a><sup>1,5</sup>
12
+
13
+ <sup>1</sup>Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI) <sup>2</sup>Technical University of Munich (TUM) <sup>3</sup>Aalto University <sup>4</sup>Australian National University <sup>5</sup>Linköping University
14
+ </div>
15
+
16
+
17
+ <div align="center">
18
+
19
+ <a href='https://arxiv.org/abs/2406.02548' target="_blank">![paper](https://img.shields.io/badge/arXiv-Paper-<COLOR>.svg)</a>
20
+
21
+
22
+ </div>
23
+
24
+
25
+
26
+ ### News
27
+
28
+ * **30 May 2024**: [Open-YOLO 3D](https://arxiv.org/abs/2406.02548) released on arXiv. 📝
29
+ * **30 May 2024**: Code released. 💻
30
+
31
+ ### Abstract
32
+
33
+ Recent works on open-vocabulary 3D instance segmentation show strong promise, but at the cost of slow inference speed and high computation requirements. This high computation cost is typically due to their heavy reliance on 3D clip features, which require computationally expensive 2D foundation models like Segment Anything (SAM) and CLIP for multi-view aggregation into 3D. As a consequence, this hampers their applicability in many real-world applications that require both fast and accurate predictions. To this end, we propose a fast yet accurate open-vocabulary 3D instance segmentation approach, named Open-YOLO 3D, that effectively leverages only 2D object detection from multi-view RGB images for open-vocabulary 3D instance segmentation.
34
+ We address this task by generating class-agnostic 3D masks for objects in the scene and associating them with text prompts.
35
+ We observe that the projection of class-agnostic 3D point cloud instances already holds instance information; thus, using SAM might only result in redundancy that unnecessarily increases the inference time.
36
+ We empirically find that a better performance of matching text prompts to 3D masks can be achieved in a faster fashion with a 2D object detector. We validate our Open-YOLO 3D on two benchmarks, ScanNet200 and Replica,
37
+ under two scenarios: (i) with ground truth masks, where labels are required for given object proposals, and (ii) with class-agnostic 3D proposals generated from a 3D proposal network. Our Open-YOLO 3D achieves state-of-the-art performance on both datasets while obtaining up to 16x speedup compared to the best existing method in literature. On ScanNet200 val. set, our Open-YOLO 3D achieves mean average precision (mAP) of 24.7% while operating at 22 seconds per scene.
38
+
39
+ ### Qualitative results
40
+ <br>
41
+
42
+ <div align="center">
43
+ <img src="./docs/qualitatives.png" width="100%">
44
+ </div>
45
+
46
+
47
+ ## Installation guide
48
+
49
+ Kindly check [Installation guide](./docs/Installation.md) on how to setup the Conda environment and to download the checkpoints, the pre-computed class agnostic masks, and the ground truth masks.
50
+
51
+ ## Data Preparation
52
+
53
+ Kindly check [Data Preparation guide](./docs/Data_prep.md) on how to prepare ScanNet200 and Replica datasets.
54
+
55
+ ## Results reproducibility
56
+
57
+ Kindly use the pre-computed class agnostic masks we shared to reproduce the exact numbers we reported in the paper.
58
+
59
+ **Reproduce the results of ScanNet200 with precomputed-masks (using Mask3D)**
60
+ ```
61
+ python run_evaluation.py --dataset_name scannet200 --path_to_3d_masks "./output/scannet200/scannet200_masks"
62
+ ```
63
+ **Reproduce the results of ScanNet200 with oracle 3D masks (ground truth 3D masks)**
64
+ ```
65
+ python run_evaluation.py --dataset_name scannet200 --path_to_3d_masks "./output/scannet200/scannet200_ground_truth_masks" --is_gt
66
+ ```
67
+ **Reproduce the results of Replica with precomputed-masks (using Mask3D)**
68
+ ```
69
+ python run_evaluation.py --dataset_name replica --path_to_3d_masks "./output/replica/replica_masks"
70
+ ```
71
+ **Reproduce the results of Replica with oracle 3D masks (ground truth 3D masks)**
72
+ ```
73
+ python run_evaluation.py --dataset_name replica --path_to_3d_masks "./output/replica/replica_ground_truth_masks" --is_gt
74
+ ```
75
+
76
+ You can evaluate without our 3D class-agnostic masks, but this may lead to variability in results due to elements like furthest point sampling that cause randomness in predictions from Mask3D. For consistent results with the ones we report in the paper, we recommend using our pre-computed masks.
77
+
78
+ **Reproduce the results of Replica or ScanNet200 without using our pre-computed masks**
79
+ ```
80
+ python run_evaluation.py --dataset_name $DATASET_NAME
81
+ ```
82
+
83
+ ## Single scene inference
84
+
85
+ ```
86
+ from utils import OpenYolo3D
87
+
88
+ openyolo3d = OpenYolo3D("$(pwd)/pretrained/config.yaml") #Initialize the model, define the text prompts in the config.
89
+ prediction = openyolo3d.predict("$(pwd)/data/replica/office0", 6553.5) #Predict the instance masks and labels (takes around 20 seconds in total).
90
+ openyolo3d.save_output_as_ply("$(pwd)/sample/output.ply", True) # Save the ply file for visualization, you can use meshlab to visualize the output scene
91
+ ```
92
+
93
+ ## Acknoledgments
94
+ We would like to thank the authors of <a href="https://github.com/cvg/Mask3D">Mask3D</a> and <a href="https://github.com/AILab-CVC/YOLO-World">YoloWorld</a> for their works which were used for our model.
95
+ </div>
96
+
97
+ ## BibTeX :pray:
98
+ ```
99
+ @misc{boudjoghra2024openyolo,
100
+ title={Open-YOLO 3D: Towards Fast and Accurate Open-Vocabulary 3D Instance Segmentation},
101
+ author={Mohamed El Amine Boudjoghra and Angela Dai and Jean Lahoud and Hisham Cholakkal and Rao Muhammad Anwer and Salman Khan and Fahad Shahbaz Khan},
102
+ year={2024},
103
+ eprint={2406.02548},
104
+ archivePrefix={arXiv},
105
+ primaryClass={cs.CV}
106
+ }
107
+ ```
108
+
environment.yml ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: openyolo3d
2
+ channels:
3
+ - anaconda
4
+ - defaults
5
+ dependencies:
6
+ - _libgcc_mutex=0.1=main
7
+ - _openmp_mutex=5.1=1_gnu
8
+ - blas=1.0=openblas
9
+ - boltons=23.0.0=py310h06a4308_0
10
+ - brotlipy=0.7.0=py310h7f8727e_1002
11
+ - bzip2=1.0.8=h7b6447c_0
12
+ - ca-certificates=2023.01.10=h06a4308_0
13
+ - certifi=2022.12.7=py310h06a4308_0
14
+ - cffi=1.15.1=py310h5eee18b_3
15
+ - charset-normalizer=2.0.4=pyhd3eb1b0_0
16
+ - conda=23.3.1=py310h06a4308_0
17
+ - conda-content-trust=0.1.3=py310h06a4308_0
18
+ - conda-package-handling=2.0.2=py310h06a4308_0
19
+ - conda-package-streaming=0.7.0=py310h06a4308_0
20
+ - cryptography=39.0.1=py310h9ce1e76_0
21
+ - idna=3.4=py310h06a4308_0
22
+ - jsonpatch=1.32=pyhd3eb1b0_0
23
+ - jsonpointer=2.1=pyhd3eb1b0_0
24
+ - ld_impl_linux-64=2.38=h1181459_1
25
+ - libffi=3.4.2=h6a678d5_6
26
+ - libgcc-ng=11.2.0=h1234567_1
27
+ - libgfortran-ng=11.2.0=h00389a5_1
28
+ - libgfortran5=11.2.0=h1234567_1
29
+ - libgomp=11.2.0=h1234567_1
30
+ - libopenblas=0.3.21=h043d6bf_0
31
+ - libstdcxx-ng=11.2.0=h1234567_1
32
+ - libuuid=1.41.5=h5eee18b_0
33
+ - ncurses=6.4=h6a678d5_0
34
+ - nomkl=3.0=0
35
+ - openblas-devel=0.3.21=h06a4308_0
36
+ - openssl=1.1.1s=h7f8727e_0
37
+ - packaging=23.0=py310h06a4308_0
38
+ - pluggy=1.0.0=py310h06a4308_1
39
+ - pycosat=0.6.4=py310h5eee18b_0
40
+ - pycparser=2.21=pyhd3eb1b0_0
41
+ - pyopenssl=23.0.0=py310h06a4308_0
42
+ - pysocks=1.7.1=py310h06a4308_0
43
+ - python=3.10.9=h7a1cb2a_0
44
+ - readline=8.2=h5eee18b_0
45
+ - requests=2.28.1=py310h06a4308_1
46
+ - ruamel.yaml=0.17.21=py310h5eee18b_0
47
+ - ruamel.yaml.clib=0.2.6=py310h5eee18b_1
48
+ - setuptools=65.6.3=py310h06a4308_0
49
+ - six=1.16.0=pyhd3eb1b0_1
50
+ - sqlite=3.41.2=h5eee18b_0
51
+ - tk=8.6.12=h1ccaba5_0
52
+ - toolz=0.12.0=py310h06a4308_0
53
+ - tqdm=4.65.0=py310h2f386ee_0
54
+ - urllib3=1.26.15=py310h06a4308_0
55
+ - wheel=0.37.1=pyhd3eb1b0_0
56
+ - xz=5.2.10=h5eee18b_1
57
+ - zlib=1.2.13=h5eee18b_0
58
+ - zstandard=0.19.0=py310h5eee18b_0
59
+ - pip
60
+ - pip:
61
+ - absl-py==1.4.0
62
+ - addict==2.4.0
63
+ - aiohttp==3.8.4
64
+ - aiosignal==1.3.1
65
+ # - albumentations==1.2.1 #manual
66
+ - antlr4-python3-runtime==4.8
67
+ - anyio==3.6.2
68
+ - appdirs==1.4.4
69
+ - asttokens==2.2.1
70
+ - async-timeout==4.0.2
71
+ - attrs==23.1.0
72
+ - backcall==0.2.0
73
+ - black==21.4b2
74
+ - cachetools==5.3.0
75
+ - click==8.1.3
76
+ - cloudpickle==2.1.0
77
+ - comm==0.1.3
78
+ - configargparse==1.5.3
79
+ - contourpy==1.0.7
80
+ - cycler==0.11.0
81
+ - dash==2.9.3
82
+ - dash-core-components==2.0.0
83
+ - dash-html-components==2.0.0
84
+ - dash-table==5.0.0
85
+ - debugpy==1.6.7
86
+ - decorator==5.1.1
87
+ # - detectron2==0.6
88
+ - docker-pycreds==0.4.0
89
+ - executing==1.2.0
90
+ - fastapi==0.95.1
91
+ - fastjsonschema==2.16.3
92
+ - fire==0.4.0
93
+ - flake8==6.0.0
94
+ - flask==2.2.3
95
+ - fonttools==4.39.3
96
+ - frozenlist==1.3.3
97
+ - fsspec==2023.4.0
98
+ # - fvcore==0.1.5.post20220512 #manual
99
+ - gitdb==4.0.10
100
+ - gitpython==3.1.31
101
+ - google-auth==2.17.3
102
+ - google-auth-oauthlib==1.0.0
103
+ - grpcio==1.54.0
104
+ - h11==0.14.0
105
+ - hydra-core==1.0.5
106
+ - imageio==2.21.1
107
+ - importlib-metadata==3.10.1
108
+ - iopath==0.1.10
109
+ - ipykernel==6.22.0
110
+ - ipython==8.12.0
111
+ - ipywidgets==8.0.6
112
+ - itsdangerous==2.1.2
113
+ - jedi==0.18.2
114
+ - jinja2==3.1.2
115
+ - joblib==1.2.0
116
+ - jsonschema==4.17.3
117
+ - jupyter-client==8.2.0
118
+ - jupyter-core==5.3.0
119
+ - jupyterlab-widgets==3.0.7
120
+ - kiwisolver==1.4.4
121
+ - lazy-loader==0.2
122
+ - loguru==0.6.0
123
+ - markdown==3.4.3
124
+ - markupsafe==2.1.2
125
+ - matplotlib==3.7.1
126
+ - matplotlib-inline==0.1.6
127
+ # - minkowskiengine==0.5.4
128
+ - multidict==6.0.4
129
+ - mypy-extensions==1.0.0
130
+ - natsort==8.3.1
131
+ - nbformat==5.7.0
132
+ - nest-asyncio==1.5.6
133
+ - networkx==3.1
134
+ - ninja==1.10.2.3
135
+ - numpy==1.24.2
136
+ - oauthlib==3.2.2
137
+ # - omegaconf==2.0.6 #manual
138
+ # - open3d==0.17.0 #manual
139
+ - opencv-python-headless==4.7.0.72
140
+ - pandas==2.0.0
141
+ - parso==0.8.3
142
+ - pathspec==0.11.1
143
+ - pathtools==0.1.2
144
+ - pexpect==4.8.0
145
+ - pickleshare==0.7.5
146
+ - pillow==9.5.0
147
+ - pip==23.1
148
+ - platformdirs==3.2.0
149
+ - plotly==5.14.1
150
+ - plyfile==0.7.4
151
+ # - pointnet2==0.0.0
152
+ - portalocker==2.7.0
153
+ - prompt-toolkit==3.0.38
154
+ - protobuf==4.22.3
155
+ - psutil==5.9.5
156
+ - ptyprocess==0.7.0
157
+ - pure-eval==0.2.2
158
+ - pyasn1==0.5.0
159
+ - pyasn1-modules==0.3.0
160
+ - pycocotools==2.0.4
161
+ - pydantic==1.10.7
162
+ - pydeprecate==0.3.2
163
+ - pygments==2.15.1
164
+ - pyparsing==3.0.9
165
+ - pyquaternion==0.9.9
166
+ - pyrsistent==0.19.3
167
+ - python-dateutil==2.8.2
168
+ - python-dotenv==0.20.0
169
+ - python-multipart==0.0.6
170
+ # - pytorch-lightning==1.7.2
171
+ - pytz==2023.3
172
+ - pyviz3d==0.2.28
173
+ - pywavelets==1.4.1
174
+ - pyyaml==5.3.1
175
+ - pyzmq==25.0.2
176
+ - qudida==0.0.4
177
+ - regex==2023.3.23
178
+ - requests-oauthlib==1.3.1
179
+ - rsa==4.9
180
+ - scikit-image==0.20.0
181
+ - scikit-learn==1.1.2
182
+ - scipy==1.9.0
183
+ - sentry-sdk==1.20.0
184
+ - setproctitle==1.3.2
185
+ - smmap==5.0.0
186
+ - sniffio==1.3.0
187
+ - stack-data==0.6.2
188
+ - starlette==0.26.1
189
+ - tabulate==0.9.0
190
+ - tenacity==8.2.2
191
+ - tensorboard==2.12.2
192
+ - tensorboard-data-server==0.7.0
193
+ - tensorboard-plugin-wit==1.8.1
194
+ - termcolor==2.2.0
195
+ - threadpoolctl==3.1.0
196
+ - tifffile==2023.4.12
197
+ - toml==0.10.2
198
+ # - torch==1.12.1+cu113
199
+ # - torch-scatter==2.1.1
200
+ # - torchmetrics==0.11.4
201
+ # - torchvision==0.13.1+cu113
202
+ - tornado==6.3
203
+ - traitlets==5.9.0
204
+ - trimesh==3.14.0
205
+ - typing-extensions==4.5.0
206
+ - tzdata==2023.3
207
+ - uvicorn==0.21.1
208
+ - volumentations==0.1.8
209
+ - wandb==0.15.0
210
+ - wcwidth==0.2.6
211
+ - werkzeug==2.2.3
212
+ - widgetsnbextension==4.0.7
213
+ - yacs==0.1.8
214
+ - yarl==1.8.2
215
+ - zipp==3.15.0
216
+ prefix: /opt/conda
models/Mask3D/LICENSE ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
models/Mask3D/MANIFEST.in ADDED
@@ -0,0 +1 @@
 
 
1
+ recursive-include mask3d/conf *.yaml
models/Mask3D/README.md ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Packaged version of Mask3D to be used in LabelMaker
2
+
3
+ ## Installation
4
+
5
+ ```
6
+ # Some users experienced issues on Ubuntu with an AMD CPU
7
+ # Install libopenblas-dev (issue #115, thanks WindWing)
8
+ # sudo apt-get install libopenblas-dev
9
+
10
+ export TORCH_CUDA_ARCH_LIST="6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6"
11
+
12
+ conda env create -f environment.yml
13
+
14
+ conda activate mask3d_cuda113
15
+
16
+ pip3 install torch==1.12.1+cu113 torchvision==0.13.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
17
+ pip3 install torch-scatter -f https://data.pyg.org/whl/torch-1.12.1+cu113.html
18
+ pip3 install 'git+https://github.com/facebookresearch/detectron2.git@710e7795d0eeadf9def0e7ef957eea13532e34cf' --no-deps
19
+
20
+ mkdir third_party
21
+ cd third_party
22
+
23
+ git clone --recursive "https://github.com/NVIDIA/MinkowskiEngine"
24
+ cd MinkowskiEngine
25
+ git checkout 02fc608bea4c0549b0a7b00ca1bf15dee4a0b228
26
+ python setup.py install --force_cuda --blas=openblas
27
+
28
+ cd ..
29
+ git clone https://github.com/ScanNet/ScanNet.git
30
+ cd ScanNet/Segmentator
31
+ git checkout 3e5726500896748521a6ceb81271b0f5b2c0e7d2
32
+ make
33
+
34
+ cd third_party/pointnet2
35
+ python setup.py install
36
+
37
+ cd ../../
38
+ pip3 install pytorch-lightning==1.7.2
39
+
40
+ pip install .
41
+
42
+ ```
43
+
44
+ To use the model in your code you need to download a checkpoint from the list below.
45
+ Afterwards, the basic model can be used like:
46
+
47
+
48
+ ```python
49
+ from mask3d import get_model
50
+
51
+ model = get_model(checkpoint_path='checkpoints/scannet200/scannet200_benchmark.ckpt')
52
+ ```
53
+
54
+
55
+ Here is a minimal example assuming you have a pointcloud in the folder data.
56
+
57
+ ```python
58
+
59
+ from mask3d import get_model, load_mesh, prepare_data, map_output_to_pointcloud, save_colorized_mesh
60
+
61
+ model = get_model('checkpoints/scannet200/scannet200_benchmark.ckpt')
62
+ model.eval()
63
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
64
+ model.to(device)
65
+
66
+ # load input data
67
+ pointcloud_file = 'data/pcl.ply'
68
+ mesh = load_mesh(pointcloud_file)
69
+
70
+ # prepare data
71
+ data, points, colors, features, unique_map, inverse_map = prepare_data(mesh, device)
72
+
73
+ # run model
74
+ with torch.no_grad():
75
+ outputs = model(data, raw_coordinates=features)
76
+
77
+ # map output to point cloud
78
+ labels = map_output_to_pointcloud(mesh, outputs, inverse_map)
79
+
80
+ # save colorized mesh
81
+ save_colorized_mesh(mesh, labels, 'data/pcl_labelled.ply', colormap='scannet200')
82
+ ```
83
+
84
+ So far, only Scannet200 checkpoints are supported. We are working on the ScanNet checkpoints.
85
+
86
+ # Original Information
87
+
88
+ ## Mask3D: Mask Transformer for 3D Instance Segmentation
89
+ <div align="center">
90
+ <a href="https://jonasschult.github.io/">Jonas Schult</a><sup>1</sup>, <a href="https://francisengelmann.github.io/">Francis Engelmann</a><sup>2,3</sup>, <a href="https://www.vision.rwth-aachen.de/person/10/">Alexander Hermans</a><sup>1</sup>, <a href="https://orlitany.github.io/">Or Litany</a><sup>4</sup>, <a href="https://inf.ethz.ch/people/person-detail.MjYyNzgw.TGlzdC8zMDQsLTg3NDc3NjI0MQ==.html">Siyu Tang</a><sup>3</sup>, <a href="https://www.vision.rwth-aachen.de/person/1/">Bastian Leibe</a><sup>1</sup>
91
+
92
+ <sup>1</sup>RWTH Aachen University <sup>2</sup>ETH AI Center <sup>3</sup>ETH Zurich <sup>4</sup>NVIDIA
93
+
94
+ Mask3D predicts accurate 3D semantic instances achieving state-of-the-art on ScanNet, ScanNet200, S3DIS and STPLS3D.
95
+
96
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mask3d-for-3d-semantic-instance-segmentation/3d-instance-segmentation-on-scannetv2)](https://paperswithcode.com/sota/3d-instance-segmentation-on-scannetv2?p=mask3d-for-3d-semantic-instance-segmentation)
97
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mask3d-for-3d-semantic-instance-segmentation/3d-instance-segmentation-on-scannet200)](https://paperswithcode.com/sota/3d-instance-segmentation-on-scannet200?p=mask3d-for-3d-semantic-instance-segmentation)
98
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mask3d-for-3d-semantic-instance-segmentation/3d-instance-segmentation-on-s3dis)](https://paperswithcode.com/sota/3d-instance-segmentation-on-s3dis?p=mask3d-for-3d-semantic-instance-segmentation)
99
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mask3d-for-3d-semantic-instance-segmentation/3d-instance-segmentation-on-stpls3d)](https://paperswithcode.com/sota/3d-instance-segmentation-on-stpls3d?p=mask3d-for-3d-semantic-instance-segmentation)
100
+
101
+ <a href="https://pytorch.org/get-started/locally/"><img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-ee4c2c?logo=pytorch&logoColor=white"></a>
102
+ <a href="https://pytorchlightning.ai/"><img alt="Lightning" src="https://img.shields.io/badge/-Lightning-792ee5?logo=pytorchlightning&logoColor=white"></a>
103
+ <a href="https://hydra.cc/"><img alt="Config: Hydra" src="https://img.shields.io/badge/Config-Hydra-89b8cd"></a>
104
+
105
+ ![teaser](./docs/teaser.jpg)
106
+
107
+ </div>
108
+ <br><br>
109
+
110
+ [[Project Webpage](https://jonasschult.github.io/Mask3D/)]
111
+ [[Paper](https://arxiv.org/abs/2210.03105)]
112
+ [[Demo](https://francisengelmann.github.io/mask3d/)]
113
+
114
+
115
+ ## News
116
+
117
+ * **17. January 2023**: Mask3D is accepted at ICRA 2023. :fire:
118
+ * **14. October 2022**: STPLS3D support added.
119
+ * **10. October 2022**: Mask3D ranks 2nd on the [STPLS3D Challenge](https://codalab.lisn.upsaclay.fr/competitions/4646#results) hosted by the [Urban3D Workshop](https://urban3dchallenge.github.io/) at ECCV 2022.
120
+ * **6. October 2022**: [Mask3D preprint](https://arxiv.org/abs/2210.03105) released on arXiv.
121
+ * **25. September 2022**: Code released.
122
+
123
+ ## Code structure
124
+ We adapt the codebase of [Mix3D](https://github.com/kumuji/mix3d) which provides a highly modularized framework for 3D Semantic Segmentation based on the MinkowskiEngine.
125
+
126
+ ```
127
+ ├── mix3d
128
+ │ ├── main_instance_segmentation.py <- the main file
129
+ │ ├── conf <- hydra configuration files
130
+ │ ├── datasets
131
+ │ │ ├── preprocessing <- folder with preprocessing scripts
132
+ │ │ ├── semseg.py <- indoor dataset
133
+ │ │ └── utils.py
134
+ │ ├── models <- Mask3D modules
135
+ │ ├── trainer
136
+ │ │ ├── __init__.py
137
+ │ │ └── trainer.py <- train loop
138
+ │ └── utils
139
+ ├── data
140
+ │ ├── processed <- folder for preprocessed datasets
141
+ │ └── raw <- folder for raw datasets
142
+ ├── scripts <- train scripts
143
+ ├── docs
144
+ ├── README.md
145
+ └── saved <- folder that stores models and logs
146
+ ```
147
+
148
+ ### Dependencies :memo:
149
+ The main dependencies of the project are the following:
150
+ ```yaml
151
+ python: 3.10.9
152
+ cuda: 11.3
153
+ ```
154
+ You can set up a conda environment as follows
155
+ ```
156
+ # Some users experienced issues on Ubuntu with an AMD CPU
157
+ # Install libopenblas-dev (issue #115, thanks WindWing)
158
+ # sudo apt-get install libopenblas-dev
159
+
160
+ export TORCH_CUDA_ARCH_LIST="6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6"
161
+
162
+ conda env create -f environment.yml
163
+
164
+ conda activate mask3d_cuda113
165
+
166
+ pip3 install torch==1.12.1+cu113 torchvision==0.13.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
167
+ pip3 install torch-scatter -f https://data.pyg.org/whl/torch-1.12.1+cu113.html
168
+ pip3 install 'git+https://github.com/facebookresearch/detectron2.git@710e7795d0eeadf9def0e7ef957eea13532e34cf' --no-deps
169
+
170
+ mkdir third_party
171
+ cd third_party
172
+
173
+ git clone --recursive "https://github.com/NVIDIA/MinkowskiEngine"
174
+ cd MinkowskiEngine
175
+ git checkout 02fc608bea4c0549b0a7b00ca1bf15dee4a0b228
176
+ python setup.py install --force_cuda --blas=openblas
177
+
178
+ cd ..
179
+ git clone https://github.com/ScanNet/ScanNet.git
180
+ cd ScanNet/Segmentator
181
+ git checkout 3e5726500896748521a6ceb81271b0f5b2c0e7d2
182
+ make
183
+
184
+ cd ../../pointnet2
185
+ python setup.py install
186
+
187
+ cd ../../
188
+ pip3 install pytorch-lightning==1.7.2
189
+ ```
190
+
191
+ ### Data preprocessing :hammer:
192
+ After installing the dependencies, we preprocess the datasets.
193
+
194
+ #### ScanNet / ScanNet200
195
+ First, we apply Felzenswalb and Huttenlocher's Graph Based Image Segmentation algorithm to the test scenes using the default parameters.
196
+ Please refer to the [original repository](https://github.com/ScanNet/ScanNet/tree/master/Segmentator) for details.
197
+ Put the resulting segmentations in `./data/raw/scannet_test_segments`.
198
+ ```
199
+ python -m datasets.preprocessing.scannet_preprocessing preprocess \
200
+ --data_dir="PATH_TO_RAW_SCANNET_DATASET" \
201
+ --save_dir="data/processed/scannet" \
202
+ --git_repo="PATH_TO_SCANNET_GIT_REPO" \
203
+ --scannet200=false/true
204
+ ```
205
+
206
+ #### S3DIS
207
+ The S3DIS dataset contains some smalls bugs which we initially fixed manually. We will soon release a preprocessing script which directly preprocesses the original dataset. For the time being, please follow the instructions [here](https://github.com/JonasSchult/Mask3D/issues/8#issuecomment-1279535948) to fix the dataset manually. Afterwards, call the preprocessing script as follows:
208
+
209
+ ```
210
+ python -m datasets.preprocessing.s3dis_preprocessing preprocess \
211
+ --data_dir="PATH_TO_Stanford3dDataset_v1.2" \
212
+ --save_dir="data/processed/s3dis"
213
+ ```
214
+
215
+ #### STPLS3D
216
+ ```
217
+ python -m datasets.preprocessing.stpls3d_preprocessing preprocess \
218
+ --data_dir="PATH_TO_STPLS3D" \
219
+ --save_dir="data/processed/stpls3d"
220
+ ```
221
+
222
+ ### Training and testing :train2:
223
+ Train Mask3D on the ScanNet dataset:
224
+ ```bash
225
+ python main_instance_segmentation.py
226
+ ```
227
+ Please refer to the [config scripts](https://github.com/JonasSchult/Mask3D/tree/main/scripts) (for example [here](https://github.com/JonasSchult/Mask3D/blob/main/scripts/scannet/scannet_val.sh#L15)) for detailed instructions how to reproduce our results.
228
+ In the simplest case the inference command looks as follows:
229
+ ```bash
230
+ python main_instance_segmentation.py \
231
+ general.checkpoint='PATH_TO_CHECKPOINT.ckpt' \
232
+ general.train_mode=false
233
+ ```
234
+
235
+ ## Trained checkpoints :floppy_disk:
236
+ We provide detailed scores and network configurations with trained checkpoints.
237
+
238
+ ### [S3DIS](http://buildingparser.stanford.edu/dataset.html) (pretrained on ScanNet train+val)
239
+ Following PointGroup, HAIS and SoftGroup, we finetune a model pretrained on ScanNet ([config](./scripts/scannet/scannet_pretrain_for_s3dis.sh) and [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/scannet_pretrained/scannet_pretrained.ckpt)).
240
+ | Dataset | AP | AP_50 | AP_25 | Config | Checkpoint :floppy_disk: | Scores :chart_with_upwards_trend: | Visualizations :telescope:
241
+ |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
242
+ | Area 1 | 69.3 | 81.9 | 87.7 | [config](scripts/s3dis/s3dis_pretrained.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/scannet_pretrained/area1_scannet_pretrained.ckpt) | [scores](./docs/detailed_scores/s3dis/scannet_pretrained/s3dis_area1_scannet_pretrained.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/scannet_pretrained/area_1/)
243
+ | Area 2 | 44.0 | 59.5 | 66.5 | [config](scripts/s3dis/s3dis_pretrained.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/scannet_pretrained/area2_scannet_pretrained.ckpt) | [scores](./docs/detailed_scores/s3dis/scannet_pretrained/s3dis_area2_scannet_pretrained.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/scannet_pretrained/area_2/)
244
+ | Area 3 | 73.4 | 83.2 | 88.2 | [config](scripts/s3dis/s3dis_pretrained.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/scannet_pretrained/area3_scannet_pretrained.ckpt) | [scores](./docs/detailed_scores/s3dis/scannet_pretrained/s3dis_area3_scannet_pretrained.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/scannet_pretrained/area_3/)
245
+ | Area 4 | 58.0 | 69.5 | 74.9 | [config](scripts/s3dis/s3dis_pretrained.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/scannet_pretrained/area4_scannet_pretrained.ckpt) | [scores](./docs/detailed_scores/s3dis/scannet_pretrained/s3dis_area4_scannet_pretrained.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/scannet_pretrained/area_4/)
246
+ | Area 5 | 57.8 | 71.9 | 77.2 | [config](scripts/s3dis/s3dis_pretrained.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/scannet_pretrained/area5_scannet_pretrained.ckpt) | [scores](./docs/detailed_scores/s3dis/scannet_pretrained/s3dis_area5_scannet_pretrained.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/scannet_pretrained/area_5/)
247
+ | Area 6 | 68.4 | 79.9 | 85.2 | [config](scripts/s3dis/s3dis_pretrained.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/scannet_pretrained/area6_scannet_pretrained.ckpt) | [scores](./docs/detailed_scores/s3dis/scannet_pretrained/s3dis_area6_scannet_pretrained.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/scannet_pretrained/area_6/)
248
+
249
+ ### [S3DIS](http://buildingparser.stanford.edu/dataset.html) (from scratch)
250
+
251
+ | Dataset | AP | AP_50 | AP_25 | Config | Checkpoint :floppy_disk: | Scores :chart_with_upwards_trend: | Visualizations :telescope:
252
+ |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
253
+ | Area 1 | 74.1 | 85.1 | 89.6 | [config](scripts/s3dis/s3dis_from_scratch.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/from_scratch/area1_from_scratch.ckpt) | [scores](./docs/detailed_scores/s3dis/from_scratch/s3dis_area1_from_scratch.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/from_scratch/area_1/)
254
+ | Area 2 | 44.9 | 57.1 | 67.9 | [config](scripts/s3dis/s3dis_from_scratch.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/from_scratch/area2_from_scratch.ckpt) | [scores](./docs/detailed_scores/s3dis/from_scratch/s3dis_area2_from_scratch.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/from_scratch/area_2/)
255
+ | Area 3 | 74.4 | 84.4 | 88.1 | [config](scripts/s3dis/s3dis_from_scratch.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/from_scratch/area3_from_scratch.ckpt) | [scores](./docs/detailed_scores/s3dis/from_scratch/s3dis_area3_from_scratch.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/from_scratch/area_3/)
256
+ | Area 4 | 63.8 | 74.7 | 81.1 | [config](scripts/s3dis/s3dis_from_scratch.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/from_scratch/area4_from_scratch.ckpt) | [scores](./docs/detailed_scores/s3dis/from_scratch/s3dis_area4_from_scratch.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/from_scratch/area_4/)
257
+ | Area 5 | 56.6 | 68.4 | 75.2 | [config](scripts/s3dis/s3dis_from_scratch.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/from_scratch/area5_from_scratch.ckpt) | [scores](./docs/detailed_scores/s3dis/from_scratch/s3dis_area5_from_scratch.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/from_scratch/area_5/)
258
+ | Area 6 | 73.3 | 83.4 | 87.8 | [config](scripts/s3dis/s3dis_from_scratch.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/s3dis/from_scratch/area6_from_scratch.ckpt) | [scores](./docs/detailed_scores/s3dis/from_scratch/s3dis_area6_from_scratch.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/s3dis/from_scratch/area_6/)
259
+
260
+ ### [ScanNet v2](https://kaldir.vc.in.tum.de/scannet_benchmark/semantic_instance_3d?metric=ap)
261
+
262
+ | Dataset | AP | AP_50 | AP_25 | Config | Checkpoint :floppy_disk: | Scores :chart_with_upwards_trend: | Visualizations :telescope:
263
+ |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
264
+ | ScanNet val | 55.2 | 73.7 | 83.5 | [config](scripts/scannet/scannet_val.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/scannet/scannet_val.ckpt) | [scores](./docs/detailed_scores/scannet_val.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/scannet/val/)
265
+ | ScanNet test | 56.6 | 78.0 | 87.0 | [config](scripts/scannet/scannet_benchmark.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/scannet/scannet_benchmark.ckpt) | [scores](http://kaldir.vc.in.tum.de/scannet_benchmark/result_details?id=1081) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/scannet/test/)
266
+
267
+ ### [ScanNet 200](https://kaldir.vc.in.tum.de/scannet_benchmark/scannet200_semantic_instance_3d)
268
+
269
+ | Dataset | AP | AP_50 | AP_25 | Config | Checkpoint :floppy_disk: | Scores :chart_with_upwards_trend: | Visualizations :telescope:
270
+ |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
271
+ | ScanNet200 val | 27.4 | 37.0 | 42.3 | [config](scripts/scannet200/scannet200_val.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/scannet200/scannet200_val.ckpt) | [scores](./docs/detailed_scores/scannet200_val.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/scannet200/val/)
272
+ | ScanNet200 test | 27.8 | 38.8 | 44.5 | [config](scripts/scannet200/scannet200_benchmark.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/scannet200/scannet200_benchmark.ckpt) | [scores](https://kaldir.vc.in.tum.de/scannet_benchmark/result_details?id=1242) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/scannet200/test/)
273
+
274
+ ### [STPLS3D](https://www.stpls3d.com/)
275
+
276
+ | Dataset | AP | AP_50 | AP_25 | Config | Checkpoint :floppy_disk: | Scores :chart_with_upwards_trend: | Visualizations :telescope:
277
+ |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
278
+ | STPLS3D val | 57.3 | 74.3 | 81.6 | [config](scripts/stpls3d/stpls3d_val.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/stpls3d/stpls3d_val.ckpt) | [scores](./docs/detailed_scores/stpls3d.txt) | [visualizations](https://omnomnom.vision.rwth-aachen.de/data/mask3d/visualizations/stpls3d/)
279
+ | STPLS3D test | 63.4 | 79.2 | 85.6 | [config](scripts/stpls3d/stpls3d_benchmark.sh) | [checkpoint](https://omnomnom.vision.rwth-aachen.de/data/mask3d/checkpoints/stpls3d/stpls3d_benchmark.zip) | [scores](https://codalab.lisn.upsaclay.fr/competitions/4646#results) | visualizations
280
+
281
+ ## BibTeX :pray:
282
+ ```
283
+ @article{Schult23ICRA,
284
+ title = {{Mask3D: Mask Transformer for 3D Semantic Instance Segmentation}},
285
+ author = {Schult, Jonas and Engelmann, Francis and Hermans, Alexander and Litany, Or and Tang, Siyu and Leibe, Bastian},
286
+ booktitle = {{International Conference on Robotics and Automation (ICRA)}},
287
+ year = {2023}
288
+ }
289
+ ```
models/Mask3D/__init__.py ADDED
File without changes
models/Mask3D/build/lib/mask3d/__init__.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hydra
2
+ import torch
3
+
4
+ from mask3d.models.mask3d import Mask3D
5
+ from mask3d.utils.utils import (
6
+ load_checkpoint_with_missing_or_exsessive_keys,
7
+ load_backbone_checkpoint_with_missing_or_exsessive_keys,
8
+ )
9
+
10
+ class InstanceSegmentation(torch.nn.Module):
11
+ def __init__(self, cfg):
12
+ super().__init__()
13
+ self.model = hydra.utils.instantiate(cfg.model)
14
+
15
+
16
+ def forward(self, x, raw_coordinates=None, point2segment=None):
17
+ return self.model(x, raw_coordinates=raw_coordinates, point2segment=point2segment)
18
+
19
+
20
+ from omegaconf import OmegaConf, DictConfig
21
+ import hydra
22
+ from hydra.core.global_hydra import GlobalHydra
23
+ from hydra.experimental import initialize, compose
24
+
25
+ # imports for input loading
26
+ import albumentations as A
27
+ import MinkowskiEngine as ME
28
+ import numpy as np
29
+ import open3d as o3d
30
+
31
+ # imports for output
32
+ from mask3d.datasets.scannet200.scannet200_constants import (VALID_CLASS_IDS_20, VALID_CLASS_IDS_200, SCANNET_COLOR_MAP_20, SCANNET_COLOR_MAP_200)
33
+
34
+ def get_model(checkpoint_path=None, dataset_name = "scannet200"):
35
+
36
+
37
+ # Initialize the directory with config files
38
+ with initialize(config_path="conf"):
39
+ # Compose a configuration
40
+ cfg = compose(config_name="config_base_instance_segmentation.yaml")
41
+
42
+ cfg.general.checkpoint = checkpoint_path
43
+
44
+ # would be nicd to avoid this hardcoding below
45
+ # dataset_name = checkpoint_path.split('/')[-1].split('_')[0]
46
+ if dataset_name == 'scannet200':
47
+ cfg.general.num_targets = 201
48
+ cfg.general.train_mode = False
49
+ cfg.general.eval_on_segments = True
50
+ cfg.general.topk_per_image = 300
51
+ cfg.general.use_dbscan = True
52
+ cfg.general.dbscan_eps = 0.95
53
+ cfg.general.export_threshold = 0.001
54
+
55
+ # # data
56
+ cfg.data.num_labels = 200
57
+ cfg.data.test_mode = "validation"
58
+
59
+ # # model
60
+ cfg.model.num_queries = 150
61
+
62
+ if dataset_name == 'scannet':
63
+ cfg.general.num_targets = 19
64
+ cfg.general.train_mode = False
65
+ cfg.general.eval_on_segments = True
66
+ cfg.general.topk_per_image = 300
67
+ cfg.general.use_dbscan = True
68
+ cfg.general.dbscan_eps = 0.95
69
+ cfg.general.export_threshold = 0.001
70
+
71
+ # # data
72
+ cfg.data.num_labels = 20
73
+ cfg.data.test_mode = "test"
74
+
75
+ # # model
76
+ cfg.model.num_queries = 150
77
+
78
+ #TODO: this has to be fixed and discussed with Jonas
79
+ # cfg.model.scene_min = -3.
80
+ # cfg.model.scene_max = 3.
81
+
82
+ # # Initialize the Hydra context
83
+ # hydra.core.global_hydra.GlobalHydra.instance().clear()
84
+ # hydra.initialize(config_path="conf")
85
+
86
+ # Load the configuration
87
+ # cfg = hydra.compose(config_name="config_base_instance_segmentation.yaml")
88
+ model = InstanceSegmentation(cfg)
89
+
90
+ if cfg.general.backbone_checkpoint is not None:
91
+ cfg, model = load_backbone_checkpoint_with_missing_or_exsessive_keys(
92
+ cfg, model
93
+ )
94
+ if cfg.general.checkpoint is not None:
95
+ cfg, model = load_checkpoint_with_missing_or_exsessive_keys(cfg, model)
96
+
97
+ return model
98
+
99
+
100
+ def load_mesh(pcl_file):
101
+
102
+ # load point cloud
103
+ input_mesh_path = pcl_file
104
+ mesh = o3d.io.read_triangle_mesh(input_mesh_path)
105
+ return mesh
106
+
107
+ def prepare_data(mesh, device):
108
+
109
+ # normalization for point cloud features
110
+ color_mean = (0.47793125906962, 0.4303257521323044, 0.3749598901421883)
111
+ color_std = (0.2834475483823543, 0.27566157565723015, 0.27018971370874995)
112
+ normalize_color = A.Normalize(mean=color_mean, std=color_std)
113
+
114
+
115
+ points = np.asarray(mesh.vertices)
116
+ colors = np.asarray(mesh.vertex_colors)
117
+ colors = colors * 255.
118
+
119
+ pseudo_image = colors.astype(np.uint8)[np.newaxis, :, :]
120
+ colors = np.squeeze(normalize_color(image=pseudo_image)["image"])
121
+
122
+ coords = np.floor(points / 0.02)
123
+ _, _, unique_map, inverse_map = ME.utils.sparse_quantize(
124
+ coordinates=coords,
125
+ features=colors,
126
+ return_index=True,
127
+ return_inverse=True,
128
+ )
129
+
130
+ sample_coordinates = coords[unique_map]
131
+ coordinates = [torch.from_numpy(sample_coordinates).int()]
132
+ sample_features = colors[unique_map]
133
+ features = [torch.from_numpy(sample_features).float()]
134
+
135
+ coordinates, _ = ME.utils.sparse_collate(coords=coordinates, feats=features)
136
+ features = torch.cat(features, dim=0)
137
+ data = ME.SparseTensor(
138
+ coordinates=coordinates,
139
+ features=features,
140
+ device=device,
141
+ )
142
+
143
+
144
+ return data, points, colors, features, unique_map, inverse_map
145
+
146
+
147
+ def map_output_to_pointcloud(mesh,
148
+ outputs,
149
+ inverse_map):
150
+
151
+ # parse predictions
152
+ logits = outputs["pred_logits"]
153
+ masks = outputs["pred_masks"]
154
+
155
+ # reformat predictions
156
+ logits = logits[0]
157
+ masks = masks[0]
158
+
159
+ labels = []
160
+ confidences = []
161
+ masks_binary = []
162
+
163
+ for i in range(len(logits)):
164
+ p_labels = torch.softmax(logits[i], dim=-1)
165
+ p_masks = torch.sigmoid(masks[:, i])
166
+ l = torch.argmax(p_labels, dim=-1)
167
+ c_label = torch.max(p_labels)
168
+ m = p_masks > 0.5
169
+ c_m = p_masks[m].sum() / (m.sum() + 1e-8)
170
+ c = c_label * c_m
171
+ labels.append(l.item())
172
+ confidences.append(c.item())
173
+ masks_binary.append(m[inverse_map]) # mapping the mask back to the original point cloud
174
+ return (torch.stack(masks_binary), torch.tensor(confidences))
175
+
176
+ def save_colorized_mesh(mesh, labels_mapped, output_file, colormap='scannet'):
177
+
178
+ # colorize mesh
179
+ colors = np.zeros((len(mesh.vertices), 3))
180
+ for li in np.unique(labels_mapped):
181
+ if colormap == 'scannet':
182
+ raise ValueError('Not implemented yet')
183
+ elif colormap == 'scannet200':
184
+ v_li = VALID_CLASS_IDS_200[int(li)]
185
+ colors[(labels_mapped == li)[:, 0], :] = SCANNET_COLOR_MAP_200[v_li]
186
+ else:
187
+ raise ValueError('Unknown colormap - not supported')
188
+
189
+ colors = colors / 255.
190
+ mesh.vertex_colors = o3d.utility.Vector3dVector(colors)
191
+ o3d.io.write_triangle_mesh(output_file, mesh)
192
+
193
+ if __name__ == '__main__':
194
+
195
+ model = get_model('checkpoints/scannet200/scannet200_benchmark.ckpt')
196
+ model.eval()
197
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
198
+ model.to(device)
199
+
200
+ # load input data
201
+ pointcloud_file = 'data/pcl.ply'
202
+ mesh = load_mesh(pointcloud_file)
203
+
204
+ # prepare data
205
+ data, points, colors, features, unique_map, inverse_map = prepare_data(mesh, device)
206
+
207
+ # run model
208
+ with torch.no_grad():
209
+ outputs = model(data, raw_coordinates=features)
210
+
211
+ # map output to point cloud
212
+ labels = map_output_to_pointcloud(mesh, outputs, inverse_map)
213
+
214
+ # save colorized mesh
215
+ save_colorized_mesh(mesh, labels, 'data/pcl_labelled.ply', colormap='scannet200')
216
+
models/Mask3D/build/lib/mask3d/benchmark/__init__.py ADDED
File without changes
models/Mask3D/build/lib/mask3d/benchmark/evaluate_semantic_instance.py ADDED
@@ -0,0 +1,1141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Evaluates semantic instance task
2
+ # Adapted from the CityScapes evaluation: https://github.com/mcordts/cityscapesScripts/tree/master/cityscapesscripts/evaluation
3
+ # Input:
4
+ # - path to .txt prediction files
5
+ # - path to .txt ground truth files
6
+ # - output file to write results to
7
+ # Each .txt prediction file look like:
8
+ # [(pred0) rel. path to pred. mask over verts as .txt] [(pred0) label id] [(pred0) confidence]
9
+ # [(pred1) rel. path to pred. mask over verts as .txt] [(pred1) label id] [(pred1) confidence]
10
+ # [(pred2) rel. path to pred. mask over verts as .txt] [(pred2) label id] [(pred2) confidence]
11
+ # ...
12
+ #
13
+ # NOTE: The prediction files must live in the root of the given prediction path.
14
+ # Predicted mask .txt files must live in a subfolder.
15
+ # Additionally, filenames must not contain spaces.
16
+ # The relative paths to predicted masks must contain one integer per line,
17
+ # where each line corresponds to vertices in the *_vh_clean_2.ply (in that order).
18
+ # Non-zero integers indicate part of the predicted instance.
19
+ # The label ids specify the class of the corresponding mask.
20
+ # Confidence is a float confidence score of the mask.
21
+ #
22
+ # Note that only the valid classes are used for evaluation,
23
+ # i.e., any ground truth label not in the valid label set
24
+ # is ignored in the evaluation.
25
+ #
26
+ # example usage: evaluate_semantic_instance.py --scan_path [path to scan data] --output_file [output file]
27
+
28
+ # python imports
29
+ import math
30
+ import os, sys, argparse
31
+ import inspect
32
+ from copy import deepcopy
33
+ from uuid import uuid4
34
+
35
+ import torch
36
+
37
+ try:
38
+ import numpy as np
39
+ except:
40
+ print("Failed to import numpy package.")
41
+ sys.exit(-1)
42
+
43
+ from scipy import stats
44
+
45
+ # currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
46
+ # parentdir = os.path.dirname(currentdir)
47
+ # sys.path.insert(0,parentdir)
48
+ import benchmark.util as util
49
+ import benchmark.util_3d as util_3d
50
+
51
+ # parser = argparse.ArgumentParser()
52
+ # parser.add_argument('--gt_path', default='', help='path to directory of gt .txt files')
53
+ # parser.add_argument('--output_file', default='', help='output file [default: ./semantic_instance_evaluation.txt]')
54
+ # opt = parser.parse_args()
55
+
56
+ # if opt.output_file == '':
57
+ # opt.output_file = os.path.join(os.getcwd(), 'semantic_instance_evaluation.txt')
58
+
59
+
60
+ # ---------- Label info ---------- #
61
+ CLASS_LABELS = [
62
+ "cabinet",
63
+ "bed",
64
+ "chair",
65
+ "sofa",
66
+ "table",
67
+ "door",
68
+ "window",
69
+ "bookshelf",
70
+ "picture",
71
+ "counter",
72
+ "desk",
73
+ "curtain",
74
+ "refrigerator",
75
+ "shower curtain",
76
+ "toilet",
77
+ "sink",
78
+ "bathtub",
79
+ "otherfurniture",
80
+ ]
81
+ VALID_CLASS_IDS = np.array(
82
+ [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39]
83
+ )
84
+ ID_TO_LABEL = {}
85
+ LABEL_TO_ID = {}
86
+ for i in range(len(VALID_CLASS_IDS)):
87
+ LABEL_TO_ID[CLASS_LABELS[i]] = VALID_CLASS_IDS[i]
88
+ ID_TO_LABEL[VALID_CLASS_IDS[i]] = CLASS_LABELS[i]
89
+ # ---------- Evaluation params ---------- #
90
+ # overlaps for evaluation
91
+ opt = {}
92
+ opt["overlaps"] = np.append(np.arange(0.5, 0.95, 0.05), 0.25)
93
+ # minimum region size for evaluation [verts]
94
+ opt["min_region_sizes"] = np.array([100]) # 100 for s3dis, scannet
95
+ # distance thresholds [m]
96
+ opt["distance_threshes"] = np.array([float("inf")])
97
+ # distance confidences
98
+ opt["distance_confs"] = np.array([-float("inf")])
99
+
100
+
101
+ def evaluate_matches(matches):
102
+ overlaps = opt["overlaps"]
103
+ min_region_sizes = [opt["min_region_sizes"][0]]
104
+ dist_threshes = [opt["distance_threshes"][0]]
105
+ dist_confs = [opt["distance_confs"][0]]
106
+
107
+ # results: class x overlap
108
+ ap = np.zeros(
109
+ (len(dist_threshes), len(CLASS_LABELS), len(overlaps)), float
110
+ )
111
+ for di, (min_region_size, distance_thresh, distance_conf) in enumerate(
112
+ zip(min_region_sizes, dist_threshes, dist_confs)
113
+ ):
114
+ for oi, overlap_th in enumerate(overlaps):
115
+ pred_visited = {}
116
+ for m in matches:
117
+ for p in matches[m]["pred"]:
118
+ for label_name in CLASS_LABELS:
119
+ for p in matches[m]["pred"][label_name]:
120
+ if "uuid" in p:
121
+ pred_visited[p["uuid"]] = False
122
+ for li, label_name in enumerate(CLASS_LABELS):
123
+ y_true = np.empty(0)
124
+ y_score = np.empty(0)
125
+ hard_false_negatives = 0
126
+ has_gt = False
127
+ has_pred = False
128
+ for m in matches:
129
+ pred_instances = matches[m]["pred"][label_name]
130
+ gt_instances = matches[m]["gt"][label_name]
131
+ # filter groups in ground truth
132
+ gt_instances = [
133
+ gt
134
+ for gt in gt_instances
135
+ if gt["instance_id"] >= 1000
136
+ and gt["vert_count"] >= min_region_size
137
+ and gt["med_dist"] <= distance_thresh
138
+ and gt["dist_conf"] >= distance_conf
139
+ ]
140
+ if gt_instances:
141
+ has_gt = True
142
+ if pred_instances:
143
+ has_pred = True
144
+
145
+ cur_true = np.ones(len(gt_instances))
146
+ cur_score = np.ones(len(gt_instances)) * (-float("inf"))
147
+ cur_match = np.zeros(len(gt_instances), dtype=bool)
148
+ # collect matches
149
+ for (gti, gt) in enumerate(gt_instances):
150
+ found_match = False
151
+ num_pred = len(gt["matched_pred"])
152
+ for pred in gt["matched_pred"]:
153
+ # greedy assignments
154
+ if pred_visited[pred["uuid"]]:
155
+ continue
156
+ overlap = float(pred["intersection"]) / (
157
+ gt["vert_count"]
158
+ + pred["vert_count"]
159
+ - pred["intersection"]
160
+ )
161
+ if overlap > overlap_th:
162
+ confidence = pred["confidence"]
163
+ # if already have a prediction for this gt,
164
+ # the prediction with the lower score is automatically a false positive
165
+ if cur_match[gti]:
166
+ max_score = max(cur_score[gti], confidence)
167
+ min_score = min(cur_score[gti], confidence)
168
+ cur_score[gti] = max_score
169
+ # append false positive
170
+ cur_true = np.append(cur_true, 0)
171
+ cur_score = np.append(cur_score, min_score)
172
+ cur_match = np.append(cur_match, True)
173
+ # otherwise set score
174
+ else:
175
+ found_match = True
176
+ cur_match[gti] = True
177
+ cur_score[gti] = confidence
178
+ pred_visited[pred["uuid"]] = True
179
+ if not found_match:
180
+ hard_false_negatives += 1
181
+ # remove non-matched ground truth instances
182
+ cur_true = cur_true[cur_match == True]
183
+ cur_score = cur_score[cur_match == True]
184
+
185
+ # collect non-matched predictions as false positive
186
+ for pred in pred_instances:
187
+ found_gt = False
188
+ for gt in pred["matched_gt"]:
189
+ overlap = float(gt["intersection"]) / (
190
+ gt["vert_count"]
191
+ + pred["vert_count"]
192
+ - gt["intersection"]
193
+ )
194
+ if overlap > overlap_th:
195
+ found_gt = True
196
+ break
197
+ if not found_gt:
198
+ num_ignore = pred["void_intersection"]
199
+ for gt in pred["matched_gt"]:
200
+ # group?
201
+ if gt["instance_id"] < 1000:
202
+ num_ignore += gt["intersection"]
203
+ # small ground truth instances
204
+ if (
205
+ gt["vert_count"] < min_region_size
206
+ or gt["med_dist"] > distance_thresh
207
+ or gt["dist_conf"] < distance_conf
208
+ ):
209
+ num_ignore += gt["intersection"]
210
+ proportion_ignore = (
211
+ float(num_ignore) / pred["vert_count"]
212
+ )
213
+ # if not ignored append false positive
214
+ if proportion_ignore <= overlap_th:
215
+ cur_true = np.append(cur_true, 0)
216
+ confidence = pred["confidence"]
217
+ cur_score = np.append(cur_score, confidence)
218
+
219
+ # append to overall results
220
+ y_true = np.append(y_true, cur_true)
221
+ y_score = np.append(y_score, cur_score)
222
+
223
+ # compute average precision
224
+ if has_gt and has_pred:
225
+ # compute precision recall curve first
226
+
227
+ # sorting and cumsum
228
+ score_arg_sort = np.argsort(y_score)
229
+ y_score_sorted = y_score[score_arg_sort]
230
+ y_true_sorted = y_true[score_arg_sort]
231
+ y_true_sorted_cumsum = np.cumsum(y_true_sorted)
232
+
233
+ # unique thresholds
234
+ (thresholds, unique_indices) = np.unique(
235
+ y_score_sorted, return_index=True
236
+ )
237
+ num_prec_recall = len(unique_indices) + 1
238
+
239
+ # prepare precision recall
240
+ num_examples = len(y_score_sorted)
241
+ # https://github.com/ScanNet/ScanNet/pull/26
242
+ # all predictions are non-matched but also all of them are ignored and not counted as FP
243
+ # y_true_sorted_cumsum is empty
244
+ # num_true_examples = y_true_sorted_cumsum[-1]
245
+ num_true_examples = (
246
+ y_true_sorted_cumsum[-1]
247
+ if len(y_true_sorted_cumsum) > 0
248
+ else 0
249
+ )
250
+ precision = np.zeros(num_prec_recall)
251
+ recall = np.zeros(num_prec_recall)
252
+
253
+ # deal with the first point
254
+ y_true_sorted_cumsum = np.append(y_true_sorted_cumsum, 0)
255
+ # deal with remaining
256
+ for idx_res, idx_scores in enumerate(unique_indices):
257
+ cumsum = y_true_sorted_cumsum[idx_scores - 1]
258
+ tp = num_true_examples - cumsum
259
+ fp = num_examples - idx_scores - tp
260
+ fn = cumsum + hard_false_negatives
261
+ p = float(tp) / (tp + fp)
262
+ r = float(tp) / (tp + fn)
263
+ precision[idx_res] = p
264
+ recall[idx_res] = r
265
+
266
+ # first point in curve is artificial
267
+ precision[-1] = 1.0
268
+ recall[-1] = 0.0
269
+
270
+ # compute average of precision-recall curve
271
+ recall_for_conv = np.copy(recall)
272
+ recall_for_conv = np.append(
273
+ recall_for_conv[0], recall_for_conv
274
+ )
275
+ recall_for_conv = np.append(recall_for_conv, 0.0)
276
+
277
+ stepWidths = np.convolve(
278
+ recall_for_conv, [-0.5, 0, 0.5], "valid"
279
+ )
280
+ # integrate is now simply a dot product
281
+ ap_current = np.dot(precision, stepWidths)
282
+
283
+ elif has_gt:
284
+ ap_current = 0.0
285
+ else:
286
+ ap_current = float("nan")
287
+ ap[di, li, oi] = ap_current
288
+ return ap
289
+
290
+
291
+ def compute_averages(aps):
292
+ d_inf = 0
293
+ o50 = np.where(np.isclose(opt["overlaps"], 0.5))
294
+ o25 = np.where(np.isclose(opt["overlaps"], 0.25))
295
+ oAllBut25 = np.where(np.logical_not(np.isclose(opt["overlaps"], 0.25)))
296
+ avg_dict = {}
297
+ # avg_dict['all_ap'] = np.nanmean(aps[ d_inf,:,: ])
298
+ avg_dict["all_ap"] = np.nanmean(aps[d_inf, :, oAllBut25])
299
+ avg_dict["all_ap_50%"] = np.nanmean(aps[d_inf, :, o50])
300
+ avg_dict["all_ap_25%"] = np.nanmean(aps[d_inf, :, o25])
301
+ avg_dict["classes"] = {}
302
+ for (li, label_name) in enumerate(CLASS_LABELS):
303
+ avg_dict["classes"][label_name] = {}
304
+ # avg_dict["classes"][label_name]["ap"] = np.average(aps[ d_inf,li, :])
305
+ avg_dict["classes"][label_name]["ap"] = np.average(
306
+ aps[d_inf, li, oAllBut25]
307
+ )
308
+ avg_dict["classes"][label_name]["ap50%"] = np.average(
309
+ aps[d_inf, li, o50]
310
+ )
311
+ avg_dict["classes"][label_name]["ap25%"] = np.average(
312
+ aps[d_inf, li, o25]
313
+ )
314
+ return avg_dict
315
+
316
+
317
+ def make_pred_info(pred: dict):
318
+ # pred = {'pred_scores' = 100, 'pred_classes' = 100 'pred_masks' = Nx100}
319
+ pred_info = {}
320
+ assert (
321
+ pred["pred_classes"].shape[0]
322
+ == pred["pred_scores"].shape[0]
323
+ == pred["pred_masks"].shape[1]
324
+ )
325
+ for i in range(len(pred["pred_classes"])):
326
+ info = {}
327
+ info["label_id"] = pred["pred_classes"][i]
328
+ info["conf"] = pred["pred_scores"][i]
329
+ info["mask"] = pred["pred_masks"][:, i]
330
+ pred_info[uuid4()] = info # we later need to identify these objects
331
+ return pred_info
332
+
333
+
334
+ def assign_instances_for_scan(pred: dict, gt_file: str):
335
+ pred_info = make_pred_info(pred)
336
+ try:
337
+ gt_ids = util_3d.load_ids(gt_file)
338
+ except Exception as e:
339
+ util.print_error("unable to load " + gt_file + ": " + str(e))
340
+
341
+ # get gt instances
342
+ gt_instances = util_3d.get_instances(
343
+ gt_ids, VALID_CLASS_IDS, CLASS_LABELS, ID_TO_LABEL
344
+ )
345
+ # associate
346
+ gt2pred = deepcopy(gt_instances)
347
+ for label in gt2pred:
348
+ for gt in gt2pred[label]:
349
+ gt["matched_pred"] = []
350
+ pred2gt = {}
351
+ for label in CLASS_LABELS:
352
+ pred2gt[label] = []
353
+ num_pred_instances = 0
354
+ # mask of void labels in the groundtruth
355
+ bool_void = np.logical_not(np.in1d(gt_ids // 1000, VALID_CLASS_IDS))
356
+ # go thru all prediction masks
357
+ for uuid in pred_info:
358
+ label_id = int(pred_info[uuid]["label_id"])
359
+ conf = pred_info[uuid]["conf"]
360
+ if not label_id in ID_TO_LABEL:
361
+ continue
362
+ label_name = ID_TO_LABEL[label_id]
363
+ # read the mask
364
+ pred_mask = pred_info[uuid]["mask"]
365
+ assert len(pred_mask) == len(gt_ids)
366
+ # convert to binary
367
+ pred_mask = np.not_equal(pred_mask, 0)
368
+ num = np.count_nonzero(pred_mask)
369
+ if num < opt["min_region_sizes"][0]:
370
+ continue # skip if empty
371
+
372
+ pred_instance = {}
373
+ pred_instance["uuid"] = uuid
374
+ pred_instance["pred_id"] = num_pred_instances
375
+ pred_instance["label_id"] = label_id
376
+ pred_instance["vert_count"] = num
377
+ pred_instance["confidence"] = conf
378
+ pred_instance["void_intersection"] = np.count_nonzero(
379
+ np.logical_and(bool_void, pred_mask)
380
+ )
381
+
382
+ # matched gt instances
383
+ matched_gt = []
384
+ # go thru all gt instances with matching label
385
+ for (gt_num, gt_inst) in enumerate(gt2pred[label_name]):
386
+ intersection = np.count_nonzero(
387
+ np.logical_and(gt_ids == gt_inst["instance_id"], pred_mask)
388
+ )
389
+ if intersection > 0:
390
+ gt_copy = gt_inst.copy()
391
+ pred_copy = pred_instance.copy()
392
+ gt_copy["intersection"] = intersection
393
+ pred_copy["intersection"] = intersection
394
+ matched_gt.append(gt_copy)
395
+ gt2pred[label_name][gt_num]["matched_pred"].append(pred_copy)
396
+ pred_instance["matched_gt"] = matched_gt
397
+ num_pred_instances += 1
398
+ pred2gt[label_name].append(pred_instance)
399
+
400
+ return gt2pred, pred2gt
401
+
402
+
403
+ def print_results(avgs):
404
+ sep = ""
405
+ col1 = ":"
406
+ lineLen = 64
407
+
408
+ print("")
409
+ print("#" * lineLen)
410
+ line = ""
411
+ line += "{:<15}".format("what") + sep + col1
412
+ line += "{:>15}".format("AP") + sep
413
+ line += "{:>15}".format("AP_50%") + sep
414
+ line += "{:>15}".format("AP_25%") + sep
415
+ print(line)
416
+ print("#" * lineLen)
417
+
418
+ for (li, label_name) in enumerate(CLASS_LABELS):
419
+ ap_avg = avgs["classes"][label_name]["ap"]
420
+ ap_50o = avgs["classes"][label_name]["ap50%"]
421
+ ap_25o = avgs["classes"][label_name]["ap25%"]
422
+ line = "{:<15}".format(label_name) + sep + col1
423
+ line += sep + "{:>15.3f}".format(ap_avg) + sep
424
+ line += sep + "{:>15.3f}".format(ap_50o) + sep
425
+ line += sep + "{:>15.3f}".format(ap_25o) + sep
426
+ print(line)
427
+
428
+ all_ap_avg = avgs["all_ap"]
429
+ all_ap_50o = avgs["all_ap_50%"]
430
+ all_ap_25o = avgs["all_ap_25%"]
431
+
432
+ print("-" * lineLen)
433
+ line = "{:<15}".format("average") + sep + col1
434
+ line += "{:>15.3f}".format(all_ap_avg) + sep
435
+ line += "{:>15.3f}".format(all_ap_50o) + sep
436
+ line += "{:>15.3f}".format(all_ap_25o) + sep
437
+ print(line)
438
+ print("")
439
+
440
+
441
+ def write_result_file(avgs, filename):
442
+ _SPLITTER = ","
443
+ with open(filename, "w") as f:
444
+ f.write(
445
+ _SPLITTER.join(["class", "class id", "ap", "ap50", "ap25"]) + "\n"
446
+ )
447
+ for i in range(len(VALID_CLASS_IDS)):
448
+ class_name = CLASS_LABELS[i]
449
+ class_id = VALID_CLASS_IDS[i]
450
+ ap = avgs["classes"][class_name]["ap"]
451
+ ap50 = avgs["classes"][class_name]["ap50%"]
452
+ ap25 = avgs["classes"][class_name]["ap25%"]
453
+ f.write(
454
+ _SPLITTER.join(
455
+ [str(x) for x in [class_name, class_id, ap, ap50, ap25]]
456
+ )
457
+ + "\n"
458
+ )
459
+
460
+
461
+ def evaluate(
462
+ preds: dict, gt_path: str, output_file: str, dataset: str = "scannet"
463
+ ):
464
+ global CLASS_LABELS
465
+ global VALID_CLASS_IDS
466
+ global ID_TO_LABEL
467
+ global LABEL_TO_ID
468
+ global opt
469
+
470
+ if dataset == "stpls3d":
471
+ # global CLASS_LABELS
472
+ # global VALID_CLASS_IDS
473
+ # global ID_TO_LABEL
474
+ # global LABEL_TO_ID
475
+
476
+ opt["min_region_sizes"] = np.array([10])
477
+
478
+ CLASS_LABELS = [
479
+ "Build",
480
+ "LowVeg",
481
+ "MediumVeg",
482
+ "HighVeg",
483
+ "Vehicle",
484
+ "Truck",
485
+ "Aircraft",
486
+ "MilitaryVeh",
487
+ "Bike",
488
+ "Motorcycle",
489
+ "LightPole",
490
+ "StreetSign",
491
+ "Clutter",
492
+ "Fence",
493
+ ]
494
+ VALID_CLASS_IDS = np.array(
495
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
496
+ )
497
+
498
+ ID_TO_LABEL = {}
499
+ LABEL_TO_ID = {}
500
+ for i in range(len(VALID_CLASS_IDS)):
501
+ LABEL_TO_ID[CLASS_LABELS[i]] = VALID_CLASS_IDS[i]
502
+ ID_TO_LABEL[VALID_CLASS_IDS[i]] = CLASS_LABELS[i]
503
+
504
+ if dataset == "s3dis":
505
+ # global CLASS_LABELS
506
+ # global VALID_CLASS_IDS
507
+ # global ID_TO_LABEL
508
+ # global LABEL_TO_ID
509
+
510
+ CLASS_LABELS = [
511
+ "ceiling",
512
+ "floor",
513
+ "wall",
514
+ "beam",
515
+ "column",
516
+ "window",
517
+ "door",
518
+ "table",
519
+ "chair",
520
+ "sofa",
521
+ "bookcase",
522
+ "board",
523
+ "clutter",
524
+ ]
525
+ VALID_CLASS_IDS = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
526
+ ID_TO_LABEL = {}
527
+ LABEL_TO_ID = {}
528
+ for i in range(len(VALID_CLASS_IDS)):
529
+ LABEL_TO_ID[CLASS_LABELS[i]] = VALID_CLASS_IDS[i]
530
+ ID_TO_LABEL[VALID_CLASS_IDS[i]] = CLASS_LABELS[i]
531
+
532
+ if dataset == "scannet200":
533
+ CLASS_LABELS = (
534
+ "chair",
535
+ "table",
536
+ "door",
537
+ "couch",
538
+ "cabinet",
539
+ "shelf",
540
+ "desk",
541
+ "office chair",
542
+ "bed",
543
+ "pillow",
544
+ "sink",
545
+ "picture",
546
+ "window",
547
+ "toilet",
548
+ "bookshelf",
549
+ "monitor",
550
+ "curtain",
551
+ "book",
552
+ "armchair",
553
+ "coffee table",
554
+ "box",
555
+ "refrigerator",
556
+ "lamp",
557
+ "kitchen cabinet",
558
+ "towel",
559
+ "clothes",
560
+ "tv",
561
+ "nightstand",
562
+ "counter",
563
+ "dresser",
564
+ "stool",
565
+ "cushion",
566
+ "plant",
567
+ "ceiling",
568
+ "bathtub",
569
+ "end table",
570
+ "dining table",
571
+ "keyboard",
572
+ "bag",
573
+ "backpack",
574
+ "toilet paper",
575
+ "printer",
576
+ "tv stand",
577
+ "whiteboard",
578
+ "blanket",
579
+ "shower curtain",
580
+ "trash can",
581
+ "closet",
582
+ "stairs",
583
+ "microwave",
584
+ "stove",
585
+ "shoe",
586
+ "computer tower",
587
+ "bottle",
588
+ "bin",
589
+ "ottoman",
590
+ "bench",
591
+ "board",
592
+ "washing machine",
593
+ "mirror",
594
+ "copier",
595
+ "basket",
596
+ "sofa chair",
597
+ "file cabinet",
598
+ "fan",
599
+ "laptop",
600
+ "shower",
601
+ "paper",
602
+ "person",
603
+ "paper towel dispenser",
604
+ "oven",
605
+ "blinds",
606
+ "rack",
607
+ "plate",
608
+ "blackboard",
609
+ "piano",
610
+ "suitcase",
611
+ "rail",
612
+ "radiator",
613
+ "recycling bin",
614
+ "container",
615
+ "wardrobe",
616
+ "soap dispenser",
617
+ "telephone",
618
+ "bucket",
619
+ "clock",
620
+ "stand",
621
+ "light",
622
+ "laundry basket",
623
+ "pipe",
624
+ "clothes dryer",
625
+ "guitar",
626
+ "toilet paper holder",
627
+ "seat",
628
+ "speaker",
629
+ "column",
630
+ "bicycle",
631
+ "ladder",
632
+ "bathroom stall",
633
+ "shower wall",
634
+ "cup",
635
+ "jacket",
636
+ "storage bin",
637
+ "coffee maker",
638
+ "dishwasher",
639
+ "paper towel roll",
640
+ "machine",
641
+ "mat",
642
+ "windowsill",
643
+ "bar",
644
+ "toaster",
645
+ "bulletin board",
646
+ "ironing board",
647
+ "fireplace",
648
+ "soap dish",
649
+ "kitchen counter",
650
+ "doorframe",
651
+ "toilet paper dispenser",
652
+ "mini fridge",
653
+ "fire extinguisher",
654
+ "ball",
655
+ "hat",
656
+ "shower curtain rod",
657
+ "water cooler",
658
+ "paper cutter",
659
+ "tray",
660
+ "shower door",
661
+ "pillar",
662
+ "ledge",
663
+ "toaster oven",
664
+ "mouse",
665
+ "toilet seat cover dispenser",
666
+ "furniture",
667
+ "cart",
668
+ "storage container",
669
+ "scale",
670
+ "tissue box",
671
+ "light switch",
672
+ "crate",
673
+ "power outlet",
674
+ "decoration",
675
+ "sign",
676
+ "projector",
677
+ "closet door",
678
+ "vacuum cleaner",
679
+ "candle",
680
+ "plunger",
681
+ "stuffed animal",
682
+ "headphones",
683
+ "dish rack",
684
+ "broom",
685
+ "guitar case",
686
+ "range hood",
687
+ "dustpan",
688
+ "hair dryer",
689
+ "water bottle",
690
+ "handicap bar",
691
+ "purse",
692
+ "vent",
693
+ "shower floor",
694
+ "water pitcher",
695
+ "mailbox",
696
+ "bowl",
697
+ "paper bag",
698
+ "alarm clock",
699
+ "music stand",
700
+ "projector screen",
701
+ "divider",
702
+ "laundry detergent",
703
+ "bathroom counter",
704
+ "object",
705
+ "bathroom vanity",
706
+ "closet wall",
707
+ "laundry hamper",
708
+ "bathroom stall door",
709
+ "ceiling light",
710
+ "trash bin",
711
+ "dumbbell",
712
+ "stair rail",
713
+ "tube",
714
+ "bathroom cabinet",
715
+ "cd case",
716
+ "closet rod",
717
+ "coffee kettle",
718
+ "structure",
719
+ "shower head",
720
+ "keyboard piano",
721
+ "case of water bottles",
722
+ "coat rack",
723
+ "storage organizer",
724
+ "folded chair",
725
+ "fire alarm",
726
+ "power strip",
727
+ "calendar",
728
+ "poster",
729
+ "potted plant",
730
+ "luggage",
731
+ "mattress",
732
+ )
733
+
734
+ VALID_CLASS_IDS = np.array(
735
+ (
736
+ 2,
737
+ 4,
738
+ 5,
739
+ 6,
740
+ 7,
741
+ 8,
742
+ 9,
743
+ 10,
744
+ 11,
745
+ 13,
746
+ 14,
747
+ 15,
748
+ 16,
749
+ 17,
750
+ 18,
751
+ 19,
752
+ 21,
753
+ 22,
754
+ 23,
755
+ 24,
756
+ 26,
757
+ 27,
758
+ 28,
759
+ 29,
760
+ 31,
761
+ 32,
762
+ 33,
763
+ 34,
764
+ 35,
765
+ 36,
766
+ 38,
767
+ 39,
768
+ 40,
769
+ 41,
770
+ 42,
771
+ 44,
772
+ 45,
773
+ 46,
774
+ 47,
775
+ 48,
776
+ 49,
777
+ 50,
778
+ 51,
779
+ 52,
780
+ 54,
781
+ 55,
782
+ 56,
783
+ 57,
784
+ 58,
785
+ 59,
786
+ 62,
787
+ 63,
788
+ 64,
789
+ 65,
790
+ 66,
791
+ 67,
792
+ 68,
793
+ 69,
794
+ 70,
795
+ 71,
796
+ 72,
797
+ 73,
798
+ 74,
799
+ 75,
800
+ 76,
801
+ 77,
802
+ 78,
803
+ 79,
804
+ 80,
805
+ 82,
806
+ 84,
807
+ 86,
808
+ 87,
809
+ 88,
810
+ 89,
811
+ 90,
812
+ 93,
813
+ 95,
814
+ 96,
815
+ 97,
816
+ 98,
817
+ 99,
818
+ 100,
819
+ 101,
820
+ 102,
821
+ 103,
822
+ 104,
823
+ 105,
824
+ 106,
825
+ 107,
826
+ 110,
827
+ 112,
828
+ 115,
829
+ 116,
830
+ 118,
831
+ 120,
832
+ 121,
833
+ 122,
834
+ 125,
835
+ 128,
836
+ 130,
837
+ 131,
838
+ 132,
839
+ 134,
840
+ 136,
841
+ 138,
842
+ 139,
843
+ 140,
844
+ 141,
845
+ 145,
846
+ 148,
847
+ 154,
848
+ 155,
849
+ 156,
850
+ 157,
851
+ 159,
852
+ 161,
853
+ 163,
854
+ 165,
855
+ 166,
856
+ 168,
857
+ 169,
858
+ 170,
859
+ 177,
860
+ 180,
861
+ 185,
862
+ 188,
863
+ 191,
864
+ 193,
865
+ 195,
866
+ 202,
867
+ 208,
868
+ 213,
869
+ 214,
870
+ 221,
871
+ 229,
872
+ 230,
873
+ 232,
874
+ 233,
875
+ 242,
876
+ 250,
877
+ 261,
878
+ 264,
879
+ 276,
880
+ 283,
881
+ 286,
882
+ 300,
883
+ 304,
884
+ 312,
885
+ 323,
886
+ 325,
887
+ 331,
888
+ 342,
889
+ 356,
890
+ 370,
891
+ 392,
892
+ 395,
893
+ 399,
894
+ 408,
895
+ 417,
896
+ 488,
897
+ 540,
898
+ 562,
899
+ 570,
900
+ 572,
901
+ 581,
902
+ 609,
903
+ 748,
904
+ 776,
905
+ 1156,
906
+ 1163,
907
+ 1164,
908
+ 1165,
909
+ 1166,
910
+ 1167,
911
+ 1168,
912
+ 1169,
913
+ 1170,
914
+ 1171,
915
+ 1172,
916
+ 1173,
917
+ 1174,
918
+ 1175,
919
+ 1176,
920
+ 1178,
921
+ 1179,
922
+ 1180,
923
+ 1181,
924
+ 1182,
925
+ 1183,
926
+ 1184,
927
+ 1185,
928
+ 1186,
929
+ 1187,
930
+ 1188,
931
+ 1189,
932
+ 1190,
933
+ 1191,
934
+ )
935
+ )
936
+
937
+ ID_TO_LABEL = {}
938
+ LABEL_TO_ID = {}
939
+ for i in range(len(VALID_CLASS_IDS)):
940
+ LABEL_TO_ID[CLASS_LABELS[i]] = VALID_CLASS_IDS[i]
941
+ ID_TO_LABEL[VALID_CLASS_IDS[i]] = CLASS_LABELS[i]
942
+
943
+ total_true = 0
944
+ total_seen = 0
945
+ NUM_CLASSES = len(VALID_CLASS_IDS)
946
+
947
+ true_positive_classes = np.zeros(NUM_CLASSES)
948
+ positive_classes = np.zeros(NUM_CLASSES)
949
+ gt_classes = np.zeros(NUM_CLASSES)
950
+
951
+ # precision & recall
952
+ total_gt_ins = np.zeros(NUM_CLASSES)
953
+ at = 0.5
954
+ tpsins = [[] for _ in range(NUM_CLASSES)]
955
+ fpsins = [[] for _ in range(NUM_CLASSES)]
956
+ # mucov and mwcov
957
+ all_mean_cov = [[] for _ in range(NUM_CLASSES)]
958
+ all_mean_weighted_cov = [[] for _ in range(NUM_CLASSES)]
959
+
960
+ print("evaluating", len(preds), "scans...")
961
+ matches = {}
962
+ for i, (k, v) in enumerate(preds.items()):
963
+ gt_file = os.path.join(gt_path, k + ".txt")
964
+ if not os.path.isfile(gt_file):
965
+ util.print_error(
966
+ "Scan {} does not match any gt file".format(k), user_fault=True
967
+ )
968
+
969
+ if dataset == "s3dis":
970
+ gt_ids = util_3d.load_ids(gt_file)
971
+ gt_sem = (gt_ids // 1000) - 1
972
+ gt_ins = gt_ids - (gt_ids // 1000) * 1000
973
+
974
+ # pred_sem = v['pred_classes'] - 1
975
+ pred_sem = np.zeros(v["pred_masks"].shape[0], dtype=np.int)
976
+ # TODO CONTINUE HERE!!!!!!!!!!!!!
977
+ pred_ins = np.zeros(v["pred_masks"].shape[0], dtype=np.int)
978
+
979
+ for inst_id in reversed(range(v["pred_masks"].shape[1])):
980
+ point_ids = np.argwhere(v["pred_masks"][:, inst_id] == 1.0)[
981
+ :, 0
982
+ ]
983
+ pred_ins[point_ids] = inst_id + 1
984
+ pred_sem[point_ids] = v["pred_classes"][inst_id] - 1
985
+
986
+ # semantic acc
987
+ total_true += np.sum(pred_sem == gt_sem)
988
+ total_seen += pred_sem.shape[0]
989
+
990
+ # TODO PARALLELIZ THIS!!!!!!!
991
+ # pn semantic mIoU
992
+ """
993
+ for j in range(gt_sem.shape[0]):
994
+ gt_l = int(gt_sem[j])
995
+ pred_l = int(pred_sem[j])
996
+ gt_classes[gt_l] += 1
997
+ positive_classes[pred_l] += 1
998
+ true_positive_classes[gt_l] += int(gt_l == pred_l)
999
+ """
1000
+
1001
+ uniq, counts = np.unique(pred_sem, return_counts=True)
1002
+ positive_classes[uniq] += counts
1003
+
1004
+ uniq, counts = np.unique(gt_sem, return_counts=True)
1005
+ gt_classes[uniq] += counts
1006
+
1007
+ uniq, counts = np.unique(
1008
+ gt_sem[pred_sem == gt_sem], return_counts=True
1009
+ )
1010
+ true_positive_classes[uniq] += counts
1011
+
1012
+ # instance
1013
+ un = np.unique(pred_ins)
1014
+ pts_in_pred = [[] for _ in range(NUM_CLASSES)]
1015
+ for ig, g in enumerate(un): # each object in prediction
1016
+ if g == -1:
1017
+ continue
1018
+ tmp = pred_ins == g
1019
+ sem_seg_i = int(stats.mode(pred_sem[tmp])[0])
1020
+ pts_in_pred[sem_seg_i] += [tmp]
1021
+
1022
+ un = np.unique(gt_ins)
1023
+ pts_in_gt = [[] for _ in range(NUM_CLASSES)]
1024
+ for ig, g in enumerate(un):
1025
+ tmp = gt_ins == g
1026
+ sem_seg_i = int(stats.mode(gt_sem[tmp])[0])
1027
+ pts_in_gt[sem_seg_i] += [tmp]
1028
+
1029
+ # instance mucov & mwcov
1030
+ for i_sem in range(NUM_CLASSES):
1031
+ sum_cov = 0
1032
+ mean_cov = 0
1033
+ mean_weighted_cov = 0
1034
+ num_gt_point = 0
1035
+ for ig, ins_gt in enumerate(pts_in_gt[i_sem]):
1036
+ ovmax = 0.0
1037
+ num_ins_gt_point = np.sum(ins_gt)
1038
+ num_gt_point += num_ins_gt_point
1039
+ for ip, ins_pred in enumerate(pts_in_pred[i_sem]):
1040
+ union = ins_pred | ins_gt
1041
+ intersect = ins_pred & ins_gt
1042
+ iou = float(np.sum(intersect)) / np.sum(union)
1043
+
1044
+ if iou > ovmax:
1045
+ ovmax = iou
1046
+ ipmax = ip
1047
+
1048
+ sum_cov += ovmax
1049
+ mean_weighted_cov += ovmax * num_ins_gt_point
1050
+
1051
+ if len(pts_in_gt[i_sem]) != 0:
1052
+ mean_cov = sum_cov / len(pts_in_gt[i_sem])
1053
+ all_mean_cov[i_sem].append(mean_cov)
1054
+
1055
+ mean_weighted_cov /= num_gt_point
1056
+ all_mean_weighted_cov[i_sem].append(mean_weighted_cov)
1057
+
1058
+ if dataset == "s3dis":
1059
+ # instance precision & recall
1060
+ for i_sem in range(NUM_CLASSES):
1061
+ tp = [0.0] * len(pts_in_pred[i_sem])
1062
+ fp = [0.0] * len(pts_in_pred[i_sem])
1063
+ gtflag = np.zeros(len(pts_in_gt[i_sem]))
1064
+ total_gt_ins[i_sem] += len(pts_in_gt[i_sem])
1065
+
1066
+ for ip, ins_pred in enumerate(pts_in_pred[i_sem]):
1067
+ ovmax = -1.0
1068
+
1069
+ for ig, ins_gt in enumerate(pts_in_gt[i_sem]):
1070
+ union = ins_pred | ins_gt
1071
+ intersect = ins_pred & ins_gt
1072
+ iou = float(np.sum(intersect)) / np.sum(union)
1073
+
1074
+ if iou > ovmax:
1075
+ ovmax = iou
1076
+ igmax = ig
1077
+
1078
+ if ovmax >= at:
1079
+ tp[ip] = 1 # true
1080
+ else:
1081
+ fp[ip] = 1 # false positive
1082
+
1083
+ tpsins[i_sem] += tp
1084
+ fpsins[i_sem] += fp
1085
+
1086
+ matches_key = os.path.abspath(gt_file)
1087
+ # assign gt to predictions
1088
+ gt2pred, pred2gt = assign_instances_for_scan(v, gt_file)
1089
+ matches[matches_key] = {}
1090
+ matches[matches_key]["gt"] = gt2pred
1091
+ matches[matches_key]["pred"] = pred2gt
1092
+ sys.stdout.write("\rscans processed: {}".format(i + 1))
1093
+ sys.stdout.flush()
1094
+ print("")
1095
+ ap_scores = evaluate_matches(matches)
1096
+ avgs = compute_averages(ap_scores)
1097
+
1098
+ # print
1099
+ print_results(avgs)
1100
+ write_result_file(avgs, output_file)
1101
+
1102
+ if dataset == "s3dis":
1103
+ MUCov = np.zeros(NUM_CLASSES)
1104
+ MWCov = np.zeros(NUM_CLASSES)
1105
+ for i_sem in range(NUM_CLASSES):
1106
+ MUCov[i_sem] = np.mean(all_mean_cov[i_sem])
1107
+ MWCov[i_sem] = np.mean(all_mean_weighted_cov[i_sem])
1108
+
1109
+ precision = np.zeros(NUM_CLASSES)
1110
+ recall = np.zeros(NUM_CLASSES)
1111
+ for i_sem in range(NUM_CLASSES):
1112
+ tp = np.asarray(tpsins[i_sem]).astype(np.float)
1113
+ fp = np.asarray(fpsins[i_sem]).astype(np.float)
1114
+ tp = np.sum(tp)
1115
+ fp = np.sum(fp)
1116
+ rec = tp / total_gt_ins[i_sem]
1117
+ prec = tp / (tp + fp)
1118
+
1119
+ precision[i_sem] = prec
1120
+ recall[i_sem] = rec
1121
+
1122
+ """
1123
+ LOG_FOUT = open(os.path.join('results_a5.txt'), 'w')
1124
+
1125
+ def log_string(out_str):
1126
+ LOG_FOUT.write(out_str + '\n')
1127
+ LOG_FOUT.flush()
1128
+ print(out_str)
1129
+ """
1130
+
1131
+ return np.mean(precision), np.mean(recall)
1132
+
1133
+
1134
+ # TODO: remove this
1135
+ # import pandas as pd
1136
+ # def main():
1137
+ # print("!!! CLI is only for debugging purposes. use `evaluate()` instead.")
1138
+ # evaluate(pd.read_pickle("/globalwork/schult/saved_predictions.pkl"), opt.gt_path, opt.output_file)
1139
+
1140
+ # if __name__ == '__main__':
1141
+ # main()
models/Mask3D/build/lib/mask3d/benchmark/util.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import csv
3
+
4
+ try:
5
+ import numpy as np
6
+ except:
7
+ print("Failed to import numpy package.")
8
+ sys.exit(-1)
9
+ try:
10
+ import imageio
11
+ except:
12
+ print("Please install the module 'imageio' for image processing, e.g.")
13
+ print("pip install imageio")
14
+ sys.exit(-1)
15
+
16
+ # print an error message and quit
17
+ def print_error(message, user_fault=False):
18
+ sys.stderr.write("ERROR: " + str(message) + "\n")
19
+ if user_fault:
20
+ sys.exit(2)
21
+ sys.exit(-1)
22
+
23
+
24
+ # if string s represents an int
25
+ def represents_int(s):
26
+ try:
27
+ int(s)
28
+ return True
29
+ except ValueError:
30
+ return False
31
+
32
+
33
+ def read_label_mapping(
34
+ filename, label_from="raw_category", label_to="nyu40id"
35
+ ):
36
+ assert os.path.isfile(filename)
37
+ mapping = dict()
38
+ with open(filename) as csvfile:
39
+ reader = csv.DictReader(csvfile, delimiter="\t")
40
+ for row in reader:
41
+ mapping[row[label_from]] = int(row[label_to])
42
+ # if ints convert
43
+ if represents_int(list(mapping.keys())[0]):
44
+ mapping = {int(k): v for k, v in mapping.items()}
45
+ return mapping
46
+
47
+
48
+ # input: scene_types.txt or scene_types_all.txt
49
+ def read_scene_types_mapping(filename, remove_spaces=True):
50
+ assert os.path.isfile(filename)
51
+ mapping = dict()
52
+ lines = open(filename).read().splitlines()
53
+ lines = [line.split("\t") for line in lines]
54
+ if remove_spaces:
55
+ mapping = {x[1].strip(): int(x[0]) for x in lines}
56
+ else:
57
+ mapping = {x[1]: int(x[0]) for x in lines}
58
+ return mapping
59
+
60
+
61
+ # color by label
62
+ def visualize_label_image(filename, image):
63
+ height = image.shape[0]
64
+ width = image.shape[1]
65
+ vis_image = np.zeros([height, width, 3], dtype=np.uint8)
66
+ color_palette = create_color_palette()
67
+ for idx, color in enumerate(color_palette):
68
+ vis_image[image == idx] = color
69
+ imageio.imwrite(filename, vis_image)
70
+
71
+
72
+ # color by different instances (mod length of color palette)
73
+ def visualize_instance_image(filename, image):
74
+ height = image.shape[0]
75
+ width = image.shape[1]
76
+ vis_image = np.zeros([height, width, 3], dtype=np.uint8)
77
+ color_palette = create_color_palette()
78
+ instances = np.unique(image)
79
+ for idx, inst in enumerate(instances):
80
+ vis_image[image == inst] = color_palette[inst % len(color_palette)]
81
+ imageio.imwrite(filename, vis_image)
82
+
83
+
84
+ # color palette for nyu40 labels
85
+ def create_color_palette():
86
+ return [
87
+ (0, 0, 0),
88
+ (174, 199, 232), # wall
89
+ (152, 223, 138), # floor
90
+ (31, 119, 180), # cabinet
91
+ (255, 187, 120), # bed
92
+ (188, 189, 34), # chair
93
+ (140, 86, 75), # sofa
94
+ (255, 152, 150), # table
95
+ (214, 39, 40), # door
96
+ (197, 176, 213), # window
97
+ (148, 103, 189), # bookshelf
98
+ (196, 156, 148), # picture
99
+ (23, 190, 207), # counter
100
+ (178, 76, 76),
101
+ (247, 182, 210), # desk
102
+ (66, 188, 102),
103
+ (219, 219, 141), # curtain
104
+ (140, 57, 197),
105
+ (202, 185, 52),
106
+ (51, 176, 203),
107
+ (200, 54, 131),
108
+ (92, 193, 61),
109
+ (78, 71, 183),
110
+ (172, 114, 82),
111
+ (255, 127, 14), # refrigerator
112
+ (91, 163, 138),
113
+ (153, 98, 156),
114
+ (140, 153, 101),
115
+ (158, 218, 229), # shower curtain
116
+ (100, 125, 154),
117
+ (178, 127, 135),
118
+ (120, 185, 128),
119
+ (146, 111, 194),
120
+ (44, 160, 44), # toilet
121
+ (112, 128, 144), # sink
122
+ (96, 207, 209),
123
+ (227, 119, 194), # bathtub
124
+ (213, 92, 176),
125
+ (94, 106, 211),
126
+ (82, 84, 163), # otherfurn
127
+ (100, 85, 144),
128
+ ]
models/Mask3D/build/lib/mask3d/benchmark/util_3d.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ import json
3
+
4
+ try:
5
+ import numpy as np
6
+ except:
7
+ print("Failed to import numpy package.")
8
+ sys.exit(-1)
9
+
10
+ try:
11
+ from plyfile import PlyData, PlyElement
12
+ except:
13
+ print("Please install the module 'plyfile' for PLY i/o, e.g.")
14
+ print("pip install plyfile")
15
+ sys.exit(-1)
16
+
17
+ import benchmark.util as util
18
+
19
+
20
+ # matrix: 4x4 np array
21
+ # points Nx3 np array
22
+ def transform_points(matrix, points):
23
+ assert len(points.shape) == 2 and points.shape[1] == 3
24
+ num_points = points.shape[0]
25
+ p = np.concatenate([points, np.ones((num_points, 1))], axis=1)
26
+ p = np.matmul(matrix, np.transpose(p))
27
+ p = np.transpose(p)
28
+ p[:, :3] /= p[:, 3, None]
29
+ return p[:, :3]
30
+
31
+
32
+ def export_ids(filename, ids):
33
+ with open(filename, "w") as f:
34
+ for id in ids:
35
+ f.write("%d\n" % id)
36
+
37
+
38
+ def load_ids(filename):
39
+ ids = open(filename).read().splitlines()
40
+ ids = np.array(ids, dtype=np.int64)
41
+ return ids
42
+
43
+
44
+ def read_mesh_vertices(filename):
45
+ assert os.path.isfile(filename)
46
+ with open(filename, "rb") as f:
47
+ plydata = PlyData.read(f)
48
+ num_verts = plydata["vertex"].count
49
+ vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
50
+ vertices[:, 0] = plydata["vertex"].data["x"]
51
+ vertices[:, 1] = plydata["vertex"].data["y"]
52
+ vertices[:, 2] = plydata["vertex"].data["z"]
53
+ return vertices
54
+
55
+
56
+ # export 3d instance labels for instance evaluation
57
+ def export_instance_ids_for_eval(filename, label_ids, instance_ids):
58
+ assert label_ids.shape[0] == instance_ids.shape[0]
59
+ output_mask_path_relative = "pred_mask"
60
+ name = os.path.splitext(os.path.basename(filename))[0]
61
+ output_mask_path = os.path.join(
62
+ os.path.dirname(filename), output_mask_path_relative
63
+ )
64
+ if not os.path.isdir(output_mask_path):
65
+ os.mkdir(output_mask_path)
66
+ insts = np.unique(instance_ids)
67
+ zero_mask = np.zeros(shape=(instance_ids.shape[0]), dtype=np.int32)
68
+ with open(filename, "w") as f:
69
+ for idx, inst_id in enumerate(insts):
70
+ if inst_id == 0: # 0 -> no instance for this vertex
71
+ continue
72
+ output_mask_file = os.path.join(
73
+ output_mask_path_relative, name + "_" + str(idx) + ".txt"
74
+ )
75
+ loc = np.where(instance_ids == inst_id)
76
+ label_id = label_ids[loc[0][0]]
77
+ f.write("%s %d %f\n" % (output_mask_file, label_id, 1.0))
78
+ # write mask
79
+ mask = np.copy(zero_mask)
80
+ mask[loc[0]] = 1
81
+ export_ids(output_mask_file, mask)
82
+
83
+
84
+ # ------------ Instance Utils ------------ #
85
+
86
+
87
+ class Instance(object):
88
+ instance_id = 0
89
+ label_id = 0
90
+ vert_count = 0
91
+ med_dist = -1
92
+ dist_conf = 0.0
93
+
94
+ def __init__(self, mesh_vert_instances, instance_id):
95
+ if instance_id == -1:
96
+ return
97
+ self.instance_id = int(instance_id)
98
+ self.label_id = int(self.get_label_id(instance_id))
99
+ self.vert_count = int(
100
+ self.get_instance_verts(mesh_vert_instances, instance_id)
101
+ )
102
+
103
+ def get_label_id(self, instance_id):
104
+ return int(instance_id // 1000)
105
+
106
+ def get_instance_verts(self, mesh_vert_instances, instance_id):
107
+ return (mesh_vert_instances == instance_id).sum()
108
+
109
+ def to_json(self):
110
+ return json.dumps(
111
+ self, default=lambda o: o.__dict__, sort_keys=True, indent=4
112
+ )
113
+
114
+ def to_dict(self):
115
+ dict = {}
116
+ dict["instance_id"] = self.instance_id
117
+ dict["label_id"] = self.label_id
118
+ dict["vert_count"] = self.vert_count
119
+ dict["med_dist"] = self.med_dist
120
+ dict["dist_conf"] = self.dist_conf
121
+ return dict
122
+
123
+ def from_json(self, data):
124
+ self.instance_id = int(data["instance_id"])
125
+ self.label_id = int(data["label_id"])
126
+ self.vert_count = int(data["vert_count"])
127
+ if "med_dist" in data:
128
+ self.med_dist = float(data["med_dist"])
129
+ self.dist_conf = float(data["dist_conf"])
130
+
131
+ def __str__(self):
132
+ return "(" + str(self.instance_id) + ")"
133
+
134
+
135
+ def read_instance_prediction_file(filename, pred_path):
136
+ lines = open(filename).read().splitlines()
137
+ instance_info = {}
138
+ abs_pred_path = os.path.abspath(pred_path)
139
+ for line in lines:
140
+ parts = line.split(" ")
141
+ if len(parts) != 3:
142
+ util.print_error(
143
+ "invalid instance prediction file. Expected (per line): [rel path prediction] [label id prediction] [confidence prediction]"
144
+ )
145
+ if os.path.isabs(parts[0]):
146
+ util.print_error(
147
+ "invalid instance prediction file. First entry in line must be a relative path"
148
+ )
149
+ mask_file = os.path.join(os.path.dirname(filename), parts[0])
150
+ mask_file = os.path.abspath(mask_file)
151
+ # check that mask_file lives inside prediction path
152
+ if os.path.commonprefix([mask_file, abs_pred_path]) != abs_pred_path:
153
+ util.print_error(
154
+ "predicted mask {} in prediction text file {} points outside of prediction path.".format(
155
+ mask_file, filename
156
+ )
157
+ )
158
+
159
+ info = {}
160
+ info["label_id"] = int(float(parts[1]))
161
+ info["conf"] = float(parts[2])
162
+ instance_info[mask_file] = info
163
+ return instance_info
164
+
165
+
166
+ def get_instances(ids, class_ids, class_labels, id2label):
167
+ instances = {}
168
+ for label in class_labels:
169
+ instances[label] = []
170
+ instance_ids = np.unique(ids)
171
+ for id in instance_ids:
172
+ if id == 0:
173
+ continue
174
+ inst = Instance(ids, id)
175
+ if inst.label_id in class_ids:
176
+ instances[id2label[inst.label_id]].append(inst.to_dict())
177
+ return instances
models/Mask3D/build/lib/mask3d/conf/__init__.py ADDED
File without changes
models/Mask3D/build/lib/mask3d/conf/augmentation/albumentations_aug.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __version__: 0.4.5
2
+ transform:
3
+ __class_fullname__: albumentations.core.composition.Compose
4
+ additional_targets: {}
5
+ bbox_params: null
6
+ keypoint_params: null
7
+ p: 1.0
8
+ transforms:
9
+ - __class_fullname__: albumentations.augmentations.transforms.RandomBrightnessContrast
10
+ always_apply: true
11
+ brightness_by_max: true
12
+ brightness_limit:
13
+ - -0.2
14
+ - 0.2
15
+ contrast_limit:
16
+ - -0.2
17
+ - 0.2
18
+ p: 0.5
19
+ - __class_fullname__: albumentations.augmentations.transforms.RGBShift
20
+ always_apply: true
21
+ b_shift_limit:
22
+ - -20
23
+ - 20
24
+ g_shift_limit:
25
+ - -20
26
+ - 20
27
+ p: 0.5
28
+ r_shift_limit:
29
+ - -20
30
+ - 20
models/Mask3D/build/lib/mask3d/conf/augmentation/volumentations_aug.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pi = 3.14159265358979
2
+ # pi/2 = 1.57079632679489
3
+ # pi/3 = 1.04719755119659
4
+ # pi/6 = 0.52359877559829
5
+ # pi/12 = 0.26179938779914
6
+ # pi/24 = 0.13089969389957
7
+ #
8
+ __version__: 0.1.6
9
+ transform:
10
+ __class_fullname__: volumentations.core.composition.Compose
11
+ additional_targets: {}
12
+ p: 1.0
13
+ transforms:
14
+ - __class_fullname__: volumentations.augmentations.transforms.Scale3d
15
+ always_apply: true
16
+ p: 0.5
17
+ scale_limit:
18
+ - - -0.1
19
+ - 0.1
20
+ - - -0.1
21
+ - 0.1
22
+ - - -0.1
23
+ - 0.1
24
+ - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d
25
+ always_apply: true
26
+ axis:
27
+ - 0
28
+ - 0
29
+ - 1
30
+ p: 0.5
31
+ rotation_limit:
32
+ - -3.141592653589793
33
+ - 3.141592653589793
34
+ - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d
35
+ always_apply: true
36
+ axis:
37
+ - 0
38
+ - 1
39
+ - 0
40
+ p: 0.5
41
+ rotation_limit:
42
+ - -0.13089969389957
43
+ - 0.13089969389957
44
+ - __class_fullname__: volumentations.augmentations.transforms.RotateAroundAxis3d
45
+ always_apply: true
46
+ axis:
47
+ - 1
48
+ - 0
49
+ - 0
50
+ p: 0.5
51
+ rotation_limit:
52
+ - -0.13089969389957
53
+ - 0.13089969389957
models/Mask3D/build/lib/mask3d/conf/callbacks/callbacks_instance_segmentation.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ - _target_: pytorch_lightning.callbacks.ModelCheckpoint
3
+ monitor: val_mean_ap_50
4
+ save_last: true
5
+ save_top_k: 1
6
+ mode: max
7
+ dirpath: ${general.save_dir}
8
+ filename: "{epoch}-{val_mean_ap_50:.3f}"
9
+ every_n_epochs: 1
10
+
11
+ - _target_: pytorch_lightning.callbacks.LearningRateMonitor
models/Mask3D/build/lib/mask3d/conf/config_base_instance_segmentation.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ train_mode: true
3
+ task: "instance_segmentation"
4
+ seed: null
5
+ checkpoint: null
6
+ backbone_checkpoint: null
7
+ freeze_backbone: false # train only last layer
8
+ linear_probing_backbone: false
9
+ train_on_segments: false
10
+ eval_on_segments: false
11
+ filter_out_instances: false
12
+ save_visualizations: false
13
+ visualization_point_size: 20
14
+ decoder_id: -1
15
+ export: false
16
+ use_dbscan: false
17
+ ignore_class_threshold: 100
18
+ project_name: scannet
19
+ workspace: jonasschult
20
+ experiment_name: DEBUG_ABLATION
21
+ num_targets: 19
22
+ add_instance: true
23
+ dbscan_eps: 0.95
24
+ dbscan_min_points: 1
25
+
26
+
27
+ export_threshold: 0.0001
28
+
29
+ reps_per_epoch: 1
30
+
31
+ on_crops: false
32
+
33
+ scores_threshold: 0.0
34
+ iou_threshold: 1.0
35
+
36
+ area: 5
37
+
38
+ eval_inner_core: -1 # disabled
39
+
40
+ topk_per_image: 100
41
+
42
+ ignore_mask_idx: []
43
+
44
+ max_batch_size: 99999999
45
+
46
+ save_dir: saved/${general.experiment_name}
47
+ # time/commit/md5(config)_uuid
48
+ # time/experiment_id/version_uuid
49
+ # experiment_id: 1 # commit[:8], or unique from logger
50
+ # version: 1 # md5[:8] of config
51
+
52
+ gpus: 1
53
+
54
+ defaults:
55
+ - data: indoor
56
+ - data/data_loaders: simple_loader
57
+ - data/datasets: scannet
58
+ - data/collation_functions: voxelize_collate
59
+ - logging: full
60
+ - model: mask3d
61
+ - metrics: miou
62
+ - optimizer: adamw
63
+ - scheduler: onecyclelr
64
+ - trainer: trainer600
65
+ - callbacks: callbacks_instance_segmentation
66
+ - matcher: hungarian_matcher
67
+ - loss: set_criterion
68
+
69
+ hydra:
70
+ run:
71
+ dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S}
72
+ sweep:
73
+ dir: saved/hydra_logs/${now:%Y-%m-%d}/${now:%H-%M-%S}
74
+ # dir: ${general.save_dir}
75
+ subdir: ${hydra.job.num}_${hydra.job.id}
models/Mask3D/build/lib/mask3d/conf/data/collation_functions/voxelize_collate.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+
3
+ train_collation:
4
+ _target_: mask3d.datasets.utils.VoxelizeCollate
5
+ ignore_label: ${data.ignore_label}
6
+ voxel_size: ${data.voxel_size}
7
+ mode: ${data.train_mode}
8
+ small_crops: false
9
+ very_small_crops: false
10
+ batch_instance: false
11
+ probing: ${general.linear_probing_backbone}
12
+ task: ${general.task}
13
+ ignore_class_threshold: ${general.ignore_class_threshold}
14
+ filter_out_classes: ${data.train_dataset.filter_out_classes}
15
+ label_offset: ${data.train_dataset.label_offset}
16
+ num_queries: ${model.num_queries}
17
+
18
+ validation_collation:
19
+ _target_: mask3d.datasets.utils.VoxelizeCollate
20
+ ignore_label: ${data.ignore_label}
21
+ voxel_size: ${data.voxel_size}
22
+ mode: ${data.validation_mode}
23
+ batch_instance: false
24
+ probing: ${general.linear_probing_backbone}
25
+ task: ${general.task}
26
+ ignore_class_threshold: ${general.ignore_class_threshold}
27
+ filter_out_classes: ${data.validation_dataset.filter_out_classes}
28
+ label_offset: ${data.validation_dataset.label_offset}
29
+ num_queries: ${model.num_queries}
30
+
31
+ test_collation:
32
+ _target_: mask3d.datasets.utils.VoxelizeCollate
33
+ ignore_label: ${data.ignore_label}
34
+ voxel_size: ${data.voxel_size}
35
+ mode: ${data.test_mode}
36
+ batch_instance: false
37
+ probing: ${general.linear_probing_backbone}
38
+ task: ${general.task}
39
+ ignore_class_threshold: ${general.ignore_class_threshold}
40
+ filter_out_classes: ${data.test_dataset.filter_out_classes}
41
+ label_offset: ${data.test_dataset.label_offset}
42
+ num_queries: ${model.num_queries}
models/Mask3D/build/lib/mask3d/conf/data/collation_functions/voxelize_collate_merge.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+
3
+ train_collation:
4
+ _target_: mask3d.datasets.utils.VoxelizeCollateMerge
5
+ ignore_label: ${data.ignore_label}
6
+ voxel_size: ${data.voxel_size}
7
+ mode: ${data.train_mode}
8
+ small_crops: false
9
+ very_small_crops: false
10
+ scenes: 2
11
+ batch_instance: false
12
+ make_one_pc_noise: false
13
+ place_nearby: false
14
+ place_far: false
15
+ proba: 1
16
+ probing: ${general.linear_probing_backbone}
17
+ include_ignore: ${general.include_ignore}
18
+ task: ${general.task}
19
+
20
+ validation_collation:
21
+ _target_: mask3d.datasets.utils.VoxelizeCollate
22
+ ignore_label: ${data.ignore_label}
23
+ voxel_size: ${data.voxel_size}
24
+ mode: ${data.validation_mode}
25
+ probing: ${general.linear_probing_backbone}
26
+ include_ignore: ${general.include_ignore}
27
+ task: ${general.task}
28
+
29
+ test_collation:
30
+ _target_: mask3d.datasets.utils.VoxelizeCollate
31
+ ignore_label: ${data.ignore_label}
32
+ voxel_size: ${data.voxel_size}
33
+ mode: ${data.test_mode}
34
+ probing: ${general.linear_probing_backbone}
35
+ include_ignore: ${general.include_ignore}
36
+ task: ${general.task}
models/Mask3D/build/lib/mask3d/conf/data/data_loaders/simple_loader.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+
3
+ train_dataloader:
4
+ _target_: torch.utils.data.DataLoader
5
+ shuffle: true
6
+ pin_memory: ${data.pin_memory}
7
+ num_workers: ${data.num_workers}
8
+ batch_size: ${data.batch_size}
9
+
10
+ validation_dataloader:
11
+ _target_: torch.utils.data.DataLoader
12
+ shuffle: false
13
+ pin_memory: ${data.pin_memory}
14
+ num_workers: ${data.num_workers}
15
+ batch_size: ${data.test_batch_size}
16
+
17
+ test_dataloader:
18
+ _target_: torch.utils.data.DataLoader
19
+ shuffle: false
20
+ pin_memory: ${data.pin_memory}
21
+ num_workers: ${data.num_workers}
22
+ batch_size: ${data.test_batch_size}
models/Mask3D/build/lib/mask3d/conf/data/data_loaders/simple_loader_save_memory.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+
3
+ train_dataloader:
4
+ _target_: torch.utils.data.DataLoader
5
+ shuffle: true
6
+ pin_memory: ${data.pin_memory}
7
+ num_workers: ${data.num_workers}
8
+ batch_size: ${data.batch_size}
9
+
10
+ validation_dataloader:
11
+ _target_: torch.utils.data.DataLoader
12
+ shuffle: false
13
+ pin_memory: ${data.pin_memory}
14
+ num_workers: 1
15
+ batch_size: ${data.test_batch_size}
16
+
17
+ test_dataloader:
18
+ _target_: torch.utils.data.DataLoader
19
+ shuffle: false
20
+ pin_memory: ${data.pin_memory}
21
+ num_workers: 1
22
+ batch_size: ${data.test_batch_size}
models/Mask3D/build/lib/mask3d/conf/data/datasets/matterport.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+ train_dataset:
3
+ _target_: mix3d.datasets.semseg.SemanticSegmentationDataset
4
+ data_dir: data/processed/matterport
5
+ image_augmentations_path: mix3d/conf/augmentation/albumentations_aug.yaml
6
+ volume_augmentations_path: mix3d/conf/augmentation/volumentations_aug.yaml
7
+ label_db_filepath: data/processed/matterport/label_database.yaml
8
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
9
+ data_percent: 1.0
10
+ mode: ${data.train_mode}
11
+ ignore_label: ${data.ignore_label}
12
+ num_labels: ${data.num_labels}
13
+ add_raw_coordinates: ${data.add_raw_coordinates}
14
+ add_colors: ${data.add_colors}
15
+ add_normals: ${data.add_normals}
16
+ add_instance: ${data.add_instance}
17
+
18
+ validation_dataset:
19
+ _target_: mix3d.datasets.semseg.SemanticSegmentationDataset
20
+ data_dir: data/processed/scannet
21
+ image_augmentations_path: null
22
+ volume_augmentations_path: null
23
+ label_db_filepath: data/processed/matterport/label_database.yaml
24
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
25
+ data_percent: 1.0
26
+ mode: ${data.validation_mode}
27
+ ignore_label: ${data.ignore_label}
28
+ num_labels: ${data.num_labels}
29
+ add_raw_coordinates: ${data.add_raw_coordinates}
30
+ add_colors: ${data.add_colors}
31
+ add_normals: ${data.add_normals}
32
+ add_instance: ${data.add_instance}
33
+
34
+ test_dataset:
35
+ _target_: mix3d.datasets.semseg.SemanticSegmentationDataset
36
+ data_dir: data/processed/matterport
37
+ image_augmentations_path: null
38
+ volume_augmentations_path: null
39
+ label_db_filepath: data/processed/matterport/label_database.yaml
40
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
41
+ data_percent: 1.0
42
+ mode: ${data.test_mode}
43
+ ignore_label: ${data.ignore_label}
44
+ num_labels: ${data.num_labels}
45
+ add_raw_coordinates: ${data.add_raw_coordinates}
46
+ add_colors: ${data.add_colors}
47
+ add_normals: ${data.add_normals}
48
+ add_instance: ${data.add_instance}
models/Mask3D/build/lib/mask3d/conf/data/datasets/matterport_scannet.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+ train_dataset:
3
+ _target_: mix3d.datasets.semseg.SemanticSegmentationDataset
4
+ data_dir:
5
+ - data/processed/scannet
6
+ - data/processed/matterport
7
+ image_augmentations_path: mix3d/conf/augmentation/albumentations_aug.yaml
8
+ volume_augmentations_path: mix3d/conf/augmentation/volumentations_aug.yaml
9
+ label_db_filepath: data/processed/scannet/label_database.yaml
10
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
11
+ data_percent: 1.0
12
+ mode: ${data.train_mode}
13
+ ignore_label: ${data.ignore_label}
14
+ num_labels: ${data.num_labels}
15
+ add_raw_coordinates: ${data.add_raw_coordinates}
16
+ add_colors: ${data.add_colors}
17
+ add_normals: ${data.add_normals}
18
+ add_instance: ${data.add_instance}
19
+
20
+ validation_dataset:
21
+ _target_: mix3d.datasets.semseg.SemanticSegmentationDataset
22
+ data_dir: data/processed/scannet
23
+ image_augmentations_path: null
24
+ volume_augmentations_path: null
25
+ label_db_filepath: data/processed/scannet/label_database.yaml
26
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
27
+ data_percent: 1.0
28
+ mode: ${data.validation_mode}
29
+ ignore_label: ${data.ignore_label}
30
+ num_labels: ${data.num_labels}
31
+ add_raw_coordinates: ${data.add_raw_coordinates}
32
+ add_colors: ${data.add_colors}
33
+ add_normals: ${data.add_normals}
34
+ add_instance: ${data.add_instance}
35
+
36
+ test_dataset:
37
+ _target_: mix3d.datasets.semseg.SemanticSegmentationDataset
38
+ data_dir: data/processed/scannet
39
+ image_augmentations_path: null
40
+ volume_augmentations_path: null
41
+ label_db_filepath: data/processed/scannet/label_database.yaml
42
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
43
+ data_percent: 1.0
44
+ mode: ${data.test_mode}
45
+ ignore_label: ${data.ignore_label}
46
+ num_labels: ${data.num_labels}
47
+ add_raw_coordinates: ${data.add_raw_coordinates}
48
+ add_colors: ${data.add_colors}
49
+ add_normals: ${data.add_normals}
50
+ add_instance: ${data.add_instance}
models/Mask3D/build/lib/mask3d/conf/data/datasets/rio.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+ train_dataset:
3
+ _target_: mix3d.datasets.semseg.SemanticSegmentationDataset
4
+ data_dir: data/processed/rio
5
+ image_augmentations_path: mix3d/conf/augmentation/albumentations_aug.yaml
6
+ volume_augmentations_path: mix3d/conf/augmentation/volumentations_aug.yaml
7
+ label_db_filepath: data/processed/scannet/label_database.yaml
8
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
9
+ data_percent: 1.0
10
+ mode: ${data.train_mode}
11
+ ignore_label: ${data.ignore_label}
12
+ num_labels: ${data.num_labels}
13
+ add_raw_coordinates: ${data.add_raw_coordinates}
14
+ add_colors: ${data.add_colors}
15
+ add_normals: ${data.add_normals}
16
+ add_instance: ${data.add_instance}
17
+
18
+ validation_dataset:
19
+ _target_: mix3d.datasets.semseg.SemanticSegmentationDataset
20
+ data_dir: data/processed/rio
21
+ image_augmentations_path: null
22
+ volume_augmentations_path: null
23
+ label_db_filepath: data/processed/scannet/label_database.yaml
24
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
25
+ data_percent: 1.0
26
+ mode: ${data.validation_mode}
27
+ ignore_label: ${data.ignore_label}
28
+ num_labels: ${data.num_labels}
29
+ add_raw_coordinates: ${data.add_raw_coordinates}
30
+ add_colors: ${data.add_colors}
31
+ add_normals: ${data.add_normals}
32
+ add_instance: ${data.add_instance}
33
+
34
+ test_dataset:
35
+ _target_: mix3d.datasets.semseg.SemanticSegmentationDataset
36
+ data_dir: data/processed/rio
37
+ image_augmentations_path: null
38
+ volume_augmentations_path: null
39
+ label_db_filepath: data/processed/scannet/label_database.yaml
40
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
41
+ data_percent: 1.0
42
+ mode: ${data.test_mode}
43
+ ignore_label: ${data.ignore_label}
44
+ num_labels: ${data.num_labels}
45
+ add_raw_coordinates: ${data.add_raw_coordinates}
46
+ add_colors: ${data.add_colors}
47
+ add_normals: ${data.add_normals}
48
+ add_instance: ${data.add_instance}
models/Mask3D/build/lib/mask3d/conf/data/datasets/s3dis.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+ train_dataset:
3
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
4
+ dataset_name: "s3dis"
5
+ data_dir: data/processed/s3dis
6
+ image_augmentations_path: conf/augmentation/albumentations_aug.yaml
7
+ volume_augmentations_path: conf/augmentation/volumentations_aug.yaml
8
+ label_db_filepath: data/processed/s3dis/label_database.yaml
9
+ color_mean_std: data/processed/s3dis/color_mean_std.yaml
10
+ data_percent: 1.0
11
+ mode: ${data.train_mode}
12
+ ignore_label: ${data.ignore_label}
13
+ num_labels: ${data.num_labels}
14
+ add_raw_coordinates: ${data.add_raw_coordinates}
15
+ add_colors: ${data.add_colors}
16
+ add_normals: ${data.add_normals}
17
+ add_instance: ${data.add_instance}
18
+ cache_data: ${data.cache_data}
19
+ # different augs experiments
20
+ instance_oversampling: 0.0
21
+ place_around_existing: False
22
+ point_per_cut: 0
23
+ max_cut_region: 0
24
+ flip_in_center: false
25
+ noise_rate: 0
26
+ resample_points: 0
27
+ cropping: ${data.cropping}
28
+ cropping_args: ${data.cropping_args}
29
+ is_tta: false
30
+ crop_min_size: ${data.crop_min_size}
31
+ crop_length: ${data.crop_length}
32
+ cropping_v1: ${data.cropping_v1}
33
+ area: ${general.area}
34
+ filter_out_classes: []
35
+ label_offset: 0
36
+
37
+ validation_dataset:
38
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
39
+ dataset_name: "s3dis"
40
+ data_dir: data/processed/s3dis
41
+ image_augmentations_path: null
42
+ volume_augmentations_path: null
43
+ label_db_filepath: data/processed/s3dis/label_database.yaml
44
+ color_mean_std: data/processed/s3dis/color_mean_std.yaml
45
+ data_percent: 1.0
46
+ mode: ${data.validation_mode}
47
+ ignore_label: ${data.ignore_label}
48
+ num_labels: ${data.num_labels}
49
+ add_raw_coordinates: ${data.add_raw_coordinates}
50
+ add_colors: ${data.add_colors}
51
+ add_normals: ${data.add_normals}
52
+ add_instance: ${data.add_instance}
53
+ cache_data: ${data.cache_data}
54
+ cropping: false
55
+ is_tta: false
56
+ crop_min_size: ${data.crop_min_size}
57
+ crop_length: ${data.crop_length}
58
+ cropping_v1: ${data.cropping_v1}
59
+ area: ${general.area}
60
+ filter_out_classes: []
61
+ label_offset: 0
62
+
63
+ test_dataset:
64
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
65
+ dataset_name: "s3dis"
66
+ data_dir: data/processed/s3dis
67
+ image_augmentations_path: null
68
+ volume_augmentations_path: null
69
+ label_db_filepath: data/processed/s3dis/label_database.yaml
70
+ color_mean_std: data/processed/s3dis/color_mean_std.yaml
71
+ data_percent: 1.0
72
+ mode: ${data.test_mode}
73
+ ignore_label: ${data.ignore_label}
74
+ num_labels: ${data.num_labels}
75
+ add_raw_coordinates: ${data.add_raw_coordinates}
76
+ add_colors: ${data.add_colors}
77
+ add_normals: ${data.add_normals}
78
+ add_instance: ${data.add_instance}
79
+ cache_data: ${data.cache_data}
80
+ cropping: false
81
+ is_tta: false
82
+ crop_min_size: ${data.crop_min_size}
83
+ crop_length: ${data.crop_length}
84
+ cropping_v1: ${data.cropping_v1}
85
+ area: ${general.area}
86
+ filter_out_classes: []
87
+ label_offset: 0
models/Mask3D/build/lib/mask3d/conf/data/datasets/scannet.yaml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+ train_dataset:
3
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
4
+ dataset_name: "scannet"
5
+ data_dir: data/processed/scannet
6
+ image_augmentations_path: conf/augmentation/albumentations_aug.yaml
7
+ volume_augmentations_path: conf/augmentation/volumentations_aug.yaml
8
+ label_db_filepath: data/processed/scannet/label_database.yaml
9
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
10
+ data_percent: 1.0
11
+ mode: ${data.train_mode}
12
+ ignore_label: ${data.ignore_label}
13
+ num_labels: ${data.num_labels}
14
+ add_raw_coordinates: ${data.add_raw_coordinates}
15
+ add_colors: ${data.add_colors}
16
+ add_normals: ${data.add_normals}
17
+ add_instance: ${data.add_instance}
18
+ # different augs experiments
19
+ instance_oversampling: 0.0
20
+ place_around_existing: false
21
+ point_per_cut: 0
22
+ max_cut_region: 0
23
+ flip_in_center: false
24
+ noise_rate: 0
25
+ resample_points: 0
26
+ add_unlabeled_pc: false
27
+ cropping: ${data.cropping}
28
+ cropping_args: ${data.cropping_args}
29
+ is_tta: false
30
+ crop_min_size: ${data.crop_min_size}
31
+ crop_length: ${data.crop_length}
32
+ filter_out_classes: [0, 1]
33
+ label_offset: 2
34
+
35
+ validation_dataset:
36
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
37
+ dataset_name: "scannet"
38
+ data_dir: data/processed/scannet
39
+ image_augmentations_path: null
40
+ volume_augmentations_path: null
41
+ label_db_filepath: data/processed/scannet/label_database.yaml
42
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
43
+ data_percent: 1.0
44
+ mode: ${data.validation_mode}
45
+ ignore_label: ${data.ignore_label}
46
+ num_labels: ${data.num_labels}
47
+ add_raw_coordinates: ${data.add_raw_coordinates}
48
+ add_colors: ${data.add_colors}
49
+ add_normals: ${data.add_normals}
50
+ add_instance: ${data.add_instance}
51
+ cropping: false
52
+ is_tta: false
53
+ crop_min_size: ${data.crop_min_size}
54
+ crop_length: ${data.crop_length}
55
+ filter_out_classes: [0, 1]
56
+ label_offset: 2
57
+
58
+ test_dataset:
59
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
60
+ dataset_name: "scannet"
61
+ data_dir: data/processed/scannet
62
+ image_augmentations_path: null
63
+ volume_augmentations_path: null
64
+ label_db_filepath: data/processed/scannet/label_database.yaml
65
+ color_mean_std: data/processed/scannet/color_mean_std.yaml
66
+ data_percent: 1.0
67
+ mode: ${data.test_mode}
68
+ ignore_label: ${data.ignore_label}
69
+ num_labels: ${data.num_labels}
70
+ add_raw_coordinates: ${data.add_raw_coordinates}
71
+ add_colors: ${data.add_colors}
72
+ add_normals: ${data.add_normals}
73
+ add_instance: ${data.add_instance}
74
+ cropping: false
75
+ is_tta: false
76
+ crop_min_size: ${data.crop_min_size}
77
+ crop_length: ${data.crop_length}
78
+ filter_out_classes: [0, 1]
79
+ label_offset: 2
models/Mask3D/build/lib/mask3d/conf/data/datasets/scannet200.yaml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+ train_dataset:
3
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
4
+ dataset_name: "scannet200"
5
+ data_dir: /home/weders/scratch/scratch/scannetter/arkit/raw/
6
+ image_augmentations_path: conf/augmentation/albumentations_aug.yaml
7
+ volume_augmentations_path: conf/augmentation/volumentations_aug.yaml
8
+ # label_db_filepath: data/processed/scannet200/label_database.yaml
9
+ # color_mean_std: data/processed/scannet200/color_mean_std.yaml
10
+ data_percent: 1.0
11
+ mode: ${data.train_mode}
12
+ ignore_label: ${data.ignore_label}
13
+ num_labels: ${data.num_labels}
14
+ add_raw_coordinates: ${data.add_raw_coordinates}
15
+ add_colors: ${data.add_colors}
16
+ add_normals: ${data.add_normals}
17
+ add_instance: ${data.add_instance}
18
+ # different augs experiments
19
+ instance_oversampling: 0.0
20
+ place_around_existing: false
21
+ point_per_cut: 0
22
+ max_cut_region: 0
23
+ flip_in_center: false
24
+ noise_rate: 0
25
+ resample_points: 0
26
+ add_unlabeled_pc: false
27
+ cropping: ${data.cropping}
28
+ cropping_args: ${data.cropping_args}
29
+ is_tta: false
30
+ crop_min_size: ${data.crop_min_size}
31
+ crop_length: ${data.crop_length}
32
+ filter_out_classes: [0, 2]
33
+ label_offset: 2
34
+
35
+ validation_dataset:
36
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
37
+ dataset_name: "scannet200"
38
+ data_dir: /home/weders/scratch/scratch/scannetter/arkit/raw/
39
+ image_augmentations_path: null
40
+ volume_augmentations_path: null
41
+ # label_db_filepath: data/processed/scannet200/label_database.yaml
42
+ # color_mean_std: data/processed/scannet200/color_mean_std.yaml
43
+ data_percent: 1.0
44
+ mode: ${data.validation_mode}
45
+ ignore_label: ${data.ignore_label}
46
+ num_labels: ${data.num_labels}
47
+ add_raw_coordinates: ${data.add_raw_coordinates}
48
+ add_colors: ${data.add_colors}
49
+ add_normals: ${data.add_normals}
50
+ add_instance: ${data.add_instance}
51
+ cropping: false
52
+ is_tta: false
53
+ crop_min_size: ${data.crop_min_size}
54
+ crop_length: ${data.crop_length}
55
+ filter_out_classes: [0, 2]
56
+ label_offset: 2
57
+
58
+ test_dataset:
59
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
60
+ dataset_name: "scannet200"
61
+ data_dir: /home/weders/scratch/scratch/scannetter/arkit/raw/
62
+ image_augmentations_path: null
63
+ volume_augmentations_path: null
64
+ # label_db_filepath: data/processed/scannet200/label_database.yaml
65
+ # color_mean_std: data/processed/scannet200/color_mean_std.yaml
66
+ data_percent: 1.0
67
+ mode: ${data.test_mode}
68
+ ignore_label: ${data.ignore_label}
69
+ num_labels: ${data.num_labels}
70
+ add_raw_coordinates: ${data.add_raw_coordinates}
71
+ add_colors: ${data.add_colors}
72
+ add_normals: ${data.add_normals}
73
+ add_instance: ${data.add_instance}
74
+ cropping: false
75
+ is_tta: false
76
+ crop_min_size: ${data.crop_min_size}
77
+ crop_length: ${data.crop_length}
78
+ filter_out_classes: [0, 2]
79
+ label_offset: 2
models/Mask3D/build/lib/mask3d/conf/data/datasets/semantic_kitti.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+ train_dataset:
3
+ _target_: mix3d.datasets.outdoor_semseg.LidarDataset
4
+ data_dir: data/processed/semantic_kitti
5
+ label_db_filepath: data/processed/semantic_kitti/label_database.yaml
6
+ mode: ${data.train_mode}
7
+ add_reflection: ${data.add_reflection}
8
+ add_distance: ${data.add_distance}
9
+ add_instance: ${data.add_instance}
10
+ num_labels: ${data.num_labels}
11
+ sweep: ${data.sweep}
12
+ data_percent: 1.0
13
+ ignore_label: ${data.ignore_label}
14
+ volume_augmentations_path: mix3d/conf/augmentation/volumentations_aug.yaml
15
+
16
+ validation_dataset:
17
+ _target_: mix3d.datasets.outdoor_semseg.LidarDataset
18
+ data_dir: data/processed/semantic_kitti
19
+ label_db_filepath: data/processed/semantic_kitti/label_database.yaml
20
+ mode: ${data.validation_mode}
21
+ add_reflection: ${data.add_reflection}
22
+ add_distance: ${data.add_distance}
23
+ add_instance: ${data.add_instance}
24
+ num_labels: ${data.num_labels}
25
+ sweep: ${data.sweep}
26
+ data_percent: 1.0
27
+ ignore_label: ${data.ignore_label}
28
+ volume_augmentations_path: null
29
+
30
+ test_dataset:
31
+ _target_: mix3d.datasets.outdoor_semseg.LidarDataset
32
+ data_dir: data/processed/semantic_kitti
33
+ label_db_filepath: data/processed/semantic_kitti/label_database.yaml
34
+ mode: ${data.test_mode}
35
+ add_reflection: ${data.add_reflection}
36
+ add_distance: ${data.add_distance}
37
+ add_instance: ${data.add_instance}
38
+ num_labels: ${data.num_labels}
39
+ sweep: ${data.sweep}
40
+ data_percent: 1.0
41
+ ignore_label: ${data.ignore_label}
42
+ volume_augmentations_path: null
models/Mask3D/build/lib/mask3d/conf/data/datasets/stpls3d.yaml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package data
2
+ train_dataset:
3
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
4
+ dataset_name: "stpls3d"
5
+ data_dir: data/processed/stpls3d
6
+ image_augmentations_path: conf/augmentation/albumentations_aug.yaml
7
+ volume_augmentations_path: conf/augmentation/volumentations_aug.yaml
8
+ label_db_filepath: data/processed/stpls3d/label_database.yaml
9
+ color_mean_std: data/processed/stpls3d/color_mean_std.yaml
10
+ data_percent: 1.0
11
+ mode: ${data.train_mode}
12
+ ignore_label: ${data.ignore_label}
13
+ num_labels: ${data.num_labels}
14
+ add_raw_coordinates: ${data.add_raw_coordinates}
15
+ add_colors: ${data.add_colors}
16
+ add_normals: ${data.add_normals}
17
+ add_instance: ${data.add_instance}
18
+ cache_data: ${data.cache_data}
19
+ # different augs experiments
20
+ instance_oversampling: 0.0
21
+ place_around_existing: False
22
+ point_per_cut: 0
23
+ max_cut_region: 0
24
+ flip_in_center: false
25
+ noise_rate: 0
26
+ resample_points: 0
27
+ cropping: ${data.cropping}
28
+ cropping_args: ${data.cropping_args}
29
+ is_tta: false
30
+ crop_min_size: ${data.crop_min_size}
31
+ crop_length: ${data.crop_length}
32
+ cropping_v1: ${data.cropping_v1}
33
+ area: ${general.area}
34
+ reps_per_epoch: ${general.reps_per_epoch}
35
+ eval_inner_core: ${general.eval_inner_core}
36
+ filter_out_classes: [0]
37
+ label_offset: 1
38
+ is_elastic_distortion: true
39
+ color_drop: 0.0
40
+
41
+ validation_dataset:
42
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
43
+ dataset_name: "stpls3d"
44
+ data_dir: data/processed/stpls3d
45
+ image_augmentations_path: null
46
+ volume_augmentations_path: null
47
+ label_db_filepath: data/processed/stpls3d/label_database.yaml
48
+ color_mean_std: data/processed/stpls3d/color_mean_std.yaml
49
+ data_percent: 1.0
50
+ mode: ${data.validation_mode}
51
+ ignore_label: ${data.ignore_label}
52
+ num_labels: ${data.num_labels}
53
+ add_raw_coordinates: ${data.add_raw_coordinates}
54
+ add_colors: ${data.add_colors}
55
+ add_normals: ${data.add_normals}
56
+ add_instance: ${data.add_instance}
57
+ cache_data: ${data.cache_data}
58
+ cropping: false
59
+ is_tta: false
60
+ crop_min_size: ${data.crop_min_size}
61
+ crop_length: ${data.crop_length}
62
+ cropping_v1: ${data.cropping_v1}
63
+ area: ${general.area}
64
+ on_crops: ${general.on_crops}
65
+ eval_inner_core: ${general.eval_inner_core}
66
+ filter_out_classes: [0]
67
+ label_offset: 1
68
+
69
+ test_dataset:
70
+ _target_: mask3d.datasets.semseg.SemanticSegmentationDataset
71
+ dataset_name: "stpls3d"
72
+ data_dir: data/processed/stpls3d
73
+ image_augmentations_path: null
74
+ volume_augmentations_path: null
75
+ label_db_filepath: data/processed/stpls3d/label_database.yaml
76
+ color_mean_std: data/processed/stpls3d/color_mean_std.yaml
77
+ data_percent: 1.0
78
+ mode: ${data.test_mode}
79
+ ignore_label: ${data.ignore_label}
80
+ num_labels: ${data.num_labels}
81
+ add_raw_coordinates: ${data.add_raw_coordinates}
82
+ add_colors: ${data.add_colors}
83
+ add_normals: ${data.add_normals}
84
+ add_instance: ${data.add_instance}
85
+ cache_data: ${data.cache_data}
86
+ cropping: false
87
+ is_tta: false
88
+ crop_min_size: ${data.crop_min_size}
89
+ crop_length: ${data.crop_length}
90
+ cropping_v1: ${data.cropping_v1}
91
+ area: ${general.area}
92
+ on_crops: ${general.on_crops}
93
+ eval_inner_core: ${general.eval_inner_core}
94
+ filter_out_classes: [0]
95
+ label_offset: 1
models/Mask3D/build/lib/mask3d/conf/data/indoor.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+
3
+ # these parameters are inherited by datasets, data_loaders and collators
4
+ # but they might be overwritten
5
+
6
+ # splits
7
+ train_mode: train
8
+ validation_mode: validation
9
+ test_mode: validation # test # validation
10
+
11
+ # dataset
12
+ ignore_label: 255
13
+ add_raw_coordinates: true # 3dim
14
+ add_colors: true # 3dim
15
+ add_normals: false # 3dim
16
+ in_channels: 3 # in_channels = 3 * (add_normals + add_colors + add_raw_coordinates)
17
+ num_labels: 20
18
+ # num_labels: 41
19
+ add_instance: ${general.add_instance}
20
+ task: ${general.task}
21
+
22
+ # data loader
23
+ pin_memory: false
24
+ num_workers: 4
25
+ batch_size: 5
26
+ test_batch_size: 1
27
+ cache_data: false
28
+
29
+ # collation
30
+ voxel_size: 0.02
31
+
32
+ reps_per_epoch: ${general.reps_per_epoch}
33
+
34
+ cropping: false
35
+ cropping_args:
36
+ min_points: 30000
37
+ aspect: 0.8
38
+ min_crop: 0.5
39
+ max_crop: 1.0
40
+
41
+ crop_min_size: 20000
42
+ crop_length: 6.0
43
+ cropping_v1: true
models/Mask3D/build/lib/mask3d/conf/data/outdoor.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+
3
+ # these parameters are inherited by datasets, data_loaders and collators
4
+ # but they might be overwritten
5
+
6
+ # splits
7
+ train_mode: train
8
+ validation_mode: validation
9
+ test_mode: validation
10
+
11
+ # dataset
12
+ ignore_label: 255
13
+ add_distance: true # 1dim
14
+ add_reflection: true # 1dim
15
+ in_channels: 2 # in_channels = add_distance + add_reflection
16
+ num_labels: 19
17
+ add_instance: false
18
+
19
+ # data loader
20
+ pin_memory: true
21
+ num_workers: 4
22
+ batch_size: 18
23
+ sweep: 1
24
+
25
+ # collation
26
+ voxel_size: 0.15
models/Mask3D/build/lib/mask3d/conf/logging/base.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ - _target_: pytorch_lightning.loggers.NeptuneLogger
3
+ project_name: ${general.workspace}/${general.project_name}
4
+ experiment_name: ${general.experiment_name}
5
+ offline_mode: false
6
+
7
+ - _target_: pytorch_lightning.loggers.CSVLogger
8
+ save_dir: ${general.save_dir}
9
+ name: ${general.experiment_id}
10
+ version: ${general.version}
models/Mask3D/build/lib/mask3d/conf/logging/full.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ - _target_: pytorch_lightning.loggers.WandbLogger
3
+ project: ${general.project_name}
4
+ name: ${general.experiment_name}
5
+ save_dir: ${general.save_dir}
6
+ entity: "schult"
7
+ resume: "allow"
8
+ id: ${general.experiment_name}
models/Mask3D/build/lib/mask3d/conf/logging/minimal.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # @package _group_
2
+ - _target_: pytorch_lightning.loggers.CSVLogger
3
+ save_dir: ${general.save_dir}
4
+ name: ${general.experiment_id}
5
+ version: ${general.version}
models/Mask3D/build/lib/mask3d/conf/logging/offline.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ - _target_: pytorch_lightning.loggers.TensorBoardLogger
3
+ name: ${general.experiment_id}
4
+ version: ${general.version}
5
+ save_dir: ${general.save_dir}
6
+
7
+ - _target_: pytorch_lightning.loggers.CSVLogger
8
+ name: ${general.experiment_id}
9
+ version: ${general.version}
10
+ save_dir: ${general.save_dir}
models/Mask3D/build/lib/mask3d/conf/loss/cross_entropy.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # @package _group_
2
+ _target_: torch.nn.CrossEntropyLoss
3
+ ignore_index: ${data.ignore_label}
models/Mask3D/build/lib/mask3d/conf/loss/set_criterion.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ _target_: mask3d.models.criterion.SetCriterion
3
+ num_classes: ${general.num_targets}
4
+ eos_coef: 0.1
5
+ losses:
6
+ - "labels"
7
+ - "masks"
8
+ num_points: ${matcher.num_points}
9
+ oversample_ratio: 3.0
10
+ importance_sample_ratio: 0.75
11
+ class_weights: -1
models/Mask3D/build/lib/mask3d/conf/loss/set_criterion_custom_weights_1.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ _target_: mask3d.models.criterion.SetCriterion
3
+ num_classes: ${general.num_targets}
4
+ eos_coef: 0.1
5
+ losses:
6
+ - "labels"
7
+ - "masks"
8
+ num_points: ${matcher.num_points}
9
+ oversample_ratio: 3.0
10
+ importance_sample_ratio: 0.75
11
+ class_weights: [1.0,1.5,10.0,1.0,1.0,1.0,1.0,1.0,10.0,10.0,1.0,10.0,1.0,1.0]
models/Mask3D/build/lib/mask3d/conf/matcher/hungarian_matcher.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ _target_: mask3d.models.matcher.HungarianMatcher
3
+ cost_class: 2.
4
+ cost_mask: 5.
5
+ cost_dice: 2.
6
+ num_points: -1
models/Mask3D/build/lib/mask3d/conf/metrics/miou.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # @package _group_
2
+ _target_: mask3d.models.metrics.ConfusionMatrix
3
+ num_classes: ${data.num_labels}
4
+ ignore_label: ${data.ignore_label}
models/Mask3D/build/lib/mask3d/conf/model/mask3d.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ _target_: mask3d.models.Mask3D
3
+
4
+ # transformer parameters
5
+ hidden_dim: 128
6
+ dim_feedforward: 1024
7
+ num_queries: 100
8
+ num_heads: 8
9
+ num_decoders: 3
10
+ dropout: 0.0
11
+ pre_norm: false
12
+ use_level_embed: false
13
+ normalize_pos_enc: true
14
+ positional_encoding_type: "fourier"
15
+ gauss_scale: 1.0
16
+ hlevels: [0,1,2,3]
17
+
18
+ # queries
19
+ non_parametric_queries: true
20
+ random_query_both: false
21
+ random_normal: false
22
+ random_queries: false
23
+ use_np_features: false
24
+
25
+ # sampling
26
+ sample_sizes: [200, 800, 3200, 12800, 51200]
27
+ max_sample_size: false # change false means sampling activated
28
+
29
+ shared_decoder: true
30
+ num_classes: ${general.num_targets}
31
+ train_on_segments: ${general.train_on_segments}
32
+ scatter_type: "mean"
33
+
34
+ voxel_size: ${data.voxel_size}
35
+
36
+ config:
37
+ backbone:
38
+ _target_: mask3d.models.Res16UNet34C
39
+ config:
40
+ dialations: [ 1, 1, 1, 1 ]
41
+ conv1_kernel_size: 5
42
+ bn_momentum: 0.02
43
+ # depends on normals, color, raw_coordinates
44
+ # varies from 3 to 9
45
+ in_channels: ${data.in_channels}
46
+ out_channels: ${data.num_labels}
47
+ out_fpn: true
models/Mask3D/build/lib/mask3d/conf/optimizer/adamw.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # @package _group_
2
+ _target_: torch.optim.AdamW
3
+ lr: 0.0001
models/Mask3D/build/lib/mask3d/conf/optimizer/adamw_lower.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # @package _group_
2
+ _target_: torch.optim.AdamW
3
+ lr: 0.005
models/Mask3D/build/lib/mask3d/conf/scheduler/exponentiallr.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+
3
+ scheduler:
4
+ _target_: torch.optim.lr_scheduler.ExponentialLR
5
+ gamma: 0.99999
6
+ last_epoch: -1 # ${trainer.max_epochs}
7
+ # need to set to number because of tensorboard logger
8
+ # steps_per_epoch: -1
9
+
10
+ pytorch_lightning_params:
11
+ interval: step
models/Mask3D/build/lib/mask3d/conf/scheduler/lambdalr.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+
3
+ scheduler:
4
+ _target_: torch.optim.lr_scheduler.StepLR
5
+ step_size: 99999
6
+
7
+ pytorch_lightning_params:
8
+ interval: epoch
models/Mask3D/build/lib/mask3d/conf/scheduler/onecyclelr.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+
3
+ scheduler:
4
+ _target_: torch.optim.lr_scheduler.OneCycleLR
5
+ max_lr: ${optimizer.lr}
6
+ epochs: ${trainer.max_epochs}
7
+ # need to set to number because of tensorboard logger
8
+ steps_per_epoch: -1
9
+
10
+ pytorch_lightning_params:
11
+ interval: step
models/Mask3D/build/lib/mask3d/conf/trainer/trainer.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ deterministic: false
3
+ max_epochs: 1000
4
+ min_epochs: 1
5
+ resume_from_checkpoint: null
6
+ check_val_every_n_epoch: 50
7
+ num_sanity_val_steps: -1
models/Mask3D/build/lib/mask3d/conf/trainer/trainer600.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # @package _group_
2
+ deterministic: false
3
+ max_epochs: 601
4
+ min_epochs: 1
5
+ resume_from_checkpoint: null
6
+ check_val_every_n_epoch: 50
7
+ num_sanity_val_steps: 2
models/Mask3D/build/lib/mask3d/datasets/__init__.py ADDED
File without changes
models/Mask3D/build/lib/mask3d/datasets/outdoor_semseg.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import List, Optional, Union, Tuple
4
+ from random import random
5
+
6
+ import numpy as np
7
+ import volumentations as V
8
+ import yaml
9
+ from torch.utils.data import Dataset
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class LidarDataset(Dataset):
15
+ def __init__(
16
+ self,
17
+ data_dir: Optional[
18
+ Union[str, Tuple[str]]
19
+ ] = "data/processed/semantic_kitti",
20
+ label_db_filepath: Optional[
21
+ str
22
+ ] = "./data/processed/semantic_kitti/label_database.yaml",
23
+ mode: Optional[str] = "train",
24
+ add_reflection: Optional[bool] = True,
25
+ add_distance: Optional[bool] = False,
26
+ add_instance: Optional[bool] = True,
27
+ num_labels: Optional[int] = -1,
28
+ data_percent: Optional[float] = 1.0,
29
+ ignore_label: Optional[Union[int, List[int]]] = 255,
30
+ volume_augmentations_path: Optional[str] = None,
31
+ sweep: Optional[int] = 1,
32
+ ):
33
+ self.mode = mode
34
+ self.data_dir = data_dir
35
+ if type(data_dir) == str:
36
+ self.data_dir = [self.data_dir]
37
+ self.ignore_label = ignore_label
38
+ self.add_instance = add_instance
39
+ self.add_distance = add_distance
40
+ self.add_reflection = add_reflection
41
+
42
+ # loading database files
43
+ self._data = []
44
+ for database_path in self.data_dir:
45
+ database_path = Path(database_path)
46
+ if not (database_path / f"{mode}_database.yaml").exists():
47
+ print(f"generate {database_path}/{mode}_database.yaml first")
48
+ exit()
49
+ self._data.extend(
50
+ self._load_yaml(database_path / f"{mode}_database.yaml")
51
+ )
52
+
53
+ labels = self._load_yaml(Path(label_db_filepath))
54
+ self._labels = self._select_correct_labels(labels, num_labels)
55
+
56
+ # augmentations
57
+ self.volume_augmentations = V.NoOp()
58
+ if volume_augmentations_path is not None:
59
+ self.volume_augmentations = V.load(
60
+ volume_augmentations_path, data_format="yaml"
61
+ )
62
+
63
+ # reformulating in sweeps
64
+ data = [[]]
65
+ last_scene = self._data[0]["scene"]
66
+ for x in self._data:
67
+ if x["scene"] == last_scene:
68
+ data[-1].append(x)
69
+ else:
70
+ last_scene = x["scene"]
71
+ data.append([x])
72
+ for i in range(len(data)):
73
+ data[i] = list(self.chunks(data[i], sweep))
74
+ self._data = [val for sublist in data for val in sublist]
75
+
76
+ if data_percent < 1.0:
77
+ self._data = self._data[: int(len(self._data) * data_percent)]
78
+
79
+ @staticmethod
80
+ def chunks(lst, n):
81
+ """Yield successive n-sized chunks from lst."""
82
+ for i in range(0, len(lst), n):
83
+ yield lst[i : i + n]
84
+
85
+ def __len__(self):
86
+ return len(self.data)
87
+
88
+ def __getitem__(self, idx: int):
89
+ points = []
90
+ for sweep in self.data[idx]:
91
+ points.append(np.load(sweep["filepath"]))
92
+ # rotate
93
+ points[-1][:, :3] = (
94
+ points[-1][:, :3] @ np.array(sweep["pose"])[:3, :3]
95
+ )
96
+ # translate
97
+ points[-1][:, :3] += np.array(sweep["pose"])[:3, 3]
98
+ points = np.vstack(points)
99
+
100
+ coordinates, features, labels = (
101
+ points[:, :3],
102
+ points[:, 3:-2],
103
+ points[:, -2:],
104
+ )
105
+
106
+ if not self.add_reflection:
107
+ features = np.ones(np.ones((len(coordinates), 1)))
108
+
109
+ if self.add_distance:
110
+ center_coordinate = coordinates.mean(0)
111
+ features = np.hstack(
112
+ (
113
+ features,
114
+ np.linalg.norm(coordinates - center_coordinate, axis=1)[
115
+ :, np.newaxis
116
+ ],
117
+ )
118
+ )
119
+
120
+ # volume and image augmentations for train
121
+ if "train" in self.mode:
122
+ coordinates -= coordinates.mean(0)
123
+ if 0.5 > random():
124
+ coordinates += (
125
+ np.random.uniform(coordinates.min(0), coordinates.max(0))
126
+ / 2
127
+ )
128
+ aug = self.volume_augmentations(
129
+ points=coordinates,
130
+ features=features,
131
+ labels=labels,
132
+ )
133
+ coordinates, features, labels = (
134
+ aug["points"],
135
+ aug["features"],
136
+ aug["labels"],
137
+ )
138
+
139
+ # prepare labels and map from 0 to 20(40)
140
+ labels = labels.astype(np.int32)
141
+ if labels.size > 0:
142
+ labels[:, 0] = self._remap_from_zero(labels[:, 0])
143
+ if not self.add_instance:
144
+ # taking only first column, which is segmentation label, not instance
145
+ labels = labels[:, 0].flatten()
146
+
147
+ return coordinates, features, labels
148
+
149
+ @property
150
+ def data(self):
151
+ """database file containing information about preproscessed dataset"""
152
+ return self._data
153
+
154
+ @property
155
+ def label_info(self):
156
+ """database file containing information labels used by dataset"""
157
+ return self._labels
158
+
159
+ @staticmethod
160
+ def _load_yaml(filepath):
161
+ with open(filepath) as f:
162
+ file = yaml.safe_load(f)
163
+ return file
164
+
165
+ def _select_correct_labels(self, labels, num_labels):
166
+ number_of_validation_labels = 0
167
+ number_of_all_labels = 0
168
+ for (
169
+ k,
170
+ v,
171
+ ) in labels.items():
172
+ number_of_all_labels += 1
173
+ if v["validation"]:
174
+ number_of_validation_labels += 1
175
+
176
+ if num_labels == number_of_all_labels:
177
+ return labels
178
+ elif num_labels == number_of_validation_labels:
179
+ valid_labels = dict()
180
+ for (
181
+ k,
182
+ v,
183
+ ) in labels.items():
184
+ if v["validation"]:
185
+ valid_labels.update({k: v})
186
+ return valid_labels
187
+ else:
188
+ msg = f"""not available number labels, select from:
189
+ {number_of_validation_labels}, {number_of_all_labels}"""
190
+ raise ValueError(msg)
191
+
192
+ def _remap_from_zero(self, labels):
193
+ labels[
194
+ ~np.isin(labels, list(self.label_info.keys()))
195
+ ] = self.ignore_label
196
+ # remap to the range from 0
197
+ for i, k in enumerate(self.label_info.keys()):
198
+ labels[labels == k] = i
199
+ return labels
200
+
201
+ def _remap_model_output(self, output):
202
+ output = np.array(output)
203
+ output_remapped = output.copy()
204
+ for i, k in enumerate(self.label_info.keys()):
205
+ output_remapped[output == i] = k
206
+ return output_remapped
models/Mask3D/build/lib/mask3d/datasets/preprocessing/__init__.py ADDED
File without changes