Spaces:
Running
on
Zero
Running
on
Zero
initial commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Pointcept/.gitignore +16 -0
- Pointcept/LICENSE +21 -0
- Pointcept/README.md +896 -0
- Pointcept/configs/_base_/dataset/scannetpp.py +104 -0
- Pointcept/configs/_base_/default_runtime.py +39 -0
- Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py +313 -0
- Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py +282 -0
- Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py +232 -0
- Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py +176 -0
- Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py +342 -0
- Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py +316 -0
- Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py +292 -0
- Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py +174 -0
- Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py +157 -0
- Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py +215 -0
- Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py +183 -0
- Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py +180 -0
- Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py +180 -0
- Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py +181 -0
- Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py +273 -0
- Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py +273 -0
- Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py +174 -0
- Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py +496 -0
- Pointcept/configs/s3dis/semseg-pt-v1-0-base.py +170 -0
- Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py +189 -0
- Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py +189 -0
- Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py +192 -0
- Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py +196 -0
- Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py +225 -0
- Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py +225 -0
- Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py +487 -0
- Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py +168 -0
- Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py +181 -0
- Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py +184 -0
- Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py +184 -0
- Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py +191 -0
- Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py +187 -0
- Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py +279 -0
- Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py +183 -0
- Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py +155 -0
- Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py +162 -0
- Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py +165 -0
- Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py +292 -0
- Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py +292 -0
- Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py +309 -0
- Pointcept/configs/scannet/semseg-minkunet34c-0-base.py +193 -0
- Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py +290 -0
- Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py +296 -0
- Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py +391 -0
- Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py +366 -0
Pointcept/.gitignore
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
image/
|
2 |
+
__pycache__
|
3 |
+
**/build/
|
4 |
+
**/*.egg-info/
|
5 |
+
**/dist/
|
6 |
+
*.so
|
7 |
+
exp
|
8 |
+
weights
|
9 |
+
data
|
10 |
+
log
|
11 |
+
outputs/
|
12 |
+
.vscode
|
13 |
+
.idea
|
14 |
+
*/.DS_Store
|
15 |
+
**/*.out
|
16 |
+
Dockerfile
|
Pointcept/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Pointcept
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
Pointcept/README.md
ADDED
@@ -0,0 +1,896 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<p align="center">
|
2 |
+
<!-- pypi-strip -->
|
3 |
+
<picture>
|
4 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/logo_dark.png">
|
5 |
+
<source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/logo.png">
|
6 |
+
<!-- /pypi-strip -->
|
7 |
+
<img alt="pointcept" src="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/logo.png" width="400">
|
8 |
+
<!-- pypi-strip -->
|
9 |
+
</picture><br>
|
10 |
+
<!-- /pypi-strip -->
|
11 |
+
</p>
|
12 |
+
|
13 |
+
[![Formatter](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml/badge.svg)](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml)
|
14 |
+
|
15 |
+
**Pointcept** is a powerful and flexible codebase for point cloud perception research. It is also an official implementation of the following paper:
|
16 |
+
- **Point Transformer V3: Simpler, Faster, Stronger**
|
17 |
+
*Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, Hengshuang Zhao*
|
18 |
+
IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 - Oral
|
19 |
+
[ Backbone ] [PTv3] - [ [arXiv](https://arxiv.org/abs/2312.10035) ] [ [Bib](https://xywu.me/research/ptv3/bib.txt) ] [ [Project](https://github.com/Pointcept/PointTransformerV3) ] → [here](https://github.com/Pointcept/PointTransformerV3)
|
20 |
+
|
21 |
+
- **OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation**
|
22 |
+
*Bohao Peng, Xiaoyang Wu, Li Jiang, Yukang Chen, Hengshuang Zhao, Zhuotao Tian, Jiaya Jia*
|
23 |
+
IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024
|
24 |
+
[ Backbone ] [ OA-CNNs ] - [ [arXiv](https://arxiv.org/abs/2403.14418) ] [ [Bib](https://xywu.me/research/oacnns/bib.txt) ] → [here](#oa-cnns)
|
25 |
+
|
26 |
+
- **PonderV2: Pave the Way for 3D Foundation Model with A Universal Pre-training Paradigm**
|
27 |
+
*Haoyi Zhu\*, Honghui Yang\*, Xiaoyang Wu\*, Di Huang\*, Sha Zhang, Xianglong He, Tong He, Hengshuang Zhao, Chunhua Shen, Yu Qiao, Wanli Ouyang*
|
28 |
+
arXiv Preprint 2023
|
29 |
+
[ Pretrain ] [PonderV2] - [ [arXiv](https://arxiv.org/abs/2310.08586) ] [ [Bib](https://xywu.me/research/ponderv2/bib.txt) ] [ [Project](https://github.com/OpenGVLab/PonderV2) ] → [here](https://github.com/OpenGVLab/PonderV2)
|
30 |
+
|
31 |
+
|
32 |
+
- **Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training**
|
33 |
+
*Xiaoyang Wu, Zhuotao Tian, Xin Wen, Bohao Peng, Xihui Liu, Kaicheng Yu, Hengshuang Zhao*
|
34 |
+
IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024
|
35 |
+
[ Pretrain ] [PPT] - [ [arXiv](https://arxiv.org/abs/2308.09718) ] [ [Bib](https://xywu.me/research/ppt/bib.txt) ] → [here](#point-prompt-training-ppt)
|
36 |
+
|
37 |
+
- **Masked Scene Contrast: A Scalable Framework for Unsupervised 3D Representation Learning**
|
38 |
+
*Xiaoyang Wu, Xin Wen, Xihui Liu, Hengshuang Zhao*
|
39 |
+
IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2023
|
40 |
+
[ Pretrain ] [ MSC ] - [ [arXiv](https://arxiv.org/abs/2303.14191) ] [ [Bib](https://xywu.me/research/msc/bib.txt) ] → [here](#masked-scene-contrast-msc)
|
41 |
+
|
42 |
+
|
43 |
+
- **Learning Context-aware Classifier for Semantic Segmentation** (3D Part)
|
44 |
+
*Zhuotao Tian, Jiequan Cui, Li Jiang, Xiaojuan Qi, Xin Lai, Yixin Chen, Shu Liu, Jiaya Jia*
|
45 |
+
AAAI Conference on Artificial Intelligence (**AAAI**) 2023 - Oral
|
46 |
+
[ SemSeg ] [ CAC ] - [ [arXiv](https://arxiv.org/abs/2303.11633) ] [ [Bib](https://xywu.me/research/cac/bib.txt) ] [ [2D Part](https://github.com/tianzhuotao/CAC) ] → [here](#context-aware-classifier)
|
47 |
+
|
48 |
+
|
49 |
+
- **Point Transformer V2: Grouped Vector Attention and Partition-based Pooling**
|
50 |
+
*Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, Hengshuang Zhao*
|
51 |
+
Conference on Neural Information Processing Systems (**NeurIPS**) 2022
|
52 |
+
[ Backbone ] [ PTv2 ] - [ [arXiv](https://arxiv.org/abs/2210.05666) ] [ [Bib](https://xywu.me/research/ptv2/bib.txt) ] → [here](#point-transformers)
|
53 |
+
|
54 |
+
|
55 |
+
- **Point Transformer**
|
56 |
+
*Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip Torr, Vladlen Koltun*
|
57 |
+
IEEE International Conference on Computer Vision (**ICCV**) 2021 - Oral
|
58 |
+
[ Backbone ] [ PTv1 ] - [ [arXiv](https://arxiv.org/abs/2012.09164) ] [ [Bib](https://hszhao.github.io/papers/iccv21_pointtransformer_bib.txt) ] → [here](#point-transformers)
|
59 |
+
|
60 |
+
Additionally, **Pointcept** integrates the following excellent work (contain above):
|
61 |
+
Backbone:
|
62 |
+
[MinkUNet](https://github.com/NVIDIA/MinkowskiEngine) ([here](#sparseunet)),
|
63 |
+
[SpUNet](https://github.com/traveller59/spconv) ([here](#sparseunet)),
|
64 |
+
[SPVCNN](https://github.com/mit-han-lab/spvnas) ([here](#spvcnn)),
|
65 |
+
[OACNNs](https://arxiv.org/abs/2403.14418) ([here](#oa-cnns)),
|
66 |
+
[PTv1](https://arxiv.org/abs/2012.09164) ([here](#point-transformers)),
|
67 |
+
[PTv2](https://arxiv.org/abs/2210.05666) ([here](#point-transformers)),
|
68 |
+
[PTv3](https://arxiv.org/abs/2312.10035) ([here](#point-transformers)),
|
69 |
+
[StratifiedFormer](https://github.com/dvlab-research/Stratified-Transformer) ([here](#stratified-transformer)),
|
70 |
+
[OctFormer](https://github.com/octree-nn/octformer) ([here](#octformer)),
|
71 |
+
[Swin3D](https://github.com/microsoft/Swin3D) ([here](#swin3d));
|
72 |
+
Semantic Segmentation:
|
73 |
+
[Mix3d](https://github.com/kumuji/mix3d) ([here](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-spunet-v1m1-0-base.py#L5)),
|
74 |
+
[CAC](https://arxiv.org/abs/2303.11633) ([here](#context-aware-classifier));
|
75 |
+
Instance Segmentation:
|
76 |
+
[PointGroup](https://github.com/dvlab-research/PointGroup) ([here](#pointgroup));
|
77 |
+
Pre-training:
|
78 |
+
[PointContrast](https://github.com/facebookresearch/PointContrast) ([here](#pointcontrast)),
|
79 |
+
[Contrastive Scene Contexts](https://github.com/facebookresearch/ContrastiveSceneContexts) ([here](#contrastive-scene-contexts)),
|
80 |
+
[Masked Scene Contrast](https://arxiv.org/abs/2303.14191) ([here](#masked-scene-contrast-msc)),
|
81 |
+
[Point Prompt Training](https://arxiv.org/abs/2308.09718) ([here](#point-prompt-training-ppt));
|
82 |
+
Datasets:
|
83 |
+
[ScanNet](http://www.scan-net.org/) ([here](#scannet-v2)),
|
84 |
+
[ScanNet200](http://www.scan-net.org/) ([here](#scannet-v2)),
|
85 |
+
[ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) ([here](#scannet)),
|
86 |
+
[S3DIS](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1) ([here](#s3dis)),
|
87 |
+
[Matterport3D](https://niessner.github.io/Matterport/) ([here](#matterport3d)),
|
88 |
+
[ArkitScene](https://github.com/apple/ARKitScenes),
|
89 |
+
[Structured3D](https://structured3d-dataset.org/) ([here](#structured3d)),
|
90 |
+
[SemanticKITTI](http://www.semantic-kitti.org/) ([here](#semantickitti)),
|
91 |
+
[nuScenes](https://www.nuscenes.org/nuscenes) ([here](#nuscenes)),
|
92 |
+
[ModelNet40](https://modelnet.cs.princeton.edu/) ([here](#modelnet)),
|
93 |
+
[Waymo](https://waymo.com/open/) ([here](#waymo)).
|
94 |
+
|
95 |
+
|
96 |
+
## Highlights
|
97 |
+
- *May, 2024*: In v1.5.2, we redesigned the default structure for each dataset for better performance. Please **re-preprocess** datasets or **download** our preprocessed datasets from **[here](https://huggingface.co/Pointcept)**.
|
98 |
+
- *Apr, 2024*: **PTv3** is selected as one of the 90 **Oral** papers (3.3% accepted papers, 0.78% submissions) by CVPR'24!
|
99 |
+
- *Mar, 2024*: We release code for **OA-CNNs**, accepted by CVPR'24. Issue related to **OA-CNNs** can @Pbihao.
|
100 |
+
- *Feb, 2024*: **PTv3** and **PPT** are accepted by CVPR'24, another **two** papers by our Pointcept team have also been accepted by CVPR'24 🎉🎉🎉. We will make them publicly available soon!
|
101 |
+
- *Dec, 2023*: **PTv3** is released on arXiv, and the code is available in Pointcept. PTv3 is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios.
|
102 |
+
- *Aug, 2023*: **PPT** is released on arXiv. PPT presents a multi-dataset pre-training framework that achieves SOTA performance in both **indoor** and **outdoor** scenarios. It is compatible with various existing pre-training frameworks and backbones. A **pre-release** version of the code is accessible; for those interested, please feel free to contact me directly for access.
|
103 |
+
- *Mar, 2023*: We released our codebase, **Pointcept**, a highly potent tool for point cloud representation learning and perception. We welcome new work to join the _Pointcept_ family and highly recommend reading [Quick Start](#quick-start) before starting your trail.
|
104 |
+
- *Feb, 2023*: **MSC** and **CeCo** accepted by CVPR 2023. _MSC_ is a highly efficient and effective pretraining framework that facilitates cross-dataset large-scale pretraining, while _CeCo_ is a segmentation method specifically designed for long-tail datasets. Both approaches are compatible with all existing backbone models in our codebase, and we will soon make the code available for public use.
|
105 |
+
- *Jan, 2023*: **CAC**, oral work of AAAI 2023, has expanded its 3D result with the incorporation of Pointcept. This addition will allow CAC to serve as a pluggable segmentor within our codebase.
|
106 |
+
- *Sep, 2022*: **PTv2** accepted by NeurIPS 2022. It is a continuation of the Point Transformer. The proposed GVA theory can apply to most existing attention mechanisms, while Grid Pooling is also a practical addition to existing pooling methods.
|
107 |
+
|
108 |
+
## Citation
|
109 |
+
If you find _Pointcept_ useful to your research, please cite our work as encouragement. (੭ˊ꒳ˋ)੭✧
|
110 |
+
```
|
111 |
+
@misc{pointcept2023,
|
112 |
+
title={Pointcept: A Codebase for Point Cloud Perception Research},
|
113 |
+
author={Pointcept Contributors},
|
114 |
+
howpublished = {\url{https://github.com/Pointcept/Pointcept}},
|
115 |
+
year={2023}
|
116 |
+
}
|
117 |
+
```
|
118 |
+
|
119 |
+
## Overview
|
120 |
+
|
121 |
+
- [Installation](#installation)
|
122 |
+
- [Data Preparation](#data-preparation)
|
123 |
+
- [Quick Start](#quick-start)
|
124 |
+
- [Model Zoo](#model-zoo)
|
125 |
+
- [Citation](#citation)
|
126 |
+
- [Acknowledgement](#acknowledgement)
|
127 |
+
|
128 |
+
## Installation
|
129 |
+
|
130 |
+
### Requirements
|
131 |
+
- Ubuntu: 18.04 and above.
|
132 |
+
- CUDA: 11.3 and above.
|
133 |
+
- PyTorch: 1.10.0 and above.
|
134 |
+
|
135 |
+
### Conda Environment
|
136 |
+
|
137 |
+
```bash
|
138 |
+
conda create -n pointcept python=3.8 -y
|
139 |
+
conda activate pointcept
|
140 |
+
conda install ninja -y
|
141 |
+
# Choose version you want here: https://pytorch.org/get-started/previous-versions/
|
142 |
+
conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch -y
|
143 |
+
conda install h5py pyyaml -c anaconda -y
|
144 |
+
conda install sharedarray tensorboard tensorboardx yapf addict einops scipy plyfile termcolor timm -c conda-forge -y
|
145 |
+
conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y
|
146 |
+
pip install torch-geometric
|
147 |
+
|
148 |
+
# spconv (SparseUNet)
|
149 |
+
# refer https://github.com/traveller59/spconv
|
150 |
+
pip install spconv-cu113
|
151 |
+
|
152 |
+
# PPT (clip)
|
153 |
+
pip install ftfy regex tqdm
|
154 |
+
pip install git+https://github.com/openai/CLIP.git
|
155 |
+
|
156 |
+
# PTv1 & PTv2 or precise eval
|
157 |
+
cd libs/pointops
|
158 |
+
# usual
|
159 |
+
python setup.py install
|
160 |
+
# docker & multi GPU arch
|
161 |
+
TORCH_CUDA_ARCH_LIST="ARCH LIST" python setup.py install
|
162 |
+
# e.g. 7.5: RTX 3000; 8.0: a100 More available in: https://developer.nvidia.com/cuda-gpus
|
163 |
+
TORCH_CUDA_ARCH_LIST="7.5 8.0" python setup.py install
|
164 |
+
cd ../..
|
165 |
+
|
166 |
+
# Open3D (visualization, optional)
|
167 |
+
pip install open3d
|
168 |
+
```
|
169 |
+
|
170 |
+
## Data Preparation
|
171 |
+
|
172 |
+
### ScanNet v2
|
173 |
+
|
174 |
+
The preprocessing supports semantic and instance segmentation for both `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient`.
|
175 |
+
- Download the [ScanNet](http://www.scan-net.org/) v2 dataset.
|
176 |
+
- Run preprocessing code for raw ScanNet as follows:
|
177 |
+
|
178 |
+
```bash
|
179 |
+
# RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset.
|
180 |
+
# PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset (output dir).
|
181 |
+
python pointcept/datasets/preprocessing/scannet/preprocess_scannet.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_DIR}
|
182 |
+
```
|
183 |
+
- (Optional) Download ScanNet Data Efficient files:
|
184 |
+
```bash
|
185 |
+
# download-scannet.py is the official download script
|
186 |
+
# or follow instructions here: https://kaldir.vc.in.tum.de/scannet_benchmark/data_efficient/documentation#download
|
187 |
+
python download-scannet.py --data_efficient -o ${RAW_SCANNET_DIR}
|
188 |
+
# unzip downloads
|
189 |
+
cd ${RAW_SCANNET_DIR}/tasks
|
190 |
+
unzip limited-annotation-points.zip
|
191 |
+
unzip limited-reconstruction-scenes.zip
|
192 |
+
# copy files to processed dataset folder
|
193 |
+
mkdir ${PROCESSED_SCANNET_DIR}/tasks
|
194 |
+
cp -r ${RAW_SCANNET_DIR}/tasks/points ${PROCESSED_SCANNET_DIR}/tasks
|
195 |
+
cp -r ${RAW_SCANNET_DIR}/tasks/scenes ${PROCESSED_SCANNET_DIR}/tasks
|
196 |
+
```
|
197 |
+
- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannet-compressed)], please agree the official license before download it.
|
198 |
+
|
199 |
+
- Link processed dataset to codebase:
|
200 |
+
```bash
|
201 |
+
# PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset.
|
202 |
+
mkdir data
|
203 |
+
ln -s ${PROCESSED_SCANNET_DIR} ${CODEBASE_DIR}/data/scannet
|
204 |
+
```
|
205 |
+
|
206 |
+
### ScanNet++
|
207 |
+
- Download the [ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) dataset.
|
208 |
+
- Run preprocessing code for raw ScanNet++ as follows:
|
209 |
+
```bash
|
210 |
+
# RAW_SCANNETPP_DIR: the directory of downloaded ScanNet++ raw dataset.
|
211 |
+
# PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir).
|
212 |
+
# NUM_WORKERS: the number of workers for parallel preprocessing.
|
213 |
+
python pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py --dataset_root ${RAW_SCANNETPP_DIR} --output_root ${PROCESSED_SCANNETPP_DIR} --num_workers ${NUM_WORKERS}
|
214 |
+
```
|
215 |
+
- Sampling and chunking large point cloud data in train/val split as follows (only used for training):
|
216 |
+
```bash
|
217 |
+
# PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir).
|
218 |
+
# NUM_WORKERS: the number of workers for parallel preprocessing.
|
219 |
+
python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split train --num_workers ${NUM_WORKERS}
|
220 |
+
python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split val --num_workers ${NUM_WORKERS}
|
221 |
+
```
|
222 |
+
- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannetpp-compressed)], please agree the official license before download it.
|
223 |
+
- Link processed dataset to codebase:
|
224 |
+
```bash
|
225 |
+
# PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet dataset.
|
226 |
+
mkdir data
|
227 |
+
ln -s ${PROCESSED_SCANNETPP_DIR} ${CODEBASE_DIR}/data/scannetpp
|
228 |
+
```
|
229 |
+
|
230 |
+
### S3DIS
|
231 |
+
|
232 |
+
- Download S3DIS data by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1). Download the `Stanford3dDataset_v1.2.zip` file and unzip it.
|
233 |
+
- Fix error in `Area_5/office_19/Annotations/ceiling` Line 323474 (103.0�0000 => 103.000000).
|
234 |
+
- (Optional) Download Full 2D-3D S3DIS dataset (no XYZ) from [here](https://github.com/alexsax/2D-3D-Semantics) for parsing normal.
|
235 |
+
- Run preprocessing code for S3DIS as follows:
|
236 |
+
|
237 |
+
```bash
|
238 |
+
# S3DIS_DIR: the directory of downloaded Stanford3dDataset_v1.2 dataset.
|
239 |
+
# RAW_S3DIS_DIR: the directory of Stanford2d3dDataset_noXYZ dataset. (optional, for parsing normal)
|
240 |
+
# PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset (output dir).
|
241 |
+
|
242 |
+
# S3DIS without aligned angle
|
243 |
+
python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR}
|
244 |
+
# S3DIS with aligned angle
|
245 |
+
python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --align_angle
|
246 |
+
# S3DIS with normal vector (recommended, normal is helpful)
|
247 |
+
python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --parse_normal
|
248 |
+
python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --align_angle --parse_normal
|
249 |
+
```
|
250 |
+
|
251 |
+
- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/s3dis-compressed
|
252 |
+
)] (with normal vector and aligned angle), please agree with the official license before downloading it.
|
253 |
+
|
254 |
+
- Link processed dataset to codebase.
|
255 |
+
```bash
|
256 |
+
# PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset.
|
257 |
+
mkdir data
|
258 |
+
ln -s ${PROCESSED_S3DIS_DIR} ${CODEBASE_DIR}/data/s3dis
|
259 |
+
```
|
260 |
+
### Structured3D
|
261 |
+
|
262 |
+
- Download Structured3D panorama related and perspective (full) related zip files by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSc0qtvh4vHSoZaW6UvlXYy79MbcGdZfICjh4_t4bYofQIVIdw/viewform?pli=1) (no need to unzip them).
|
263 |
+
- Organize all downloaded zip file in one folder (`${STRUCT3D_DIR}`).
|
264 |
+
- Run preprocessing code for Structured3D as follows:
|
265 |
+
```bash
|
266 |
+
# STRUCT3D_DIR: the directory of downloaded Structured3D dataset.
|
267 |
+
# PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir).
|
268 |
+
# NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM).
|
269 |
+
export PYTHONPATH=./
|
270 |
+
python pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py --dataset_root ${STRUCT3D_DIR} --output_root ${PROCESSED_STRUCT3D_DIR} --num_workers ${NUM_WORKERS} --grid_size 0.01 --fuse_prsp --fuse_pano
|
271 |
+
```
|
272 |
+
Following the instruction of [Swin3D](https://arxiv.org/abs/2304.06906), we keep 25 categories with frequencies of more than 0.001, out of the original 40 categories.
|
273 |
+
|
274 |
+
[//]: # (- (Alternative) Our preprocess data can also be downloaded [[here]()], please agree the official license before download it.)
|
275 |
+
|
276 |
+
- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/structured3d-compressed
|
277 |
+
)] (with perspective views and panorama view, 471.7G after unzipping), please agree the official license before download it.
|
278 |
+
|
279 |
+
- Link processed dataset to codebase.
|
280 |
+
```bash
|
281 |
+
# PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir).
|
282 |
+
mkdir data
|
283 |
+
ln -s ${PROCESSED_STRUCT3D_DIR} ${CODEBASE_DIR}/data/structured3d
|
284 |
+
```
|
285 |
+
### Matterport3D
|
286 |
+
- Follow [this page](https://niessner.github.io/Matterport/#download) to request access to the dataset.
|
287 |
+
- Download the "region_segmentation" type, which represents the division of a scene into individual rooms.
|
288 |
+
```bash
|
289 |
+
# download-mp.py is the official download script
|
290 |
+
# MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
|
291 |
+
python download-mp.py -o {MATTERPORT3D_DIR} --type region_segmentations
|
292 |
+
```
|
293 |
+
- Unzip the region_segmentations data
|
294 |
+
```bash
|
295 |
+
# MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
|
296 |
+
python pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py --dataset_root {MATTERPORT3D_DIR}
|
297 |
+
```
|
298 |
+
- Run preprocessing code for Matterport3D as follows:
|
299 |
+
```bash
|
300 |
+
# MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
|
301 |
+
# PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir).
|
302 |
+
# NUM_WORKERS: the number of workers for this preprocessing.
|
303 |
+
python pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py --dataset_root ${MATTERPORT3D_DIR} --output_root ${PROCESSED_MATTERPORT3D_DIR} --num_workers ${NUM_WORKERS}
|
304 |
+
```
|
305 |
+
- Link processed dataset to codebase.
|
306 |
+
```bash
|
307 |
+
# PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir).
|
308 |
+
mkdir data
|
309 |
+
ln -s ${PROCESSED_MATTERPORT3D_DIR} ${CODEBASE_DIR}/data/matterport3d
|
310 |
+
```
|
311 |
+
|
312 |
+
Following the instruction of [OpenRooms](https://github.com/ViLab-UCSD/OpenRooms), we remapped Matterport3D's categories to ScanNet 20 semantic categories with the addition of a ceiling category.
|
313 |
+
* (Alternative) Our preprocess data can also be downloaded [here](https://huggingface.co/datasets/Pointcept/matterport3d-compressed), please agree the official license before download it.
|
314 |
+
|
315 |
+
### SemanticKITTI
|
316 |
+
- Download [SemanticKITTI](http://www.semantic-kitti.org/dataset.html#download) dataset.
|
317 |
+
- Link dataset to codebase.
|
318 |
+
```bash
|
319 |
+
# SEMANTIC_KITTI_DIR: the directory of SemanticKITTI dataset.
|
320 |
+
# |- SEMANTIC_KITTI_DIR
|
321 |
+
# |- dataset
|
322 |
+
# |- sequences
|
323 |
+
# |- 00
|
324 |
+
# |- 01
|
325 |
+
# |- ...
|
326 |
+
|
327 |
+
mkdir -p data
|
328 |
+
ln -s ${SEMANTIC_KITTI_DIR} ${CODEBASE_DIR}/data/semantic_kitti
|
329 |
+
```
|
330 |
+
|
331 |
+
### nuScenes
|
332 |
+
- Download the official [NuScene](https://www.nuscenes.org/nuscenes#download) dataset (with Lidar Segmentation) and organize the downloaded files as follows:
|
333 |
+
```bash
|
334 |
+
NUSCENES_DIR
|
335 |
+
│── samples
|
336 |
+
│── sweeps
|
337 |
+
│── lidarseg
|
338 |
+
...
|
339 |
+
│── v1.0-trainval
|
340 |
+
│── v1.0-test
|
341 |
+
```
|
342 |
+
- Run information preprocessing code (modified from OpenPCDet) for nuScenes as follows:
|
343 |
+
```bash
|
344 |
+
# NUSCENES_DIR: the directory of downloaded nuScenes dataset.
|
345 |
+
# PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
|
346 |
+
# MAX_SWEEPS: Max number of sweeps. Default: 10.
|
347 |
+
pip install nuscenes-devkit pyquaternion
|
348 |
+
python pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py --dataset_root ${NUSCENES_DIR} --output_root ${PROCESSED_NUSCENES_DIR} --max_sweeps ${MAX_SWEEPS} --with_camera
|
349 |
+
```
|
350 |
+
- (Alternative) Our preprocess nuScenes information data can also be downloaded [[here](
|
351 |
+
https://huggingface.co/datasets/Pointcept/nuscenes-compressed)] (only processed information, still need to download raw dataset and link to the folder), please agree the official license before download it.
|
352 |
+
|
353 |
+
- Link raw dataset to processed NuScene dataset folder:
|
354 |
+
```bash
|
355 |
+
# NUSCENES_DIR: the directory of downloaded nuScenes dataset.
|
356 |
+
# PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
|
357 |
+
ln -s ${NUSCENES_DIR} {PROCESSED_NUSCENES_DIR}/raw
|
358 |
+
```
|
359 |
+
then the processed nuscenes folder is organized as follows:
|
360 |
+
```bash
|
361 |
+
nuscene
|
362 |
+
|── raw
|
363 |
+
│── samples
|
364 |
+
│── sweeps
|
365 |
+
│── lidarseg
|
366 |
+
...
|
367 |
+
│── v1.0-trainval
|
368 |
+
│── v1.0-test
|
369 |
+
|── info
|
370 |
+
```
|
371 |
+
|
372 |
+
- Link processed dataset to codebase.
|
373 |
+
```bash
|
374 |
+
# PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
|
375 |
+
mkdir data
|
376 |
+
ln -s ${PROCESSED_NUSCENES_DIR} ${CODEBASE_DIR}/data/nuscenes
|
377 |
+
```
|
378 |
+
|
379 |
+
### Waymo
|
380 |
+
- Download the official [Waymo](https://waymo.com/open/download/) dataset (v1.4.3) and organize the downloaded files as follows:
|
381 |
+
```bash
|
382 |
+
WAYMO_RAW_DIR
|
383 |
+
│── training
|
384 |
+
│── validation
|
385 |
+
│── testing
|
386 |
+
```
|
387 |
+
- Install the following dependence:
|
388 |
+
```bash
|
389 |
+
# If shows "No matching distribution found", download whl directly from Pypi and install the package.
|
390 |
+
conda create -n waymo python=3.10 -y
|
391 |
+
conda activate waymo
|
392 |
+
pip install waymo-open-dataset-tf-2-12-0
|
393 |
+
```
|
394 |
+
- Run the preprocessing code as follows:
|
395 |
+
```bash
|
396 |
+
# WAYMO_DIR: the directory of the downloaded Waymo dataset.
|
397 |
+
# PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir).
|
398 |
+
# NUM_WORKERS: num workers for preprocessing
|
399 |
+
python pointcept/datasets/preprocessing/waymo/preprocess_waymo.py --dataset_root ${WAYMO_DIR} --output_root ${PROCESSED_WAYMO_DIR} --splits training validation --num_workers ${NUM_WORKERS}
|
400 |
+
```
|
401 |
+
|
402 |
+
- Link processed dataset to the codebase.
|
403 |
+
```bash
|
404 |
+
# PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir).
|
405 |
+
mkdir data
|
406 |
+
ln -s ${PROCESSED_WAYMO_DIR} ${CODEBASE_DIR}/data/waymo
|
407 |
+
```
|
408 |
+
|
409 |
+
### ModelNet
|
410 |
+
- Download [modelnet40_normal_resampled.zip](https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip) and unzip
|
411 |
+
- Link dataset to the codebase.
|
412 |
+
```bash
|
413 |
+
mkdir -p data
|
414 |
+
ln -s ${MODELNET_DIR} ${CODEBASE_DIR}/data/modelnet40_normal_resampled
|
415 |
+
```
|
416 |
+
|
417 |
+
## Quick Start
|
418 |
+
|
419 |
+
### Training
|
420 |
+
**Train from scratch.** The training processing is based on configs in `configs` folder.
|
421 |
+
The training script will generate an experiment folder in `exp` folder and backup essential code in the experiment folder.
|
422 |
+
Training config, log, tensorboard, and checkpoints will also be saved into the experiment folder during the training process.
|
423 |
+
```bash
|
424 |
+
export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
|
425 |
+
# Script (Recommended)
|
426 |
+
sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME}
|
427 |
+
# Direct
|
428 |
+
export PYTHONPATH=./
|
429 |
+
python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH}
|
430 |
+
```
|
431 |
+
|
432 |
+
For example:
|
433 |
+
```bash
|
434 |
+
# By script (Recommended)
|
435 |
+
# -p is default set as python and can be ignored
|
436 |
+
sh scripts/train.sh -p python -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
437 |
+
# Direct
|
438 |
+
export PYTHONPATH=./
|
439 |
+
python tools/train.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base
|
440 |
+
```
|
441 |
+
**Resume training from checkpoint.** If the training process is interrupted by accident, the following script can resume training from a given checkpoint.
|
442 |
+
```bash
|
443 |
+
export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
|
444 |
+
# Script (Recommended)
|
445 |
+
# simply add "-r true"
|
446 |
+
sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -r true
|
447 |
+
# Direct
|
448 |
+
export PYTHONPATH=./
|
449 |
+
python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} resume=True weight=${CHECKPOINT_PATH}
|
450 |
+
```
|
451 |
+
|
452 |
+
### Testing
|
453 |
+
During training, model evaluation is performed on point clouds after grid sampling (voxelization), providing an initial assessment of model performance. However, to obtain precise evaluation results, testing is **essential**. The testing process involves subsampling a dense point cloud into a sequence of voxelized point clouds, ensuring comprehensive coverage of all points. These sub-results are then predicted and collected to form a complete prediction of the entire point cloud. This approach yields higher evaluation results compared to simply mapping/interpolating the prediction. In addition, our testing code supports TTA (test time augmentation) testing, which further enhances the stability of evaluation performance.
|
454 |
+
|
455 |
+
```bash
|
456 |
+
# By script (Based on experiment folder created by training script)
|
457 |
+
sh scripts/test.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -n ${EXP_NAME} -w ${CHECKPOINT_NAME}
|
458 |
+
# Direct
|
459 |
+
export PYTHONPATH=./
|
460 |
+
python tools/test.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} weight=${CHECKPOINT_PATH}
|
461 |
+
```
|
462 |
+
For example:
|
463 |
+
```bash
|
464 |
+
# By script (Based on experiment folder created by training script)
|
465 |
+
# -p is default set as python and can be ignored
|
466 |
+
# -w is default set as model_best and can be ignored
|
467 |
+
sh scripts/test.sh -p python -d scannet -n semseg-pt-v2m2-0-base -w model_best
|
468 |
+
# Direct
|
469 |
+
export PYTHONPATH=./
|
470 |
+
python tools/test.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base weight=exp/scannet/semseg-pt-v2m2-0-base/model/model_best.pth
|
471 |
+
```
|
472 |
+
|
473 |
+
The TTA can be disabled by replace `data.test.test_cfg.aug_transform = [...]` with:
|
474 |
+
|
475 |
+
```python
|
476 |
+
data = dict(
|
477 |
+
train = dict(...),
|
478 |
+
val = dict(...),
|
479 |
+
test = dict(
|
480 |
+
...,
|
481 |
+
test_cfg = dict(
|
482 |
+
...,
|
483 |
+
aug_transform = [
|
484 |
+
[dict(type="RandomRotateTargetAngle", angle=[0], axis="z", center=[0, 0, 0], p=1)]
|
485 |
+
]
|
486 |
+
)
|
487 |
+
)
|
488 |
+
)
|
489 |
+
```
|
490 |
+
|
491 |
+
### Offset
|
492 |
+
`Offset` is the separator of point clouds in batch data, and it is similar to the concept of `Batch` in PyG.
|
493 |
+
A visual illustration of batch and offset is as follows:
|
494 |
+
<p align="center">
|
495 |
+
<!-- pypi-strip -->
|
496 |
+
<picture>
|
497 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/offset_dark.png">
|
498 |
+
<source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/offset.png">
|
499 |
+
<!-- /pypi-strip -->
|
500 |
+
<img alt="pointcept" src="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/offset.png" width="480">
|
501 |
+
<!-- pypi-strip -->
|
502 |
+
</picture><br>
|
503 |
+
<!-- /pypi-strip -->
|
504 |
+
</p>
|
505 |
+
|
506 |
+
## Model Zoo
|
507 |
+
### 1. Backbones and Semantic Segmentation
|
508 |
+
#### SparseUNet
|
509 |
+
|
510 |
+
_Pointcept_ provides `SparseUNet` implemented by `SpConv` and `MinkowskiEngine`. The SpConv version is recommended since SpConv is easy to install and faster than MinkowskiEngine. Meanwhile, SpConv is also widely applied in outdoor perception.
|
511 |
+
|
512 |
+
- **SpConv (recommend)**
|
513 |
+
|
514 |
+
The SpConv version `SparseUNet` in the codebase was fully rewrite from `MinkowskiEngine` version, example running script is as follows:
|
515 |
+
|
516 |
+
```bash
|
517 |
+
# ScanNet val
|
518 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
519 |
+
# ScanNet200
|
520 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
521 |
+
# S3DIS
|
522 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
523 |
+
# S3DIS (with normal)
|
524 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-cn-base -n semseg-spunet-v1m1-0-cn-base
|
525 |
+
# SemanticKITTI
|
526 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
527 |
+
# nuScenes
|
528 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
529 |
+
# ModelNet40
|
530 |
+
sh scripts/train.sh -g 2 -d modelnet40 -c cls-spunet-v1m1-0-base -n cls-spunet-v1m1-0-base
|
531 |
+
|
532 |
+
# ScanNet Data Efficient
|
533 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la20 -n semseg-spunet-v1m1-2-efficient-la20
|
534 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la50 -n semseg-spunet-v1m1-2-efficient-la50
|
535 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la100 -n semseg-spunet-v1m1-2-efficient-la100
|
536 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la200 -n semseg-spunet-v1m1-2-efficient-la200
|
537 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr1 -n semseg-spunet-v1m1-2-efficient-lr1
|
538 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr5 -n semseg-spunet-v1m1-2-efficient-lr5
|
539 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr10 -n semseg-spunet-v1m1-2-efficient-lr10
|
540 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr20 -n semseg-spunet-v1m1-2-efficient-lr20
|
541 |
+
|
542 |
+
# Profile model run time
|
543 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-enable-profiler -n semseg-spunet-v1m1-0-enable-profiler
|
544 |
+
```
|
545 |
+
|
546 |
+
- **MinkowskiEngine**
|
547 |
+
|
548 |
+
The MinkowskiEngine version `SparseUNet` in the codebase was modified from the original MinkowskiEngine repo, and example running scripts are as follows:
|
549 |
+
1. Install MinkowskiEngine, refer https://github.com/NVIDIA/MinkowskiEngine
|
550 |
+
2. Training with the following example scripts:
|
551 |
+
```bash
|
552 |
+
# Uncomment "# from .sparse_unet import *" in "pointcept/models/__init__.py"
|
553 |
+
# Uncomment "# from .mink_unet import *" in "pointcept/models/sparse_unet/__init__.py"
|
554 |
+
# ScanNet
|
555 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
|
556 |
+
# ScanNet200
|
557 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
|
558 |
+
# S3DIS
|
559 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
|
560 |
+
# SemanticKITTI
|
561 |
+
sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
|
562 |
+
```
|
563 |
+
|
564 |
+
#### OA-CNNs
|
565 |
+
Introducing Omni-Adaptive 3D CNNs (**OA-CNNs**), a family of networks that integrates a lightweight module to greatly enhance the adaptivity of sparse CNNs at minimal computational cost. Without any self-attention modules, **OA-CNNs** favorably surpass point transformers in terms of accuracy in both indoor and outdoor scenes, with much less latency and memory cost. Issue related to **OA-CNNs** can @Pbihao.
|
566 |
+
```bash
|
567 |
+
# ScanNet
|
568 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-oacnns-v1m1-0-base -n semseg-oacnns-v1m1-0-base
|
569 |
+
```
|
570 |
+
|
571 |
+
#### Point Transformers
|
572 |
+
- **PTv3**
|
573 |
+
|
574 |
+
[PTv3](https://arxiv.org/abs/2312.10035) is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. The full PTv3 relies on FlashAttention, while FlashAttention relies on CUDA 11.6 and above, make sure your local Pointcept environment satisfies the requirements.
|
575 |
+
|
576 |
+
If you can not upgrade your local environment to satisfy the requirements (CUDA >= 11.6), then you can disable FlashAttention by setting the model parameter `enable_flash` to `false` and reducing the `enc_patch_size` and `dec_patch_size` to a level (e.g. 128).
|
577 |
+
|
578 |
+
FlashAttention force disables RPE and forces the accuracy reduced to fp16. If you require these features, please disable `enable_flash` and adjust `enable_rpe`, `upcast_attention` and`upcast_softmax`.
|
579 |
+
|
580 |
+
Detailed instructions and experiment records (containing weights) are available on the [project repository](https://github.com/Pointcept/PointTransformerV3). Example running scripts are as follows:
|
581 |
+
```bash
|
582 |
+
# Scratched ScanNet
|
583 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
584 |
+
# PPT joint training (ScanNet + Structured3D) and evaluate in ScanNet
|
585 |
+
sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme
|
586 |
+
|
587 |
+
# Scratched ScanNet200
|
588 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
589 |
+
# Fine-tuning from PPT joint training (ScanNet + Structured3D) with ScanNet200
|
590 |
+
# PTV3_PPT_WEIGHT_PATH: Path to model weight trained by PPT multi-dataset joint training
|
591 |
+
# e.g. exp/scannet/semseg-pt-v3m1-1-ppt-extreme/model/model_best.pth
|
592 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-1-ppt-ft -n semseg-pt-v3m1-1-ppt-ft -w ${PTV3_PPT_WEIGHT_PATH}
|
593 |
+
|
594 |
+
# Scratched ScanNet++
|
595 |
+
sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
596 |
+
# Scratched ScanNet++ test
|
597 |
+
sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-1-submit -n semseg-pt-v3m1-1-submit
|
598 |
+
|
599 |
+
|
600 |
+
# Scratched S3DIS
|
601 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
602 |
+
# an example for disbale flash_attention and enable rpe.
|
603 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-1-rpe -n semseg-pt-v3m1-0-rpe
|
604 |
+
# PPT joint training (ScanNet + S3DIS + Structured3D) and evaluate in ScanNet
|
605 |
+
sh scripts/train.sh -g 8 -d s3dis -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme
|
606 |
+
# S3DIS 6-fold cross validation
|
607 |
+
# 1. The default configs are evaluated on Area_5, modify the "data.train.split", "data.val.split", and "data.test.split" to make the config evaluated on Area_1 ~ Area_6 respectively.
|
608 |
+
# 2. Train and evaluate the model on each split of areas and gather result files located in "exp/s3dis/EXP_NAME/result/Area_x.pth" in one single folder, noted as RECORD_FOLDER.
|
609 |
+
# 3. Run the following script to get S3DIS 6-fold cross validation performance:
|
610 |
+
export PYTHONPATH=./
|
611 |
+
python tools/test_s3dis_6fold.py --record_root ${RECORD_FOLDER}
|
612 |
+
|
613 |
+
# Scratched nuScenes
|
614 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
615 |
+
# Scratched Waymo
|
616 |
+
sh scripts/train.sh -g 4 -d waymo -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
617 |
+
|
618 |
+
# More configs and exp records for PTv3 will be available soon.
|
619 |
+
```
|
620 |
+
|
621 |
+
Indoor semantic segmentation
|
622 |
+
| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record |
|
623 |
+
| :---: | :---: |:---------------:| :---: | :---: | :---: | :---: | :---: |
|
624 |
+
| PTv3 | ScanNet | ✗ | 4 | 77.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-0-base) |
|
625 |
+
| PTv3 + PPT | ScanNet | ✓ | 8 | 78.5% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-1-ppt-extreme) |
|
626 |
+
| PTv3 | ScanNet200 | ✗ | 4 | 35.3% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet200/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) |[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet200-semseg-pt-v3m1-0-base)|
|
627 |
+
| PTv3 + PPT | ScanNet200 | ✓ (f.t.) | 4 | | | | |
|
628 |
+
| PTv3 | S3DIS (Area5) | ✗ | 4 | 73.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-0-rpe.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-0-rpe) |
|
629 |
+
| PTv3 + PPT | S3DIS (Area5) | ✓ | 8 | 75.4% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-1-ppt-extreme) |
|
630 |
+
|
631 |
+
Outdoor semantic segmentation
|
632 |
+
| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record |
|
633 |
+
| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
|
634 |
+
| PTv3 | nuScenes | ✗ | 4 | 80.3 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/nuscenes/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard)|[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/nuscenes-semseg-pt-v3m1-0-base) |
|
635 |
+
| PTv3 + PPT | nuScenes | ✓ | 8 | | | | |
|
636 |
+
| PTv3 | SemanticKITTI | ✗ | 4 | | | | |
|
637 |
+
| PTv3 + PPT | SemanticKITTI | ✓ | 8 | | | | |
|
638 |
+
| PTv3 | Waymo | ✗ | 4 | 71.2 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/waymo/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/waymo-semseg-pt-v3m1-0-base) (log only) |
|
639 |
+
| PTv3 + PPT | Waymo | ✓ | 8 | | | | |
|
640 |
+
|
641 |
+
_**\*Released model weights are trained for v1.5.1, weights for v1.5.2 and later is still ongoing.**_
|
642 |
+
|
643 |
+
- **PTv2 mode2**
|
644 |
+
|
645 |
+
The original PTv2 was trained on 4 * RTX a6000 (48G memory). Even enabling AMP, the memory cost of the original PTv2 is slightly larger than 24G. Considering GPUs with 24G memory are much more accessible, I tuned the PTv2 on the latest Pointcept and made it runnable on 4 * RTX 3090 machines.
|
646 |
+
|
647 |
+
`PTv2 Mode2` enables AMP and disables _Position Encoding Multiplier_ & _Grouped Linear_. During our further research, we found that precise coordinates are not necessary for point cloud understanding (Replacing precise coordinates with grid coordinates doesn't influence the performance. Also, SparseUNet is an example). As for Grouped Linear, my implementation of Grouped Linear seems to cost more memory than the Linear layer provided by PyTorch. Benefiting from the codebase and better parameter tuning, we also relieve the overfitting problem. The reproducing performance is even better than the results reported in our paper.
|
648 |
+
|
649 |
+
Example running scripts are as follows:
|
650 |
+
|
651 |
+
```bash
|
652 |
+
# ptv2m2: PTv2 mode2, disable PEM & Grouped Linear, GPU memory cost < 24G (recommend)
|
653 |
+
# ScanNet
|
654 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
655 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-3-lovasz -n semseg-pt-v2m2-3-lovasz
|
656 |
+
|
657 |
+
# ScanNet test
|
658 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit
|
659 |
+
# ScanNet200
|
660 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
661 |
+
# ScanNet++
|
662 |
+
sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
663 |
+
# ScanNet++ test
|
664 |
+
sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit
|
665 |
+
# S3DIS
|
666 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
667 |
+
# SemanticKITTI
|
668 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
669 |
+
# nuScenes
|
670 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
671 |
+
```
|
672 |
+
|
673 |
+
- **PTv2 mode1**
|
674 |
+
|
675 |
+
`PTv2 mode1` is the original PTv2 we reported in our paper, example running scripts are as follows:
|
676 |
+
|
677 |
+
```bash
|
678 |
+
# ptv2m1: PTv2 mode1, Original PTv2, GPU memory cost > 24G
|
679 |
+
# ScanNet
|
680 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
|
681 |
+
# ScanNet200
|
682 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
|
683 |
+
# S3DIS
|
684 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
|
685 |
+
```
|
686 |
+
|
687 |
+
- **PTv1**
|
688 |
+
|
689 |
+
The original PTv1 is also available in our Pointcept codebase. I haven't run PTv1 for a long time, but I have ensured that the example running script works well.
|
690 |
+
|
691 |
+
```bash
|
692 |
+
# ScanNet
|
693 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
|
694 |
+
# ScanNet200
|
695 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
|
696 |
+
# S3DIS
|
697 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
|
698 |
+
```
|
699 |
+
|
700 |
+
|
701 |
+
#### Stratified Transformer
|
702 |
+
1. Additional requirements:
|
703 |
+
```bash
|
704 |
+
pip install torch-points3d
|
705 |
+
# Fix dependence, caused by installing torch-points3d
|
706 |
+
pip uninstall SharedArray
|
707 |
+
pip install SharedArray==3.2.1
|
708 |
+
|
709 |
+
cd libs/pointops2
|
710 |
+
python setup.py install
|
711 |
+
cd ../..
|
712 |
+
```
|
713 |
+
2. Uncomment `# from .stratified_transformer import *` in `pointcept/models/__init__.py`.
|
714 |
+
3. Refer [Optional Installation](installation) to install dependence.
|
715 |
+
4. Training with the following example scripts:
|
716 |
+
```bash
|
717 |
+
# stv1m1: Stratified Transformer mode1, Modified from the original Stratified Transformer code.
|
718 |
+
# PTv2m2: Stratified Transformer mode2, My rewrite version (recommend).
|
719 |
+
|
720 |
+
# ScanNet
|
721 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
|
722 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m1-0-origin -n semseg-st-v1m1-0-origin
|
723 |
+
# ScanNet200
|
724 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
|
725 |
+
# S3DIS
|
726 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
|
727 |
+
```
|
728 |
+
|
729 |
+
#### SPVCNN
|
730 |
+
`SPVCNN` is a baseline model of [SPVNAS](https://github.com/mit-han-lab/spvnas), it is also a practical baseline for outdoor datasets.
|
731 |
+
1. Install torchsparse:
|
732 |
+
```bash
|
733 |
+
# refer https://github.com/mit-han-lab/torchsparse
|
734 |
+
# install method without sudo apt install
|
735 |
+
conda install google-sparsehash -c bioconda
|
736 |
+
export C_INCLUDE_PATH=${CONDA_PREFIX}/include:$C_INCLUDE_PATH
|
737 |
+
export CPLUS_INCLUDE_PATH=${CONDA_PREFIX}/include:CPLUS_INCLUDE_PATH
|
738 |
+
pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git
|
739 |
+
```
|
740 |
+
2. Training with the following example scripts:
|
741 |
+
```bash
|
742 |
+
# SemanticKITTI
|
743 |
+
sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-spvcnn-v1m1-0-base -n semseg-spvcnn-v1m1-0-base
|
744 |
+
```
|
745 |
+
|
746 |
+
#### OctFormer
|
747 |
+
OctFormer from _OctFormer: Octree-based Transformers for 3D Point Clouds_.
|
748 |
+
1. Additional requirements:
|
749 |
+
```bash
|
750 |
+
cd libs
|
751 |
+
git clone https://github.com/octree-nn/dwconv.git
|
752 |
+
pip install ./dwconv
|
753 |
+
pip install ocnn
|
754 |
+
```
|
755 |
+
2. Uncomment `# from .octformer import *` in `pointcept/models/__init__.py`.
|
756 |
+
2. Training with the following example scripts:
|
757 |
+
```bash
|
758 |
+
# ScanNet
|
759 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-octformer-v1m1-0-base -n semseg-octformer-v1m1-0-base
|
760 |
+
```
|
761 |
+
|
762 |
+
#### Swin3D
|
763 |
+
Swin3D from _Swin3D: A Pretrained Transformer Backbone for 3D Indoor Scene Understanding_.
|
764 |
+
1. Additional requirements:
|
765 |
+
```bash
|
766 |
+
# 1. Install MinkEngine v0.5.4, follow readme in https://github.com/NVIDIA/MinkowskiEngine;
|
767 |
+
# 2. Install Swin3D, mainly for cuda operation:
|
768 |
+
cd libs
|
769 |
+
git clone https://github.com/microsoft/Swin3D.git
|
770 |
+
cd Swin3D
|
771 |
+
pip install ./
|
772 |
+
```
|
773 |
+
2. Uncomment `# from .swin3d import *` in `pointcept/models/__init__.py`.
|
774 |
+
3. Pre-Training with the following example scripts (Structured3D preprocessing refer [here](#structured3d)):
|
775 |
+
```bash
|
776 |
+
# Structured3D + Swin-S
|
777 |
+
sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
|
778 |
+
# Structured3D + Swin-L
|
779 |
+
sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
|
780 |
+
|
781 |
+
# Addition
|
782 |
+
# Structured3D + SpUNet
|
783 |
+
sh scripts/train.sh -g 4 -d structured3d -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
784 |
+
# Structured3D + PTv2
|
785 |
+
sh scripts/train.sh -g 4 -d structured3d -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
786 |
+
```
|
787 |
+
4. Fine-tuning with the following example scripts:
|
788 |
+
```bash
|
789 |
+
# ScanNet + Swin-S
|
790 |
+
sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
|
791 |
+
# ScanNet + Swin-L
|
792 |
+
sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
|
793 |
+
|
794 |
+
# S3DIS + Swin-S (here we provide config support S3DIS normal vector)
|
795 |
+
sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
|
796 |
+
# S3DIS + Swin-L (here we provide config support S3DIS normal vector)
|
797 |
+
sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
|
798 |
+
```
|
799 |
+
|
800 |
+
#### Context-Aware Classifier
|
801 |
+
`Context-Aware Classifier` is a segmentor that can further boost the performance of each backbone, as a replacement for `Default Segmentor`. Training with the following example scripts:
|
802 |
+
```bash
|
803 |
+
# ScanNet
|
804 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base
|
805 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz
|
806 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz
|
807 |
+
|
808 |
+
# ScanNet200
|
809 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base
|
810 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz
|
811 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz
|
812 |
+
```
|
813 |
+
|
814 |
+
|
815 |
+
### 2. Instance Segmentation
|
816 |
+
#### PointGroup
|
817 |
+
[PointGroup](https://github.com/dvlab-research/PointGroup) is a baseline framework for point cloud instance segmentation.
|
818 |
+
1. Additional requirements:
|
819 |
+
```bash
|
820 |
+
conda install -c bioconda google-sparsehash
|
821 |
+
cd libs/pointgroup_ops
|
822 |
+
python setup.py install --include_dirs=${CONDA_PREFIX}/include
|
823 |
+
cd ../..
|
824 |
+
```
|
825 |
+
2. Uncomment `# from .point_group import *` in `pointcept/models/__init__.py`.
|
826 |
+
3. Training with the following example scripts:
|
827 |
+
```bash
|
828 |
+
# ScanNet
|
829 |
+
sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base
|
830 |
+
# S3DIS
|
831 |
+
sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base
|
832 |
+
```
|
833 |
+
|
834 |
+
### 3. Pre-training
|
835 |
+
#### Masked Scene Contrast (MSC)
|
836 |
+
1. Pre-training with the following example scripts:
|
837 |
+
```bash
|
838 |
+
# ScanNet
|
839 |
+
sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-0-spunet-base -n pretrain-msc-v1m1-0-spunet-base
|
840 |
+
```
|
841 |
+
|
842 |
+
2. Fine-tuning with the following example scripts:
|
843 |
+
enable PointGroup ([here](#pointgroup)) before fine-tuning on instance segmentation task.
|
844 |
+
```bash
|
845 |
+
# ScanNet20 Semantic Segmentation
|
846 |
+
sh scripts/train.sh -g 8 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c semseg-spunet-v1m1-4-ft -n semseg-msc-v1m1-0f-spunet-base
|
847 |
+
# ScanNet20 Instance Segmentation (enable PointGroup before running the script)
|
848 |
+
sh scripts/train.sh -g 4 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-msc-v1m1-0f-pointgroup-spunet-base
|
849 |
+
```
|
850 |
+
3. Example log and weight: [[Pretrain](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EYvNV4XUJ_5Mlk-g15RelN4BW_P8lVBfC_zhjC_BlBDARg?e=UoGFWH)] [[Semseg](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EQkDiv5xkOFKgCpGiGtAlLwBon7i8W6my3TIbGVxuiTttQ?e=tQFnbr)]
|
851 |
+
|
852 |
+
#### Point Prompt Training (PPT)
|
853 |
+
PPT presents a multi-dataset pre-training framework, and it is compatible with various existing pre-training frameworks and backbones.
|
854 |
+
1. PPT supervised joint training with the following example scripts:
|
855 |
+
```bash
|
856 |
+
# ScanNet + Structured3d, validate on ScanNet (S3DIS might cause long data time, w/o S3DIS for a quick validation) >= 3090 * 8
|
857 |
+
sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-0-sc-st-spunet -n semseg-ppt-v1m1-0-sc-st-spunet
|
858 |
+
sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-1-sc-st-spunet-submit -n semseg-ppt-v1m1-1-sc-st-spunet-submit
|
859 |
+
# ScanNet + S3DIS + Structured3d, validate on S3DIS (>= a100 * 8)
|
860 |
+
sh scripts/train.sh -g 8 -d s3dis -c semseg-ppt-v1m1-0-s3-sc-st-spunet -n semseg-ppt-v1m1-0-s3-sc-st-spunet
|
861 |
+
# SemanticKITTI + nuScenes + Waymo, validate on SemanticKITTI (bs12 >= 3090 * 4 >= 3090 * 8, v1m1-0 is still on tuning)
|
862 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet
|
863 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-0-sk-nu-wa-spunet -n semseg-ppt-v1m2-0-sk-nu-wa-spunet
|
864 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit -n semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit
|
865 |
+
# SemanticKITTI + nuScenes + Waymo, validate on nuScenes (bs12 >= 3090 * 4; bs24 >= 3090 * 8, v1m1-0 is still on tuning))
|
866 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet
|
867 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-0-nu-sk-wa-spunet -n semseg-ppt-v1m2-0-nu-sk-wa-spunet
|
868 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit -n semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit
|
869 |
+
```
|
870 |
+
|
871 |
+
#### PointContrast
|
872 |
+
1. Preprocess and link ScanNet-Pair dataset (pair-wise matching with ScanNet raw RGB-D frame, ~1.5T):
|
873 |
+
```bash
|
874 |
+
# RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset.
|
875 |
+
# PROCESSED_SCANNET_PAIR_DIR: the directory of processed ScanNet pair dataset (output dir).
|
876 |
+
python pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_PAIR_DIR}
|
877 |
+
ln -s ${PROCESSED_SCANNET_PAIR_DIR} ${CODEBASE_DIR}/data/scannet
|
878 |
+
```
|
879 |
+
2. Pre-training with the following example scripts:
|
880 |
+
```bash
|
881 |
+
# ScanNet
|
882 |
+
sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-1-spunet-pointcontrast -n pretrain-msc-v1m1-1-spunet-pointcontrast
|
883 |
+
```
|
884 |
+
3. Fine-tuning refer [MSC](#masked-scene-contrast-msc).
|
885 |
+
|
886 |
+
#### Contrastive Scene Contexts
|
887 |
+
1. Preprocess and link ScanNet-Pair dataset (refer [PointContrast](#pointcontrast)):
|
888 |
+
2. Pre-training with the following example scripts:
|
889 |
+
```bash
|
890 |
+
# ScanNet
|
891 |
+
sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m2-0-spunet-csc -n pretrain-msc-v1m2-0-spunet-csc
|
892 |
+
```
|
893 |
+
3. Fine-tuning refer [MSC](#masked-scene-contrast-msc).
|
894 |
+
|
895 |
+
## Acknowledgement
|
896 |
+
_Pointcept_ is designed by [Xiaoyang](https://xywu.me/), named by [Yixing](https://github.com/yxlao) and the logo is created by [Yuechen](https://julianjuaner.github.io/). It is derived from [Hengshuang](https://hszhao.github.io/)'s [Semseg](https://github.com/hszhao/semseg) and inspirited by several repos, e.g., [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine), [pointnet2](https://github.com/charlesq34/pointnet2), [mmcv](https://github.com/open-mmlab/mmcv/tree/master/mmcv), and [Detectron2](https://github.com/facebookresearch/detectron2).
|
Pointcept/configs/_base_/dataset/scannetpp.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
data = dict(
|
2 |
+
names=[
|
3 |
+
"wall",
|
4 |
+
"ceiling",
|
5 |
+
"floor",
|
6 |
+
"table",
|
7 |
+
"door",
|
8 |
+
"ceiling lamp",
|
9 |
+
"cabinet",
|
10 |
+
"blinds",
|
11 |
+
"curtain",
|
12 |
+
"chair",
|
13 |
+
"storage cabinet",
|
14 |
+
"office chair",
|
15 |
+
"bookshelf",
|
16 |
+
"whiteboard",
|
17 |
+
"window",
|
18 |
+
"box",
|
19 |
+
"window frame",
|
20 |
+
"monitor",
|
21 |
+
"shelf",
|
22 |
+
"doorframe",
|
23 |
+
"pipe",
|
24 |
+
"heater",
|
25 |
+
"kitchen cabinet",
|
26 |
+
"sofa",
|
27 |
+
"windowsill",
|
28 |
+
"bed",
|
29 |
+
"shower wall",
|
30 |
+
"trash can",
|
31 |
+
"book",
|
32 |
+
"plant",
|
33 |
+
"blanket",
|
34 |
+
"tv",
|
35 |
+
"computer tower",
|
36 |
+
"kitchen counter",
|
37 |
+
"refrigerator",
|
38 |
+
"jacket",
|
39 |
+
"electrical duct",
|
40 |
+
"sink",
|
41 |
+
"bag",
|
42 |
+
"picture",
|
43 |
+
"pillow",
|
44 |
+
"towel",
|
45 |
+
"suitcase",
|
46 |
+
"backpack",
|
47 |
+
"crate",
|
48 |
+
"keyboard",
|
49 |
+
"rack",
|
50 |
+
"toilet",
|
51 |
+
"paper",
|
52 |
+
"printer",
|
53 |
+
"poster",
|
54 |
+
"painting",
|
55 |
+
"microwave",
|
56 |
+
"board",
|
57 |
+
"shoes",
|
58 |
+
"socket",
|
59 |
+
"bottle",
|
60 |
+
"bucket",
|
61 |
+
"cushion",
|
62 |
+
"basket",
|
63 |
+
"shoe rack",
|
64 |
+
"telephone",
|
65 |
+
"file folder",
|
66 |
+
"cloth",
|
67 |
+
"blind rail",
|
68 |
+
"laptop",
|
69 |
+
"plant pot",
|
70 |
+
"exhaust fan",
|
71 |
+
"cup",
|
72 |
+
"coat hanger",
|
73 |
+
"light switch",
|
74 |
+
"speaker",
|
75 |
+
"table lamp",
|
76 |
+
"air vent",
|
77 |
+
"clothes hanger",
|
78 |
+
"kettle",
|
79 |
+
"smoke detector",
|
80 |
+
"container",
|
81 |
+
"power strip",
|
82 |
+
"slippers",
|
83 |
+
"paper bag",
|
84 |
+
"mouse",
|
85 |
+
"cutting board",
|
86 |
+
"toilet paper",
|
87 |
+
"paper towel",
|
88 |
+
"pot",
|
89 |
+
"clock",
|
90 |
+
"pan",
|
91 |
+
"tap",
|
92 |
+
"jar",
|
93 |
+
"soap dispenser",
|
94 |
+
"binder",
|
95 |
+
"bowl",
|
96 |
+
"tissue box",
|
97 |
+
"whiteboard eraser",
|
98 |
+
"toilet brush",
|
99 |
+
"spray bottle",
|
100 |
+
"headphones",
|
101 |
+
"stapler",
|
102 |
+
"marker",
|
103 |
+
]
|
104 |
+
)
|
Pointcept/configs/_base_/default_runtime.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
weight = None # path to model weight
|
2 |
+
resume = False # whether to resume training process
|
3 |
+
evaluate = True # evaluate after each epoch training process
|
4 |
+
test_only = False # test process
|
5 |
+
|
6 |
+
seed = None # train process will init a random seed and record
|
7 |
+
save_path = "exp/default"
|
8 |
+
num_worker = 16 # total worker in all gpu
|
9 |
+
batch_size = 16 # total batch size in all gpu
|
10 |
+
batch_size_val = None # auto adapt to bs 1 for each gpu
|
11 |
+
batch_size_test = None # auto adapt to bs 1 for each gpu
|
12 |
+
epoch = 100 # total epoch, data loop = epoch // eval_epoch
|
13 |
+
eval_epoch = 100 # sche total eval & checkpoint epoch
|
14 |
+
clip_grad = None # disable with None, enable with a float
|
15 |
+
|
16 |
+
sync_bn = False
|
17 |
+
enable_amp = False
|
18 |
+
empty_cache = False
|
19 |
+
empty_cache_per_epoch = False
|
20 |
+
find_unused_parameters = False
|
21 |
+
|
22 |
+
mix_prob = 0
|
23 |
+
param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)]
|
24 |
+
|
25 |
+
# hook
|
26 |
+
hooks = [
|
27 |
+
dict(type="CheckpointLoader"),
|
28 |
+
dict(type="IterationTimer", warmup_iter=2),
|
29 |
+
dict(type="InformationWriter"),
|
30 |
+
dict(type="SemSegEvaluator"),
|
31 |
+
dict(type="CheckpointSaver", save_freq=None),
|
32 |
+
dict(type="PreciseEvaluator", test_last=False),
|
33 |
+
]
|
34 |
+
|
35 |
+
# Trainer
|
36 |
+
train = dict(type="DefaultTrainer")
|
37 |
+
|
38 |
+
# Tester
|
39 |
+
test = dict(type="SemSegTester", verbose=True)
|
Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py
ADDED
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
|
10 |
+
# model settings
|
11 |
+
model = dict(
|
12 |
+
type="DefaultSegmentorV2",
|
13 |
+
num_classes=21,
|
14 |
+
backbone_out_channels=64,
|
15 |
+
backbone=dict(
|
16 |
+
type="PT-v3m1",
|
17 |
+
in_channels=6,
|
18 |
+
order=("z", "z-trans", "hilbert", "hilbert-trans"),
|
19 |
+
stride=(2, 2, 2, 2),
|
20 |
+
enc_depths=(2, 2, 2, 6, 2),
|
21 |
+
enc_channels=(32, 64, 128, 256, 512),
|
22 |
+
enc_num_head=(2, 4, 8, 16, 32),
|
23 |
+
enc_patch_size=(1024, 1024, 1024, 1024, 1024),
|
24 |
+
dec_depths=(2, 2, 2, 2),
|
25 |
+
dec_channels=(64, 64, 128, 256),
|
26 |
+
dec_num_head=(4, 4, 8, 16),
|
27 |
+
dec_patch_size=(1024, 1024, 1024, 1024),
|
28 |
+
mlp_ratio=4,
|
29 |
+
qkv_bias=True,
|
30 |
+
qk_scale=None,
|
31 |
+
attn_drop=0.0,
|
32 |
+
proj_drop=0.0,
|
33 |
+
drop_path=0.3,
|
34 |
+
shuffle_orders=True,
|
35 |
+
pre_norm=True,
|
36 |
+
enable_rpe=False,
|
37 |
+
enable_flash=True,
|
38 |
+
upcast_attention=False,
|
39 |
+
upcast_softmax=False,
|
40 |
+
cls_mode=False,
|
41 |
+
pdnorm_bn=False,
|
42 |
+
pdnorm_ln=False,
|
43 |
+
pdnorm_decouple=True,
|
44 |
+
pdnorm_adaptive=False,
|
45 |
+
pdnorm_affine=True,
|
46 |
+
pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
|
47 |
+
),
|
48 |
+
criteria=[
|
49 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
50 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
51 |
+
],
|
52 |
+
)
|
53 |
+
|
54 |
+
# scheduler settings
|
55 |
+
epoch = 800
|
56 |
+
optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
|
57 |
+
scheduler = dict(
|
58 |
+
type="OneCycleLR",
|
59 |
+
max_lr=[0.006, 0.0006],
|
60 |
+
pct_start=0.05,
|
61 |
+
anneal_strategy="cos",
|
62 |
+
div_factor=10.0,
|
63 |
+
final_div_factor=1000.0,
|
64 |
+
)
|
65 |
+
param_dicts = [dict(keyword="block", lr=0.0006)]
|
66 |
+
|
67 |
+
# dataset settings
|
68 |
+
dataset_type = "DefaultDataset"
|
69 |
+
data_root = "data/matterport3d"
|
70 |
+
|
71 |
+
data = dict(
|
72 |
+
num_classes=21,
|
73 |
+
ignore_index=-1,
|
74 |
+
names=(
|
75 |
+
"wall",
|
76 |
+
"floor",
|
77 |
+
"cabinet",
|
78 |
+
"bed",
|
79 |
+
"chair",
|
80 |
+
"sofa",
|
81 |
+
"table",
|
82 |
+
"door",
|
83 |
+
"window",
|
84 |
+
"bookshelf",
|
85 |
+
"picture",
|
86 |
+
"counter",
|
87 |
+
"desk",
|
88 |
+
"curtain",
|
89 |
+
"refrigerator",
|
90 |
+
"shower curtain",
|
91 |
+
"toilet",
|
92 |
+
"sink",
|
93 |
+
"bathtub",
|
94 |
+
"other",
|
95 |
+
"ceiling",
|
96 |
+
),
|
97 |
+
train=dict(
|
98 |
+
type=dataset_type,
|
99 |
+
split="train",
|
100 |
+
data_root=data_root,
|
101 |
+
transform=[
|
102 |
+
dict(type="CenterShift", apply_z=True),
|
103 |
+
dict(
|
104 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
105 |
+
),
|
106 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
107 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
108 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
109 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
110 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
111 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
112 |
+
dict(type="RandomFlip", p=0.5),
|
113 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
114 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
115 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
116 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
117 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
118 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
119 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
120 |
+
dict(
|
121 |
+
type="GridSample",
|
122 |
+
grid_size=0.02,
|
123 |
+
hash_type="fnv",
|
124 |
+
mode="train",
|
125 |
+
return_grid_coord=True,
|
126 |
+
),
|
127 |
+
dict(type="SphereCrop", point_max=102400, mode="random"),
|
128 |
+
dict(type="CenterShift", apply_z=False),
|
129 |
+
dict(type="NormalizeColor"),
|
130 |
+
# dict(type="ShufflePoint"),
|
131 |
+
dict(type="ToTensor"),
|
132 |
+
dict(
|
133 |
+
type="Collect",
|
134 |
+
keys=("coord", "grid_coord", "segment"),
|
135 |
+
feat_keys=("color", "normal"),
|
136 |
+
),
|
137 |
+
],
|
138 |
+
test_mode=False,
|
139 |
+
),
|
140 |
+
val=dict(
|
141 |
+
type=dataset_type,
|
142 |
+
split="val",
|
143 |
+
data_root=data_root,
|
144 |
+
transform=[
|
145 |
+
dict(type="CenterShift", apply_z=True),
|
146 |
+
dict(
|
147 |
+
type="GridSample",
|
148 |
+
grid_size=0.02,
|
149 |
+
hash_type="fnv",
|
150 |
+
mode="train",
|
151 |
+
return_grid_coord=True,
|
152 |
+
),
|
153 |
+
dict(type="CenterShift", apply_z=False),
|
154 |
+
dict(type="NormalizeColor"),
|
155 |
+
dict(type="ToTensor"),
|
156 |
+
dict(
|
157 |
+
type="Collect",
|
158 |
+
keys=("coord", "grid_coord", "segment"),
|
159 |
+
feat_keys=("color", "normal"),
|
160 |
+
),
|
161 |
+
],
|
162 |
+
test_mode=False,
|
163 |
+
),
|
164 |
+
test=dict(
|
165 |
+
type=dataset_type,
|
166 |
+
split="val",
|
167 |
+
data_root=data_root,
|
168 |
+
transform=[
|
169 |
+
dict(type="CenterShift", apply_z=True),
|
170 |
+
dict(type="NormalizeColor"),
|
171 |
+
],
|
172 |
+
test_mode=True,
|
173 |
+
test_cfg=dict(
|
174 |
+
voxelize=dict(
|
175 |
+
type="GridSample",
|
176 |
+
grid_size=0.02,
|
177 |
+
hash_type="fnv",
|
178 |
+
mode="test",
|
179 |
+
keys=("coord", "color", "normal"),
|
180 |
+
return_grid_coord=True,
|
181 |
+
),
|
182 |
+
crop=None,
|
183 |
+
post_transform=[
|
184 |
+
dict(type="CenterShift", apply_z=False),
|
185 |
+
dict(type="ToTensor"),
|
186 |
+
dict(
|
187 |
+
type="Collect",
|
188 |
+
keys=("coord", "grid_coord", "index"),
|
189 |
+
feat_keys=("color", "normal"),
|
190 |
+
),
|
191 |
+
],
|
192 |
+
aug_transform=[
|
193 |
+
[
|
194 |
+
dict(
|
195 |
+
type="RandomRotateTargetAngle",
|
196 |
+
angle=[0],
|
197 |
+
axis="z",
|
198 |
+
center=[0, 0, 0],
|
199 |
+
p=1,
|
200 |
+
)
|
201 |
+
],
|
202 |
+
[
|
203 |
+
dict(
|
204 |
+
type="RandomRotateTargetAngle",
|
205 |
+
angle=[1 / 2],
|
206 |
+
axis="z",
|
207 |
+
center=[0, 0, 0],
|
208 |
+
p=1,
|
209 |
+
)
|
210 |
+
],
|
211 |
+
[
|
212 |
+
dict(
|
213 |
+
type="RandomRotateTargetAngle",
|
214 |
+
angle=[1],
|
215 |
+
axis="z",
|
216 |
+
center=[0, 0, 0],
|
217 |
+
p=1,
|
218 |
+
)
|
219 |
+
],
|
220 |
+
[
|
221 |
+
dict(
|
222 |
+
type="RandomRotateTargetAngle",
|
223 |
+
angle=[3 / 2],
|
224 |
+
axis="z",
|
225 |
+
center=[0, 0, 0],
|
226 |
+
p=1,
|
227 |
+
)
|
228 |
+
],
|
229 |
+
[
|
230 |
+
dict(
|
231 |
+
type="RandomRotateTargetAngle",
|
232 |
+
angle=[0],
|
233 |
+
axis="z",
|
234 |
+
center=[0, 0, 0],
|
235 |
+
p=1,
|
236 |
+
),
|
237 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
238 |
+
],
|
239 |
+
[
|
240 |
+
dict(
|
241 |
+
type="RandomRotateTargetAngle",
|
242 |
+
angle=[1 / 2],
|
243 |
+
axis="z",
|
244 |
+
center=[0, 0, 0],
|
245 |
+
p=1,
|
246 |
+
),
|
247 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
248 |
+
],
|
249 |
+
[
|
250 |
+
dict(
|
251 |
+
type="RandomRotateTargetAngle",
|
252 |
+
angle=[1],
|
253 |
+
axis="z",
|
254 |
+
center=[0, 0, 0],
|
255 |
+
p=1,
|
256 |
+
),
|
257 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
258 |
+
],
|
259 |
+
[
|
260 |
+
dict(
|
261 |
+
type="RandomRotateTargetAngle",
|
262 |
+
angle=[3 / 2],
|
263 |
+
axis="z",
|
264 |
+
center=[0, 0, 0],
|
265 |
+
p=1,
|
266 |
+
),
|
267 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
268 |
+
],
|
269 |
+
[
|
270 |
+
dict(
|
271 |
+
type="RandomRotateTargetAngle",
|
272 |
+
angle=[0],
|
273 |
+
axis="z",
|
274 |
+
center=[0, 0, 0],
|
275 |
+
p=1,
|
276 |
+
),
|
277 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
278 |
+
],
|
279 |
+
[
|
280 |
+
dict(
|
281 |
+
type="RandomRotateTargetAngle",
|
282 |
+
angle=[1 / 2],
|
283 |
+
axis="z",
|
284 |
+
center=[0, 0, 0],
|
285 |
+
p=1,
|
286 |
+
),
|
287 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
288 |
+
],
|
289 |
+
[
|
290 |
+
dict(
|
291 |
+
type="RandomRotateTargetAngle",
|
292 |
+
angle=[1],
|
293 |
+
axis="z",
|
294 |
+
center=[0, 0, 0],
|
295 |
+
p=1,
|
296 |
+
),
|
297 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
298 |
+
],
|
299 |
+
[
|
300 |
+
dict(
|
301 |
+
type="RandomRotateTargetAngle",
|
302 |
+
angle=[3 / 2],
|
303 |
+
axis="z",
|
304 |
+
center=[0, 0, 0],
|
305 |
+
p=1,
|
306 |
+
),
|
307 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
308 |
+
],
|
309 |
+
[dict(type="RandomFlip", p=1)],
|
310 |
+
],
|
311 |
+
),
|
312 |
+
),
|
313 |
+
)
|
Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="DefaultSegmentor",
|
12 |
+
backbone=dict(
|
13 |
+
type="SpUNet-v1m1",
|
14 |
+
in_channels=6,
|
15 |
+
num_classes=21,
|
16 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
17 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
18 |
+
),
|
19 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
20 |
+
)
|
21 |
+
|
22 |
+
|
23 |
+
# scheduler settings
|
24 |
+
epoch = 800
|
25 |
+
optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
26 |
+
scheduler = dict(
|
27 |
+
type="OneCycleLR",
|
28 |
+
max_lr=optimizer["lr"],
|
29 |
+
pct_start=0.05,
|
30 |
+
anneal_strategy="cos",
|
31 |
+
div_factor=10.0,
|
32 |
+
final_div_factor=10000.0,
|
33 |
+
)
|
34 |
+
|
35 |
+
# dataset settings
|
36 |
+
dataset_type = "DefaultDataset"
|
37 |
+
data_root = "data/matterport3d"
|
38 |
+
|
39 |
+
data = dict(
|
40 |
+
num_classes=21,
|
41 |
+
ignore_index=-1,
|
42 |
+
names=(
|
43 |
+
"wall",
|
44 |
+
"floor",
|
45 |
+
"cabinet",
|
46 |
+
"bed",
|
47 |
+
"chair",
|
48 |
+
"sofa",
|
49 |
+
"table",
|
50 |
+
"door",
|
51 |
+
"window",
|
52 |
+
"bookshelf",
|
53 |
+
"picture",
|
54 |
+
"counter",
|
55 |
+
"desk",
|
56 |
+
"curtain",
|
57 |
+
"refrigerator",
|
58 |
+
"shower curtain",
|
59 |
+
"toilet",
|
60 |
+
"sink",
|
61 |
+
"bathtub",
|
62 |
+
"other",
|
63 |
+
"ceiling",
|
64 |
+
),
|
65 |
+
train=dict(
|
66 |
+
type=dataset_type,
|
67 |
+
split="train",
|
68 |
+
data_root=data_root,
|
69 |
+
transform=[
|
70 |
+
dict(type="CenterShift", apply_z=True),
|
71 |
+
dict(
|
72 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
73 |
+
),
|
74 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
75 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
76 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
77 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
78 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
79 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
80 |
+
dict(type="RandomFlip", p=0.5),
|
81 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
82 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
83 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
84 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
85 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
86 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
87 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
88 |
+
dict(
|
89 |
+
type="GridSample",
|
90 |
+
grid_size=0.02,
|
91 |
+
hash_type="fnv",
|
92 |
+
mode="train",
|
93 |
+
return_grid_coord=True,
|
94 |
+
),
|
95 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
96 |
+
dict(type="CenterShift", apply_z=False),
|
97 |
+
dict(type="NormalizeColor"),
|
98 |
+
dict(type="ShufflePoint"),
|
99 |
+
dict(type="ToTensor"),
|
100 |
+
dict(
|
101 |
+
type="Collect",
|
102 |
+
keys=("coord", "grid_coord", "segment"),
|
103 |
+
feat_keys=("color", "normal"),
|
104 |
+
),
|
105 |
+
],
|
106 |
+
test_mode=False,
|
107 |
+
),
|
108 |
+
val=dict(
|
109 |
+
type=dataset_type,
|
110 |
+
split="val",
|
111 |
+
data_root=data_root,
|
112 |
+
transform=[
|
113 |
+
dict(type="CenterShift", apply_z=True),
|
114 |
+
dict(
|
115 |
+
type="GridSample",
|
116 |
+
grid_size=0.02,
|
117 |
+
hash_type="fnv",
|
118 |
+
mode="train",
|
119 |
+
return_grid_coord=True,
|
120 |
+
),
|
121 |
+
# dict(type="SphereCrop", point_max=1000000, mode="center"),
|
122 |
+
dict(type="CenterShift", apply_z=False),
|
123 |
+
dict(type="NormalizeColor"),
|
124 |
+
dict(type="ToTensor"),
|
125 |
+
dict(
|
126 |
+
type="Collect",
|
127 |
+
keys=("coord", "grid_coord", "segment"),
|
128 |
+
feat_keys=("color", "normal"),
|
129 |
+
),
|
130 |
+
],
|
131 |
+
test_mode=False,
|
132 |
+
),
|
133 |
+
test=dict(
|
134 |
+
type=dataset_type,
|
135 |
+
split="val",
|
136 |
+
data_root=data_root,
|
137 |
+
transform=[
|
138 |
+
dict(type="CenterShift", apply_z=True),
|
139 |
+
dict(type="NormalizeColor"),
|
140 |
+
],
|
141 |
+
test_mode=True,
|
142 |
+
test_cfg=dict(
|
143 |
+
voxelize=dict(
|
144 |
+
type="GridSample",
|
145 |
+
grid_size=0.02,
|
146 |
+
hash_type="fnv",
|
147 |
+
mode="test",
|
148 |
+
return_grid_coord=True,
|
149 |
+
keys=("coord", "color", "normal"),
|
150 |
+
),
|
151 |
+
crop=None,
|
152 |
+
post_transform=[
|
153 |
+
dict(type="CenterShift", apply_z=False),
|
154 |
+
dict(type="ToTensor"),
|
155 |
+
dict(
|
156 |
+
type="Collect",
|
157 |
+
keys=("coord", "grid_coord", "index"),
|
158 |
+
feat_keys=("color", "normal"),
|
159 |
+
),
|
160 |
+
],
|
161 |
+
aug_transform=[
|
162 |
+
[
|
163 |
+
dict(
|
164 |
+
type="RandomRotateTargetAngle",
|
165 |
+
angle=[0],
|
166 |
+
axis="z",
|
167 |
+
center=[0, 0, 0],
|
168 |
+
p=1,
|
169 |
+
)
|
170 |
+
],
|
171 |
+
[
|
172 |
+
dict(
|
173 |
+
type="RandomRotateTargetAngle",
|
174 |
+
angle=[1 / 2],
|
175 |
+
axis="z",
|
176 |
+
center=[0, 0, 0],
|
177 |
+
p=1,
|
178 |
+
)
|
179 |
+
],
|
180 |
+
[
|
181 |
+
dict(
|
182 |
+
type="RandomRotateTargetAngle",
|
183 |
+
angle=[1],
|
184 |
+
axis="z",
|
185 |
+
center=[0, 0, 0],
|
186 |
+
p=1,
|
187 |
+
)
|
188 |
+
],
|
189 |
+
[
|
190 |
+
dict(
|
191 |
+
type="RandomRotateTargetAngle",
|
192 |
+
angle=[3 / 2],
|
193 |
+
axis="z",
|
194 |
+
center=[0, 0, 0],
|
195 |
+
p=1,
|
196 |
+
)
|
197 |
+
],
|
198 |
+
[
|
199 |
+
dict(
|
200 |
+
type="RandomRotateTargetAngle",
|
201 |
+
angle=[0],
|
202 |
+
axis="z",
|
203 |
+
center=[0, 0, 0],
|
204 |
+
p=1,
|
205 |
+
),
|
206 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
207 |
+
],
|
208 |
+
[
|
209 |
+
dict(
|
210 |
+
type="RandomRotateTargetAngle",
|
211 |
+
angle=[1 / 2],
|
212 |
+
axis="z",
|
213 |
+
center=[0, 0, 0],
|
214 |
+
p=1,
|
215 |
+
),
|
216 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
217 |
+
],
|
218 |
+
[
|
219 |
+
dict(
|
220 |
+
type="RandomRotateTargetAngle",
|
221 |
+
angle=[1],
|
222 |
+
axis="z",
|
223 |
+
center=[0, 0, 0],
|
224 |
+
p=1,
|
225 |
+
),
|
226 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
227 |
+
],
|
228 |
+
[
|
229 |
+
dict(
|
230 |
+
type="RandomRotateTargetAngle",
|
231 |
+
angle=[3 / 2],
|
232 |
+
axis="z",
|
233 |
+
center=[0, 0, 0],
|
234 |
+
p=1,
|
235 |
+
),
|
236 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
237 |
+
],
|
238 |
+
[
|
239 |
+
dict(
|
240 |
+
type="RandomRotateTargetAngle",
|
241 |
+
angle=[0],
|
242 |
+
axis="z",
|
243 |
+
center=[0, 0, 0],
|
244 |
+
p=1,
|
245 |
+
),
|
246 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
247 |
+
],
|
248 |
+
[
|
249 |
+
dict(
|
250 |
+
type="RandomRotateTargetAngle",
|
251 |
+
angle=[1 / 2],
|
252 |
+
axis="z",
|
253 |
+
center=[0, 0, 0],
|
254 |
+
p=1,
|
255 |
+
),
|
256 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
257 |
+
],
|
258 |
+
[
|
259 |
+
dict(
|
260 |
+
type="RandomRotateTargetAngle",
|
261 |
+
angle=[1],
|
262 |
+
axis="z",
|
263 |
+
center=[0, 0, 0],
|
264 |
+
p=1,
|
265 |
+
),
|
266 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
267 |
+
],
|
268 |
+
[
|
269 |
+
dict(
|
270 |
+
type="RandomRotateTargetAngle",
|
271 |
+
angle=[3 / 2],
|
272 |
+
axis="z",
|
273 |
+
center=[0, 0, 0],
|
274 |
+
p=1,
|
275 |
+
),
|
276 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
277 |
+
],
|
278 |
+
[dict(type="RandomFlip", p=1)],
|
279 |
+
],
|
280 |
+
),
|
281 |
+
),
|
282 |
+
)
|
Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 32 # bs: total bs in all gpus
|
4 |
+
num_worker = 16
|
5 |
+
batch_size_val = 8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = False
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="DefaultClassifier",
|
12 |
+
num_classes=40,
|
13 |
+
backbone_embed_dim=512,
|
14 |
+
backbone=dict(
|
15 |
+
type="PT-v3m1",
|
16 |
+
in_channels=6,
|
17 |
+
order=("z", "z-trans", "hilbert", "hilbert-trans"),
|
18 |
+
stride=(2, 2, 2, 2),
|
19 |
+
enc_depths=(2, 2, 2, 6, 2),
|
20 |
+
enc_channels=(32, 64, 128, 256, 512),
|
21 |
+
enc_num_head=(2, 4, 8, 16, 32),
|
22 |
+
enc_patch_size=(1024, 1024, 1024, 1024, 1024),
|
23 |
+
dec_depths=(2, 2, 2, 2),
|
24 |
+
dec_channels=(64, 64, 128, 256),
|
25 |
+
dec_num_head=(4, 4, 8, 16),
|
26 |
+
dec_patch_size=(1024, 1024, 1024, 1024),
|
27 |
+
mlp_ratio=4,
|
28 |
+
qkv_bias=True,
|
29 |
+
qk_scale=None,
|
30 |
+
attn_drop=0.0,
|
31 |
+
proj_drop=0.0,
|
32 |
+
drop_path=0.3,
|
33 |
+
shuffle_orders=True,
|
34 |
+
pre_norm=True,
|
35 |
+
enable_rpe=False,
|
36 |
+
enable_flash=True,
|
37 |
+
upcast_attention=False,
|
38 |
+
upcast_softmax=False,
|
39 |
+
cls_mode=True,
|
40 |
+
pdnorm_bn=False,
|
41 |
+
pdnorm_ln=False,
|
42 |
+
pdnorm_decouple=True,
|
43 |
+
pdnorm_adaptive=False,
|
44 |
+
pdnorm_affine=True,
|
45 |
+
pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
|
46 |
+
),
|
47 |
+
criteria=[
|
48 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
49 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
50 |
+
],
|
51 |
+
)
|
52 |
+
|
53 |
+
# scheduler settings
|
54 |
+
epoch = 300
|
55 |
+
# optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
56 |
+
# scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
|
57 |
+
optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.01)
|
58 |
+
scheduler = dict(
|
59 |
+
type="OneCycleLR",
|
60 |
+
max_lr=[0.001, 0.0001],
|
61 |
+
pct_start=0.05,
|
62 |
+
anneal_strategy="cos",
|
63 |
+
div_factor=10.0,
|
64 |
+
final_div_factor=1000.0,
|
65 |
+
)
|
66 |
+
param_dicts = [dict(keyword="block", lr=0.0001)]
|
67 |
+
|
68 |
+
# dataset settings
|
69 |
+
dataset_type = "ModelNetDataset"
|
70 |
+
data_root = "data/modelnet40_normal_resampled"
|
71 |
+
cache_data = False
|
72 |
+
class_names = [
|
73 |
+
"airplane",
|
74 |
+
"bathtub",
|
75 |
+
"bed",
|
76 |
+
"bench",
|
77 |
+
"bookshelf",
|
78 |
+
"bottle",
|
79 |
+
"bowl",
|
80 |
+
"car",
|
81 |
+
"chair",
|
82 |
+
"cone",
|
83 |
+
"cup",
|
84 |
+
"curtain",
|
85 |
+
"desk",
|
86 |
+
"door",
|
87 |
+
"dresser",
|
88 |
+
"flower_pot",
|
89 |
+
"glass_box",
|
90 |
+
"guitar",
|
91 |
+
"keyboard",
|
92 |
+
"lamp",
|
93 |
+
"laptop",
|
94 |
+
"mantel",
|
95 |
+
"monitor",
|
96 |
+
"night_stand",
|
97 |
+
"person",
|
98 |
+
"piano",
|
99 |
+
"plant",
|
100 |
+
"radio",
|
101 |
+
"range_hood",
|
102 |
+
"sink",
|
103 |
+
"sofa",
|
104 |
+
"stairs",
|
105 |
+
"stool",
|
106 |
+
"table",
|
107 |
+
"tent",
|
108 |
+
"toilet",
|
109 |
+
"tv_stand",
|
110 |
+
"vase",
|
111 |
+
"wardrobe",
|
112 |
+
"xbox",
|
113 |
+
]
|
114 |
+
|
115 |
+
data = dict(
|
116 |
+
num_classes=40,
|
117 |
+
ignore_index=-1,
|
118 |
+
names=class_names,
|
119 |
+
train=dict(
|
120 |
+
type=dataset_type,
|
121 |
+
split="train",
|
122 |
+
data_root=data_root,
|
123 |
+
class_names=class_names,
|
124 |
+
transform=[
|
125 |
+
dict(type="NormalizeCoord"),
|
126 |
+
# dict(type="CenterShift", apply_z=True),
|
127 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
128 |
+
# dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5),
|
129 |
+
# dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5),
|
130 |
+
dict(type="RandomScale", scale=[0.7, 1.5], anisotropic=True),
|
131 |
+
dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
|
132 |
+
# dict(type="RandomFlip", p=0.5),
|
133 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
134 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
135 |
+
dict(
|
136 |
+
type="GridSample",
|
137 |
+
grid_size=0.01,
|
138 |
+
hash_type="fnv",
|
139 |
+
mode="train",
|
140 |
+
keys=("coord", "normal"),
|
141 |
+
return_grid_coord=True,
|
142 |
+
),
|
143 |
+
# dict(type="SphereCrop", point_max=10000, mode="random"),
|
144 |
+
# dict(type="CenterShift", apply_z=True),
|
145 |
+
dict(type="ShufflePoint"),
|
146 |
+
dict(type="ToTensor"),
|
147 |
+
dict(
|
148 |
+
type="Collect",
|
149 |
+
keys=("coord", "grid_coord", "category"),
|
150 |
+
feat_keys=["coord", "normal"],
|
151 |
+
),
|
152 |
+
],
|
153 |
+
test_mode=False,
|
154 |
+
),
|
155 |
+
val=dict(
|
156 |
+
type=dataset_type,
|
157 |
+
split="test",
|
158 |
+
data_root=data_root,
|
159 |
+
class_names=class_names,
|
160 |
+
transform=[
|
161 |
+
dict(type="NormalizeCoord"),
|
162 |
+
dict(
|
163 |
+
type="GridSample",
|
164 |
+
grid_size=0.01,
|
165 |
+
hash_type="fnv",
|
166 |
+
mode="train",
|
167 |
+
keys=("coord", "normal"),
|
168 |
+
return_grid_coord=True,
|
169 |
+
),
|
170 |
+
dict(type="ToTensor"),
|
171 |
+
dict(
|
172 |
+
type="Collect",
|
173 |
+
keys=("coord", "grid_coord", "category"),
|
174 |
+
feat_keys=["coord", "normal"],
|
175 |
+
),
|
176 |
+
],
|
177 |
+
test_mode=False,
|
178 |
+
),
|
179 |
+
test=dict(
|
180 |
+
type=dataset_type,
|
181 |
+
split="test",
|
182 |
+
data_root=data_root,
|
183 |
+
class_names=class_names,
|
184 |
+
transform=[
|
185 |
+
dict(type="NormalizeCoord"),
|
186 |
+
],
|
187 |
+
test_mode=True,
|
188 |
+
test_cfg=dict(
|
189 |
+
post_transform=[
|
190 |
+
dict(
|
191 |
+
type="GridSample",
|
192 |
+
grid_size=0.01,
|
193 |
+
hash_type="fnv",
|
194 |
+
mode="train",
|
195 |
+
keys=("coord", "normal"),
|
196 |
+
return_grid_coord=True,
|
197 |
+
),
|
198 |
+
dict(type="ToTensor"),
|
199 |
+
dict(
|
200 |
+
type="Collect",
|
201 |
+
keys=("coord", "grid_coord"),
|
202 |
+
feat_keys=["coord", "normal"],
|
203 |
+
),
|
204 |
+
],
|
205 |
+
aug_transform=[
|
206 |
+
[dict(type="RandomScale", scale=[1, 1], anisotropic=True)], # 1
|
207 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 2
|
208 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 3
|
209 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 4
|
210 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5
|
211 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5
|
212 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 6
|
213 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 7
|
214 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 8
|
215 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 9
|
216 |
+
],
|
217 |
+
),
|
218 |
+
),
|
219 |
+
)
|
220 |
+
|
221 |
+
# hooks
|
222 |
+
hooks = [
|
223 |
+
dict(type="CheckpointLoader"),
|
224 |
+
dict(type="IterationTimer", warmup_iter=2),
|
225 |
+
dict(type="InformationWriter"),
|
226 |
+
dict(type="ClsEvaluator"),
|
227 |
+
dict(type="CheckpointSaver", save_freq=None),
|
228 |
+
dict(type="PreciseEvaluator", test_last=False),
|
229 |
+
]
|
230 |
+
|
231 |
+
# tester
|
232 |
+
test = dict(type="ClsVotingTester", num_repeat=100)
|
Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 16 # bs: total bs in all gpus
|
4 |
+
# batch_size_val = 8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = False
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultClassifier",
|
11 |
+
num_classes=40,
|
12 |
+
backbone_embed_dim=256,
|
13 |
+
backbone=dict(
|
14 |
+
type="SpUNet-v1m1",
|
15 |
+
in_channels=6,
|
16 |
+
num_classes=0,
|
17 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
18 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
19 |
+
cls_mode=True,
|
20 |
+
),
|
21 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
22 |
+
)
|
23 |
+
|
24 |
+
# scheduler settings
|
25 |
+
epoch = 200
|
26 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
27 |
+
scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
|
28 |
+
|
29 |
+
# dataset settings
|
30 |
+
dataset_type = "ModelNetDataset"
|
31 |
+
data_root = "data/modelnet40_normal_resampled"
|
32 |
+
cache_data = False
|
33 |
+
class_names = [
|
34 |
+
"airplane",
|
35 |
+
"bathtub",
|
36 |
+
"bed",
|
37 |
+
"bench",
|
38 |
+
"bookshelf",
|
39 |
+
"bottle",
|
40 |
+
"bowl",
|
41 |
+
"car",
|
42 |
+
"chair",
|
43 |
+
"cone",
|
44 |
+
"cup",
|
45 |
+
"curtain",
|
46 |
+
"desk",
|
47 |
+
"door",
|
48 |
+
"dresser",
|
49 |
+
"flower_pot",
|
50 |
+
"glass_box",
|
51 |
+
"guitar",
|
52 |
+
"keyboard",
|
53 |
+
"lamp",
|
54 |
+
"laptop",
|
55 |
+
"mantel",
|
56 |
+
"monitor",
|
57 |
+
"night_stand",
|
58 |
+
"person",
|
59 |
+
"piano",
|
60 |
+
"plant",
|
61 |
+
"radio",
|
62 |
+
"range_hood",
|
63 |
+
"sink",
|
64 |
+
"sofa",
|
65 |
+
"stairs",
|
66 |
+
"stool",
|
67 |
+
"table",
|
68 |
+
"tent",
|
69 |
+
"toilet",
|
70 |
+
"tv_stand",
|
71 |
+
"vase",
|
72 |
+
"wardrobe",
|
73 |
+
"xbox",
|
74 |
+
]
|
75 |
+
|
76 |
+
data = dict(
|
77 |
+
num_classes=40,
|
78 |
+
ignore_index=-1,
|
79 |
+
names=class_names,
|
80 |
+
train=dict(
|
81 |
+
type=dataset_type,
|
82 |
+
split="train",
|
83 |
+
data_root=data_root,
|
84 |
+
class_names=class_names,
|
85 |
+
transform=[
|
86 |
+
dict(type="NormalizeCoord"),
|
87 |
+
# dict(type="CenterShift", apply_z=True),
|
88 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
89 |
+
# dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5),
|
90 |
+
# dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5),
|
91 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
92 |
+
dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
|
93 |
+
# dict(type="RandomFlip", p=0.5),
|
94 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
95 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
96 |
+
dict(
|
97 |
+
type="GridSample",
|
98 |
+
grid_size=0.01,
|
99 |
+
hash_type="fnv",
|
100 |
+
mode="train",
|
101 |
+
keys=("coord", "normal"),
|
102 |
+
return_grid_coord=True,
|
103 |
+
),
|
104 |
+
# dict(type="SphereCrop", point_max=10000, mode="random"),
|
105 |
+
# dict(type="CenterShift", apply_z=True),
|
106 |
+
dict(type="ShufflePoint"),
|
107 |
+
dict(type="ToTensor"),
|
108 |
+
dict(
|
109 |
+
type="Collect",
|
110 |
+
keys=("coord", "grid_coord", "category"),
|
111 |
+
feat_keys=["coord", "normal"],
|
112 |
+
),
|
113 |
+
],
|
114 |
+
test_mode=False,
|
115 |
+
),
|
116 |
+
val=dict(
|
117 |
+
type=dataset_type,
|
118 |
+
split="test",
|
119 |
+
data_root=data_root,
|
120 |
+
class_names=class_names,
|
121 |
+
transform=[
|
122 |
+
dict(type="NormalizeCoord"),
|
123 |
+
dict(
|
124 |
+
type="GridSample",
|
125 |
+
grid_size=0.01,
|
126 |
+
hash_type="fnv",
|
127 |
+
mode="train",
|
128 |
+
keys=("coord", "normal"),
|
129 |
+
return_grid_coord=True,
|
130 |
+
),
|
131 |
+
dict(type="ToTensor"),
|
132 |
+
dict(
|
133 |
+
type="Collect",
|
134 |
+
keys=("coord", "grid_coord", "category"),
|
135 |
+
feat_keys=["coord", "normal"],
|
136 |
+
),
|
137 |
+
],
|
138 |
+
test_mode=False,
|
139 |
+
),
|
140 |
+
test=dict(
|
141 |
+
type=dataset_type,
|
142 |
+
split="test",
|
143 |
+
data_root=data_root,
|
144 |
+
class_names=class_names,
|
145 |
+
transform=[
|
146 |
+
dict(type="NormalizeCoord"),
|
147 |
+
dict(
|
148 |
+
type="GridSample",
|
149 |
+
grid_size=0.01,
|
150 |
+
hash_type="fnv",
|
151 |
+
mode="train",
|
152 |
+
keys=("coord", "normal"),
|
153 |
+
return_grid_coord=True,
|
154 |
+
),
|
155 |
+
dict(type="ToTensor"),
|
156 |
+
dict(
|
157 |
+
type="Collect",
|
158 |
+
keys=("coord", "grid_coord", "category"),
|
159 |
+
feat_keys=["coord", "normal"],
|
160 |
+
),
|
161 |
+
],
|
162 |
+
test_mode=True,
|
163 |
+
),
|
164 |
+
)
|
165 |
+
|
166 |
+
# hooks
|
167 |
+
hooks = [
|
168 |
+
dict(type="CheckpointLoader"),
|
169 |
+
dict(type="IterationTimer", warmup_iter=2),
|
170 |
+
dict(type="InformationWriter"),
|
171 |
+
dict(type="ClsEvaluator"),
|
172 |
+
dict(type="CheckpointSaver", save_freq=None),
|
173 |
+
]
|
174 |
+
|
175 |
+
# tester
|
176 |
+
test = dict(type="ClsTester")
|
Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py
ADDED
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
find_unused_parameters = True
|
10 |
+
|
11 |
+
# trainer
|
12 |
+
train = dict(
|
13 |
+
type="MultiDatasetTrainer",
|
14 |
+
)
|
15 |
+
|
16 |
+
# model settings
|
17 |
+
model = dict(
|
18 |
+
type="PPT-v1m1",
|
19 |
+
backbone=dict(
|
20 |
+
type="SpUNet-v1m3",
|
21 |
+
in_channels=4,
|
22 |
+
num_classes=0,
|
23 |
+
base_channels=32,
|
24 |
+
context_channels=256,
|
25 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
26 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
27 |
+
cls_mode=False,
|
28 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
29 |
+
zero_init=False,
|
30 |
+
norm_decouple=True,
|
31 |
+
norm_adaptive=False,
|
32 |
+
norm_affine=True,
|
33 |
+
),
|
34 |
+
criteria=[
|
35 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
36 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
37 |
+
],
|
38 |
+
backbone_out_channels=96,
|
39 |
+
context_channels=256,
|
40 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
41 |
+
template="[x]",
|
42 |
+
clip_model="ViT-B/16",
|
43 |
+
# fmt: off
|
44 |
+
class_name=(
|
45 |
+
# SemanticKITTI
|
46 |
+
"car", "bicycle", "motorcycle", "truck", "other vehicle",
|
47 |
+
"person", "person who rides a bicycle", "person who rides a motorcycle", "road", "parking",
|
48 |
+
"path for pedestrians at the side of a road", "other ground", "building", "fence", "vegetation",
|
49 |
+
"trunk", "terrain", "pole", "traffic sign",
|
50 |
+
# nuScenes
|
51 |
+
"barrier", "bicycle", "bus", "car", "construction vehicle",
|
52 |
+
"motorcycle", "pedestrian", "traffic cone", "trailer", "truck",
|
53 |
+
"path suitable or safe for driving", "other flat", "sidewalk", "terrain", "man made", "vegetation",
|
54 |
+
# waymo
|
55 |
+
"car", "truck", "bus", "other vehicle", "person who rides a motorcycle",
|
56 |
+
"person who rides a bicycle", "pedestrian", "sign", "traffic light", "pole",
|
57 |
+
"construction cone", "bicycle", "motorcycle", "building", "vegetation",
|
58 |
+
"tree trunk", "curb", "road", "lane marker", "other ground", "horizontal surface that can not drive",
|
59 |
+
"surface when pedestrians most likely to walk on",
|
60 |
+
),
|
61 |
+
valid_index=(
|
62 |
+
[i for i in range(19)],
|
63 |
+
[i for i in range(19, 19 + 16)],
|
64 |
+
[i for i in range(19 + 16, 19 + 16 + 22)],
|
65 |
+
),
|
66 |
+
# fmt: on
|
67 |
+
backbone_mode=False,
|
68 |
+
)
|
69 |
+
|
70 |
+
# scheduler settings
|
71 |
+
epoch = 50
|
72 |
+
eval_epoch = 50
|
73 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
74 |
+
scheduler = dict(
|
75 |
+
type="OneCycleLR",
|
76 |
+
max_lr=optimizer["lr"],
|
77 |
+
pct_start=0.04,
|
78 |
+
anneal_strategy="cos",
|
79 |
+
div_factor=10.0,
|
80 |
+
final_div_factor=100.0,
|
81 |
+
)
|
82 |
+
# param_dicts = [dict(keyword="modulation", lr=0.0002)]
|
83 |
+
|
84 |
+
# dataset settings
|
85 |
+
data = dict(
|
86 |
+
num_classes=16,
|
87 |
+
ignore_index=-1,
|
88 |
+
names=[
|
89 |
+
"barrier",
|
90 |
+
"bicycle",
|
91 |
+
"bus",
|
92 |
+
"car",
|
93 |
+
"construction_vehicle",
|
94 |
+
"motorcycle",
|
95 |
+
"pedestrian",
|
96 |
+
"traffic_cone",
|
97 |
+
"trailer",
|
98 |
+
"truck",
|
99 |
+
"driveable_surface",
|
100 |
+
"other_flat",
|
101 |
+
"sidewalk",
|
102 |
+
"terrain",
|
103 |
+
"manmade",
|
104 |
+
"vegetation",
|
105 |
+
],
|
106 |
+
train=dict(
|
107 |
+
type="ConcatDataset",
|
108 |
+
datasets=[
|
109 |
+
# nuScenes
|
110 |
+
dict(
|
111 |
+
type="NuScenesDataset",
|
112 |
+
split="train",
|
113 |
+
data_root="data/nuscenes",
|
114 |
+
transform=[
|
115 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
116 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
117 |
+
dict(
|
118 |
+
type="RandomRotate",
|
119 |
+
angle=[-1, 1],
|
120 |
+
axis="z",
|
121 |
+
center=[0, 0, 0],
|
122 |
+
p=0.5,
|
123 |
+
),
|
124 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
|
125 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
|
126 |
+
dict(
|
127 |
+
type="PointClip",
|
128 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
129 |
+
),
|
130 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
131 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
132 |
+
dict(type="RandomFlip", p=0.5),
|
133 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
134 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
135 |
+
dict(
|
136 |
+
type="GridSample",
|
137 |
+
grid_size=0.05,
|
138 |
+
hash_type="fnv",
|
139 |
+
mode="train",
|
140 |
+
keys=("coord", "strength", "segment"),
|
141 |
+
return_grid_coord=True,
|
142 |
+
),
|
143 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
144 |
+
# dict(type="CenterShift", apply_z=False),
|
145 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
146 |
+
dict(type="ToTensor"),
|
147 |
+
dict(
|
148 |
+
type="Collect",
|
149 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
150 |
+
feat_keys=("coord", "strength"),
|
151 |
+
),
|
152 |
+
],
|
153 |
+
test_mode=False,
|
154 |
+
ignore_index=-1,
|
155 |
+
loop=1,
|
156 |
+
),
|
157 |
+
# SemanticKITTI
|
158 |
+
dict(
|
159 |
+
type="SemanticKITTIDataset",
|
160 |
+
split="train",
|
161 |
+
data_root="data/semantic_kitti",
|
162 |
+
transform=[
|
163 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
164 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
165 |
+
dict(
|
166 |
+
type="RandomRotate",
|
167 |
+
angle=[-1, 1],
|
168 |
+
axis="z",
|
169 |
+
center=[0, 0, 0],
|
170 |
+
p=0.5,
|
171 |
+
),
|
172 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
173 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
174 |
+
dict(
|
175 |
+
type="PointClip",
|
176 |
+
point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
|
177 |
+
),
|
178 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
179 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
180 |
+
dict(type="RandomFlip", p=0.5),
|
181 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
182 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
183 |
+
dict(
|
184 |
+
type="GridSample",
|
185 |
+
grid_size=0.05,
|
186 |
+
hash_type="fnv",
|
187 |
+
mode="train",
|
188 |
+
keys=("coord", "strength", "segment"),
|
189 |
+
return_grid_coord=True,
|
190 |
+
),
|
191 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
192 |
+
# dict(type="CenterShift", apply_z=False),
|
193 |
+
dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
|
194 |
+
dict(type="ToTensor"),
|
195 |
+
dict(
|
196 |
+
type="Collect",
|
197 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
198 |
+
feat_keys=("coord", "strength"),
|
199 |
+
),
|
200 |
+
],
|
201 |
+
test_mode=False,
|
202 |
+
ignore_index=-1,
|
203 |
+
loop=1,
|
204 |
+
),
|
205 |
+
# Waymo
|
206 |
+
dict(
|
207 |
+
type="WaymoDataset",
|
208 |
+
split="training",
|
209 |
+
data_root="data/waymo",
|
210 |
+
transform=[
|
211 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
212 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
213 |
+
dict(
|
214 |
+
type="RandomRotate",
|
215 |
+
angle=[-1, 1],
|
216 |
+
axis="z",
|
217 |
+
center=[0, 0, 0],
|
218 |
+
p=0.5,
|
219 |
+
),
|
220 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
221 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
222 |
+
dict(
|
223 |
+
type="PointClip",
|
224 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
225 |
+
),
|
226 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
227 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
228 |
+
dict(type="RandomFlip", p=0.5),
|
229 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
230 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
231 |
+
dict(
|
232 |
+
type="GridSample",
|
233 |
+
grid_size=0.05,
|
234 |
+
hash_type="fnv",
|
235 |
+
mode="train",
|
236 |
+
keys=("coord", "strength", "segment"),
|
237 |
+
return_grid_coord=True,
|
238 |
+
),
|
239 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
240 |
+
# dict(type="CenterShift", apply_z=False),
|
241 |
+
dict(type="Add", keys_dict={"condition": "Waymo"}),
|
242 |
+
dict(type="ToTensor"),
|
243 |
+
dict(
|
244 |
+
type="Collect",
|
245 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
246 |
+
feat_keys=("coord", "strength"),
|
247 |
+
),
|
248 |
+
],
|
249 |
+
test_mode=False,
|
250 |
+
ignore_index=-1,
|
251 |
+
loop=1,
|
252 |
+
),
|
253 |
+
],
|
254 |
+
),
|
255 |
+
val=dict(
|
256 |
+
type="NuScenesDataset",
|
257 |
+
split="val",
|
258 |
+
data_root="data/nuscenes",
|
259 |
+
transform=[
|
260 |
+
dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
|
261 |
+
dict(
|
262 |
+
type="GridSample",
|
263 |
+
grid_size=0.05,
|
264 |
+
hash_type="fnv",
|
265 |
+
mode="train",
|
266 |
+
keys=("coord", "strength", "segment"),
|
267 |
+
return_grid_coord=True,
|
268 |
+
),
|
269 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
270 |
+
dict(type="ToTensor"),
|
271 |
+
dict(
|
272 |
+
type="Collect",
|
273 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
274 |
+
feat_keys=("coord", "strength"),
|
275 |
+
),
|
276 |
+
],
|
277 |
+
test_mode=False,
|
278 |
+
ignore_index=-1,
|
279 |
+
),
|
280 |
+
test=dict(
|
281 |
+
type="NuScenesDataset",
|
282 |
+
split="val",
|
283 |
+
data_root="data/nuscenes",
|
284 |
+
transform=[
|
285 |
+
dict(type="Copy", keys_dict={"segment": "origin_segment"}),
|
286 |
+
dict(
|
287 |
+
type="GridSample",
|
288 |
+
grid_size=0.025,
|
289 |
+
hash_type="fnv",
|
290 |
+
mode="train",
|
291 |
+
keys=("coord", "strength", "segment"),
|
292 |
+
return_inverse=True,
|
293 |
+
),
|
294 |
+
],
|
295 |
+
test_mode=True,
|
296 |
+
test_cfg=dict(
|
297 |
+
voxelize=dict(
|
298 |
+
type="GridSample",
|
299 |
+
grid_size=0.05,
|
300 |
+
hash_type="fnv",
|
301 |
+
mode="test",
|
302 |
+
return_grid_coord=True,
|
303 |
+
keys=("coord", "strength"),
|
304 |
+
),
|
305 |
+
crop=None,
|
306 |
+
post_transform=[
|
307 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
308 |
+
dict(type="ToTensor"),
|
309 |
+
dict(
|
310 |
+
type="Collect",
|
311 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
312 |
+
feat_keys=("coord", "strength"),
|
313 |
+
),
|
314 |
+
],
|
315 |
+
aug_transform=[
|
316 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
317 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
318 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
319 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
320 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
321 |
+
[
|
322 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
323 |
+
dict(type="RandomFlip", p=1),
|
324 |
+
],
|
325 |
+
[
|
326 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
327 |
+
dict(type="RandomFlip", p=1),
|
328 |
+
],
|
329 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
330 |
+
[
|
331 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
332 |
+
dict(type="RandomFlip", p=1),
|
333 |
+
],
|
334 |
+
[
|
335 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
336 |
+
dict(type="RandomFlip", p=1),
|
337 |
+
],
|
338 |
+
],
|
339 |
+
),
|
340 |
+
ignore_index=-1,
|
341 |
+
),
|
342 |
+
)
|
Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py
ADDED
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
find_unused_parameters = True
|
10 |
+
|
11 |
+
# trainer
|
12 |
+
train = dict(
|
13 |
+
type="MultiDatasetTrainer",
|
14 |
+
)
|
15 |
+
|
16 |
+
# model settings
|
17 |
+
model = dict(
|
18 |
+
type="PPT-v1m2",
|
19 |
+
backbone=dict(
|
20 |
+
type="SpUNet-v1m3",
|
21 |
+
in_channels=4,
|
22 |
+
num_classes=0,
|
23 |
+
base_channels=32,
|
24 |
+
context_channels=256,
|
25 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
26 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
27 |
+
cls_mode=False,
|
28 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
29 |
+
zero_init=False,
|
30 |
+
norm_decouple=True,
|
31 |
+
norm_adaptive=False,
|
32 |
+
norm_affine=True,
|
33 |
+
),
|
34 |
+
criteria=[
|
35 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
36 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
37 |
+
],
|
38 |
+
backbone_out_channels=96,
|
39 |
+
context_channels=256,
|
40 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
41 |
+
num_classes=(19, 16, 22),
|
42 |
+
)
|
43 |
+
|
44 |
+
# scheduler settings
|
45 |
+
epoch = 50
|
46 |
+
eval_epoch = 50
|
47 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
48 |
+
scheduler = dict(
|
49 |
+
type="OneCycleLR",
|
50 |
+
max_lr=optimizer["lr"],
|
51 |
+
pct_start=0.04,
|
52 |
+
anneal_strategy="cos",
|
53 |
+
div_factor=10.0,
|
54 |
+
final_div_factor=100.0,
|
55 |
+
)
|
56 |
+
# param_dicts = [dict(keyword="modulation", lr=0.0002)]
|
57 |
+
|
58 |
+
# dataset settings
|
59 |
+
data = dict(
|
60 |
+
num_classes=16,
|
61 |
+
ignore_index=-1,
|
62 |
+
names=[
|
63 |
+
"barrier",
|
64 |
+
"bicycle",
|
65 |
+
"bus",
|
66 |
+
"car",
|
67 |
+
"construction_vehicle",
|
68 |
+
"motorcycle",
|
69 |
+
"pedestrian",
|
70 |
+
"traffic_cone",
|
71 |
+
"trailer",
|
72 |
+
"truck",
|
73 |
+
"driveable_surface",
|
74 |
+
"other_flat",
|
75 |
+
"sidewalk",
|
76 |
+
"terrain",
|
77 |
+
"manmade",
|
78 |
+
"vegetation",
|
79 |
+
],
|
80 |
+
train=dict(
|
81 |
+
type="ConcatDataset",
|
82 |
+
datasets=[
|
83 |
+
# nuScenes
|
84 |
+
dict(
|
85 |
+
type="NuScenesDataset",
|
86 |
+
split="train",
|
87 |
+
data_root="data/nuscenes",
|
88 |
+
transform=[
|
89 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
90 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
91 |
+
dict(
|
92 |
+
type="RandomRotate",
|
93 |
+
angle=[-1, 1],
|
94 |
+
axis="z",
|
95 |
+
center=[0, 0, 0],
|
96 |
+
p=0.5,
|
97 |
+
),
|
98 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
|
99 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
|
100 |
+
dict(
|
101 |
+
type="PointClip",
|
102 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
103 |
+
),
|
104 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
105 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
106 |
+
dict(type="RandomFlip", p=0.5),
|
107 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
108 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
109 |
+
dict(
|
110 |
+
type="GridSample",
|
111 |
+
grid_size=0.05,
|
112 |
+
hash_type="fnv",
|
113 |
+
mode="train",
|
114 |
+
keys=("coord", "strength", "segment"),
|
115 |
+
return_grid_coord=True,
|
116 |
+
),
|
117 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
118 |
+
# dict(type="CenterShift", apply_z=False),
|
119 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
120 |
+
dict(type="ToTensor"),
|
121 |
+
dict(
|
122 |
+
type="Collect",
|
123 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
124 |
+
feat_keys=("coord", "strength"),
|
125 |
+
),
|
126 |
+
],
|
127 |
+
test_mode=False,
|
128 |
+
ignore_index=-1,
|
129 |
+
loop=1,
|
130 |
+
),
|
131 |
+
# SemanticKITTI
|
132 |
+
dict(
|
133 |
+
type="SemanticKITTIDataset",
|
134 |
+
split="train",
|
135 |
+
data_root="data/semantic_kitti",
|
136 |
+
transform=[
|
137 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
138 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
139 |
+
dict(
|
140 |
+
type="RandomRotate",
|
141 |
+
angle=[-1, 1],
|
142 |
+
axis="z",
|
143 |
+
center=[0, 0, 0],
|
144 |
+
p=0.5,
|
145 |
+
),
|
146 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
147 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
148 |
+
dict(
|
149 |
+
type="PointClip",
|
150 |
+
point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
|
151 |
+
),
|
152 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
153 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
154 |
+
dict(type="RandomFlip", p=0.5),
|
155 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
156 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
157 |
+
dict(
|
158 |
+
type="GridSample",
|
159 |
+
grid_size=0.05,
|
160 |
+
hash_type="fnv",
|
161 |
+
mode="train",
|
162 |
+
keys=("coord", "strength", "segment"),
|
163 |
+
return_grid_coord=True,
|
164 |
+
),
|
165 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
166 |
+
# dict(type="CenterShift", apply_z=False),
|
167 |
+
dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
|
168 |
+
dict(type="ToTensor"),
|
169 |
+
dict(
|
170 |
+
type="Collect",
|
171 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
172 |
+
feat_keys=("coord", "strength"),
|
173 |
+
),
|
174 |
+
],
|
175 |
+
test_mode=False,
|
176 |
+
ignore_index=-1,
|
177 |
+
loop=1,
|
178 |
+
),
|
179 |
+
# Waymo
|
180 |
+
dict(
|
181 |
+
type="WaymoDataset",
|
182 |
+
split="training",
|
183 |
+
data_root="data/waymo",
|
184 |
+
transform=[
|
185 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
186 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
187 |
+
dict(
|
188 |
+
type="RandomRotate",
|
189 |
+
angle=[-1, 1],
|
190 |
+
axis="z",
|
191 |
+
center=[0, 0, 0],
|
192 |
+
p=0.5,
|
193 |
+
),
|
194 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
195 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
196 |
+
dict(
|
197 |
+
type="PointClip",
|
198 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
199 |
+
),
|
200 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
201 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
202 |
+
dict(type="RandomFlip", p=0.5),
|
203 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
204 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
205 |
+
dict(
|
206 |
+
type="GridSample",
|
207 |
+
grid_size=0.05,
|
208 |
+
hash_type="fnv",
|
209 |
+
mode="train",
|
210 |
+
keys=("coord", "strength", "segment"),
|
211 |
+
return_grid_coord=True,
|
212 |
+
),
|
213 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
214 |
+
# dict(type="CenterShift", apply_z=False),
|
215 |
+
dict(type="Add", keys_dict={"condition": "Waymo"}),
|
216 |
+
dict(type="ToTensor"),
|
217 |
+
dict(
|
218 |
+
type="Collect",
|
219 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
220 |
+
feat_keys=("coord", "strength"),
|
221 |
+
),
|
222 |
+
],
|
223 |
+
test_mode=False,
|
224 |
+
ignore_index=-1,
|
225 |
+
loop=1,
|
226 |
+
),
|
227 |
+
],
|
228 |
+
),
|
229 |
+
val=dict(
|
230 |
+
type="NuScenesDataset",
|
231 |
+
split="val",
|
232 |
+
data_root="data/nuscenes",
|
233 |
+
transform=[
|
234 |
+
dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
|
235 |
+
dict(
|
236 |
+
type="GridSample",
|
237 |
+
grid_size=0.05,
|
238 |
+
hash_type="fnv",
|
239 |
+
mode="train",
|
240 |
+
keys=("coord", "strength", "segment"),
|
241 |
+
return_grid_coord=True,
|
242 |
+
),
|
243 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
244 |
+
dict(type="ToTensor"),
|
245 |
+
dict(
|
246 |
+
type="Collect",
|
247 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
248 |
+
feat_keys=("coord", "strength"),
|
249 |
+
),
|
250 |
+
],
|
251 |
+
test_mode=False,
|
252 |
+
ignore_index=-1,
|
253 |
+
),
|
254 |
+
test=dict(
|
255 |
+
type="NuScenesDataset",
|
256 |
+
split="val",
|
257 |
+
data_root="data/nuscenes",
|
258 |
+
transform=[
|
259 |
+
dict(type="Copy", keys_dict={"segment": "origin_segment"}),
|
260 |
+
dict(
|
261 |
+
type="GridSample",
|
262 |
+
grid_size=0.025,
|
263 |
+
hash_type="fnv",
|
264 |
+
mode="train",
|
265 |
+
keys=("coord", "strength", "segment"),
|
266 |
+
return_inverse=True,
|
267 |
+
),
|
268 |
+
],
|
269 |
+
test_mode=True,
|
270 |
+
test_cfg=dict(
|
271 |
+
voxelize=dict(
|
272 |
+
type="GridSample",
|
273 |
+
grid_size=0.05,
|
274 |
+
hash_type="fnv",
|
275 |
+
mode="test",
|
276 |
+
return_grid_coord=True,
|
277 |
+
keys=("coord", "strength"),
|
278 |
+
),
|
279 |
+
crop=None,
|
280 |
+
post_transform=[
|
281 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
282 |
+
dict(type="ToTensor"),
|
283 |
+
dict(
|
284 |
+
type="Collect",
|
285 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
286 |
+
feat_keys=("coord", "strength"),
|
287 |
+
),
|
288 |
+
],
|
289 |
+
aug_transform=[
|
290 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
291 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
292 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
293 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
294 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
295 |
+
[
|
296 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
297 |
+
dict(type="RandomFlip", p=1),
|
298 |
+
],
|
299 |
+
[
|
300 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
301 |
+
dict(type="RandomFlip", p=1),
|
302 |
+
],
|
303 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
304 |
+
[
|
305 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
306 |
+
dict(type="RandomFlip", p=1),
|
307 |
+
],
|
308 |
+
[
|
309 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
310 |
+
dict(type="RandomFlip", p=1),
|
311 |
+
],
|
312 |
+
],
|
313 |
+
),
|
314 |
+
ignore_index=-1,
|
315 |
+
),
|
316 |
+
)
|
Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
find_unused_parameters = True
|
10 |
+
evaluate = False
|
11 |
+
|
12 |
+
# trainer
|
13 |
+
train = dict(
|
14 |
+
type="MultiDatasetTrainer",
|
15 |
+
)
|
16 |
+
|
17 |
+
# model settings
|
18 |
+
model = dict(
|
19 |
+
type="PPT-v1m2",
|
20 |
+
backbone=dict(
|
21 |
+
type="SpUNet-v1m3",
|
22 |
+
in_channels=4,
|
23 |
+
num_classes=0,
|
24 |
+
base_channels=32,
|
25 |
+
context_channels=256,
|
26 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
27 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
28 |
+
cls_mode=False,
|
29 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
30 |
+
zero_init=False,
|
31 |
+
norm_decouple=True,
|
32 |
+
norm_adaptive=False,
|
33 |
+
norm_affine=True,
|
34 |
+
),
|
35 |
+
criteria=[
|
36 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
37 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
38 |
+
],
|
39 |
+
backbone_out_channels=96,
|
40 |
+
context_channels=256,
|
41 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
42 |
+
num_classes=(19, 16, 22),
|
43 |
+
)
|
44 |
+
|
45 |
+
# scheduler settings
|
46 |
+
epoch = 50
|
47 |
+
eval_epoch = 50
|
48 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
49 |
+
scheduler = dict(
|
50 |
+
type="OneCycleLR",
|
51 |
+
max_lr=optimizer["lr"],
|
52 |
+
pct_start=0.04,
|
53 |
+
anneal_strategy="cos",
|
54 |
+
div_factor=10.0,
|
55 |
+
final_div_factor=100.0,
|
56 |
+
)
|
57 |
+
# param_dicts = [dict(keyword="modulation", lr=0.0002)]
|
58 |
+
|
59 |
+
# dataset settings
|
60 |
+
data = dict(
|
61 |
+
num_classes=16,
|
62 |
+
ignore_index=-1,
|
63 |
+
names=[
|
64 |
+
"barrier",
|
65 |
+
"bicycle",
|
66 |
+
"bus",
|
67 |
+
"car",
|
68 |
+
"construction_vehicle",
|
69 |
+
"motorcycle",
|
70 |
+
"pedestrian",
|
71 |
+
"traffic_cone",
|
72 |
+
"trailer",
|
73 |
+
"truck",
|
74 |
+
"driveable_surface",
|
75 |
+
"other_flat",
|
76 |
+
"sidewalk",
|
77 |
+
"terrain",
|
78 |
+
"manmade",
|
79 |
+
"vegetation",
|
80 |
+
],
|
81 |
+
train=dict(
|
82 |
+
type="ConcatDataset",
|
83 |
+
datasets=[
|
84 |
+
# nuScenes
|
85 |
+
dict(
|
86 |
+
type="NuScenesDataset",
|
87 |
+
split=["train", "val"],
|
88 |
+
data_root="data/nuscenes",
|
89 |
+
transform=[
|
90 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
91 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
92 |
+
dict(
|
93 |
+
type="RandomRotate",
|
94 |
+
angle=[-1, 1],
|
95 |
+
axis="z",
|
96 |
+
center=[0, 0, 0],
|
97 |
+
p=0.5,
|
98 |
+
),
|
99 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
|
100 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
|
101 |
+
dict(
|
102 |
+
type="PointClip",
|
103 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
104 |
+
),
|
105 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
106 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
107 |
+
dict(type="RandomFlip", p=0.5),
|
108 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
109 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
110 |
+
dict(
|
111 |
+
type="GridSample",
|
112 |
+
grid_size=0.05,
|
113 |
+
hash_type="fnv",
|
114 |
+
mode="train",
|
115 |
+
keys=("coord", "strength", "segment"),
|
116 |
+
return_grid_coord=True,
|
117 |
+
),
|
118 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
119 |
+
# dict(type="CenterShift", apply_z=False),
|
120 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
121 |
+
dict(type="ToTensor"),
|
122 |
+
dict(
|
123 |
+
type="Collect",
|
124 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
125 |
+
feat_keys=("coord", "strength"),
|
126 |
+
),
|
127 |
+
],
|
128 |
+
test_mode=False,
|
129 |
+
ignore_index=-1,
|
130 |
+
loop=1,
|
131 |
+
),
|
132 |
+
# SemanticKITTI
|
133 |
+
dict(
|
134 |
+
type="SemanticKITTIDataset",
|
135 |
+
split=["train", "val"],
|
136 |
+
data_root="data/semantic_kitti",
|
137 |
+
transform=[
|
138 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
139 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
140 |
+
dict(
|
141 |
+
type="RandomRotate",
|
142 |
+
angle=[-1, 1],
|
143 |
+
axis="z",
|
144 |
+
center=[0, 0, 0],
|
145 |
+
p=0.5,
|
146 |
+
),
|
147 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
148 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
149 |
+
dict(
|
150 |
+
type="PointClip",
|
151 |
+
point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
|
152 |
+
),
|
153 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
154 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
155 |
+
dict(type="RandomFlip", p=0.5),
|
156 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
157 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
158 |
+
dict(
|
159 |
+
type="GridSample",
|
160 |
+
grid_size=0.05,
|
161 |
+
hash_type="fnv",
|
162 |
+
mode="train",
|
163 |
+
keys=("coord", "strength", "segment"),
|
164 |
+
return_grid_coord=True,
|
165 |
+
),
|
166 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
167 |
+
# dict(type="CenterShift", apply_z=False),
|
168 |
+
dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
|
169 |
+
dict(type="ToTensor"),
|
170 |
+
dict(
|
171 |
+
type="Collect",
|
172 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
173 |
+
feat_keys=("coord", "strength"),
|
174 |
+
),
|
175 |
+
],
|
176 |
+
test_mode=False,
|
177 |
+
ignore_index=-1,
|
178 |
+
loop=1,
|
179 |
+
),
|
180 |
+
# Waymo
|
181 |
+
dict(
|
182 |
+
type="WaymoDataset",
|
183 |
+
split=["training", "validation"],
|
184 |
+
data_root="data/waymo",
|
185 |
+
transform=[
|
186 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
187 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
188 |
+
dict(
|
189 |
+
type="RandomRotate",
|
190 |
+
angle=[-1, 1],
|
191 |
+
axis="z",
|
192 |
+
center=[0, 0, 0],
|
193 |
+
p=0.5,
|
194 |
+
),
|
195 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
196 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
197 |
+
dict(
|
198 |
+
type="PointClip",
|
199 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
200 |
+
),
|
201 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
202 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
203 |
+
dict(type="RandomFlip", p=0.5),
|
204 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
205 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
206 |
+
dict(
|
207 |
+
type="GridSample",
|
208 |
+
grid_size=0.05,
|
209 |
+
hash_type="fnv",
|
210 |
+
mode="train",
|
211 |
+
keys=("coord", "strength", "segment"),
|
212 |
+
return_grid_coord=True,
|
213 |
+
),
|
214 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
215 |
+
# dict(type="CenterShift", apply_z=False),
|
216 |
+
dict(type="Add", keys_dict={"condition": "Waymo"}),
|
217 |
+
dict(type="ToTensor"),
|
218 |
+
dict(
|
219 |
+
type="Collect",
|
220 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
221 |
+
feat_keys=("coord", "strength"),
|
222 |
+
),
|
223 |
+
],
|
224 |
+
test_mode=False,
|
225 |
+
ignore_index=-1,
|
226 |
+
loop=1,
|
227 |
+
),
|
228 |
+
],
|
229 |
+
),
|
230 |
+
test=dict(
|
231 |
+
type="NuScenesDataset",
|
232 |
+
split="test",
|
233 |
+
data_root="data/nuscenes",
|
234 |
+
transform=[
|
235 |
+
dict(type="Copy", keys_dict={"segment": "origin_segment"}),
|
236 |
+
dict(
|
237 |
+
type="GridSample",
|
238 |
+
grid_size=0.025,
|
239 |
+
hash_type="fnv",
|
240 |
+
mode="train",
|
241 |
+
keys=("coord", "strength", "segment"),
|
242 |
+
return_inverse=True,
|
243 |
+
),
|
244 |
+
],
|
245 |
+
test_mode=True,
|
246 |
+
test_cfg=dict(
|
247 |
+
voxelize=dict(
|
248 |
+
type="GridSample",
|
249 |
+
grid_size=0.05,
|
250 |
+
hash_type="fnv",
|
251 |
+
mode="test",
|
252 |
+
return_grid_coord=True,
|
253 |
+
keys=("coord", "strength"),
|
254 |
+
),
|
255 |
+
crop=None,
|
256 |
+
post_transform=[
|
257 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
258 |
+
dict(type="ToTensor"),
|
259 |
+
dict(
|
260 |
+
type="Collect",
|
261 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
262 |
+
feat_keys=("coord", "strength"),
|
263 |
+
),
|
264 |
+
],
|
265 |
+
aug_transform=[
|
266 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
267 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
268 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
269 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
270 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
271 |
+
[
|
272 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
273 |
+
dict(type="RandomFlip", p=1),
|
274 |
+
],
|
275 |
+
[
|
276 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
277 |
+
dict(type="RandomFlip", p=1),
|
278 |
+
],
|
279 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
280 |
+
[
|
281 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
282 |
+
dict(type="RandomFlip", p=1),
|
283 |
+
],
|
284 |
+
[
|
285 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
286 |
+
dict(type="RandomFlip", p=1),
|
287 |
+
],
|
288 |
+
],
|
289 |
+
),
|
290 |
+
ignore_index=-1,
|
291 |
+
),
|
292 |
+
)
|
Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="DefaultSegmentor",
|
12 |
+
backbone=dict(
|
13 |
+
type="PT-v2m2",
|
14 |
+
in_channels=4,
|
15 |
+
num_classes=16,
|
16 |
+
patch_embed_depth=1,
|
17 |
+
patch_embed_channels=48,
|
18 |
+
patch_embed_groups=6,
|
19 |
+
patch_embed_neighbours=8,
|
20 |
+
enc_depths=(2, 2, 6, 2),
|
21 |
+
enc_channels=(96, 192, 384, 512),
|
22 |
+
enc_groups=(12, 24, 48, 64),
|
23 |
+
enc_neighbours=(16, 16, 16, 16),
|
24 |
+
dec_depths=(1, 1, 1, 1),
|
25 |
+
dec_channels=(48, 96, 192, 384),
|
26 |
+
dec_groups=(6, 12, 24, 48),
|
27 |
+
dec_neighbours=(16, 16, 16, 16),
|
28 |
+
grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5
|
29 |
+
attn_qkv_bias=True,
|
30 |
+
pe_multiplier=False,
|
31 |
+
pe_bias=True,
|
32 |
+
attn_drop_rate=0.0,
|
33 |
+
drop_path_rate=0.3,
|
34 |
+
enable_checkpoint=False,
|
35 |
+
unpool_backend="map", # map / interp
|
36 |
+
),
|
37 |
+
criteria=[
|
38 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
39 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
40 |
+
],
|
41 |
+
)
|
42 |
+
|
43 |
+
# scheduler settings
|
44 |
+
epoch = 50
|
45 |
+
eval_epoch = 50
|
46 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
47 |
+
scheduler = dict(
|
48 |
+
type="OneCycleLR",
|
49 |
+
max_lr=optimizer["lr"],
|
50 |
+
pct_start=0.04,
|
51 |
+
anneal_strategy="cos",
|
52 |
+
div_factor=10.0,
|
53 |
+
final_div_factor=100.0,
|
54 |
+
)
|
55 |
+
|
56 |
+
# dataset settings
|
57 |
+
dataset_type = "NuScenesDataset"
|
58 |
+
data_root = "data/nuscenes"
|
59 |
+
ignore_index = -1
|
60 |
+
names = [
|
61 |
+
"barrier",
|
62 |
+
"bicycle",
|
63 |
+
"bus",
|
64 |
+
"car",
|
65 |
+
"construction_vehicle",
|
66 |
+
"motorcycle",
|
67 |
+
"pedestrian",
|
68 |
+
"traffic_cone",
|
69 |
+
"trailer",
|
70 |
+
"truck",
|
71 |
+
"driveable_surface",
|
72 |
+
"other_flat",
|
73 |
+
"sidewalk",
|
74 |
+
"terrain",
|
75 |
+
"manmade",
|
76 |
+
"vegetation",
|
77 |
+
]
|
78 |
+
|
79 |
+
data = dict(
|
80 |
+
num_classes=16,
|
81 |
+
ignore_index=ignore_index,
|
82 |
+
names=names,
|
83 |
+
train=dict(
|
84 |
+
type=dataset_type,
|
85 |
+
split="train",
|
86 |
+
data_root=data_root,
|
87 |
+
transform=[
|
88 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
89 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
90 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
91 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
|
92 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
|
93 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
94 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
95 |
+
dict(type="RandomFlip", p=0.5),
|
96 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
97 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
98 |
+
# dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train",
|
99 |
+
# keys=("coord", "strength", "segment"), return_grid_coord=True),
|
100 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
101 |
+
# dict(type="CenterShift", apply_z=False),
|
102 |
+
dict(type="ToTensor"),
|
103 |
+
dict(
|
104 |
+
type="Collect",
|
105 |
+
keys=("coord", "segment"),
|
106 |
+
feat_keys=("coord", "strength"),
|
107 |
+
),
|
108 |
+
],
|
109 |
+
test_mode=False,
|
110 |
+
ignore_index=ignore_index,
|
111 |
+
),
|
112 |
+
val=dict(
|
113 |
+
type=dataset_type,
|
114 |
+
split="val",
|
115 |
+
data_root=data_root,
|
116 |
+
transform=[
|
117 |
+
# dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
|
118 |
+
# dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train",
|
119 |
+
# keys=("coord", "strength", "segment"), return_grid_coord=True),
|
120 |
+
dict(type="ToTensor"),
|
121 |
+
dict(
|
122 |
+
type="Collect",
|
123 |
+
keys=("coord", "segment"),
|
124 |
+
feat_keys=("coord", "strength"),
|
125 |
+
),
|
126 |
+
],
|
127 |
+
test_mode=False,
|
128 |
+
ignore_index=ignore_index,
|
129 |
+
),
|
130 |
+
test=dict(
|
131 |
+
type=dataset_type,
|
132 |
+
split="val",
|
133 |
+
data_root=data_root,
|
134 |
+
transform=[],
|
135 |
+
test_mode=True,
|
136 |
+
test_cfg=dict(
|
137 |
+
voxelize=None,
|
138 |
+
crop=None,
|
139 |
+
post_transform=[
|
140 |
+
dict(type="ToTensor"),
|
141 |
+
dict(
|
142 |
+
type="Collect",
|
143 |
+
keys=("coord", "index"),
|
144 |
+
feat_keys=("coord", "strength"),
|
145 |
+
),
|
146 |
+
],
|
147 |
+
aug_transform=[
|
148 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
149 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
150 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
151 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
152 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
153 |
+
[
|
154 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
155 |
+
dict(type="RandomFlip", p=1),
|
156 |
+
],
|
157 |
+
[
|
158 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
159 |
+
dict(type="RandomFlip", p=1),
|
160 |
+
],
|
161 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
162 |
+
[
|
163 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
164 |
+
dict(type="RandomFlip", p=1),
|
165 |
+
],
|
166 |
+
[
|
167 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
168 |
+
dict(type="RandomFlip", p=1),
|
169 |
+
],
|
170 |
+
],
|
171 |
+
),
|
172 |
+
ignore_index=ignore_index,
|
173 |
+
),
|
174 |
+
)
|
Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
evaluate = False
|
9 |
+
|
10 |
+
# model settings
|
11 |
+
model = dict(
|
12 |
+
type="DefaultSegmentor",
|
13 |
+
backbone=dict(
|
14 |
+
type="PT-v2m2",
|
15 |
+
in_channels=4,
|
16 |
+
num_classes=16,
|
17 |
+
patch_embed_depth=1,
|
18 |
+
patch_embed_channels=48,
|
19 |
+
patch_embed_groups=6,
|
20 |
+
patch_embed_neighbours=8,
|
21 |
+
enc_depths=(2, 2, 6, 2),
|
22 |
+
enc_channels=(96, 192, 384, 512),
|
23 |
+
enc_groups=(12, 24, 48, 64),
|
24 |
+
enc_neighbours=(16, 16, 16, 16),
|
25 |
+
dec_depths=(1, 1, 1, 1),
|
26 |
+
dec_channels=(48, 96, 192, 384),
|
27 |
+
dec_groups=(6, 12, 24, 48),
|
28 |
+
dec_neighbours=(16, 16, 16, 16),
|
29 |
+
grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5
|
30 |
+
attn_qkv_bias=True,
|
31 |
+
pe_multiplier=False,
|
32 |
+
pe_bias=True,
|
33 |
+
attn_drop_rate=0.0,
|
34 |
+
drop_path_rate=0.3,
|
35 |
+
enable_checkpoint=False,
|
36 |
+
unpool_backend="map", # map / interp
|
37 |
+
),
|
38 |
+
criteria=[
|
39 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
40 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
41 |
+
],
|
42 |
+
)
|
43 |
+
|
44 |
+
# scheduler settings
|
45 |
+
epoch = 50
|
46 |
+
eval_epoch = 50
|
47 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
48 |
+
scheduler = dict(
|
49 |
+
type="OneCycleLR",
|
50 |
+
max_lr=optimizer["lr"],
|
51 |
+
pct_start=0.04,
|
52 |
+
anneal_strategy="cos",
|
53 |
+
div_factor=10.0,
|
54 |
+
final_div_factor=100.0,
|
55 |
+
)
|
56 |
+
|
57 |
+
# dataset settings
|
58 |
+
dataset_type = "NuScenesDataset"
|
59 |
+
data_root = "data/nuscenes"
|
60 |
+
ignore_index = -1
|
61 |
+
names = [
|
62 |
+
"barrier",
|
63 |
+
"bicycle",
|
64 |
+
"bus",
|
65 |
+
"car",
|
66 |
+
"construction_vehicle",
|
67 |
+
"motorcycle",
|
68 |
+
"pedestrian",
|
69 |
+
"traffic_cone",
|
70 |
+
"trailer",
|
71 |
+
"truck",
|
72 |
+
"driveable_surface",
|
73 |
+
"other_flat",
|
74 |
+
"sidewalk",
|
75 |
+
"terrain",
|
76 |
+
"manmade",
|
77 |
+
"vegetation",
|
78 |
+
]
|
79 |
+
|
80 |
+
data = dict(
|
81 |
+
num_classes=16,
|
82 |
+
ignore_index=ignore_index,
|
83 |
+
names=names,
|
84 |
+
train=dict(
|
85 |
+
type=dataset_type,
|
86 |
+
split=["train", "val"],
|
87 |
+
data_root=data_root,
|
88 |
+
transform=[
|
89 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
90 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
91 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
92 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
|
93 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
|
94 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
95 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
96 |
+
dict(type="RandomFlip", p=0.5),
|
97 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
98 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
99 |
+
# dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train",
|
100 |
+
# keys=("coord", "strength", "segment"), return_grid_coord=True),
|
101 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
102 |
+
# dict(type="CenterShift", apply_z=False),
|
103 |
+
dict(type="ToTensor"),
|
104 |
+
dict(
|
105 |
+
type="Collect",
|
106 |
+
keys=("coord", "segment"),
|
107 |
+
feat_keys=("coord", "strength"),
|
108 |
+
),
|
109 |
+
],
|
110 |
+
test_mode=False,
|
111 |
+
ignore_index=ignore_index,
|
112 |
+
),
|
113 |
+
test=dict(
|
114 |
+
type=dataset_type,
|
115 |
+
split="test",
|
116 |
+
data_root=data_root,
|
117 |
+
transform=[],
|
118 |
+
test_mode=True,
|
119 |
+
test_cfg=dict(
|
120 |
+
voxelize=None,
|
121 |
+
crop=None,
|
122 |
+
post_transform=[
|
123 |
+
dict(type="ToTensor"),
|
124 |
+
dict(
|
125 |
+
type="Collect",
|
126 |
+
keys=("coord", "index"),
|
127 |
+
feat_keys=("coord", "strength"),
|
128 |
+
),
|
129 |
+
],
|
130 |
+
aug_transform=[
|
131 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
132 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
133 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
134 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
135 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
136 |
+
[
|
137 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
138 |
+
dict(type="RandomFlip", p=1),
|
139 |
+
],
|
140 |
+
[
|
141 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
142 |
+
dict(type="RandomFlip", p=1),
|
143 |
+
],
|
144 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
145 |
+
[
|
146 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
147 |
+
dict(type="RandomFlip", p=1),
|
148 |
+
],
|
149 |
+
[
|
150 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
151 |
+
dict(type="RandomFlip", p=1),
|
152 |
+
],
|
153 |
+
],
|
154 |
+
),
|
155 |
+
ignore_index=ignore_index,
|
156 |
+
),
|
157 |
+
)
|
Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py
ADDED
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="DefaultSegmentorV2",
|
12 |
+
num_classes=16,
|
13 |
+
backbone_out_channels=64,
|
14 |
+
backbone=dict(
|
15 |
+
type="PT-v3m1",
|
16 |
+
in_channels=4,
|
17 |
+
order=["z", "z-trans", "hilbert", "hilbert-trans"],
|
18 |
+
stride=(2, 2, 2, 2),
|
19 |
+
enc_depths=(2, 2, 2, 6, 2),
|
20 |
+
enc_channels=(32, 64, 128, 256, 512),
|
21 |
+
enc_num_head=(2, 4, 8, 16, 32),
|
22 |
+
enc_patch_size=(1024, 1024, 1024, 1024, 1024),
|
23 |
+
dec_depths=(2, 2, 2, 2),
|
24 |
+
dec_channels=(64, 64, 128, 256),
|
25 |
+
dec_num_head=(4, 4, 8, 16),
|
26 |
+
dec_patch_size=(1024, 1024, 1024, 1024),
|
27 |
+
mlp_ratio=4,
|
28 |
+
qkv_bias=True,
|
29 |
+
qk_scale=None,
|
30 |
+
attn_drop=0.0,
|
31 |
+
proj_drop=0.0,
|
32 |
+
drop_path=0.3,
|
33 |
+
shuffle_orders=True,
|
34 |
+
pre_norm=True,
|
35 |
+
enable_rpe=False,
|
36 |
+
enable_flash=True,
|
37 |
+
upcast_attention=False,
|
38 |
+
upcast_softmax=False,
|
39 |
+
cls_mode=False,
|
40 |
+
pdnorm_bn=False,
|
41 |
+
pdnorm_ln=False,
|
42 |
+
pdnorm_decouple=True,
|
43 |
+
pdnorm_adaptive=False,
|
44 |
+
pdnorm_affine=True,
|
45 |
+
pdnorm_conditions=("nuScenes", "SemanticKITTI", "Waymo"),
|
46 |
+
),
|
47 |
+
criteria=[
|
48 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
49 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
50 |
+
],
|
51 |
+
)
|
52 |
+
|
53 |
+
# scheduler settings
|
54 |
+
epoch = 50
|
55 |
+
eval_epoch = 50
|
56 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
57 |
+
scheduler = dict(
|
58 |
+
type="OneCycleLR",
|
59 |
+
max_lr=[0.002, 0.0002],
|
60 |
+
pct_start=0.04,
|
61 |
+
anneal_strategy="cos",
|
62 |
+
div_factor=10.0,
|
63 |
+
final_div_factor=100.0,
|
64 |
+
)
|
65 |
+
param_dicts = [dict(keyword="block", lr=0.0002)]
|
66 |
+
|
67 |
+
# dataset settings
|
68 |
+
dataset_type = "NuScenesDataset"
|
69 |
+
data_root = "data/nuscenes"
|
70 |
+
ignore_index = -1
|
71 |
+
names = [
|
72 |
+
"barrier",
|
73 |
+
"bicycle",
|
74 |
+
"bus",
|
75 |
+
"car",
|
76 |
+
"construction_vehicle",
|
77 |
+
"motorcycle",
|
78 |
+
"pedestrian",
|
79 |
+
"traffic_cone",
|
80 |
+
"trailer",
|
81 |
+
"truck",
|
82 |
+
"driveable_surface",
|
83 |
+
"other_flat",
|
84 |
+
"sidewalk",
|
85 |
+
"terrain",
|
86 |
+
"manmade",
|
87 |
+
"vegetation",
|
88 |
+
]
|
89 |
+
|
90 |
+
data = dict(
|
91 |
+
num_classes=16,
|
92 |
+
ignore_index=ignore_index,
|
93 |
+
names=names,
|
94 |
+
train=dict(
|
95 |
+
type=dataset_type,
|
96 |
+
split="train",
|
97 |
+
data_root=data_root,
|
98 |
+
transform=[
|
99 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
100 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
101 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
102 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
103 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
104 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
105 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
106 |
+
dict(type="RandomFlip", p=0.5),
|
107 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
108 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
109 |
+
dict(
|
110 |
+
type="GridSample",
|
111 |
+
grid_size=0.05,
|
112 |
+
hash_type="fnv",
|
113 |
+
mode="train",
|
114 |
+
keys=("coord", "strength", "segment"),
|
115 |
+
return_grid_coord=True,
|
116 |
+
),
|
117 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
118 |
+
# dict(type="CenterShift", apply_z=False),
|
119 |
+
dict(type="ToTensor"),
|
120 |
+
dict(
|
121 |
+
type="Collect",
|
122 |
+
keys=("coord", "grid_coord", "segment"),
|
123 |
+
feat_keys=("coord", "strength"),
|
124 |
+
),
|
125 |
+
],
|
126 |
+
test_mode=False,
|
127 |
+
ignore_index=ignore_index,
|
128 |
+
),
|
129 |
+
val=dict(
|
130 |
+
type=dataset_type,
|
131 |
+
split="val",
|
132 |
+
data_root=data_root,
|
133 |
+
transform=[
|
134 |
+
# dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
|
135 |
+
dict(
|
136 |
+
type="GridSample",
|
137 |
+
grid_size=0.05,
|
138 |
+
hash_type="fnv",
|
139 |
+
mode="train",
|
140 |
+
keys=("coord", "strength", "segment"),
|
141 |
+
return_grid_coord=True,
|
142 |
+
),
|
143 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
144 |
+
dict(type="ToTensor"),
|
145 |
+
dict(
|
146 |
+
type="Collect",
|
147 |
+
keys=("coord", "grid_coord", "segment"),
|
148 |
+
feat_keys=("coord", "strength"),
|
149 |
+
),
|
150 |
+
],
|
151 |
+
test_mode=False,
|
152 |
+
ignore_index=ignore_index,
|
153 |
+
),
|
154 |
+
test=dict(
|
155 |
+
type=dataset_type,
|
156 |
+
split="val",
|
157 |
+
data_root=data_root,
|
158 |
+
transform=[
|
159 |
+
dict(type="Copy", keys_dict={"segment": "origin_segment"}),
|
160 |
+
dict(
|
161 |
+
type="GridSample",
|
162 |
+
grid_size=0.025,
|
163 |
+
hash_type="fnv",
|
164 |
+
mode="train",
|
165 |
+
keys=("coord", "strength", "segment"),
|
166 |
+
return_inverse=True,
|
167 |
+
),
|
168 |
+
],
|
169 |
+
test_mode=True,
|
170 |
+
test_cfg=dict(
|
171 |
+
voxelize=dict(
|
172 |
+
type="GridSample",
|
173 |
+
grid_size=0.05,
|
174 |
+
hash_type="fnv",
|
175 |
+
mode="test",
|
176 |
+
return_grid_coord=True,
|
177 |
+
keys=("coord", "strength"),
|
178 |
+
),
|
179 |
+
crop=None,
|
180 |
+
post_transform=[
|
181 |
+
dict(type="ToTensor"),
|
182 |
+
dict(
|
183 |
+
type="Collect",
|
184 |
+
keys=("coord", "grid_coord", "index"),
|
185 |
+
feat_keys=("coord", "strength"),
|
186 |
+
),
|
187 |
+
],
|
188 |
+
aug_transform=[
|
189 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
190 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
191 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
192 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
193 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
194 |
+
[
|
195 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
196 |
+
dict(type="RandomFlip", p=1),
|
197 |
+
],
|
198 |
+
[
|
199 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
200 |
+
dict(type="RandomFlip", p=1),
|
201 |
+
],
|
202 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
203 |
+
[
|
204 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
205 |
+
dict(type="RandomFlip", p=1),
|
206 |
+
],
|
207 |
+
[
|
208 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
209 |
+
dict(type="RandomFlip", p=1),
|
210 |
+
],
|
211 |
+
],
|
212 |
+
),
|
213 |
+
ignore_index=ignore_index,
|
214 |
+
),
|
215 |
+
)
|
Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="DefaultSegmentor",
|
12 |
+
backbone=dict(
|
13 |
+
type="SpUNet-v1m1",
|
14 |
+
in_channels=4,
|
15 |
+
num_classes=16,
|
16 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
17 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
18 |
+
),
|
19 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
20 |
+
)
|
21 |
+
|
22 |
+
# scheduler settings
|
23 |
+
epoch = 50
|
24 |
+
eval_epoch = 50
|
25 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
26 |
+
scheduler = dict(
|
27 |
+
type="OneCycleLR",
|
28 |
+
max_lr=optimizer["lr"],
|
29 |
+
pct_start=0.04,
|
30 |
+
anneal_strategy="cos",
|
31 |
+
div_factor=10.0,
|
32 |
+
final_div_factor=100.0,
|
33 |
+
)
|
34 |
+
|
35 |
+
# dataset settings
|
36 |
+
dataset_type = "NuScenesDataset"
|
37 |
+
data_root = "data/nuscenes"
|
38 |
+
ignore_index = -1
|
39 |
+
names = [
|
40 |
+
"barrier",
|
41 |
+
"bicycle",
|
42 |
+
"bus",
|
43 |
+
"car",
|
44 |
+
"construction_vehicle",
|
45 |
+
"motorcycle",
|
46 |
+
"pedestrian",
|
47 |
+
"traffic_cone",
|
48 |
+
"trailer",
|
49 |
+
"truck",
|
50 |
+
"driveable_surface",
|
51 |
+
"other_flat",
|
52 |
+
"sidewalk",
|
53 |
+
"terrain",
|
54 |
+
"manmade",
|
55 |
+
"vegetation",
|
56 |
+
]
|
57 |
+
|
58 |
+
data = dict(
|
59 |
+
num_classes=16,
|
60 |
+
ignore_index=ignore_index,
|
61 |
+
names=names,
|
62 |
+
train=dict(
|
63 |
+
type=dataset_type,
|
64 |
+
split="train",
|
65 |
+
data_root=data_root,
|
66 |
+
transform=[
|
67 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
68 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
69 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
70 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
|
71 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
|
72 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
73 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
74 |
+
dict(type="RandomFlip", p=0.5),
|
75 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
76 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
77 |
+
dict(
|
78 |
+
type="GridSample",
|
79 |
+
grid_size=0.05,
|
80 |
+
hash_type="fnv",
|
81 |
+
mode="train",
|
82 |
+
keys=("coord", "strength", "segment"),
|
83 |
+
return_grid_coord=True,
|
84 |
+
),
|
85 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
86 |
+
# dict(type="CenterShift", apply_z=False),
|
87 |
+
dict(type="ToTensor"),
|
88 |
+
dict(
|
89 |
+
type="Collect",
|
90 |
+
keys=("coord", "grid_coord", "segment"),
|
91 |
+
feat_keys=("coord", "strength"),
|
92 |
+
),
|
93 |
+
],
|
94 |
+
test_mode=False,
|
95 |
+
ignore_index=ignore_index,
|
96 |
+
),
|
97 |
+
val=dict(
|
98 |
+
type=dataset_type,
|
99 |
+
split="val",
|
100 |
+
data_root=data_root,
|
101 |
+
transform=[
|
102 |
+
# dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
|
103 |
+
dict(
|
104 |
+
type="GridSample",
|
105 |
+
grid_size=0.05,
|
106 |
+
hash_type="fnv",
|
107 |
+
mode="train",
|
108 |
+
keys=("coord", "strength", "segment"),
|
109 |
+
return_grid_coord=True,
|
110 |
+
),
|
111 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
112 |
+
dict(type="ToTensor"),
|
113 |
+
dict(
|
114 |
+
type="Collect",
|
115 |
+
keys=("coord", "grid_coord", "segment"),
|
116 |
+
feat_keys=("coord", "strength"),
|
117 |
+
),
|
118 |
+
],
|
119 |
+
test_mode=False,
|
120 |
+
ignore_index=ignore_index,
|
121 |
+
),
|
122 |
+
test=dict(
|
123 |
+
type=dataset_type,
|
124 |
+
split="val",
|
125 |
+
data_root=data_root,
|
126 |
+
transform=[
|
127 |
+
dict(type="Copy", keys_dict={"segment": "origin_segment"}),
|
128 |
+
dict(
|
129 |
+
type="GridSample",
|
130 |
+
grid_size=0.025,
|
131 |
+
hash_type="fnv",
|
132 |
+
mode="train",
|
133 |
+
keys=("coord", "strength", "segment"),
|
134 |
+
return_inverse=True,
|
135 |
+
),
|
136 |
+
],
|
137 |
+
test_mode=True,
|
138 |
+
test_cfg=dict(
|
139 |
+
voxelize=dict(
|
140 |
+
type="GridSample",
|
141 |
+
grid_size=0.05,
|
142 |
+
hash_type="fnv",
|
143 |
+
mode="test",
|
144 |
+
return_grid_coord=True,
|
145 |
+
keys=("coord", "strength"),
|
146 |
+
),
|
147 |
+
crop=None,
|
148 |
+
post_transform=[
|
149 |
+
dict(type="ToTensor"),
|
150 |
+
dict(
|
151 |
+
type="Collect",
|
152 |
+
keys=("coord", "grid_coord", "index"),
|
153 |
+
feat_keys=("coord", "strength"),
|
154 |
+
),
|
155 |
+
],
|
156 |
+
aug_transform=[
|
157 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
158 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
159 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
160 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
161 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
162 |
+
[
|
163 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
164 |
+
dict(type="RandomFlip", p=1),
|
165 |
+
],
|
166 |
+
[
|
167 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
168 |
+
dict(type="RandomFlip", p=1),
|
169 |
+
],
|
170 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
171 |
+
[
|
172 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
173 |
+
dict(type="RandomFlip", p=1),
|
174 |
+
],
|
175 |
+
[
|
176 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
177 |
+
dict(type="RandomFlip", p=1),
|
178 |
+
],
|
179 |
+
],
|
180 |
+
),
|
181 |
+
ignore_index=ignore_index,
|
182 |
+
),
|
183 |
+
)
|
Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 12
|
6 |
+
mix_prob = 0.0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
evaluate = True
|
10 |
+
|
11 |
+
class_names = [
|
12 |
+
"ceiling",
|
13 |
+
"floor",
|
14 |
+
"wall",
|
15 |
+
"beam",
|
16 |
+
"column",
|
17 |
+
"window",
|
18 |
+
"door",
|
19 |
+
"table",
|
20 |
+
"chair",
|
21 |
+
"sofa",
|
22 |
+
"bookcase",
|
23 |
+
"board",
|
24 |
+
"clutter",
|
25 |
+
]
|
26 |
+
num_classes = 13
|
27 |
+
segment_ignore_index = (-1,)
|
28 |
+
|
29 |
+
# model settings
|
30 |
+
model = dict(
|
31 |
+
type="PG-v1m1",
|
32 |
+
backbone=dict(
|
33 |
+
type="SpUNet-v1m1",
|
34 |
+
in_channels=6,
|
35 |
+
num_classes=0,
|
36 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
37 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
38 |
+
),
|
39 |
+
backbone_out_channels=96,
|
40 |
+
semantic_num_classes=num_classes,
|
41 |
+
semantic_ignore_index=-1,
|
42 |
+
segment_ignore_index=segment_ignore_index,
|
43 |
+
instance_ignore_index=-1,
|
44 |
+
cluster_thresh=1.5,
|
45 |
+
cluster_closed_points=300,
|
46 |
+
cluster_propose_points=100,
|
47 |
+
cluster_min_points=50,
|
48 |
+
)
|
49 |
+
|
50 |
+
# scheduler settings
|
51 |
+
epoch = 3000
|
52 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
53 |
+
scheduler = dict(type="PolyLR")
|
54 |
+
|
55 |
+
# dataset settings
|
56 |
+
dataset_type = "S3DISDataset"
|
57 |
+
data_root = "data/s3dis"
|
58 |
+
|
59 |
+
data = dict(
|
60 |
+
num_classes=num_classes,
|
61 |
+
ignore_index=-1,
|
62 |
+
names=class_names,
|
63 |
+
train=dict(
|
64 |
+
type=dataset_type,
|
65 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
66 |
+
data_root=data_root,
|
67 |
+
transform=[
|
68 |
+
dict(type="CenterShift", apply_z=True),
|
69 |
+
dict(
|
70 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
|
71 |
+
),
|
72 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
73 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
74 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
75 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
76 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
77 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
78 |
+
dict(type="RandomFlip", p=0.5),
|
79 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
80 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
81 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
82 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
|
83 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
84 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
85 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
86 |
+
dict(
|
87 |
+
type="GridSample",
|
88 |
+
grid_size=0.02,
|
89 |
+
hash_type="fnv",
|
90 |
+
mode="train",
|
91 |
+
return_grid_coord=True,
|
92 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
93 |
+
),
|
94 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
95 |
+
dict(type="NormalizeColor"),
|
96 |
+
dict(
|
97 |
+
type="InstanceParser",
|
98 |
+
segment_ignore_index=segment_ignore_index,
|
99 |
+
instance_ignore_index=-1,
|
100 |
+
),
|
101 |
+
dict(type="ToTensor"),
|
102 |
+
dict(
|
103 |
+
type="Collect",
|
104 |
+
keys=(
|
105 |
+
"coord",
|
106 |
+
"grid_coord",
|
107 |
+
"segment",
|
108 |
+
"instance",
|
109 |
+
"instance_centroid",
|
110 |
+
"bbox",
|
111 |
+
),
|
112 |
+
feat_keys=("color", "normal"),
|
113 |
+
),
|
114 |
+
],
|
115 |
+
test_mode=False,
|
116 |
+
),
|
117 |
+
val=dict(
|
118 |
+
type=dataset_type,
|
119 |
+
split="Area_5",
|
120 |
+
data_root=data_root,
|
121 |
+
transform=[
|
122 |
+
dict(type="CenterShift", apply_z=True),
|
123 |
+
dict(
|
124 |
+
type="Copy",
|
125 |
+
keys_dict={
|
126 |
+
"coord": "origin_coord",
|
127 |
+
"segment": "origin_segment",
|
128 |
+
"instance": "origin_instance",
|
129 |
+
},
|
130 |
+
),
|
131 |
+
dict(
|
132 |
+
type="GridSample",
|
133 |
+
grid_size=0.02,
|
134 |
+
hash_type="fnv",
|
135 |
+
mode="train",
|
136 |
+
return_grid_coord=True,
|
137 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
138 |
+
),
|
139 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
140 |
+
dict(type="CenterShift", apply_z=False),
|
141 |
+
dict(type="NormalizeColor"),
|
142 |
+
dict(
|
143 |
+
type="InstanceParser",
|
144 |
+
segment_ignore_index=segment_ignore_index,
|
145 |
+
instance_ignore_index=-1,
|
146 |
+
),
|
147 |
+
dict(type="ToTensor"),
|
148 |
+
dict(
|
149 |
+
type="Collect",
|
150 |
+
keys=(
|
151 |
+
"coord",
|
152 |
+
"grid_coord",
|
153 |
+
"segment",
|
154 |
+
"instance",
|
155 |
+
"origin_coord",
|
156 |
+
"origin_segment",
|
157 |
+
"origin_instance",
|
158 |
+
"instance_centroid",
|
159 |
+
"bbox",
|
160 |
+
),
|
161 |
+
feat_keys=("color", "normal"),
|
162 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
163 |
+
),
|
164 |
+
],
|
165 |
+
test_mode=False,
|
166 |
+
),
|
167 |
+
test=dict(), # currently not available
|
168 |
+
)
|
169 |
+
|
170 |
+
hooks = [
|
171 |
+
dict(type="CheckpointLoader", keywords="module.", replacement="module."),
|
172 |
+
dict(type="IterationTimer", warmup_iter=2),
|
173 |
+
dict(type="InformationWriter"),
|
174 |
+
dict(
|
175 |
+
type="InsSegEvaluator",
|
176 |
+
segment_ignore_index=segment_ignore_index,
|
177 |
+
instance_ignore_index=-1,
|
178 |
+
),
|
179 |
+
dict(type="CheckpointSaver", save_freq=None),
|
180 |
+
]
|
Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 12
|
6 |
+
mix_prob = 0.0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
evaluate = True
|
10 |
+
|
11 |
+
class_names = [
|
12 |
+
"ceiling",
|
13 |
+
"floor",
|
14 |
+
"wall",
|
15 |
+
"beam",
|
16 |
+
"column",
|
17 |
+
"window",
|
18 |
+
"door",
|
19 |
+
"table",
|
20 |
+
"chair",
|
21 |
+
"sofa",
|
22 |
+
"bookcase",
|
23 |
+
"board",
|
24 |
+
"clutter",
|
25 |
+
]
|
26 |
+
num_classes = 13
|
27 |
+
segment_ignore_index = (-1,)
|
28 |
+
|
29 |
+
# model settings
|
30 |
+
model = dict(
|
31 |
+
type="PG-v1m1",
|
32 |
+
backbone=dict(
|
33 |
+
type="SpUNet-v1m1",
|
34 |
+
in_channels=6,
|
35 |
+
num_classes=0,
|
36 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
37 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
38 |
+
),
|
39 |
+
backbone_out_channels=96,
|
40 |
+
semantic_num_classes=num_classes,
|
41 |
+
semantic_ignore_index=-1,
|
42 |
+
segment_ignore_index=segment_ignore_index,
|
43 |
+
instance_ignore_index=-1,
|
44 |
+
cluster_thresh=1.5,
|
45 |
+
cluster_closed_points=300,
|
46 |
+
cluster_propose_points=100,
|
47 |
+
cluster_min_points=50,
|
48 |
+
)
|
49 |
+
|
50 |
+
# scheduler settings
|
51 |
+
epoch = 3000
|
52 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
53 |
+
scheduler = dict(type="PolyLR")
|
54 |
+
|
55 |
+
# dataset settings
|
56 |
+
dataset_type = "S3DISDataset"
|
57 |
+
data_root = "data/s3dis"
|
58 |
+
|
59 |
+
data = dict(
|
60 |
+
num_classes=num_classes,
|
61 |
+
ignore_index=-1,
|
62 |
+
names=class_names,
|
63 |
+
train=dict(
|
64 |
+
type=dataset_type,
|
65 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
66 |
+
data_root=data_root,
|
67 |
+
transform=[
|
68 |
+
dict(type="CenterShift", apply_z=True),
|
69 |
+
dict(
|
70 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
|
71 |
+
),
|
72 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
73 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
74 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
75 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
76 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
77 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
78 |
+
dict(type="RandomFlip", p=0.5),
|
79 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
80 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
81 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
82 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
83 |
+
dict(type="ChromaticJitter", p=0.95, std=0.005),
|
84 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
85 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
86 |
+
dict(
|
87 |
+
type="GridSample",
|
88 |
+
grid_size=0.02,
|
89 |
+
hash_type="fnv",
|
90 |
+
mode="train",
|
91 |
+
return_grid_coord=True,
|
92 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
93 |
+
),
|
94 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
95 |
+
dict(type="NormalizeColor"),
|
96 |
+
dict(
|
97 |
+
type="InstanceParser",
|
98 |
+
segment_ignore_index=segment_ignore_index,
|
99 |
+
instance_ignore_index=-1,
|
100 |
+
),
|
101 |
+
dict(type="ToTensor"),
|
102 |
+
dict(
|
103 |
+
type="Collect",
|
104 |
+
keys=(
|
105 |
+
"coord",
|
106 |
+
"grid_coord",
|
107 |
+
"segment",
|
108 |
+
"instance",
|
109 |
+
"instance_centroid",
|
110 |
+
"bbox",
|
111 |
+
),
|
112 |
+
feat_keys=("color", "normal"),
|
113 |
+
),
|
114 |
+
],
|
115 |
+
test_mode=False,
|
116 |
+
),
|
117 |
+
val=dict(
|
118 |
+
type=dataset_type,
|
119 |
+
split="Area_5",
|
120 |
+
data_root=data_root,
|
121 |
+
transform=[
|
122 |
+
dict(type="CenterShift", apply_z=True),
|
123 |
+
dict(
|
124 |
+
type="Copy",
|
125 |
+
keys_dict={
|
126 |
+
"coord": "origin_coord",
|
127 |
+
"segment": "origin_segment",
|
128 |
+
"instance": "origin_instance",
|
129 |
+
},
|
130 |
+
),
|
131 |
+
dict(
|
132 |
+
type="GridSample",
|
133 |
+
grid_size=0.02,
|
134 |
+
hash_type="fnv",
|
135 |
+
mode="train",
|
136 |
+
return_grid_coord=True,
|
137 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
138 |
+
),
|
139 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
140 |
+
dict(type="CenterShift", apply_z=False),
|
141 |
+
dict(type="NormalizeColor"),
|
142 |
+
dict(
|
143 |
+
type="InstanceParser",
|
144 |
+
segment_ignore_index=segment_ignore_index,
|
145 |
+
instance_ignore_index=-1,
|
146 |
+
),
|
147 |
+
dict(type="ToTensor"),
|
148 |
+
dict(
|
149 |
+
type="Collect",
|
150 |
+
keys=(
|
151 |
+
"coord",
|
152 |
+
"grid_coord",
|
153 |
+
"segment",
|
154 |
+
"instance",
|
155 |
+
"origin_coord",
|
156 |
+
"origin_segment",
|
157 |
+
"origin_instance",
|
158 |
+
"instance_centroid",
|
159 |
+
"bbox",
|
160 |
+
),
|
161 |
+
feat_keys=("color", "normal"),
|
162 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
163 |
+
),
|
164 |
+
],
|
165 |
+
test_mode=False,
|
166 |
+
),
|
167 |
+
test=dict(), # currently not available
|
168 |
+
)
|
169 |
+
|
170 |
+
hooks = [
|
171 |
+
dict(type="CheckpointLoader", keywords="module.", replacement="module."),
|
172 |
+
dict(type="IterationTimer", warmup_iter=2),
|
173 |
+
dict(type="InformationWriter"),
|
174 |
+
dict(
|
175 |
+
type="InsSegEvaluator",
|
176 |
+
segment_ignore_index=segment_ignore_index,
|
177 |
+
instance_ignore_index=-1,
|
178 |
+
),
|
179 |
+
dict(type="CheckpointSaver", save_freq=None),
|
180 |
+
]
|
Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 12
|
6 |
+
mix_prob = 0.0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
evaluate = True
|
10 |
+
|
11 |
+
class_names = [
|
12 |
+
"ceiling",
|
13 |
+
"floor",
|
14 |
+
"wall",
|
15 |
+
"beam",
|
16 |
+
"column",
|
17 |
+
"window",
|
18 |
+
"door",
|
19 |
+
"table",
|
20 |
+
"chair",
|
21 |
+
"sofa",
|
22 |
+
"bookcase",
|
23 |
+
"board",
|
24 |
+
"clutter",
|
25 |
+
]
|
26 |
+
num_classes = 13
|
27 |
+
segment_ignore_index = (-1,)
|
28 |
+
|
29 |
+
# model settings
|
30 |
+
model = dict(
|
31 |
+
type="PG-v1m1",
|
32 |
+
backbone=dict(
|
33 |
+
type="SpUNet-v1m1",
|
34 |
+
in_channels=6,
|
35 |
+
num_classes=0,
|
36 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
37 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
38 |
+
),
|
39 |
+
backbone_out_channels=96,
|
40 |
+
semantic_num_classes=num_classes,
|
41 |
+
semantic_ignore_index=-1,
|
42 |
+
segment_ignore_index=segment_ignore_index,
|
43 |
+
instance_ignore_index=-1,
|
44 |
+
cluster_thresh=1.5,
|
45 |
+
cluster_closed_points=300,
|
46 |
+
cluster_propose_points=100,
|
47 |
+
cluster_min_points=50,
|
48 |
+
voxel_size=0.05,
|
49 |
+
)
|
50 |
+
|
51 |
+
# scheduler settings
|
52 |
+
epoch = 3000
|
53 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
54 |
+
scheduler = dict(type="PolyLR")
|
55 |
+
|
56 |
+
# dataset settings
|
57 |
+
dataset_type = "S3DISDataset"
|
58 |
+
data_root = "data/s3dis"
|
59 |
+
|
60 |
+
data = dict(
|
61 |
+
num_classes=num_classes,
|
62 |
+
ignore_index=-1,
|
63 |
+
names=class_names,
|
64 |
+
train=dict(
|
65 |
+
type=dataset_type,
|
66 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
67 |
+
data_root=data_root,
|
68 |
+
transform=[
|
69 |
+
dict(type="CenterShift", apply_z=True),
|
70 |
+
dict(
|
71 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
|
72 |
+
),
|
73 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
74 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
75 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
76 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
77 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
78 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
79 |
+
dict(type="RandomFlip", p=0.5),
|
80 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
81 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
82 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
83 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
84 |
+
dict(type="ChromaticJitter", p=0.95, std=0.005),
|
85 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
86 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
87 |
+
dict(
|
88 |
+
type="GridSample",
|
89 |
+
grid_size=0.05,
|
90 |
+
hash_type="fnv",
|
91 |
+
mode="train",
|
92 |
+
return_grid_coord=True,
|
93 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
94 |
+
),
|
95 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
96 |
+
dict(type="NormalizeColor"),
|
97 |
+
dict(
|
98 |
+
type="InstanceParser",
|
99 |
+
segment_ignore_index=segment_ignore_index,
|
100 |
+
instance_ignore_index=-1,
|
101 |
+
),
|
102 |
+
dict(type="ToTensor"),
|
103 |
+
dict(
|
104 |
+
type="Collect",
|
105 |
+
keys=(
|
106 |
+
"coord",
|
107 |
+
"grid_coord",
|
108 |
+
"segment",
|
109 |
+
"instance",
|
110 |
+
"instance_centroid",
|
111 |
+
"bbox",
|
112 |
+
),
|
113 |
+
feat_keys=("color", "normal"),
|
114 |
+
),
|
115 |
+
],
|
116 |
+
test_mode=False,
|
117 |
+
),
|
118 |
+
val=dict(
|
119 |
+
type=dataset_type,
|
120 |
+
split="Area_5",
|
121 |
+
data_root=data_root,
|
122 |
+
transform=[
|
123 |
+
dict(type="CenterShift", apply_z=True),
|
124 |
+
dict(
|
125 |
+
type="Copy",
|
126 |
+
keys_dict={
|
127 |
+
"coord": "origin_coord",
|
128 |
+
"segment": "origin_segment",
|
129 |
+
"instance": "origin_instance",
|
130 |
+
},
|
131 |
+
),
|
132 |
+
dict(
|
133 |
+
type="GridSample",
|
134 |
+
grid_size=0.05,
|
135 |
+
hash_type="fnv",
|
136 |
+
mode="train",
|
137 |
+
return_grid_coord=True,
|
138 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
139 |
+
),
|
140 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
141 |
+
dict(type="CenterShift", apply_z=False),
|
142 |
+
dict(type="NormalizeColor"),
|
143 |
+
dict(
|
144 |
+
type="InstanceParser",
|
145 |
+
segment_ignore_index=segment_ignore_index,
|
146 |
+
instance_ignore_index=-1,
|
147 |
+
),
|
148 |
+
dict(type="ToTensor"),
|
149 |
+
dict(
|
150 |
+
type="Collect",
|
151 |
+
keys=(
|
152 |
+
"coord",
|
153 |
+
"grid_coord",
|
154 |
+
"segment",
|
155 |
+
"instance",
|
156 |
+
"origin_coord",
|
157 |
+
"origin_segment",
|
158 |
+
"origin_instance",
|
159 |
+
"instance_centroid",
|
160 |
+
"bbox",
|
161 |
+
),
|
162 |
+
feat_keys=("color", "normal"),
|
163 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
164 |
+
),
|
165 |
+
],
|
166 |
+
test_mode=False,
|
167 |
+
),
|
168 |
+
test=dict(), # currently not available
|
169 |
+
)
|
170 |
+
|
171 |
+
hooks = [
|
172 |
+
dict(type="CheckpointLoader", keywords="module.", replacement="module."),
|
173 |
+
dict(type="IterationTimer", warmup_iter=2),
|
174 |
+
dict(type="InformationWriter"),
|
175 |
+
dict(
|
176 |
+
type="InsSegEvaluator",
|
177 |
+
segment_ignore_index=segment_ignore_index,
|
178 |
+
instance_ignore_index=-1,
|
179 |
+
),
|
180 |
+
dict(type="CheckpointSaver", save_freq=None),
|
181 |
+
]
|
Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py
ADDED
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
evaluate = True
|
10 |
+
find_unused_parameters = True
|
11 |
+
|
12 |
+
class_names = [
|
13 |
+
"ceiling",
|
14 |
+
"floor",
|
15 |
+
"wall",
|
16 |
+
"beam",
|
17 |
+
"column",
|
18 |
+
"window",
|
19 |
+
"door",
|
20 |
+
"table",
|
21 |
+
"chair",
|
22 |
+
"sofa",
|
23 |
+
"bookcase",
|
24 |
+
"board",
|
25 |
+
"clutter",
|
26 |
+
]
|
27 |
+
num_classes = 13
|
28 |
+
segment_ignore_index = (-1,)
|
29 |
+
|
30 |
+
# model settings
|
31 |
+
model = dict(
|
32 |
+
type="PG-v1m1",
|
33 |
+
backbone=dict(
|
34 |
+
type="PPT-v1m1",
|
35 |
+
backbone=dict(
|
36 |
+
type="SpUNet-v1m3",
|
37 |
+
in_channels=6,
|
38 |
+
num_classes=0,
|
39 |
+
base_channels=32,
|
40 |
+
context_channels=256,
|
41 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
42 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
43 |
+
cls_mode=False,
|
44 |
+
conditions=("ScanNet", "S3DIS", "Structured3D"),
|
45 |
+
zero_init=False,
|
46 |
+
norm_decouple=True,
|
47 |
+
norm_adaptive=True,
|
48 |
+
norm_affine=True,
|
49 |
+
),
|
50 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
51 |
+
backbone_out_channels=96,
|
52 |
+
context_channels=256,
|
53 |
+
conditions=("Structured3D", "ScanNet", "S3DIS"),
|
54 |
+
template="[x]",
|
55 |
+
clip_model="ViT-B/16",
|
56 |
+
class_name=(
|
57 |
+
"wall",
|
58 |
+
"floor",
|
59 |
+
"cabinet",
|
60 |
+
"bed",
|
61 |
+
"chair",
|
62 |
+
"sofa",
|
63 |
+
"table",
|
64 |
+
"door",
|
65 |
+
"window",
|
66 |
+
"bookshelf",
|
67 |
+
"bookcase",
|
68 |
+
"picture",
|
69 |
+
"counter",
|
70 |
+
"desk",
|
71 |
+
"shelves",
|
72 |
+
"curtain",
|
73 |
+
"dresser",
|
74 |
+
"pillow",
|
75 |
+
"mirror",
|
76 |
+
"ceiling",
|
77 |
+
"refrigerator",
|
78 |
+
"television",
|
79 |
+
"shower curtain",
|
80 |
+
"nightstand",
|
81 |
+
"toilet",
|
82 |
+
"sink",
|
83 |
+
"lamp",
|
84 |
+
"bathtub",
|
85 |
+
"garbagebin",
|
86 |
+
"board",
|
87 |
+
"beam",
|
88 |
+
"column",
|
89 |
+
"clutter",
|
90 |
+
"otherstructure",
|
91 |
+
"otherfurniture",
|
92 |
+
"otherprop",
|
93 |
+
),
|
94 |
+
valid_index=(
|
95 |
+
(
|
96 |
+
0,
|
97 |
+
1,
|
98 |
+
2,
|
99 |
+
3,
|
100 |
+
4,
|
101 |
+
5,
|
102 |
+
6,
|
103 |
+
7,
|
104 |
+
8,
|
105 |
+
11,
|
106 |
+
13,
|
107 |
+
14,
|
108 |
+
15,
|
109 |
+
16,
|
110 |
+
17,
|
111 |
+
18,
|
112 |
+
19,
|
113 |
+
20,
|
114 |
+
21,
|
115 |
+
23,
|
116 |
+
25,
|
117 |
+
26,
|
118 |
+
33,
|
119 |
+
34,
|
120 |
+
35,
|
121 |
+
),
|
122 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
|
123 |
+
(0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
|
124 |
+
),
|
125 |
+
backbone_mode=True,
|
126 |
+
),
|
127 |
+
backbone_out_channels=96,
|
128 |
+
semantic_num_classes=num_classes,
|
129 |
+
semantic_ignore_index=-1,
|
130 |
+
segment_ignore_index=segment_ignore_index,
|
131 |
+
instance_ignore_index=-1,
|
132 |
+
cluster_thresh=1.5,
|
133 |
+
cluster_closed_points=300,
|
134 |
+
cluster_propose_points=100,
|
135 |
+
cluster_min_points=50,
|
136 |
+
voxel_size=0.05,
|
137 |
+
)
|
138 |
+
|
139 |
+
# scheduler settings
|
140 |
+
epoch = 3000
|
141 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
142 |
+
scheduler = dict(type="PolyLR")
|
143 |
+
|
144 |
+
# dataset settings
|
145 |
+
dataset_type = "S3DISDataset"
|
146 |
+
data_root = "data/s3dis"
|
147 |
+
|
148 |
+
data = dict(
|
149 |
+
num_classes=num_classes,
|
150 |
+
ignore_index=-1,
|
151 |
+
names=class_names,
|
152 |
+
train=dict(
|
153 |
+
type=dataset_type,
|
154 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
155 |
+
data_root=data_root,
|
156 |
+
transform=[
|
157 |
+
dict(type="CenterShift", apply_z=True),
|
158 |
+
dict(
|
159 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
|
160 |
+
),
|
161 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
162 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
163 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
164 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
165 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
166 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
167 |
+
dict(type="RandomFlip", p=0.5),
|
168 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
169 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
170 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
171 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
172 |
+
dict(type="ChromaticJitter", p=0.95, std=0.005),
|
173 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
174 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
175 |
+
dict(
|
176 |
+
type="GridSample",
|
177 |
+
grid_size=0.05,
|
178 |
+
hash_type="fnv",
|
179 |
+
mode="train",
|
180 |
+
return_grid_coord=True,
|
181 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
182 |
+
),
|
183 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
184 |
+
dict(type="NormalizeColor"),
|
185 |
+
dict(
|
186 |
+
type="InstanceParser",
|
187 |
+
segment_ignore_index=segment_ignore_index,
|
188 |
+
instance_ignore_index=-1,
|
189 |
+
),
|
190 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
191 |
+
dict(type="ToTensor"),
|
192 |
+
dict(
|
193 |
+
type="Collect",
|
194 |
+
keys=(
|
195 |
+
"coord",
|
196 |
+
"grid_coord",
|
197 |
+
"segment",
|
198 |
+
"instance",
|
199 |
+
"instance_centroid",
|
200 |
+
"bbox",
|
201 |
+
"condition",
|
202 |
+
),
|
203 |
+
feat_keys=("color", "normal"),
|
204 |
+
),
|
205 |
+
],
|
206 |
+
test_mode=False,
|
207 |
+
),
|
208 |
+
val=dict(
|
209 |
+
type=dataset_type,
|
210 |
+
split="Area_5",
|
211 |
+
data_root=data_root,
|
212 |
+
transform=[
|
213 |
+
dict(type="CenterShift", apply_z=True),
|
214 |
+
dict(
|
215 |
+
type="Copy",
|
216 |
+
keys_dict={
|
217 |
+
"coord": "origin_coord",
|
218 |
+
"segment": "origin_segment",
|
219 |
+
"instance": "origin_instance",
|
220 |
+
},
|
221 |
+
),
|
222 |
+
dict(
|
223 |
+
type="GridSample",
|
224 |
+
grid_size=0.05,
|
225 |
+
hash_type="fnv",
|
226 |
+
mode="train",
|
227 |
+
return_grid_coord=True,
|
228 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
229 |
+
),
|
230 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
231 |
+
dict(type="CenterShift", apply_z=False),
|
232 |
+
dict(type="NormalizeColor"),
|
233 |
+
dict(
|
234 |
+
type="InstanceParser",
|
235 |
+
segment_ignore_index=segment_ignore_index,
|
236 |
+
instance_ignore_index=-1,
|
237 |
+
),
|
238 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
239 |
+
dict(type="ToTensor"),
|
240 |
+
dict(
|
241 |
+
type="Collect",
|
242 |
+
keys=(
|
243 |
+
"coord",
|
244 |
+
"grid_coord",
|
245 |
+
"segment",
|
246 |
+
"instance",
|
247 |
+
"origin_coord",
|
248 |
+
"origin_segment",
|
249 |
+
"origin_instance",
|
250 |
+
"instance_centroid",
|
251 |
+
"bbox",
|
252 |
+
"condition",
|
253 |
+
),
|
254 |
+
feat_keys=("color", "normal"),
|
255 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
256 |
+
),
|
257 |
+
],
|
258 |
+
test_mode=False,
|
259 |
+
),
|
260 |
+
test=dict(), # currently not available
|
261 |
+
)
|
262 |
+
|
263 |
+
hooks = [
|
264 |
+
dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."),
|
265 |
+
dict(type="IterationTimer", warmup_iter=2),
|
266 |
+
dict(type="InformationWriter"),
|
267 |
+
dict(
|
268 |
+
type="InsSegEvaluator",
|
269 |
+
segment_ignore_index=segment_ignore_index,
|
270 |
+
instance_ignore_index=-1,
|
271 |
+
),
|
272 |
+
dict(type="CheckpointSaver", save_freq=None),
|
273 |
+
]
|
Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py
ADDED
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
evaluate = True
|
10 |
+
find_unused_parameters = True
|
11 |
+
|
12 |
+
class_names = [
|
13 |
+
"ceiling",
|
14 |
+
"floor",
|
15 |
+
"wall",
|
16 |
+
"beam",
|
17 |
+
"column",
|
18 |
+
"window",
|
19 |
+
"door",
|
20 |
+
"table",
|
21 |
+
"chair",
|
22 |
+
"sofa",
|
23 |
+
"bookcase",
|
24 |
+
"board",
|
25 |
+
"clutter",
|
26 |
+
]
|
27 |
+
num_classes = 13
|
28 |
+
segment_ignore_index = (-1,)
|
29 |
+
|
30 |
+
# model settings
|
31 |
+
model = dict(
|
32 |
+
type="PG-v1m1",
|
33 |
+
backbone=dict(
|
34 |
+
type="PPT-v1m1",
|
35 |
+
backbone=dict(
|
36 |
+
type="SpUNet-v1m3",
|
37 |
+
in_channels=6,
|
38 |
+
num_classes=0,
|
39 |
+
base_channels=32,
|
40 |
+
context_channels=256,
|
41 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
42 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
43 |
+
cls_mode=False,
|
44 |
+
conditions=("ScanNet", "S3DIS", "Structured3D"),
|
45 |
+
zero_init=False,
|
46 |
+
norm_decouple=True,
|
47 |
+
norm_adaptive=True,
|
48 |
+
norm_affine=True,
|
49 |
+
),
|
50 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
51 |
+
backbone_out_channels=96,
|
52 |
+
context_channels=256,
|
53 |
+
conditions=("Structured3D", "ScanNet", "S3DIS"),
|
54 |
+
template="[x]",
|
55 |
+
clip_model="ViT-B/16",
|
56 |
+
class_name=(
|
57 |
+
"wall",
|
58 |
+
"floor",
|
59 |
+
"cabinet",
|
60 |
+
"bed",
|
61 |
+
"chair",
|
62 |
+
"sofa",
|
63 |
+
"table",
|
64 |
+
"door",
|
65 |
+
"window",
|
66 |
+
"bookshelf",
|
67 |
+
"bookcase",
|
68 |
+
"picture",
|
69 |
+
"counter",
|
70 |
+
"desk",
|
71 |
+
"shelves",
|
72 |
+
"curtain",
|
73 |
+
"dresser",
|
74 |
+
"pillow",
|
75 |
+
"mirror",
|
76 |
+
"ceiling",
|
77 |
+
"refrigerator",
|
78 |
+
"television",
|
79 |
+
"shower curtain",
|
80 |
+
"nightstand",
|
81 |
+
"toilet",
|
82 |
+
"sink",
|
83 |
+
"lamp",
|
84 |
+
"bathtub",
|
85 |
+
"garbagebin",
|
86 |
+
"board",
|
87 |
+
"beam",
|
88 |
+
"column",
|
89 |
+
"clutter",
|
90 |
+
"otherstructure",
|
91 |
+
"otherfurniture",
|
92 |
+
"otherprop",
|
93 |
+
),
|
94 |
+
valid_index=(
|
95 |
+
(
|
96 |
+
0,
|
97 |
+
1,
|
98 |
+
2,
|
99 |
+
3,
|
100 |
+
4,
|
101 |
+
5,
|
102 |
+
6,
|
103 |
+
7,
|
104 |
+
8,
|
105 |
+
11,
|
106 |
+
13,
|
107 |
+
14,
|
108 |
+
15,
|
109 |
+
16,
|
110 |
+
17,
|
111 |
+
18,
|
112 |
+
19,
|
113 |
+
20,
|
114 |
+
21,
|
115 |
+
23,
|
116 |
+
25,
|
117 |
+
26,
|
118 |
+
33,
|
119 |
+
34,
|
120 |
+
35,
|
121 |
+
),
|
122 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
|
123 |
+
(0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
|
124 |
+
),
|
125 |
+
backbone_mode=True,
|
126 |
+
),
|
127 |
+
backbone_out_channels=96,
|
128 |
+
semantic_num_classes=num_classes,
|
129 |
+
semantic_ignore_index=-1,
|
130 |
+
segment_ignore_index=segment_ignore_index,
|
131 |
+
instance_ignore_index=-1,
|
132 |
+
cluster_thresh=1.5,
|
133 |
+
cluster_closed_points=300,
|
134 |
+
cluster_propose_points=100,
|
135 |
+
cluster_min_points=50,
|
136 |
+
voxel_size=0.02,
|
137 |
+
)
|
138 |
+
|
139 |
+
# scheduler settings
|
140 |
+
epoch = 3000
|
141 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
142 |
+
scheduler = dict(type="PolyLR")
|
143 |
+
|
144 |
+
# dataset settings
|
145 |
+
dataset_type = "S3DISDataset"
|
146 |
+
data_root = "data/s3dis"
|
147 |
+
|
148 |
+
data = dict(
|
149 |
+
num_classes=num_classes,
|
150 |
+
ignore_index=-1,
|
151 |
+
names=class_names,
|
152 |
+
train=dict(
|
153 |
+
type=dataset_type,
|
154 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
155 |
+
data_root=data_root,
|
156 |
+
transform=[
|
157 |
+
dict(type="CenterShift", apply_z=True),
|
158 |
+
dict(
|
159 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
|
160 |
+
),
|
161 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
162 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
163 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
164 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
165 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
166 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
167 |
+
dict(type="RandomFlip", p=0.5),
|
168 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
169 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
170 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
171 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
172 |
+
dict(type="ChromaticJitter", p=0.95, std=0.005),
|
173 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
174 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
175 |
+
dict(
|
176 |
+
type="GridSample",
|
177 |
+
grid_size=0.02,
|
178 |
+
hash_type="fnv",
|
179 |
+
mode="train",
|
180 |
+
return_grid_coord=True,
|
181 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
182 |
+
),
|
183 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
184 |
+
dict(type="NormalizeColor"),
|
185 |
+
dict(
|
186 |
+
type="InstanceParser",
|
187 |
+
segment_ignore_index=segment_ignore_index,
|
188 |
+
instance_ignore_index=-1,
|
189 |
+
),
|
190 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
191 |
+
dict(type="ToTensor"),
|
192 |
+
dict(
|
193 |
+
type="Collect",
|
194 |
+
keys=(
|
195 |
+
"coord",
|
196 |
+
"grid_coord",
|
197 |
+
"segment",
|
198 |
+
"instance",
|
199 |
+
"instance_centroid",
|
200 |
+
"bbox",
|
201 |
+
"condition",
|
202 |
+
),
|
203 |
+
feat_keys=("color", "normal"),
|
204 |
+
),
|
205 |
+
],
|
206 |
+
test_mode=False,
|
207 |
+
),
|
208 |
+
val=dict(
|
209 |
+
type=dataset_type,
|
210 |
+
split="Area_5",
|
211 |
+
data_root=data_root,
|
212 |
+
transform=[
|
213 |
+
dict(type="CenterShift", apply_z=True),
|
214 |
+
dict(
|
215 |
+
type="Copy",
|
216 |
+
keys_dict={
|
217 |
+
"coord": "origin_coord",
|
218 |
+
"segment": "origin_segment",
|
219 |
+
"instance": "origin_instance",
|
220 |
+
},
|
221 |
+
),
|
222 |
+
dict(
|
223 |
+
type="GridSample",
|
224 |
+
grid_size=0.02,
|
225 |
+
hash_type="fnv",
|
226 |
+
mode="train",
|
227 |
+
return_grid_coord=True,
|
228 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
229 |
+
),
|
230 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
231 |
+
dict(type="CenterShift", apply_z=False),
|
232 |
+
dict(type="NormalizeColor"),
|
233 |
+
dict(
|
234 |
+
type="InstanceParser",
|
235 |
+
segment_ignore_index=segment_ignore_index,
|
236 |
+
instance_ignore_index=-1,
|
237 |
+
),
|
238 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
239 |
+
dict(type="ToTensor"),
|
240 |
+
dict(
|
241 |
+
type="Collect",
|
242 |
+
keys=(
|
243 |
+
"coord",
|
244 |
+
"grid_coord",
|
245 |
+
"segment",
|
246 |
+
"instance",
|
247 |
+
"origin_coord",
|
248 |
+
"origin_segment",
|
249 |
+
"origin_instance",
|
250 |
+
"instance_centroid",
|
251 |
+
"bbox",
|
252 |
+
"condition",
|
253 |
+
),
|
254 |
+
feat_keys=("color", "normal"),
|
255 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
256 |
+
),
|
257 |
+
],
|
258 |
+
test_mode=False,
|
259 |
+
),
|
260 |
+
test=dict(), # currently not available
|
261 |
+
)
|
262 |
+
|
263 |
+
hooks = [
|
264 |
+
dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."),
|
265 |
+
dict(type="IterationTimer", warmup_iter=2),
|
266 |
+
dict(type="InformationWriter"),
|
267 |
+
dict(
|
268 |
+
type="InsSegEvaluator",
|
269 |
+
segment_ignore_index=segment_ignore_index,
|
270 |
+
instance_ignore_index=-1,
|
271 |
+
),
|
272 |
+
dict(type="CheckpointSaver", save_freq=None),
|
273 |
+
]
|
Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 12 # bs: total bs in all gpus
|
4 |
+
mix_prob = 0.8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = True
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultSegmentor",
|
11 |
+
backbone=dict(type="MinkUNet34C", in_channels=6, out_channels=13),
|
12 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
13 |
+
)
|
14 |
+
|
15 |
+
# scheduler settings
|
16 |
+
epoch = 3000
|
17 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
18 |
+
scheduler = dict(type="PolyLR")
|
19 |
+
|
20 |
+
|
21 |
+
# dataset settings
|
22 |
+
dataset_type = "S3DISDataset"
|
23 |
+
data_root = "data/s3dis"
|
24 |
+
|
25 |
+
data = dict(
|
26 |
+
num_classes=13,
|
27 |
+
ignore_index=-1,
|
28 |
+
names=[
|
29 |
+
"ceiling",
|
30 |
+
"floor",
|
31 |
+
"wall",
|
32 |
+
"beam",
|
33 |
+
"column",
|
34 |
+
"window",
|
35 |
+
"door",
|
36 |
+
"table",
|
37 |
+
"chair",
|
38 |
+
"sofa",
|
39 |
+
"bookcase",
|
40 |
+
"board",
|
41 |
+
"clutter",
|
42 |
+
],
|
43 |
+
train=dict(
|
44 |
+
type=dataset_type,
|
45 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
46 |
+
data_root=data_root,
|
47 |
+
transform=[
|
48 |
+
dict(type="CenterShift", apply_z=True),
|
49 |
+
dict(
|
50 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
51 |
+
),
|
52 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
53 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
54 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
55 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
56 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
57 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
58 |
+
dict(type="RandomFlip", p=0.5),
|
59 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
60 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
61 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
62 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
63 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
64 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
65 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
66 |
+
dict(
|
67 |
+
type="GridSample",
|
68 |
+
grid_size=0.05,
|
69 |
+
hash_type="fnv",
|
70 |
+
mode="train",
|
71 |
+
keys=("coord", "color", "segment"),
|
72 |
+
return_grid_coord=True,
|
73 |
+
),
|
74 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
75 |
+
dict(type="CenterShift", apply_z=False),
|
76 |
+
dict(type="NormalizeColor"),
|
77 |
+
dict(type="ShufflePoint"),
|
78 |
+
dict(type="ToTensor"),
|
79 |
+
dict(
|
80 |
+
type="Collect",
|
81 |
+
keys=("coord", "grid_coord", "segment"),
|
82 |
+
feat_keys=["coord", "color"],
|
83 |
+
),
|
84 |
+
],
|
85 |
+
test_mode=False,
|
86 |
+
),
|
87 |
+
val=dict(
|
88 |
+
type=dataset_type,
|
89 |
+
split="Area_5",
|
90 |
+
data_root=data_root,
|
91 |
+
transform=[
|
92 |
+
dict(type="CenterShift", apply_z=True),
|
93 |
+
dict(
|
94 |
+
type="Copy",
|
95 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
96 |
+
),
|
97 |
+
dict(
|
98 |
+
type="GridSample",
|
99 |
+
grid_size=0.05,
|
100 |
+
hash_type="fnv",
|
101 |
+
mode="train",
|
102 |
+
keys=("coord", "color", "segment"),
|
103 |
+
return_grid_coord=True,
|
104 |
+
),
|
105 |
+
dict(type="CenterShift", apply_z=False),
|
106 |
+
dict(type="NormalizeColor"),
|
107 |
+
dict(type="ToTensor"),
|
108 |
+
dict(
|
109 |
+
type="Collect",
|
110 |
+
keys=(
|
111 |
+
"coord",
|
112 |
+
"grid_coord",
|
113 |
+
"origin_coord",
|
114 |
+
"segment",
|
115 |
+
"origin_segment",
|
116 |
+
),
|
117 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
118 |
+
feat_keys=["coord", "color"],
|
119 |
+
),
|
120 |
+
],
|
121 |
+
test_mode=False,
|
122 |
+
),
|
123 |
+
test=dict(
|
124 |
+
type=dataset_type,
|
125 |
+
split="Area_5",
|
126 |
+
data_root=data_root,
|
127 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
128 |
+
test_mode=True,
|
129 |
+
test_cfg=dict(
|
130 |
+
voxelize=dict(
|
131 |
+
type="GridSample",
|
132 |
+
grid_size=0.05,
|
133 |
+
hash_type="fnv",
|
134 |
+
mode="test",
|
135 |
+
keys=("coord", "color"),
|
136 |
+
return_grid_coord=True,
|
137 |
+
),
|
138 |
+
crop=None,
|
139 |
+
post_transform=[
|
140 |
+
dict(type="CenterShift", apply_z=False),
|
141 |
+
dict(type="ToTensor"),
|
142 |
+
dict(
|
143 |
+
type="Collect",
|
144 |
+
keys=("coord", "grid_coord", "index"),
|
145 |
+
feat_keys=("coord", "color"),
|
146 |
+
),
|
147 |
+
],
|
148 |
+
aug_transform=[
|
149 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
150 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
151 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
152 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
153 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
154 |
+
[
|
155 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
156 |
+
dict(type="RandomFlip", p=1),
|
157 |
+
],
|
158 |
+
[
|
159 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
160 |
+
dict(type="RandomFlip", p=1),
|
161 |
+
],
|
162 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
163 |
+
[
|
164 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
165 |
+
dict(type="RandomFlip", p=1),
|
166 |
+
],
|
167 |
+
[
|
168 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
169 |
+
dict(type="RandomFlip", p=1),
|
170 |
+
],
|
171 |
+
],
|
172 |
+
),
|
173 |
+
),
|
174 |
+
)
|
Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py
ADDED
@@ -0,0 +1,496 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 24 # bs: total bs in all gpus
|
5 |
+
num_worker = 48
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
find_unused_parameters = True
|
10 |
+
|
11 |
+
# trainer
|
12 |
+
train = dict(
|
13 |
+
type="MultiDatasetTrainer",
|
14 |
+
)
|
15 |
+
|
16 |
+
# model settings
|
17 |
+
model = dict(
|
18 |
+
type="PPT-v1m1",
|
19 |
+
backbone=dict(
|
20 |
+
type="SpUNet-v1m3",
|
21 |
+
in_channels=6,
|
22 |
+
num_classes=0,
|
23 |
+
base_channels=32,
|
24 |
+
context_channels=256,
|
25 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
26 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
27 |
+
cls_mode=False,
|
28 |
+
conditions=("ScanNet", "S3DIS", "Structured3D"),
|
29 |
+
zero_init=False,
|
30 |
+
norm_decouple=True,
|
31 |
+
norm_adaptive=True,
|
32 |
+
norm_affine=True,
|
33 |
+
),
|
34 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
35 |
+
backbone_out_channels=96,
|
36 |
+
context_channels=256,
|
37 |
+
conditions=("Structured3D", "ScanNet", "S3DIS"),
|
38 |
+
template="[x]",
|
39 |
+
clip_model="ViT-B/16",
|
40 |
+
class_name=(
|
41 |
+
"wall",
|
42 |
+
"floor",
|
43 |
+
"cabinet",
|
44 |
+
"bed",
|
45 |
+
"chair",
|
46 |
+
"sofa",
|
47 |
+
"table",
|
48 |
+
"door",
|
49 |
+
"window",
|
50 |
+
"bookshelf",
|
51 |
+
"bookcase",
|
52 |
+
"picture",
|
53 |
+
"counter",
|
54 |
+
"desk",
|
55 |
+
"shelves",
|
56 |
+
"curtain",
|
57 |
+
"dresser",
|
58 |
+
"pillow",
|
59 |
+
"mirror",
|
60 |
+
"ceiling",
|
61 |
+
"refrigerator",
|
62 |
+
"television",
|
63 |
+
"shower curtain",
|
64 |
+
"nightstand",
|
65 |
+
"toilet",
|
66 |
+
"sink",
|
67 |
+
"lamp",
|
68 |
+
"bathtub",
|
69 |
+
"garbagebin",
|
70 |
+
"board",
|
71 |
+
"beam",
|
72 |
+
"column",
|
73 |
+
"clutter",
|
74 |
+
"otherstructure",
|
75 |
+
"otherfurniture",
|
76 |
+
"otherprop",
|
77 |
+
),
|
78 |
+
valid_index=(
|
79 |
+
(
|
80 |
+
0,
|
81 |
+
1,
|
82 |
+
2,
|
83 |
+
3,
|
84 |
+
4,
|
85 |
+
5,
|
86 |
+
6,
|
87 |
+
7,
|
88 |
+
8,
|
89 |
+
11,
|
90 |
+
13,
|
91 |
+
14,
|
92 |
+
15,
|
93 |
+
16,
|
94 |
+
17,
|
95 |
+
18,
|
96 |
+
19,
|
97 |
+
20,
|
98 |
+
21,
|
99 |
+
23,
|
100 |
+
25,
|
101 |
+
26,
|
102 |
+
33,
|
103 |
+
34,
|
104 |
+
35,
|
105 |
+
),
|
106 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
|
107 |
+
(0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
|
108 |
+
),
|
109 |
+
backbone_mode=False,
|
110 |
+
)
|
111 |
+
|
112 |
+
# scheduler settings
|
113 |
+
epoch = 100
|
114 |
+
optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
115 |
+
scheduler = dict(
|
116 |
+
type="OneCycleLR",
|
117 |
+
max_lr=optimizer["lr"],
|
118 |
+
pct_start=0.05,
|
119 |
+
anneal_strategy="cos",
|
120 |
+
div_factor=10.0,
|
121 |
+
final_div_factor=10000.0,
|
122 |
+
)
|
123 |
+
# param_dicts = [dict(keyword="modulation", lr=0.005)]
|
124 |
+
|
125 |
+
# dataset settings
|
126 |
+
data = dict(
|
127 |
+
num_classes=13,
|
128 |
+
ignore_index=-1,
|
129 |
+
names=[
|
130 |
+
"ceiling",
|
131 |
+
"floor",
|
132 |
+
"wall",
|
133 |
+
"beam",
|
134 |
+
"column",
|
135 |
+
"window",
|
136 |
+
"door",
|
137 |
+
"table",
|
138 |
+
"chair",
|
139 |
+
"sofa",
|
140 |
+
"bookcase",
|
141 |
+
"board",
|
142 |
+
"clutter",
|
143 |
+
],
|
144 |
+
train=dict(
|
145 |
+
type="ConcatDataset",
|
146 |
+
datasets=[
|
147 |
+
# Structured3D
|
148 |
+
dict(
|
149 |
+
type="Structured3DDataset",
|
150 |
+
split="train",
|
151 |
+
data_root="data/structured3d",
|
152 |
+
transform=[
|
153 |
+
dict(type="CenterShift", apply_z=True),
|
154 |
+
dict(
|
155 |
+
type="RandomDropout",
|
156 |
+
dropout_ratio=0.2,
|
157 |
+
dropout_application_ratio=0.2,
|
158 |
+
),
|
159 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
160 |
+
dict(
|
161 |
+
type="RandomRotate",
|
162 |
+
angle=[-1, 1],
|
163 |
+
axis="z",
|
164 |
+
center=[0, 0, 0],
|
165 |
+
p=0.5,
|
166 |
+
),
|
167 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
168 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
169 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
170 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
171 |
+
dict(type="RandomFlip", p=0.5),
|
172 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
173 |
+
dict(
|
174 |
+
type="ElasticDistortion",
|
175 |
+
distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
176 |
+
),
|
177 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
178 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
179 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
180 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
181 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
182 |
+
dict(
|
183 |
+
type="GridSample",
|
184 |
+
grid_size=0.02,
|
185 |
+
hash_type="fnv",
|
186 |
+
mode="train",
|
187 |
+
return_grid_coord=True,
|
188 |
+
),
|
189 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
190 |
+
dict(type="CenterShift", apply_z=False),
|
191 |
+
dict(type="NormalizeColor"),
|
192 |
+
dict(type="ShufflePoint"),
|
193 |
+
dict(type="Add", keys_dict={"condition": "Structured3D"}),
|
194 |
+
dict(type="ToTensor"),
|
195 |
+
dict(
|
196 |
+
type="Collect",
|
197 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
198 |
+
feat_keys=("color", "normal"),
|
199 |
+
),
|
200 |
+
],
|
201 |
+
test_mode=False,
|
202 |
+
loop=4, # sampling weight
|
203 |
+
),
|
204 |
+
# ScanNet
|
205 |
+
dict(
|
206 |
+
type="ScanNetDataset",
|
207 |
+
split="train",
|
208 |
+
data_root="data/scannet",
|
209 |
+
transform=[
|
210 |
+
dict(type="CenterShift", apply_z=True),
|
211 |
+
dict(
|
212 |
+
type="RandomDropout",
|
213 |
+
dropout_ratio=0.2,
|
214 |
+
dropout_application_ratio=0.2,
|
215 |
+
),
|
216 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
217 |
+
dict(
|
218 |
+
type="RandomRotate",
|
219 |
+
angle=[-1, 1],
|
220 |
+
axis="z",
|
221 |
+
center=[0, 0, 0],
|
222 |
+
p=0.5,
|
223 |
+
),
|
224 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
225 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
226 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
227 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
228 |
+
dict(type="RandomFlip", p=0.5),
|
229 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
230 |
+
dict(
|
231 |
+
type="ElasticDistortion",
|
232 |
+
distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
233 |
+
),
|
234 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
235 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
236 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
237 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
238 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
239 |
+
dict(
|
240 |
+
type="GridSample",
|
241 |
+
grid_size=0.02,
|
242 |
+
hash_type="fnv",
|
243 |
+
mode="train",
|
244 |
+
return_grid_coord=True,
|
245 |
+
),
|
246 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
247 |
+
dict(type="CenterShift", apply_z=False),
|
248 |
+
dict(type="NormalizeColor"),
|
249 |
+
dict(type="ShufflePoint"),
|
250 |
+
dict(type="Add", keys_dict={"condition": "ScanNet"}),
|
251 |
+
dict(type="ToTensor"),
|
252 |
+
dict(
|
253 |
+
type="Collect",
|
254 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
255 |
+
feat_keys=("color", "normal"),
|
256 |
+
),
|
257 |
+
],
|
258 |
+
test_mode=False,
|
259 |
+
loop=2, # sampling weight
|
260 |
+
),
|
261 |
+
# S3DIS
|
262 |
+
dict(
|
263 |
+
type="S3DISDataset",
|
264 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
265 |
+
data_root="data/s3dis",
|
266 |
+
transform=[
|
267 |
+
dict(type="CenterShift", apply_z=True),
|
268 |
+
dict(
|
269 |
+
type="RandomDropout",
|
270 |
+
dropout_ratio=0.2,
|
271 |
+
dropout_application_ratio=0.2,
|
272 |
+
),
|
273 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
274 |
+
dict(
|
275 |
+
type="RandomRotate",
|
276 |
+
angle=[-1, 1],
|
277 |
+
axis="z",
|
278 |
+
center=[0, 0, 0],
|
279 |
+
p=0.5,
|
280 |
+
),
|
281 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
282 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
283 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
284 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
285 |
+
dict(type="RandomFlip", p=0.5),
|
286 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
287 |
+
dict(
|
288 |
+
type="ElasticDistortion",
|
289 |
+
distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
290 |
+
),
|
291 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
292 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
293 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
294 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
295 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
296 |
+
dict(
|
297 |
+
type="GridSample",
|
298 |
+
grid_size=0.02,
|
299 |
+
hash_type="fnv",
|
300 |
+
mode="train",
|
301 |
+
return_grid_coord=True,
|
302 |
+
),
|
303 |
+
dict(type="SphereCrop", sample_rate=0.6, mode="random"),
|
304 |
+
dict(type="CenterShift", apply_z=False),
|
305 |
+
dict(type="NormalizeColor"),
|
306 |
+
dict(type="ShufflePoint"),
|
307 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
308 |
+
dict(type="ToTensor"),
|
309 |
+
dict(
|
310 |
+
type="Collect",
|
311 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
312 |
+
feat_keys=("color", "normal"),
|
313 |
+
),
|
314 |
+
],
|
315 |
+
test_mode=False,
|
316 |
+
loop=1, # sampling weight
|
317 |
+
),
|
318 |
+
],
|
319 |
+
),
|
320 |
+
val=dict(
|
321 |
+
type="S3DISDataset",
|
322 |
+
split="Area_5",
|
323 |
+
data_root="data/s3dis",
|
324 |
+
transform=[
|
325 |
+
dict(type="CenterShift", apply_z=True),
|
326 |
+
dict(
|
327 |
+
type="GridSample",
|
328 |
+
grid_size=0.02,
|
329 |
+
hash_type="fnv",
|
330 |
+
mode="train",
|
331 |
+
return_grid_coord=True,
|
332 |
+
),
|
333 |
+
# dict(type="SphereCrop", point_max=1000000, mode="center"),
|
334 |
+
dict(type="CenterShift", apply_z=False),
|
335 |
+
dict(type="NormalizeColor"),
|
336 |
+
dict(type="ToTensor"),
|
337 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
338 |
+
dict(
|
339 |
+
type="Collect",
|
340 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
341 |
+
feat_keys=("color", "normal"),
|
342 |
+
),
|
343 |
+
],
|
344 |
+
test_mode=False,
|
345 |
+
),
|
346 |
+
test=dict(
|
347 |
+
type="S3DISDataset",
|
348 |
+
split="Area_5",
|
349 |
+
data_root="data/s3dis",
|
350 |
+
transform=[
|
351 |
+
dict(type="CenterShift", apply_z=True),
|
352 |
+
dict(type="NormalizeColor"),
|
353 |
+
],
|
354 |
+
test_mode=True,
|
355 |
+
test_cfg=dict(
|
356 |
+
voxelize=dict(
|
357 |
+
type="GridSample",
|
358 |
+
grid_size=0.02,
|
359 |
+
hash_type="fnv",
|
360 |
+
mode="test",
|
361 |
+
return_grid_coord=True,
|
362 |
+
keys=("coord", "color", "normal"),
|
363 |
+
),
|
364 |
+
crop=None,
|
365 |
+
post_transform=[
|
366 |
+
dict(type="CenterShift", apply_z=False),
|
367 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
368 |
+
dict(type="ToTensor"),
|
369 |
+
dict(
|
370 |
+
type="Collect",
|
371 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
372 |
+
feat_keys=("color", "normal"),
|
373 |
+
),
|
374 |
+
],
|
375 |
+
aug_transform=[
|
376 |
+
[
|
377 |
+
dict(
|
378 |
+
type="RandomRotateTargetAngle",
|
379 |
+
angle=[0],
|
380 |
+
axis="z",
|
381 |
+
center=[0, 0, 0],
|
382 |
+
p=1,
|
383 |
+
)
|
384 |
+
],
|
385 |
+
[
|
386 |
+
dict(
|
387 |
+
type="RandomRotateTargetAngle",
|
388 |
+
angle=[1 / 2],
|
389 |
+
axis="z",
|
390 |
+
center=[0, 0, 0],
|
391 |
+
p=1,
|
392 |
+
)
|
393 |
+
],
|
394 |
+
[
|
395 |
+
dict(
|
396 |
+
type="RandomRotateTargetAngle",
|
397 |
+
angle=[1],
|
398 |
+
axis="z",
|
399 |
+
center=[0, 0, 0],
|
400 |
+
p=1,
|
401 |
+
)
|
402 |
+
],
|
403 |
+
[
|
404 |
+
dict(
|
405 |
+
type="RandomRotateTargetAngle",
|
406 |
+
angle=[3 / 2],
|
407 |
+
axis="z",
|
408 |
+
center=[0, 0, 0],
|
409 |
+
p=1,
|
410 |
+
)
|
411 |
+
],
|
412 |
+
[
|
413 |
+
dict(
|
414 |
+
type="RandomRotateTargetAngle",
|
415 |
+
angle=[0],
|
416 |
+
axis="z",
|
417 |
+
center=[0, 0, 0],
|
418 |
+
p=1,
|
419 |
+
),
|
420 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
421 |
+
],
|
422 |
+
[
|
423 |
+
dict(
|
424 |
+
type="RandomRotateTargetAngle",
|
425 |
+
angle=[1 / 2],
|
426 |
+
axis="z",
|
427 |
+
center=[0, 0, 0],
|
428 |
+
p=1,
|
429 |
+
),
|
430 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
431 |
+
],
|
432 |
+
[
|
433 |
+
dict(
|
434 |
+
type="RandomRotateTargetAngle",
|
435 |
+
angle=[1],
|
436 |
+
axis="z",
|
437 |
+
center=[0, 0, 0],
|
438 |
+
p=1,
|
439 |
+
),
|
440 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
441 |
+
],
|
442 |
+
[
|
443 |
+
dict(
|
444 |
+
type="RandomRotateTargetAngle",
|
445 |
+
angle=[3 / 2],
|
446 |
+
axis="z",
|
447 |
+
center=[0, 0, 0],
|
448 |
+
p=1,
|
449 |
+
),
|
450 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
451 |
+
],
|
452 |
+
[
|
453 |
+
dict(
|
454 |
+
type="RandomRotateTargetAngle",
|
455 |
+
angle=[0],
|
456 |
+
axis="z",
|
457 |
+
center=[0, 0, 0],
|
458 |
+
p=1,
|
459 |
+
),
|
460 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
461 |
+
],
|
462 |
+
[
|
463 |
+
dict(
|
464 |
+
type="RandomRotateTargetAngle",
|
465 |
+
angle=[1 / 2],
|
466 |
+
axis="z",
|
467 |
+
center=[0, 0, 0],
|
468 |
+
p=1,
|
469 |
+
),
|
470 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
471 |
+
],
|
472 |
+
[
|
473 |
+
dict(
|
474 |
+
type="RandomRotateTargetAngle",
|
475 |
+
angle=[1],
|
476 |
+
axis="z",
|
477 |
+
center=[0, 0, 0],
|
478 |
+
p=1,
|
479 |
+
),
|
480 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
481 |
+
],
|
482 |
+
[
|
483 |
+
dict(
|
484 |
+
type="RandomRotateTargetAngle",
|
485 |
+
angle=[3 / 2],
|
486 |
+
axis="z",
|
487 |
+
center=[0, 0, 0],
|
488 |
+
p=1,
|
489 |
+
),
|
490 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
491 |
+
],
|
492 |
+
[dict(type="RandomFlip", p=1)],
|
493 |
+
],
|
494 |
+
),
|
495 |
+
),
|
496 |
+
)
|
Pointcept/configs/s3dis/semseg-pt-v1-0-base.py
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 12 # bs: total bs in all gpus
|
4 |
+
mix_prob = 0.8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = True
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultSegmentor",
|
11 |
+
backbone=dict(
|
12 |
+
type="PointTransformer-Seg50",
|
13 |
+
in_channels=6,
|
14 |
+
num_classes=13,
|
15 |
+
),
|
16 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
# scheduler settings
|
21 |
+
epoch = 3000
|
22 |
+
optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
|
23 |
+
scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
|
24 |
+
|
25 |
+
# dataset settings
|
26 |
+
dataset_type = "S3DISDataset"
|
27 |
+
data_root = "data/s3dis"
|
28 |
+
|
29 |
+
data = dict(
|
30 |
+
num_classes=13,
|
31 |
+
ignore_index=-1,
|
32 |
+
names=[
|
33 |
+
"ceiling",
|
34 |
+
"floor",
|
35 |
+
"wall",
|
36 |
+
"beam",
|
37 |
+
"column",
|
38 |
+
"window",
|
39 |
+
"door",
|
40 |
+
"table",
|
41 |
+
"chair",
|
42 |
+
"sofa",
|
43 |
+
"bookcase",
|
44 |
+
"board",
|
45 |
+
"clutter",
|
46 |
+
],
|
47 |
+
train=dict(
|
48 |
+
type=dataset_type,
|
49 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
50 |
+
data_root=data_root,
|
51 |
+
transform=[
|
52 |
+
dict(type="CenterShift", apply_z=True),
|
53 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
54 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
55 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
56 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
57 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
58 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
59 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
60 |
+
dict(type="RandomFlip", p=0.5),
|
61 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
62 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
63 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
64 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
65 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
66 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
67 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
68 |
+
dict(
|
69 |
+
type="GridSample",
|
70 |
+
grid_size=0.04,
|
71 |
+
hash_type="fnv",
|
72 |
+
mode="train",
|
73 |
+
keys=("coord", "color", "segment"),
|
74 |
+
return_grid_coord=True,
|
75 |
+
),
|
76 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
77 |
+
dict(type="CenterShift", apply_z=False),
|
78 |
+
dict(type="NormalizeColor"),
|
79 |
+
# dict(type="ShufflePoint"),
|
80 |
+
dict(type="ToTensor"),
|
81 |
+
dict(
|
82 |
+
type="Collect",
|
83 |
+
keys=("coord", "grid_coord", "segment"),
|
84 |
+
feat_keys=["coord", "color"],
|
85 |
+
),
|
86 |
+
],
|
87 |
+
test_mode=False,
|
88 |
+
),
|
89 |
+
val=dict(
|
90 |
+
type=dataset_type,
|
91 |
+
split="Area_5",
|
92 |
+
data_root=data_root,
|
93 |
+
transform=[
|
94 |
+
dict(type="CenterShift", apply_z=True),
|
95 |
+
dict(
|
96 |
+
type="Copy",
|
97 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
98 |
+
),
|
99 |
+
dict(
|
100 |
+
type="GridSample",
|
101 |
+
grid_size=0.04,
|
102 |
+
hash_type="fnv",
|
103 |
+
mode="train",
|
104 |
+
keys=("coord", "color", "segment"),
|
105 |
+
return_grid_coord=True,
|
106 |
+
),
|
107 |
+
dict(type="CenterShift", apply_z=False),
|
108 |
+
dict(type="NormalizeColor"),
|
109 |
+
dict(type="ToTensor"),
|
110 |
+
dict(
|
111 |
+
type="Collect",
|
112 |
+
keys=("coord", "grid_coord", "segment"),
|
113 |
+
offset_keys_dict=dict(offset="coord"),
|
114 |
+
feat_keys=["coord", "color"],
|
115 |
+
),
|
116 |
+
],
|
117 |
+
test_mode=False,
|
118 |
+
),
|
119 |
+
test=dict(
|
120 |
+
type=dataset_type,
|
121 |
+
split="Area_5",
|
122 |
+
data_root=data_root,
|
123 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
124 |
+
test_mode=True,
|
125 |
+
test_cfg=dict(
|
126 |
+
voxelize=dict(
|
127 |
+
type="GridSample",
|
128 |
+
grid_size=0.04,
|
129 |
+
hash_type="fnv",
|
130 |
+
mode="test",
|
131 |
+
keys=("coord", "color"),
|
132 |
+
return_grid_coord=True,
|
133 |
+
),
|
134 |
+
crop=None,
|
135 |
+
post_transform=[
|
136 |
+
dict(type="CenterShift", apply_z=False),
|
137 |
+
dict(type="ToTensor"),
|
138 |
+
dict(
|
139 |
+
type="Collect",
|
140 |
+
keys=("coord", "grid_coord", "index"),
|
141 |
+
feat_keys=("coord", "color"),
|
142 |
+
),
|
143 |
+
],
|
144 |
+
aug_transform=[
|
145 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
146 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
147 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
148 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
149 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
150 |
+
[
|
151 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
152 |
+
dict(type="RandomFlip", p=1),
|
153 |
+
],
|
154 |
+
[
|
155 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
156 |
+
dict(type="RandomFlip", p=1),
|
157 |
+
],
|
158 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
159 |
+
[
|
160 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
161 |
+
dict(type="RandomFlip", p=1),
|
162 |
+
],
|
163 |
+
[
|
164 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
165 |
+
dict(type="RandomFlip", p=1),
|
166 |
+
],
|
167 |
+
],
|
168 |
+
),
|
169 |
+
),
|
170 |
+
)
|
Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 12 # bs: total bs in all gpus
|
4 |
+
mix_prob = 0.8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = False
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultSegmentor",
|
11 |
+
backbone=dict(
|
12 |
+
type="PT-v2m1",
|
13 |
+
in_channels=6,
|
14 |
+
num_classes=13,
|
15 |
+
patch_embed_depth=2,
|
16 |
+
patch_embed_channels=48,
|
17 |
+
patch_embed_groups=6,
|
18 |
+
patch_embed_neighbours=16,
|
19 |
+
enc_depths=(2, 6, 2),
|
20 |
+
enc_channels=(96, 192, 384),
|
21 |
+
enc_groups=(12, 24, 48),
|
22 |
+
enc_neighbours=(16, 16, 16),
|
23 |
+
dec_depths=(1, 1, 1),
|
24 |
+
dec_channels=(48, 96, 192),
|
25 |
+
dec_groups=(6, 12, 24),
|
26 |
+
dec_neighbours=(16, 16, 16),
|
27 |
+
grid_sizes=(0.1, 0.2, 0.4),
|
28 |
+
attn_qkv_bias=True,
|
29 |
+
pe_multiplier=True,
|
30 |
+
pe_bias=True,
|
31 |
+
attn_drop_rate=0.0,
|
32 |
+
drop_path_rate=0.3,
|
33 |
+
enable_checkpoint=False,
|
34 |
+
unpool_backend="interp", # map / interp
|
35 |
+
),
|
36 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
37 |
+
)
|
38 |
+
|
39 |
+
# scheduler settings
|
40 |
+
epoch = 3000
|
41 |
+
optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
|
42 |
+
scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
|
43 |
+
|
44 |
+
# dataset settings
|
45 |
+
dataset_type = "S3DISDataset"
|
46 |
+
data_root = "data/s3dis"
|
47 |
+
|
48 |
+
data = dict(
|
49 |
+
num_classes=13,
|
50 |
+
ignore_index=-1,
|
51 |
+
names=[
|
52 |
+
"ceiling",
|
53 |
+
"floor",
|
54 |
+
"wall",
|
55 |
+
"beam",
|
56 |
+
"column",
|
57 |
+
"window",
|
58 |
+
"door",
|
59 |
+
"table",
|
60 |
+
"chair",
|
61 |
+
"sofa",
|
62 |
+
"bookcase",
|
63 |
+
"board",
|
64 |
+
"clutter",
|
65 |
+
],
|
66 |
+
train=dict(
|
67 |
+
type=dataset_type,
|
68 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
69 |
+
data_root=data_root,
|
70 |
+
transform=[
|
71 |
+
dict(type="CenterShift", apply_z=True),
|
72 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
73 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
74 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
75 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
76 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
77 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
78 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
79 |
+
dict(type="RandomFlip", p=0.5),
|
80 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
81 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
82 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
83 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
84 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
85 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
86 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
87 |
+
dict(
|
88 |
+
type="GridSample",
|
89 |
+
grid_size=0.04,
|
90 |
+
hash_type="fnv",
|
91 |
+
mode="train",
|
92 |
+
keys=("coord", "color", "segment"),
|
93 |
+
return_grid_coord=True,
|
94 |
+
),
|
95 |
+
dict(type="SphereCrop", point_max=80000, mode="random"),
|
96 |
+
dict(type="CenterShift", apply_z=False),
|
97 |
+
dict(type="NormalizeColor"),
|
98 |
+
# dict(type="ShufflePoint"),
|
99 |
+
dict(type="ToTensor"),
|
100 |
+
dict(
|
101 |
+
type="Collect",
|
102 |
+
keys=("coord", "grid_coord", "segment"),
|
103 |
+
feat_keys=["coord", "color"],
|
104 |
+
),
|
105 |
+
],
|
106 |
+
test_mode=False,
|
107 |
+
),
|
108 |
+
val=dict(
|
109 |
+
type=dataset_type,
|
110 |
+
split="Area_5",
|
111 |
+
data_root=data_root,
|
112 |
+
transform=[
|
113 |
+
dict(type="CenterShift", apply_z=True),
|
114 |
+
dict(
|
115 |
+
type="Copy",
|
116 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
117 |
+
),
|
118 |
+
dict(
|
119 |
+
type="GridSample",
|
120 |
+
grid_size=0.04,
|
121 |
+
hash_type="fnv",
|
122 |
+
mode="train",
|
123 |
+
keys=("coord", "color", "segment"),
|
124 |
+
return_grid_coord=True,
|
125 |
+
),
|
126 |
+
dict(type="CenterShift", apply_z=False),
|
127 |
+
dict(type="NormalizeColor"),
|
128 |
+
dict(type="ToTensor"),
|
129 |
+
dict(
|
130 |
+
type="Collect",
|
131 |
+
keys=("coord", "grid_coord", "segment"),
|
132 |
+
offset_keys_dict=dict(offset="coord"),
|
133 |
+
feat_keys=["coord", "color"],
|
134 |
+
),
|
135 |
+
],
|
136 |
+
test_mode=False,
|
137 |
+
),
|
138 |
+
test=dict(
|
139 |
+
type=dataset_type,
|
140 |
+
split="Area_5",
|
141 |
+
data_root=data_root,
|
142 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
143 |
+
test_mode=True,
|
144 |
+
test_cfg=dict(
|
145 |
+
voxelize=dict(
|
146 |
+
type="GridSample",
|
147 |
+
grid_size=0.04,
|
148 |
+
hash_type="fnv",
|
149 |
+
mode="test",
|
150 |
+
keys=("coord", "color"),
|
151 |
+
return_grid_coord=True,
|
152 |
+
),
|
153 |
+
crop=None,
|
154 |
+
post_transform=[
|
155 |
+
dict(type="CenterShift", apply_z=False),
|
156 |
+
dict(type="ToTensor"),
|
157 |
+
dict(
|
158 |
+
type="Collect",
|
159 |
+
keys=("coord", "grid_coord", "index"),
|
160 |
+
feat_keys=("coord", "color"),
|
161 |
+
),
|
162 |
+
],
|
163 |
+
aug_transform=[
|
164 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
165 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
166 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
167 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
168 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
169 |
+
[
|
170 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
171 |
+
dict(type="RandomFlip", p=1),
|
172 |
+
],
|
173 |
+
[
|
174 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
175 |
+
dict(type="RandomFlip", p=1),
|
176 |
+
],
|
177 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
178 |
+
[
|
179 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
180 |
+
dict(type="RandomFlip", p=1),
|
181 |
+
],
|
182 |
+
[
|
183 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
184 |
+
dict(type="RandomFlip", p=1),
|
185 |
+
],
|
186 |
+
],
|
187 |
+
),
|
188 |
+
),
|
189 |
+
)
|
Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 12 # bs: total bs in all gpus
|
4 |
+
mix_prob = 0.8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = True
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultSegmentor",
|
11 |
+
backbone=dict(
|
12 |
+
type="PT-v2m2",
|
13 |
+
in_channels=6,
|
14 |
+
num_classes=13,
|
15 |
+
patch_embed_depth=2,
|
16 |
+
patch_embed_channels=48,
|
17 |
+
patch_embed_groups=6,
|
18 |
+
patch_embed_neighbours=16,
|
19 |
+
enc_depths=(2, 6, 2),
|
20 |
+
enc_channels=(96, 192, 384),
|
21 |
+
enc_groups=(12, 24, 48),
|
22 |
+
enc_neighbours=(16, 16, 16),
|
23 |
+
dec_depths=(1, 1, 1),
|
24 |
+
dec_channels=(48, 96, 192),
|
25 |
+
dec_groups=(6, 12, 24),
|
26 |
+
dec_neighbours=(16, 16, 16),
|
27 |
+
grid_sizes=(0.1, 0.2, 0.4),
|
28 |
+
attn_qkv_bias=True,
|
29 |
+
pe_multiplier=False,
|
30 |
+
pe_bias=True,
|
31 |
+
attn_drop_rate=0.0,
|
32 |
+
drop_path_rate=0.3,
|
33 |
+
enable_checkpoint=False,
|
34 |
+
unpool_backend="interp", # map / interp
|
35 |
+
),
|
36 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
37 |
+
)
|
38 |
+
|
39 |
+
# scheduler settings
|
40 |
+
epoch = 3000
|
41 |
+
optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
|
42 |
+
scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
|
43 |
+
|
44 |
+
# dataset settings
|
45 |
+
dataset_type = "S3DISDataset"
|
46 |
+
data_root = "data/s3dis"
|
47 |
+
|
48 |
+
data = dict(
|
49 |
+
num_classes=13,
|
50 |
+
ignore_index=-1,
|
51 |
+
names=[
|
52 |
+
"ceiling",
|
53 |
+
"floor",
|
54 |
+
"wall",
|
55 |
+
"beam",
|
56 |
+
"column",
|
57 |
+
"window",
|
58 |
+
"door",
|
59 |
+
"table",
|
60 |
+
"chair",
|
61 |
+
"sofa",
|
62 |
+
"bookcase",
|
63 |
+
"board",
|
64 |
+
"clutter",
|
65 |
+
],
|
66 |
+
train=dict(
|
67 |
+
type=dataset_type,
|
68 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
69 |
+
data_root=data_root,
|
70 |
+
transform=[
|
71 |
+
dict(type="CenterShift", apply_z=True),
|
72 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
73 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
74 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
75 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
76 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
77 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
78 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
79 |
+
dict(type="RandomFlip", p=0.5),
|
80 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
81 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
82 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
83 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
84 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
85 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
86 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
87 |
+
dict(
|
88 |
+
type="GridSample",
|
89 |
+
grid_size=0.04,
|
90 |
+
hash_type="fnv",
|
91 |
+
mode="train",
|
92 |
+
keys=("coord", "color", "segment"),
|
93 |
+
return_grid_coord=True,
|
94 |
+
),
|
95 |
+
dict(type="SphereCrop", point_max=80000, mode="random"),
|
96 |
+
dict(type="CenterShift", apply_z=False),
|
97 |
+
dict(type="NormalizeColor"),
|
98 |
+
# dict(type="ShufflePoint"),
|
99 |
+
dict(type="ToTensor"),
|
100 |
+
dict(
|
101 |
+
type="Collect",
|
102 |
+
keys=("coord", "grid_coord", "segment"),
|
103 |
+
feat_keys=["coord", "color"],
|
104 |
+
),
|
105 |
+
],
|
106 |
+
test_mode=False,
|
107 |
+
),
|
108 |
+
val=dict(
|
109 |
+
type=dataset_type,
|
110 |
+
split="Area_5",
|
111 |
+
data_root=data_root,
|
112 |
+
transform=[
|
113 |
+
dict(type="CenterShift", apply_z=True),
|
114 |
+
dict(
|
115 |
+
type="Copy",
|
116 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
117 |
+
),
|
118 |
+
dict(
|
119 |
+
type="GridSample",
|
120 |
+
grid_size=0.04,
|
121 |
+
hash_type="fnv",
|
122 |
+
mode="train",
|
123 |
+
keys=("coord", "color", "segment"),
|
124 |
+
return_grid_coord=True,
|
125 |
+
),
|
126 |
+
dict(type="CenterShift", apply_z=False),
|
127 |
+
dict(type="NormalizeColor"),
|
128 |
+
dict(type="ToTensor"),
|
129 |
+
dict(
|
130 |
+
type="Collect",
|
131 |
+
keys=("coord", "grid_coord", "segment"),
|
132 |
+
offset_keys_dict=dict(offset="coord"),
|
133 |
+
feat_keys=["coord", "color"],
|
134 |
+
),
|
135 |
+
],
|
136 |
+
test_mode=False,
|
137 |
+
),
|
138 |
+
test=dict(
|
139 |
+
type=dataset_type,
|
140 |
+
split="Area_5",
|
141 |
+
data_root=data_root,
|
142 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
143 |
+
test_mode=True,
|
144 |
+
test_cfg=dict(
|
145 |
+
voxelize=dict(
|
146 |
+
type="GridSample",
|
147 |
+
grid_size=0.04,
|
148 |
+
hash_type="fnv",
|
149 |
+
mode="test",
|
150 |
+
keys=("coord", "color"),
|
151 |
+
return_grid_coord=True,
|
152 |
+
),
|
153 |
+
crop=None,
|
154 |
+
post_transform=[
|
155 |
+
dict(type="CenterShift", apply_z=False),
|
156 |
+
dict(type="ToTensor"),
|
157 |
+
dict(
|
158 |
+
type="Collect",
|
159 |
+
keys=("coord", "grid_coord", "index"),
|
160 |
+
feat_keys=("coord", "color"),
|
161 |
+
),
|
162 |
+
],
|
163 |
+
aug_transform=[
|
164 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
165 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
166 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
167 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
168 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
169 |
+
[
|
170 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
171 |
+
dict(type="RandomFlip", p=1),
|
172 |
+
],
|
173 |
+
[
|
174 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
175 |
+
dict(type="RandomFlip", p=1),
|
176 |
+
],
|
177 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
178 |
+
[
|
179 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
180 |
+
dict(type="RandomFlip", p=1),
|
181 |
+
],
|
182 |
+
[
|
183 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
184 |
+
dict(type="RandomFlip", p=1),
|
185 |
+
],
|
186 |
+
],
|
187 |
+
),
|
188 |
+
),
|
189 |
+
)
|
Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 12 # bs: total bs in all gpus
|
4 |
+
mix_prob = 0.8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = True
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultSegmentor",
|
11 |
+
backbone=dict(
|
12 |
+
type="PT-v2m2",
|
13 |
+
in_channels=6,
|
14 |
+
num_classes=13,
|
15 |
+
patch_embed_depth=2,
|
16 |
+
patch_embed_channels=48,
|
17 |
+
patch_embed_groups=6,
|
18 |
+
patch_embed_neighbours=16,
|
19 |
+
enc_depths=(2, 6, 2),
|
20 |
+
enc_channels=(96, 192, 384),
|
21 |
+
enc_groups=(12, 24, 48),
|
22 |
+
enc_neighbours=(16, 16, 16),
|
23 |
+
dec_depths=(1, 1, 1),
|
24 |
+
dec_channels=(48, 96, 192),
|
25 |
+
dec_groups=(6, 12, 24),
|
26 |
+
dec_neighbours=(16, 16, 16),
|
27 |
+
grid_sizes=(0.1, 0.2, 0.4),
|
28 |
+
attn_qkv_bias=True,
|
29 |
+
pe_multiplier=False,
|
30 |
+
pe_bias=True,
|
31 |
+
attn_drop_rate=0.0,
|
32 |
+
drop_path_rate=0.3,
|
33 |
+
enable_checkpoint=False,
|
34 |
+
unpool_backend="interp", # map / interp
|
35 |
+
),
|
36 |
+
criteria=[
|
37 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
38 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
39 |
+
],
|
40 |
+
)
|
41 |
+
|
42 |
+
# scheduler settings
|
43 |
+
epoch = 3000
|
44 |
+
optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
|
45 |
+
scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
|
46 |
+
|
47 |
+
# dataset settings
|
48 |
+
dataset_type = "S3DISDataset"
|
49 |
+
data_root = "data/s3dis"
|
50 |
+
|
51 |
+
data = dict(
|
52 |
+
num_classes=13,
|
53 |
+
ignore_index=-1,
|
54 |
+
names=[
|
55 |
+
"ceiling",
|
56 |
+
"floor",
|
57 |
+
"wall",
|
58 |
+
"beam",
|
59 |
+
"column",
|
60 |
+
"window",
|
61 |
+
"door",
|
62 |
+
"table",
|
63 |
+
"chair",
|
64 |
+
"sofa",
|
65 |
+
"bookcase",
|
66 |
+
"board",
|
67 |
+
"clutter",
|
68 |
+
],
|
69 |
+
train=dict(
|
70 |
+
type=dataset_type,
|
71 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
72 |
+
data_root=data_root,
|
73 |
+
transform=[
|
74 |
+
dict(type="CenterShift", apply_z=True),
|
75 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
76 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
77 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
78 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
79 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
80 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
81 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
82 |
+
dict(type="RandomFlip", p=0.5),
|
83 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
84 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
85 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
86 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
87 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
88 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
89 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
90 |
+
dict(
|
91 |
+
type="GridSample",
|
92 |
+
grid_size=0.04,
|
93 |
+
hash_type="fnv",
|
94 |
+
mode="train",
|
95 |
+
keys=("coord", "color", "segment"),
|
96 |
+
return_grid_coord=True,
|
97 |
+
),
|
98 |
+
dict(type="SphereCrop", point_max=80000, mode="random"),
|
99 |
+
dict(type="CenterShift", apply_z=False),
|
100 |
+
dict(type="NormalizeColor"),
|
101 |
+
# dict(type="ShufflePoint"),
|
102 |
+
dict(type="ToTensor"),
|
103 |
+
dict(
|
104 |
+
type="Collect",
|
105 |
+
keys=("coord", "grid_coord", "segment"),
|
106 |
+
feat_keys=["coord", "color"],
|
107 |
+
),
|
108 |
+
],
|
109 |
+
test_mode=False,
|
110 |
+
),
|
111 |
+
val=dict(
|
112 |
+
type=dataset_type,
|
113 |
+
split="Area_5",
|
114 |
+
data_root=data_root,
|
115 |
+
transform=[
|
116 |
+
dict(type="CenterShift", apply_z=True),
|
117 |
+
dict(
|
118 |
+
type="Copy",
|
119 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
120 |
+
),
|
121 |
+
dict(
|
122 |
+
type="GridSample",
|
123 |
+
grid_size=0.04,
|
124 |
+
hash_type="fnv",
|
125 |
+
mode="train",
|
126 |
+
keys=("coord", "color", "segment"),
|
127 |
+
return_grid_coord=True,
|
128 |
+
),
|
129 |
+
dict(type="CenterShift", apply_z=False),
|
130 |
+
dict(type="NormalizeColor"),
|
131 |
+
dict(type="ToTensor"),
|
132 |
+
dict(
|
133 |
+
type="Collect",
|
134 |
+
keys=("coord", "grid_coord", "segment"),
|
135 |
+
offset_keys_dict=dict(offset="coord"),
|
136 |
+
feat_keys=["coord", "color"],
|
137 |
+
),
|
138 |
+
],
|
139 |
+
test_mode=False,
|
140 |
+
),
|
141 |
+
test=dict(
|
142 |
+
type=dataset_type,
|
143 |
+
split="Area_5",
|
144 |
+
data_root=data_root,
|
145 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
146 |
+
test_mode=True,
|
147 |
+
test_cfg=dict(
|
148 |
+
voxelize=dict(
|
149 |
+
type="GridSample",
|
150 |
+
grid_size=0.04,
|
151 |
+
hash_type="fnv",
|
152 |
+
mode="test",
|
153 |
+
keys=("coord", "color"),
|
154 |
+
return_grid_coord=True,
|
155 |
+
),
|
156 |
+
crop=None,
|
157 |
+
post_transform=[
|
158 |
+
dict(type="CenterShift", apply_z=False),
|
159 |
+
dict(type="ToTensor"),
|
160 |
+
dict(
|
161 |
+
type="Collect",
|
162 |
+
keys=("coord", "grid_coord", "index"),
|
163 |
+
feat_keys=("coord", "color"),
|
164 |
+
),
|
165 |
+
],
|
166 |
+
aug_transform=[
|
167 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
168 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
169 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
170 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
171 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
172 |
+
[
|
173 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
174 |
+
dict(type="RandomFlip", p=1),
|
175 |
+
],
|
176 |
+
[
|
177 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
178 |
+
dict(type="RandomFlip", p=1),
|
179 |
+
],
|
180 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
181 |
+
[
|
182 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
183 |
+
dict(type="RandomFlip", p=1),
|
184 |
+
],
|
185 |
+
[
|
186 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
187 |
+
dict(type="RandomFlip", p=1),
|
188 |
+
],
|
189 |
+
],
|
190 |
+
),
|
191 |
+
),
|
192 |
+
)
|
Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 12 # bs: total bs in all gpus
|
4 |
+
mix_prob = 0.8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = True
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultSegmentor",
|
11 |
+
backbone=dict(
|
12 |
+
type="PT-v2m2",
|
13 |
+
in_channels=6,
|
14 |
+
num_classes=13,
|
15 |
+
patch_embed_depth=2,
|
16 |
+
patch_embed_channels=48,
|
17 |
+
patch_embed_groups=6,
|
18 |
+
patch_embed_neighbours=16,
|
19 |
+
enc_depths=(2, 6, 2),
|
20 |
+
enc_channels=(96, 192, 384),
|
21 |
+
enc_groups=(12, 24, 48),
|
22 |
+
enc_neighbours=(16, 16, 16),
|
23 |
+
dec_depths=(1, 1, 1),
|
24 |
+
dec_channels=(48, 96, 192),
|
25 |
+
dec_groups=(6, 12, 24),
|
26 |
+
dec_neighbours=(16, 16, 16),
|
27 |
+
grid_sizes=(0.1, 0.2, 0.4),
|
28 |
+
attn_qkv_bias=True,
|
29 |
+
pe_multiplier=False,
|
30 |
+
pe_bias=True,
|
31 |
+
attn_drop_rate=0.0,
|
32 |
+
drop_path_rate=0.3,
|
33 |
+
enable_checkpoint=False,
|
34 |
+
unpool_backend="interp", # map / interp
|
35 |
+
),
|
36 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
37 |
+
)
|
38 |
+
|
39 |
+
# scheduler settings
|
40 |
+
epoch = 3000
|
41 |
+
optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05)
|
42 |
+
scheduler = dict(
|
43 |
+
type="OneCycleLR",
|
44 |
+
max_lr=optimizer["lr"],
|
45 |
+
pct_start=0.05,
|
46 |
+
anneal_strategy="cos",
|
47 |
+
div_factor=10.0,
|
48 |
+
final_div_factor=1000.0,
|
49 |
+
)
|
50 |
+
|
51 |
+
# dataset settings
|
52 |
+
dataset_type = "S3DISDataset"
|
53 |
+
data_root = "data/s3dis"
|
54 |
+
|
55 |
+
data = dict(
|
56 |
+
num_classes=13,
|
57 |
+
ignore_index=-1,
|
58 |
+
names=[
|
59 |
+
"ceiling",
|
60 |
+
"floor",
|
61 |
+
"wall",
|
62 |
+
"beam",
|
63 |
+
"column",
|
64 |
+
"window",
|
65 |
+
"door",
|
66 |
+
"table",
|
67 |
+
"chair",
|
68 |
+
"sofa",
|
69 |
+
"bookcase",
|
70 |
+
"board",
|
71 |
+
"clutter",
|
72 |
+
],
|
73 |
+
train=dict(
|
74 |
+
type=dataset_type,
|
75 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
76 |
+
data_root=data_root,
|
77 |
+
transform=[
|
78 |
+
dict(type="CenterShift", apply_z=True),
|
79 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
80 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
81 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
82 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
83 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
84 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
85 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
86 |
+
dict(type="RandomFlip", p=0.5),
|
87 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
88 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
89 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
90 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
91 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
92 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
93 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
94 |
+
dict(
|
95 |
+
type="GridSample",
|
96 |
+
grid_size=0.04,
|
97 |
+
hash_type="fnv",
|
98 |
+
mode="train",
|
99 |
+
keys=("coord", "color", "segment"),
|
100 |
+
return_grid_coord=True,
|
101 |
+
),
|
102 |
+
dict(type="SphereCrop", point_max=80000, mode="random"),
|
103 |
+
dict(type="CenterShift", apply_z=False),
|
104 |
+
dict(type="NormalizeColor"),
|
105 |
+
# dict(type="ShufflePoint"),
|
106 |
+
dict(type="ToTensor"),
|
107 |
+
dict(
|
108 |
+
type="Collect",
|
109 |
+
keys=("coord", "grid_coord", "segment"),
|
110 |
+
feat_keys=["coord", "color"],
|
111 |
+
),
|
112 |
+
],
|
113 |
+
test_mode=False,
|
114 |
+
),
|
115 |
+
val=dict(
|
116 |
+
type=dataset_type,
|
117 |
+
split="Area_5",
|
118 |
+
data_root=data_root,
|
119 |
+
transform=[
|
120 |
+
dict(type="CenterShift", apply_z=True),
|
121 |
+
dict(
|
122 |
+
type="Copy",
|
123 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
124 |
+
),
|
125 |
+
dict(
|
126 |
+
type="GridSample",
|
127 |
+
grid_size=0.04,
|
128 |
+
hash_type="fnv",
|
129 |
+
mode="train",
|
130 |
+
keys=("coord", "color", "segment"),
|
131 |
+
return_grid_coord=True,
|
132 |
+
),
|
133 |
+
dict(type="CenterShift", apply_z=False),
|
134 |
+
dict(type="NormalizeColor"),
|
135 |
+
dict(type="ToTensor"),
|
136 |
+
dict(
|
137 |
+
type="Collect",
|
138 |
+
keys=("coord", "grid_coord", "segment"),
|
139 |
+
offset_keys_dict=dict(offset="coord"),
|
140 |
+
feat_keys=["coord", "color"],
|
141 |
+
),
|
142 |
+
],
|
143 |
+
test_mode=False,
|
144 |
+
),
|
145 |
+
test=dict(
|
146 |
+
type=dataset_type,
|
147 |
+
split="Area_5",
|
148 |
+
data_root=data_root,
|
149 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
150 |
+
test_mode=True,
|
151 |
+
test_cfg=dict(
|
152 |
+
voxelize=dict(
|
153 |
+
type="GridSample",
|
154 |
+
grid_size=0.04,
|
155 |
+
hash_type="fnv",
|
156 |
+
mode="test",
|
157 |
+
keys=("coord", "color"),
|
158 |
+
return_grid_coord=True,
|
159 |
+
),
|
160 |
+
crop=None,
|
161 |
+
post_transform=[
|
162 |
+
dict(type="CenterShift", apply_z=False),
|
163 |
+
dict(type="ToTensor"),
|
164 |
+
dict(
|
165 |
+
type="Collect",
|
166 |
+
keys=("coord", "grid_coord", "index"),
|
167 |
+
feat_keys=("coord", "color"),
|
168 |
+
),
|
169 |
+
],
|
170 |
+
aug_transform=[
|
171 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
172 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
173 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
174 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
175 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
176 |
+
[
|
177 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
178 |
+
dict(type="RandomFlip", p=1),
|
179 |
+
],
|
180 |
+
[
|
181 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
182 |
+
dict(type="RandomFlip", p=1),
|
183 |
+
],
|
184 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
185 |
+
[
|
186 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
187 |
+
dict(type="RandomFlip", p=1),
|
188 |
+
],
|
189 |
+
[
|
190 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
191 |
+
dict(type="RandomFlip", p=1),
|
192 |
+
],
|
193 |
+
],
|
194 |
+
),
|
195 |
+
),
|
196 |
+
)
|
Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
|
10 |
+
# model settings
|
11 |
+
model = dict(
|
12 |
+
type="DefaultSegmentorV2",
|
13 |
+
num_classes=13,
|
14 |
+
backbone_out_channels=64,
|
15 |
+
backbone=dict(
|
16 |
+
type="PT-v3m1",
|
17 |
+
in_channels=6,
|
18 |
+
order=("z", "z-trans", "hilbert", "hilbert-trans"),
|
19 |
+
stride=(2, 2, 2, 2),
|
20 |
+
enc_depths=(2, 2, 2, 6, 2),
|
21 |
+
enc_channels=(32, 64, 128, 256, 512),
|
22 |
+
enc_num_head=(2, 4, 8, 16, 32),
|
23 |
+
enc_patch_size=(1024, 1024, 1024, 1024, 1024),
|
24 |
+
dec_depths=(2, 2, 2, 2),
|
25 |
+
dec_channels=(64, 64, 128, 256),
|
26 |
+
dec_num_head=(4, 4, 8, 16),
|
27 |
+
dec_patch_size=(1024, 1024, 1024, 1024),
|
28 |
+
mlp_ratio=4,
|
29 |
+
qkv_bias=True,
|
30 |
+
qk_scale=None,
|
31 |
+
attn_drop=0.0,
|
32 |
+
proj_drop=0.0,
|
33 |
+
drop_path=0.3,
|
34 |
+
shuffle_orders=True,
|
35 |
+
pre_norm=True,
|
36 |
+
enable_rpe=False,
|
37 |
+
enable_flash=True,
|
38 |
+
upcast_attention=False,
|
39 |
+
upcast_softmax=False,
|
40 |
+
cls_mode=False,
|
41 |
+
pdnorm_bn=False,
|
42 |
+
pdnorm_ln=False,
|
43 |
+
pdnorm_decouple=True,
|
44 |
+
pdnorm_adaptive=False,
|
45 |
+
pdnorm_affine=True,
|
46 |
+
pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
|
47 |
+
),
|
48 |
+
criteria=[
|
49 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
50 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
51 |
+
],
|
52 |
+
)
|
53 |
+
|
54 |
+
# scheduler settings
|
55 |
+
epoch = 3000
|
56 |
+
optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
|
57 |
+
scheduler = dict(
|
58 |
+
type="OneCycleLR",
|
59 |
+
max_lr=[0.006, 0.0006],
|
60 |
+
pct_start=0.05,
|
61 |
+
anneal_strategy="cos",
|
62 |
+
div_factor=10.0,
|
63 |
+
final_div_factor=1000.0,
|
64 |
+
)
|
65 |
+
param_dicts = [dict(keyword="block", lr=0.0006)]
|
66 |
+
|
67 |
+
# dataset settings
|
68 |
+
dataset_type = "S3DISDataset"
|
69 |
+
data_root = "data/s3dis"
|
70 |
+
|
71 |
+
data = dict(
|
72 |
+
num_classes=13,
|
73 |
+
ignore_index=-1,
|
74 |
+
names=[
|
75 |
+
"ceiling",
|
76 |
+
"floor",
|
77 |
+
"wall",
|
78 |
+
"beam",
|
79 |
+
"column",
|
80 |
+
"window",
|
81 |
+
"door",
|
82 |
+
"table",
|
83 |
+
"chair",
|
84 |
+
"sofa",
|
85 |
+
"bookcase",
|
86 |
+
"board",
|
87 |
+
"clutter",
|
88 |
+
],
|
89 |
+
train=dict(
|
90 |
+
type=dataset_type,
|
91 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
92 |
+
data_root=data_root,
|
93 |
+
transform=[
|
94 |
+
dict(type="CenterShift", apply_z=True),
|
95 |
+
dict(
|
96 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
97 |
+
),
|
98 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
99 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
100 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
101 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
102 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
103 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
104 |
+
dict(type="RandomFlip", p=0.5),
|
105 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
106 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
107 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
108 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
109 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
110 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
111 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
112 |
+
dict(
|
113 |
+
type="GridSample",
|
114 |
+
grid_size=0.02,
|
115 |
+
hash_type="fnv",
|
116 |
+
mode="train",
|
117 |
+
return_grid_coord=True,
|
118 |
+
),
|
119 |
+
dict(type="SphereCrop", sample_rate=0.6, mode="random"),
|
120 |
+
dict(type="SphereCrop", point_max=204800, mode="random"),
|
121 |
+
dict(type="CenterShift", apply_z=False),
|
122 |
+
dict(type="NormalizeColor"),
|
123 |
+
# dict(type="ShufflePoint"),
|
124 |
+
dict(type="ToTensor"),
|
125 |
+
dict(
|
126 |
+
type="Collect",
|
127 |
+
keys=("coord", "grid_coord", "segment"),
|
128 |
+
feat_keys=("color", "normal"),
|
129 |
+
),
|
130 |
+
],
|
131 |
+
test_mode=False,
|
132 |
+
),
|
133 |
+
val=dict(
|
134 |
+
type=dataset_type,
|
135 |
+
split="Area_5",
|
136 |
+
data_root=data_root,
|
137 |
+
transform=[
|
138 |
+
dict(type="CenterShift", apply_z=True),
|
139 |
+
dict(
|
140 |
+
type="Copy",
|
141 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
142 |
+
),
|
143 |
+
dict(
|
144 |
+
type="GridSample",
|
145 |
+
grid_size=0.02,
|
146 |
+
hash_type="fnv",
|
147 |
+
mode="train",
|
148 |
+
return_grid_coord=True,
|
149 |
+
),
|
150 |
+
dict(type="CenterShift", apply_z=False),
|
151 |
+
dict(type="NormalizeColor"),
|
152 |
+
dict(type="ToTensor"),
|
153 |
+
dict(
|
154 |
+
type="Collect",
|
155 |
+
keys=(
|
156 |
+
"coord",
|
157 |
+
"grid_coord",
|
158 |
+
"origin_coord",
|
159 |
+
"segment",
|
160 |
+
"origin_segment",
|
161 |
+
),
|
162 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
163 |
+
feat_keys=("color", "normal"),
|
164 |
+
),
|
165 |
+
],
|
166 |
+
test_mode=False,
|
167 |
+
),
|
168 |
+
test=dict(
|
169 |
+
type=dataset_type,
|
170 |
+
split="Area_5",
|
171 |
+
data_root=data_root,
|
172 |
+
transform=[
|
173 |
+
dict(type="CenterShift", apply_z=True),
|
174 |
+
dict(type="NormalizeColor"),
|
175 |
+
],
|
176 |
+
test_mode=True,
|
177 |
+
test_cfg=dict(
|
178 |
+
voxelize=dict(
|
179 |
+
type="GridSample",
|
180 |
+
grid_size=0.02,
|
181 |
+
hash_type="fnv",
|
182 |
+
mode="test",
|
183 |
+
keys=("coord", "color", "normal"),
|
184 |
+
return_grid_coord=True,
|
185 |
+
),
|
186 |
+
crop=None,
|
187 |
+
post_transform=[
|
188 |
+
dict(type="CenterShift", apply_z=False),
|
189 |
+
dict(type="ToTensor"),
|
190 |
+
dict(
|
191 |
+
type="Collect",
|
192 |
+
keys=("coord", "grid_coord", "index"),
|
193 |
+
feat_keys=("color", "normal"),
|
194 |
+
),
|
195 |
+
],
|
196 |
+
aug_transform=[
|
197 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
198 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
199 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
200 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
201 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
202 |
+
[
|
203 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
204 |
+
dict(type="RandomFlip", p=1),
|
205 |
+
],
|
206 |
+
[
|
207 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
208 |
+
dict(type="RandomFlip", p=1),
|
209 |
+
],
|
210 |
+
[
|
211 |
+
dict(type="RandomScale", scale=[1, 1]),
|
212 |
+
dict(type="RandomFlip", p=1),
|
213 |
+
],
|
214 |
+
[
|
215 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
216 |
+
dict(type="RandomFlip", p=1),
|
217 |
+
],
|
218 |
+
[
|
219 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
220 |
+
dict(type="RandomFlip", p=1),
|
221 |
+
],
|
222 |
+
],
|
223 |
+
),
|
224 |
+
),
|
225 |
+
)
|
Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
|
10 |
+
# model settings
|
11 |
+
model = dict(
|
12 |
+
type="DefaultSegmentorV2",
|
13 |
+
num_classes=13,
|
14 |
+
backbone_out_channels=64,
|
15 |
+
backbone=dict(
|
16 |
+
type="PT-v3m1",
|
17 |
+
in_channels=6,
|
18 |
+
order=["z", "z-trans", "hilbert", "hilbert-trans"],
|
19 |
+
stride=(2, 2, 2, 2),
|
20 |
+
enc_depths=(2, 2, 2, 6, 2),
|
21 |
+
enc_channels=(32, 64, 128, 256, 512),
|
22 |
+
enc_num_head=(2, 4, 8, 16, 32),
|
23 |
+
enc_patch_size=(128, 128, 128, 128, 128),
|
24 |
+
dec_depths=(2, 2, 2, 2),
|
25 |
+
dec_channels=(64, 64, 128, 256),
|
26 |
+
dec_num_head=(4, 4, 8, 16),
|
27 |
+
dec_patch_size=(128, 128, 128, 128),
|
28 |
+
mlp_ratio=4,
|
29 |
+
qkv_bias=True,
|
30 |
+
qk_scale=None,
|
31 |
+
attn_drop=0.0,
|
32 |
+
proj_drop=0.0,
|
33 |
+
drop_path=0.3,
|
34 |
+
shuffle_orders=True,
|
35 |
+
pre_norm=True,
|
36 |
+
enable_rpe=True,
|
37 |
+
enable_flash=False,
|
38 |
+
upcast_attention=True,
|
39 |
+
upcast_softmax=True,
|
40 |
+
cls_mode=False,
|
41 |
+
pdnorm_bn=False,
|
42 |
+
pdnorm_ln=False,
|
43 |
+
pdnorm_decouple=True,
|
44 |
+
pdnorm_adaptive=False,
|
45 |
+
pdnorm_affine=True,
|
46 |
+
pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
|
47 |
+
),
|
48 |
+
criteria=[
|
49 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
50 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
51 |
+
],
|
52 |
+
)
|
53 |
+
|
54 |
+
# scheduler settings
|
55 |
+
epoch = 3000
|
56 |
+
optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
|
57 |
+
scheduler = dict(
|
58 |
+
type="OneCycleLR",
|
59 |
+
max_lr=[0.006, 0.0006],
|
60 |
+
pct_start=0.05,
|
61 |
+
anneal_strategy="cos",
|
62 |
+
div_factor=10.0,
|
63 |
+
final_div_factor=1000.0,
|
64 |
+
)
|
65 |
+
param_dicts = [dict(keyword="block", lr=0.0006)]
|
66 |
+
|
67 |
+
# dataset settings
|
68 |
+
dataset_type = "S3DISDataset"
|
69 |
+
data_root = "data/s3dis"
|
70 |
+
|
71 |
+
data = dict(
|
72 |
+
num_classes=13,
|
73 |
+
ignore_index=-1,
|
74 |
+
names=[
|
75 |
+
"ceiling",
|
76 |
+
"floor",
|
77 |
+
"wall",
|
78 |
+
"beam",
|
79 |
+
"column",
|
80 |
+
"window",
|
81 |
+
"door",
|
82 |
+
"table",
|
83 |
+
"chair",
|
84 |
+
"sofa",
|
85 |
+
"bookcase",
|
86 |
+
"board",
|
87 |
+
"clutter",
|
88 |
+
],
|
89 |
+
train=dict(
|
90 |
+
type=dataset_type,
|
91 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
92 |
+
data_root=data_root,
|
93 |
+
transform=[
|
94 |
+
dict(type="CenterShift", apply_z=True),
|
95 |
+
dict(
|
96 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
97 |
+
),
|
98 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
99 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
100 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
101 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
102 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
103 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
104 |
+
dict(type="RandomFlip", p=0.5),
|
105 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
106 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
107 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
108 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
109 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
110 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
111 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
112 |
+
dict(
|
113 |
+
type="GridSample",
|
114 |
+
grid_size=0.02,
|
115 |
+
hash_type="fnv",
|
116 |
+
mode="train",
|
117 |
+
return_grid_coord=True,
|
118 |
+
),
|
119 |
+
dict(type="SphereCrop", sample_rate=0.6, mode="random"),
|
120 |
+
dict(type="SphereCrop", point_max=204800, mode="random"),
|
121 |
+
dict(type="CenterShift", apply_z=False),
|
122 |
+
dict(type="NormalizeColor"),
|
123 |
+
# dict(type="ShufflePoint"),
|
124 |
+
dict(type="ToTensor"),
|
125 |
+
dict(
|
126 |
+
type="Collect",
|
127 |
+
keys=("coord", "grid_coord", "segment"),
|
128 |
+
feat_keys=("color", "normal"),
|
129 |
+
),
|
130 |
+
],
|
131 |
+
test_mode=False,
|
132 |
+
),
|
133 |
+
val=dict(
|
134 |
+
type=dataset_type,
|
135 |
+
split="Area_5",
|
136 |
+
data_root=data_root,
|
137 |
+
transform=[
|
138 |
+
dict(type="CenterShift", apply_z=True),
|
139 |
+
dict(
|
140 |
+
type="Copy",
|
141 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
142 |
+
),
|
143 |
+
dict(
|
144 |
+
type="GridSample",
|
145 |
+
grid_size=0.02,
|
146 |
+
hash_type="fnv",
|
147 |
+
mode="train",
|
148 |
+
return_grid_coord=True,
|
149 |
+
),
|
150 |
+
dict(type="CenterShift", apply_z=False),
|
151 |
+
dict(type="NormalizeColor"),
|
152 |
+
dict(type="ToTensor"),
|
153 |
+
dict(
|
154 |
+
type="Collect",
|
155 |
+
keys=(
|
156 |
+
"coord",
|
157 |
+
"grid_coord",
|
158 |
+
"origin_coord",
|
159 |
+
"segment",
|
160 |
+
"origin_segment",
|
161 |
+
),
|
162 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
163 |
+
feat_keys=("color", "normal"),
|
164 |
+
),
|
165 |
+
],
|
166 |
+
test_mode=False,
|
167 |
+
),
|
168 |
+
test=dict(
|
169 |
+
type=dataset_type,
|
170 |
+
split="Area_5",
|
171 |
+
data_root=data_root,
|
172 |
+
transform=[
|
173 |
+
dict(type="CenterShift", apply_z=True),
|
174 |
+
dict(type="NormalizeColor"),
|
175 |
+
],
|
176 |
+
test_mode=True,
|
177 |
+
test_cfg=dict(
|
178 |
+
voxelize=dict(
|
179 |
+
type="GridSample",
|
180 |
+
grid_size=0.02,
|
181 |
+
hash_type="fnv",
|
182 |
+
mode="test",
|
183 |
+
keys=("coord", "color", "normal"),
|
184 |
+
return_grid_coord=True,
|
185 |
+
),
|
186 |
+
crop=None,
|
187 |
+
post_transform=[
|
188 |
+
dict(type="CenterShift", apply_z=False),
|
189 |
+
dict(type="ToTensor"),
|
190 |
+
dict(
|
191 |
+
type="Collect",
|
192 |
+
keys=("coord", "grid_coord", "index"),
|
193 |
+
feat_keys=("color", "normal"),
|
194 |
+
),
|
195 |
+
],
|
196 |
+
aug_transform=[
|
197 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
198 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
199 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
200 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
201 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
202 |
+
[
|
203 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
204 |
+
dict(type="RandomFlip", p=1),
|
205 |
+
],
|
206 |
+
[
|
207 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
208 |
+
dict(type="RandomFlip", p=1),
|
209 |
+
],
|
210 |
+
[
|
211 |
+
dict(type="RandomScale", scale=[1, 1]),
|
212 |
+
dict(type="RandomFlip", p=1),
|
213 |
+
],
|
214 |
+
[
|
215 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
216 |
+
dict(type="RandomFlip", p=1),
|
217 |
+
],
|
218 |
+
[
|
219 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
220 |
+
dict(type="RandomFlip", p=1),
|
221 |
+
],
|
222 |
+
],
|
223 |
+
),
|
224 |
+
),
|
225 |
+
)
|
Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py
ADDED
@@ -0,0 +1,487 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
PTv3 + PPT
|
3 |
+
Pre-trained on ScanNet + Structured3D
|
4 |
+
(S3DIS is commented by default as a long data time issue of S3DIS: https://github.com/Pointcept/Pointcept/issues/103)
|
5 |
+
In the original PPT paper, 3 datasets are jointly trained and validated on the three datasets jointly with
|
6 |
+
one shared weight model. In PTv3, we trained on multi-dataset but only validated on one single dataset to
|
7 |
+
achieve extreme performance on one single dataset.
|
8 |
+
|
9 |
+
To enable joint training on three datasets, uncomment config for the S3DIS dataset and change the "loop" of
|
10 |
+
Structured3D and ScanNet to 4 and 2 respectively.
|
11 |
+
"""
|
12 |
+
|
13 |
+
_base_ = ["../_base_/default_runtime.py"]
|
14 |
+
|
15 |
+
# misc custom setting
|
16 |
+
batch_size = 24 # bs: total bs in all gpus
|
17 |
+
num_worker = 48
|
18 |
+
mix_prob = 0.8
|
19 |
+
empty_cache = False
|
20 |
+
enable_amp = True
|
21 |
+
find_unused_parameters = True
|
22 |
+
|
23 |
+
# trainer
|
24 |
+
train = dict(
|
25 |
+
type="MultiDatasetTrainer",
|
26 |
+
)
|
27 |
+
|
28 |
+
# model settings
|
29 |
+
model = dict(
|
30 |
+
type="PPT-v1m1",
|
31 |
+
backbone=dict(
|
32 |
+
type="PT-v3m1",
|
33 |
+
in_channels=6,
|
34 |
+
order=("z", "z-trans", "hilbert", "hilbert-trans"),
|
35 |
+
stride=(2, 2, 2, 2),
|
36 |
+
enc_depths=(2, 2, 2, 6, 2),
|
37 |
+
enc_channels=(32, 64, 128, 256, 512),
|
38 |
+
enc_num_head=(2, 4, 8, 16, 32),
|
39 |
+
enc_patch_size=(1024, 1024, 1024, 1024, 1024),
|
40 |
+
dec_depths=(2, 2, 2, 2),
|
41 |
+
dec_channels=(64, 64, 128, 256),
|
42 |
+
dec_num_head=(4, 4, 8, 16),
|
43 |
+
dec_patch_size=(1024, 1024, 1024, 1024),
|
44 |
+
mlp_ratio=4,
|
45 |
+
qkv_bias=True,
|
46 |
+
qk_scale=None,
|
47 |
+
attn_drop=0.0,
|
48 |
+
proj_drop=0.0,
|
49 |
+
drop_path=0.3,
|
50 |
+
shuffle_orders=True,
|
51 |
+
pre_norm=True,
|
52 |
+
enable_rpe=False,
|
53 |
+
enable_flash=True,
|
54 |
+
upcast_attention=False,
|
55 |
+
upcast_softmax=False,
|
56 |
+
cls_mode=False,
|
57 |
+
pdnorm_bn=True,
|
58 |
+
pdnorm_ln=True,
|
59 |
+
pdnorm_decouple=True,
|
60 |
+
pdnorm_adaptive=False,
|
61 |
+
pdnorm_affine=True,
|
62 |
+
pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
|
63 |
+
),
|
64 |
+
criteria=[
|
65 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
66 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
67 |
+
],
|
68 |
+
backbone_out_channels=64,
|
69 |
+
context_channels=256,
|
70 |
+
conditions=("Structured3D", "ScanNet", "S3DIS"),
|
71 |
+
template="[x]",
|
72 |
+
clip_model="ViT-B/16",
|
73 |
+
# fmt: off
|
74 |
+
class_name=(
|
75 |
+
"wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
|
76 |
+
"window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
|
77 |
+
"dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
|
78 |
+
"toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
|
79 |
+
"clutter", "otherstructure", "otherfurniture", "otherprop",
|
80 |
+
),
|
81 |
+
valid_index=(
|
82 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
|
83 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
|
84 |
+
(0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
|
85 |
+
),
|
86 |
+
# fmt: on
|
87 |
+
backbone_mode=False,
|
88 |
+
)
|
89 |
+
|
90 |
+
# scheduler settings
|
91 |
+
epoch = 100
|
92 |
+
optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05)
|
93 |
+
scheduler = dict(
|
94 |
+
type="OneCycleLR",
|
95 |
+
max_lr=[0.005, 0.0005],
|
96 |
+
pct_start=0.05,
|
97 |
+
anneal_strategy="cos",
|
98 |
+
div_factor=10.0,
|
99 |
+
final_div_factor=1000.0,
|
100 |
+
)
|
101 |
+
param_dicts = [dict(keyword="block", lr=0.0005)]
|
102 |
+
|
103 |
+
# dataset settings
|
104 |
+
data = dict(
|
105 |
+
num_classes=13,
|
106 |
+
ignore_index=-1,
|
107 |
+
names=[
|
108 |
+
"ceiling",
|
109 |
+
"floor",
|
110 |
+
"wall",
|
111 |
+
"beam",
|
112 |
+
"column",
|
113 |
+
"window",
|
114 |
+
"door",
|
115 |
+
"table",
|
116 |
+
"chair",
|
117 |
+
"sofa",
|
118 |
+
"bookcase",
|
119 |
+
"board",
|
120 |
+
"clutter",
|
121 |
+
],
|
122 |
+
train=dict(
|
123 |
+
type="ConcatDataset",
|
124 |
+
datasets=[
|
125 |
+
# Structured3D
|
126 |
+
dict(
|
127 |
+
type="Structured3DDataset",
|
128 |
+
split=["train", "val", "test"],
|
129 |
+
data_root="data/structured3d",
|
130 |
+
transform=[
|
131 |
+
dict(type="CenterShift", apply_z=True),
|
132 |
+
dict(
|
133 |
+
type="RandomDropout",
|
134 |
+
dropout_ratio=0.2,
|
135 |
+
dropout_application_ratio=0.2,
|
136 |
+
),
|
137 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
138 |
+
dict(
|
139 |
+
type="RandomRotate",
|
140 |
+
angle=[-1, 1],
|
141 |
+
axis="z",
|
142 |
+
center=[0, 0, 0],
|
143 |
+
p=0.5,
|
144 |
+
),
|
145 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
146 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
147 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
148 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
149 |
+
dict(type="RandomFlip", p=0.5),
|
150 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
151 |
+
# dict(
|
152 |
+
# type="ElasticDistortion",
|
153 |
+
# distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
154 |
+
# ),
|
155 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
156 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
157 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
158 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
159 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
160 |
+
dict(
|
161 |
+
type="GridSample",
|
162 |
+
grid_size=0.02,
|
163 |
+
hash_type="fnv",
|
164 |
+
mode="train",
|
165 |
+
return_grid_coord=True,
|
166 |
+
),
|
167 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
168 |
+
dict(type="SphereCrop", point_max=204800, mode="random"),
|
169 |
+
dict(type="CenterShift", apply_z=False),
|
170 |
+
dict(type="NormalizeColor"),
|
171 |
+
# dict(type="ShufflePoint"),
|
172 |
+
dict(type="Add", keys_dict={"condition": "Structured3D"}),
|
173 |
+
dict(type="ToTensor"),
|
174 |
+
dict(
|
175 |
+
type="Collect",
|
176 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
177 |
+
feat_keys=("color", "normal"),
|
178 |
+
),
|
179 |
+
],
|
180 |
+
test_mode=False,
|
181 |
+
loop=4, # sampling weight
|
182 |
+
),
|
183 |
+
# ScanNet
|
184 |
+
dict(
|
185 |
+
type="ScanNetDataset",
|
186 |
+
split="train",
|
187 |
+
data_root="data/scannet",
|
188 |
+
transform=[
|
189 |
+
dict(type="CenterShift", apply_z=True),
|
190 |
+
dict(
|
191 |
+
type="RandomDropout",
|
192 |
+
dropout_ratio=0.2,
|
193 |
+
dropout_application_ratio=0.2,
|
194 |
+
),
|
195 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
196 |
+
dict(
|
197 |
+
type="RandomRotate",
|
198 |
+
angle=[-1, 1],
|
199 |
+
axis="z",
|
200 |
+
center=[0, 0, 0],
|
201 |
+
p=0.5,
|
202 |
+
),
|
203 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
204 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
205 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
206 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
207 |
+
dict(type="RandomFlip", p=0.5),
|
208 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
209 |
+
# dict(
|
210 |
+
# type="ElasticDistortion",
|
211 |
+
# distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
212 |
+
# ),
|
213 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
214 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
215 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
216 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
217 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
218 |
+
dict(
|
219 |
+
type="GridSample",
|
220 |
+
grid_size=0.02,
|
221 |
+
hash_type="fnv",
|
222 |
+
mode="train",
|
223 |
+
return_grid_coord=True,
|
224 |
+
),
|
225 |
+
dict(type="SphereCrop", point_max=102400, mode="random"),
|
226 |
+
dict(type="CenterShift", apply_z=False),
|
227 |
+
dict(type="NormalizeColor"),
|
228 |
+
# dict(type="ShufflePoint"),
|
229 |
+
dict(type="Add", keys_dict={"condition": "ScanNet"}),
|
230 |
+
dict(type="ToTensor"),
|
231 |
+
dict(
|
232 |
+
type="Collect",
|
233 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
234 |
+
feat_keys=("color", "normal"),
|
235 |
+
),
|
236 |
+
],
|
237 |
+
test_mode=False,
|
238 |
+
loop=2, # sampling weight
|
239 |
+
),
|
240 |
+
# S3DIS
|
241 |
+
dict(
|
242 |
+
type="S3DISDataset",
|
243 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
244 |
+
data_root="data/s3dis",
|
245 |
+
transform=[
|
246 |
+
dict(type="CenterShift", apply_z=True),
|
247 |
+
dict(
|
248 |
+
type="RandomDropout",
|
249 |
+
dropout_ratio=0.2,
|
250 |
+
dropout_application_ratio=0.2,
|
251 |
+
),
|
252 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
253 |
+
dict(
|
254 |
+
type="RandomRotate",
|
255 |
+
angle=[-1, 1],
|
256 |
+
axis="z",
|
257 |
+
center=[0, 0, 0],
|
258 |
+
p=0.5,
|
259 |
+
),
|
260 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
261 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
262 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
263 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
264 |
+
dict(type="RandomFlip", p=0.5),
|
265 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
266 |
+
# dict(
|
267 |
+
# type="ElasticDistortion",
|
268 |
+
# distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
269 |
+
# ),
|
270 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
271 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
272 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
273 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
274 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
275 |
+
dict(
|
276 |
+
type="GridSample",
|
277 |
+
grid_size=0.02,
|
278 |
+
hash_type="fnv",
|
279 |
+
mode="train",
|
280 |
+
return_grid_coord=True,
|
281 |
+
),
|
282 |
+
dict(type="SphereCrop", sample_rate=0.6, mode="random"),
|
283 |
+
dict(type="SphereCrop", point_max=204800, mode="random"),
|
284 |
+
dict(type="CenterShift", apply_z=False),
|
285 |
+
dict(type="NormalizeColor"),
|
286 |
+
# dict(type="ShufflePoint"),
|
287 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
288 |
+
dict(type="ToTensor"),
|
289 |
+
dict(
|
290 |
+
type="Collect",
|
291 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
292 |
+
feat_keys=("color", "normal"),
|
293 |
+
),
|
294 |
+
],
|
295 |
+
test_mode=False,
|
296 |
+
loop=1, # sampling weight
|
297 |
+
),
|
298 |
+
],
|
299 |
+
),
|
300 |
+
val=dict(
|
301 |
+
type="S3DISDataset",
|
302 |
+
split="Area_5",
|
303 |
+
data_root="data/s3dis",
|
304 |
+
transform=[
|
305 |
+
dict(type="CenterShift", apply_z=True),
|
306 |
+
dict(
|
307 |
+
type="Copy",
|
308 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
309 |
+
),
|
310 |
+
dict(
|
311 |
+
type="GridSample",
|
312 |
+
grid_size=0.02,
|
313 |
+
hash_type="fnv",
|
314 |
+
mode="train",
|
315 |
+
return_grid_coord=True,
|
316 |
+
),
|
317 |
+
dict(type="CenterShift", apply_z=False),
|
318 |
+
dict(type="NormalizeColor"),
|
319 |
+
dict(type="ToTensor"),
|
320 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
321 |
+
dict(
|
322 |
+
type="Collect",
|
323 |
+
keys=(
|
324 |
+
"coord",
|
325 |
+
"grid_coord",
|
326 |
+
"origin_coord",
|
327 |
+
"segment",
|
328 |
+
"origin_segment",
|
329 |
+
"condition",
|
330 |
+
),
|
331 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
332 |
+
feat_keys=("color", "normal"),
|
333 |
+
),
|
334 |
+
],
|
335 |
+
test_mode=False,
|
336 |
+
),
|
337 |
+
test=dict(
|
338 |
+
type="S3DISDataset",
|
339 |
+
split="Area_5",
|
340 |
+
data_root="data/s3dis",
|
341 |
+
transform=[
|
342 |
+
dict(type="CenterShift", apply_z=True),
|
343 |
+
dict(type="NormalizeColor"),
|
344 |
+
],
|
345 |
+
test_mode=True,
|
346 |
+
test_cfg=dict(
|
347 |
+
voxelize=dict(
|
348 |
+
type="GridSample",
|
349 |
+
grid_size=0.02,
|
350 |
+
hash_type="fnv",
|
351 |
+
mode="test",
|
352 |
+
keys=("coord", "color", "normal"),
|
353 |
+
return_grid_coord=True,
|
354 |
+
),
|
355 |
+
crop=None,
|
356 |
+
post_transform=[
|
357 |
+
dict(type="CenterShift", apply_z=False),
|
358 |
+
dict(type="Add", keys_dict={"condition": "S3DIS"}),
|
359 |
+
dict(type="ToTensor"),
|
360 |
+
dict(
|
361 |
+
type="Collect",
|
362 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
363 |
+
feat_keys=("color", "normal"),
|
364 |
+
),
|
365 |
+
],
|
366 |
+
aug_transform=[
|
367 |
+
[
|
368 |
+
dict(
|
369 |
+
type="RandomRotateTargetAngle",
|
370 |
+
angle=[0],
|
371 |
+
axis="z",
|
372 |
+
center=[0, 0, 0],
|
373 |
+
p=1,
|
374 |
+
)
|
375 |
+
],
|
376 |
+
[
|
377 |
+
dict(
|
378 |
+
type="RandomRotateTargetAngle",
|
379 |
+
angle=[1 / 2],
|
380 |
+
axis="z",
|
381 |
+
center=[0, 0, 0],
|
382 |
+
p=1,
|
383 |
+
)
|
384 |
+
],
|
385 |
+
[
|
386 |
+
dict(
|
387 |
+
type="RandomRotateTargetAngle",
|
388 |
+
angle=[1],
|
389 |
+
axis="z",
|
390 |
+
center=[0, 0, 0],
|
391 |
+
p=1,
|
392 |
+
)
|
393 |
+
],
|
394 |
+
[
|
395 |
+
dict(
|
396 |
+
type="RandomRotateTargetAngle",
|
397 |
+
angle=[3 / 2],
|
398 |
+
axis="z",
|
399 |
+
center=[0, 0, 0],
|
400 |
+
p=1,
|
401 |
+
)
|
402 |
+
],
|
403 |
+
[
|
404 |
+
dict(
|
405 |
+
type="RandomRotateTargetAngle",
|
406 |
+
angle=[0],
|
407 |
+
axis="z",
|
408 |
+
center=[0, 0, 0],
|
409 |
+
p=1,
|
410 |
+
),
|
411 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
412 |
+
],
|
413 |
+
[
|
414 |
+
dict(
|
415 |
+
type="RandomRotateTargetAngle",
|
416 |
+
angle=[1 / 2],
|
417 |
+
axis="z",
|
418 |
+
center=[0, 0, 0],
|
419 |
+
p=1,
|
420 |
+
),
|
421 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
422 |
+
],
|
423 |
+
[
|
424 |
+
dict(
|
425 |
+
type="RandomRotateTargetAngle",
|
426 |
+
angle=[1],
|
427 |
+
axis="z",
|
428 |
+
center=[0, 0, 0],
|
429 |
+
p=1,
|
430 |
+
),
|
431 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
432 |
+
],
|
433 |
+
[
|
434 |
+
dict(
|
435 |
+
type="RandomRotateTargetAngle",
|
436 |
+
angle=[3 / 2],
|
437 |
+
axis="z",
|
438 |
+
center=[0, 0, 0],
|
439 |
+
p=1,
|
440 |
+
),
|
441 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
442 |
+
],
|
443 |
+
[
|
444 |
+
dict(
|
445 |
+
type="RandomRotateTargetAngle",
|
446 |
+
angle=[0],
|
447 |
+
axis="z",
|
448 |
+
center=[0, 0, 0],
|
449 |
+
p=1,
|
450 |
+
),
|
451 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
452 |
+
],
|
453 |
+
[
|
454 |
+
dict(
|
455 |
+
type="RandomRotateTargetAngle",
|
456 |
+
angle=[1 / 2],
|
457 |
+
axis="z",
|
458 |
+
center=[0, 0, 0],
|
459 |
+
p=1,
|
460 |
+
),
|
461 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
462 |
+
],
|
463 |
+
[
|
464 |
+
dict(
|
465 |
+
type="RandomRotateTargetAngle",
|
466 |
+
angle=[1],
|
467 |
+
axis="z",
|
468 |
+
center=[0, 0, 0],
|
469 |
+
p=1,
|
470 |
+
),
|
471 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
472 |
+
],
|
473 |
+
[
|
474 |
+
dict(
|
475 |
+
type="RandomRotateTargetAngle",
|
476 |
+
angle=[3 / 2],
|
477 |
+
axis="z",
|
478 |
+
center=[0, 0, 0],
|
479 |
+
p=1,
|
480 |
+
),
|
481 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
482 |
+
],
|
483 |
+
[dict(type="RandomFlip", p=1)],
|
484 |
+
],
|
485 |
+
),
|
486 |
+
),
|
487 |
+
)
|
Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 12 # bs: total bs in all gpus
|
4 |
+
mix_prob = 0.8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = True
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultSegmentor",
|
11 |
+
backbone=dict(
|
12 |
+
type="SpUNet-v1m1",
|
13 |
+
in_channels=6,
|
14 |
+
num_classes=13,
|
15 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
16 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
17 |
+
),
|
18 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
19 |
+
)
|
20 |
+
|
21 |
+
# scheduler settings
|
22 |
+
epoch = 3000
|
23 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
24 |
+
scheduler = dict(type="PolyLR")
|
25 |
+
|
26 |
+
# dataset settings
|
27 |
+
dataset_type = "S3DISDataset"
|
28 |
+
data_root = "data/s3dis"
|
29 |
+
|
30 |
+
data = dict(
|
31 |
+
num_classes=13,
|
32 |
+
ignore_index=-1,
|
33 |
+
names=[
|
34 |
+
"ceiling",
|
35 |
+
"floor",
|
36 |
+
"wall",
|
37 |
+
"beam",
|
38 |
+
"column",
|
39 |
+
"window",
|
40 |
+
"door",
|
41 |
+
"table",
|
42 |
+
"chair",
|
43 |
+
"sofa",
|
44 |
+
"bookcase",
|
45 |
+
"board",
|
46 |
+
"clutter",
|
47 |
+
],
|
48 |
+
train=dict(
|
49 |
+
type=dataset_type,
|
50 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
51 |
+
data_root=data_root,
|
52 |
+
transform=[
|
53 |
+
dict(type="CenterShift", apply_z=True),
|
54 |
+
dict(
|
55 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
56 |
+
),
|
57 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
58 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
59 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
60 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
61 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
62 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
63 |
+
dict(type="RandomFlip", p=0.5),
|
64 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
65 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
66 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
67 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
68 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
69 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
70 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
71 |
+
dict(
|
72 |
+
type="GridSample",
|
73 |
+
grid_size=0.05,
|
74 |
+
hash_type="fnv",
|
75 |
+
mode="train",
|
76 |
+
keys=("coord", "color", "segment"),
|
77 |
+
return_grid_coord=True,
|
78 |
+
),
|
79 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
80 |
+
dict(type="CenterShift", apply_z=False),
|
81 |
+
dict(type="NormalizeColor"),
|
82 |
+
dict(type="ShufflePoint"),
|
83 |
+
dict(type="ToTensor"),
|
84 |
+
dict(
|
85 |
+
type="Collect",
|
86 |
+
keys=("coord", "grid_coord", "segment"),
|
87 |
+
feat_keys=["coord", "color"],
|
88 |
+
),
|
89 |
+
],
|
90 |
+
test_mode=False,
|
91 |
+
),
|
92 |
+
val=dict(
|
93 |
+
type=dataset_type,
|
94 |
+
split="Area_5",
|
95 |
+
data_root=data_root,
|
96 |
+
transform=[
|
97 |
+
dict(type="CenterShift", apply_z=True),
|
98 |
+
dict(
|
99 |
+
type="GridSample",
|
100 |
+
grid_size=0.05,
|
101 |
+
hash_type="fnv",
|
102 |
+
mode="train",
|
103 |
+
keys=("coord", "color", "segment"),
|
104 |
+
return_grid_coord=True,
|
105 |
+
),
|
106 |
+
dict(type="CenterShift", apply_z=False),
|
107 |
+
dict(type="NormalizeColor"),
|
108 |
+
dict(type="ToTensor"),
|
109 |
+
dict(
|
110 |
+
type="Collect",
|
111 |
+
keys=("coord", "grid_coord", "segment"),
|
112 |
+
feat_keys=["coord", "color"],
|
113 |
+
),
|
114 |
+
],
|
115 |
+
test_mode=False,
|
116 |
+
),
|
117 |
+
test=dict(
|
118 |
+
type=dataset_type,
|
119 |
+
split="Area_5",
|
120 |
+
data_root=data_root,
|
121 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
122 |
+
test_mode=True,
|
123 |
+
test_cfg=dict(
|
124 |
+
voxelize=dict(
|
125 |
+
type="GridSample",
|
126 |
+
grid_size=0.05,
|
127 |
+
hash_type="fnv",
|
128 |
+
mode="test",
|
129 |
+
keys=("coord", "color"),
|
130 |
+
return_grid_coord=True,
|
131 |
+
),
|
132 |
+
crop=None,
|
133 |
+
post_transform=[
|
134 |
+
dict(type="CenterShift", apply_z=False),
|
135 |
+
dict(type="ToTensor"),
|
136 |
+
dict(
|
137 |
+
type="Collect",
|
138 |
+
keys=("coord", "grid_coord", "index"),
|
139 |
+
feat_keys=("coord", "color"),
|
140 |
+
),
|
141 |
+
],
|
142 |
+
aug_transform=[
|
143 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
144 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
145 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
146 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
147 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
148 |
+
[
|
149 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
150 |
+
dict(type="RandomFlip", p=1),
|
151 |
+
],
|
152 |
+
[
|
153 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
154 |
+
dict(type="RandomFlip", p=1),
|
155 |
+
],
|
156 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
157 |
+
[
|
158 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
159 |
+
dict(type="RandomFlip", p=1),
|
160 |
+
],
|
161 |
+
[
|
162 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
163 |
+
dict(type="RandomFlip", p=1),
|
164 |
+
],
|
165 |
+
],
|
166 |
+
),
|
167 |
+
),
|
168 |
+
)
|
Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# spconv is too fast, data loading speed is bottleneck. Cache data is a better choice.
|
2 |
+
|
3 |
+
|
4 |
+
_base_ = ["../_base_/default_runtime.py"]
|
5 |
+
# misc custom setting
|
6 |
+
batch_size = 12 # bs: total bs in all gpus
|
7 |
+
mix_prob = 0.8
|
8 |
+
empty_cache = False
|
9 |
+
enable_amp = True
|
10 |
+
|
11 |
+
# model settings
|
12 |
+
model = dict(
|
13 |
+
type="DefaultSegmentor",
|
14 |
+
backbone=dict(
|
15 |
+
type="SpUNet-v1m1",
|
16 |
+
in_channels=6,
|
17 |
+
num_classes=13,
|
18 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
19 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
20 |
+
),
|
21 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
22 |
+
)
|
23 |
+
|
24 |
+
# scheduler settings
|
25 |
+
epoch = 3000
|
26 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
27 |
+
scheduler = dict(type="PolyLR")
|
28 |
+
|
29 |
+
|
30 |
+
# dataset settings
|
31 |
+
dataset_type = "S3DISDataset"
|
32 |
+
data_root = "data/s3dis"
|
33 |
+
|
34 |
+
data = dict(
|
35 |
+
num_classes=13,
|
36 |
+
ignore_index=-1,
|
37 |
+
names=[
|
38 |
+
"ceiling",
|
39 |
+
"floor",
|
40 |
+
"wall",
|
41 |
+
"beam",
|
42 |
+
"column",
|
43 |
+
"window",
|
44 |
+
"door",
|
45 |
+
"table",
|
46 |
+
"chair",
|
47 |
+
"sofa",
|
48 |
+
"bookcase",
|
49 |
+
"board",
|
50 |
+
"clutter",
|
51 |
+
],
|
52 |
+
train=dict(
|
53 |
+
type=dataset_type,
|
54 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
55 |
+
data_root=data_root,
|
56 |
+
transform=[
|
57 |
+
dict(type="CenterShift", apply_z=True),
|
58 |
+
dict(
|
59 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
60 |
+
),
|
61 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
62 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
63 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
64 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
65 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
66 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
67 |
+
dict(type="RandomFlip", p=0.5),
|
68 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
69 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
70 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
71 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
72 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
73 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
74 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
75 |
+
dict(
|
76 |
+
type="GridSample",
|
77 |
+
grid_size=0.05,
|
78 |
+
hash_type="fnv",
|
79 |
+
mode="train",
|
80 |
+
return_grid_coord=True,
|
81 |
+
),
|
82 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
83 |
+
dict(type="CenterShift", apply_z=False),
|
84 |
+
dict(type="NormalizeColor"),
|
85 |
+
dict(type="ShufflePoint"),
|
86 |
+
dict(type="ToTensor"),
|
87 |
+
dict(
|
88 |
+
type="Collect",
|
89 |
+
keys=("coord", "grid_coord", "segment"),
|
90 |
+
feat_keys=["color", "normal"],
|
91 |
+
),
|
92 |
+
],
|
93 |
+
test_mode=False,
|
94 |
+
),
|
95 |
+
val=dict(
|
96 |
+
type=dataset_type,
|
97 |
+
split="Area_5",
|
98 |
+
data_root=data_root,
|
99 |
+
transform=[
|
100 |
+
dict(type="CenterShift", apply_z=True),
|
101 |
+
dict(
|
102 |
+
type="Copy",
|
103 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
104 |
+
),
|
105 |
+
dict(
|
106 |
+
type="GridSample",
|
107 |
+
grid_size=0.05,
|
108 |
+
hash_type="fnv",
|
109 |
+
mode="train",
|
110 |
+
return_grid_coord=True,
|
111 |
+
),
|
112 |
+
dict(type="CenterShift", apply_z=False),
|
113 |
+
dict(type="NormalizeColor"),
|
114 |
+
dict(type="ToTensor"),
|
115 |
+
dict(
|
116 |
+
type="Collect",
|
117 |
+
keys=(
|
118 |
+
"coord",
|
119 |
+
"grid_coord",
|
120 |
+
"origin_coord",
|
121 |
+
"segment",
|
122 |
+
"origin_segment",
|
123 |
+
),
|
124 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
125 |
+
feat_keys=["color", "normal"],
|
126 |
+
),
|
127 |
+
],
|
128 |
+
test_mode=False,
|
129 |
+
),
|
130 |
+
test=dict(
|
131 |
+
type=dataset_type,
|
132 |
+
split="Area_5",
|
133 |
+
data_root=data_root,
|
134 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
135 |
+
test_mode=True,
|
136 |
+
test_cfg=dict(
|
137 |
+
voxelize=dict(
|
138 |
+
type="GridSample",
|
139 |
+
grid_size=0.05,
|
140 |
+
hash_type="fnv",
|
141 |
+
mode="test",
|
142 |
+
keys=("coord", "color", "normal"),
|
143 |
+
return_grid_coord=True,
|
144 |
+
),
|
145 |
+
crop=None,
|
146 |
+
post_transform=[
|
147 |
+
dict(type="CenterShift", apply_z=False),
|
148 |
+
dict(type="ToTensor"),
|
149 |
+
dict(
|
150 |
+
type="Collect",
|
151 |
+
keys=("coord", "grid_coord", "index"),
|
152 |
+
feat_keys=("color", "normal"),
|
153 |
+
),
|
154 |
+
],
|
155 |
+
aug_transform=[
|
156 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
157 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
158 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
159 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
160 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
161 |
+
[
|
162 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
163 |
+
dict(type="RandomFlip", p=1),
|
164 |
+
],
|
165 |
+
[
|
166 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
167 |
+
dict(type="RandomFlip", p=1),
|
168 |
+
],
|
169 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
170 |
+
[
|
171 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
172 |
+
dict(type="RandomFlip", p=1),
|
173 |
+
],
|
174 |
+
[
|
175 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
176 |
+
dict(type="RandomFlip", p=1),
|
177 |
+
],
|
178 |
+
],
|
179 |
+
),
|
180 |
+
),
|
181 |
+
)
|
Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# spconv is too fast, data loading speed is bottleneck. Cache data is a better choice.
|
2 |
+
|
3 |
+
|
4 |
+
_base_ = ["../_base_/default_runtime.py"]
|
5 |
+
# misc custom setting
|
6 |
+
batch_size = 48 # bs: total bs in all gpus
|
7 |
+
mix_prob = 0.8
|
8 |
+
empty_cache = False
|
9 |
+
enable_amp = True
|
10 |
+
|
11 |
+
# model settings
|
12 |
+
model = dict(
|
13 |
+
type="DefaultSegmentor",
|
14 |
+
backbone=dict(
|
15 |
+
type="SpUNet-v1m2",
|
16 |
+
in_channels=3,
|
17 |
+
num_classes=13,
|
18 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
19 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
20 |
+
bn_momentum=0.1,
|
21 |
+
),
|
22 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
23 |
+
)
|
24 |
+
|
25 |
+
# scheduler settings
|
26 |
+
epoch = 3000
|
27 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
28 |
+
scheduler = dict(type="PolyLR")
|
29 |
+
|
30 |
+
|
31 |
+
# dataset settings
|
32 |
+
dataset_type = "S3DISDataset"
|
33 |
+
data_root = "data/s3dis"
|
34 |
+
|
35 |
+
data = dict(
|
36 |
+
num_classes=13,
|
37 |
+
ignore_index=-1,
|
38 |
+
names=[
|
39 |
+
"ceiling",
|
40 |
+
"floor",
|
41 |
+
"wall",
|
42 |
+
"beam",
|
43 |
+
"column",
|
44 |
+
"window",
|
45 |
+
"door",
|
46 |
+
"table",
|
47 |
+
"chair",
|
48 |
+
"sofa",
|
49 |
+
"bookcase",
|
50 |
+
"board",
|
51 |
+
"clutter",
|
52 |
+
],
|
53 |
+
train=dict(
|
54 |
+
type=dataset_type,
|
55 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
56 |
+
data_root=data_root,
|
57 |
+
transform=[
|
58 |
+
dict(type="CenterShift", apply_z=True),
|
59 |
+
dict(
|
60 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
61 |
+
),
|
62 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
63 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
64 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
65 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
66 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
67 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
68 |
+
dict(type="RandomFlip", p=0.5),
|
69 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
70 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
71 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
72 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
73 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
74 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
75 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
76 |
+
dict(
|
77 |
+
type="GridSample",
|
78 |
+
grid_size=0.05,
|
79 |
+
hash_type="fnv",
|
80 |
+
mode="train",
|
81 |
+
keys=("coord", "color", "segment"),
|
82 |
+
return_grid_coord=True,
|
83 |
+
),
|
84 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
85 |
+
dict(type="CenterShift", apply_z=False),
|
86 |
+
dict(type="NormalizeColor"),
|
87 |
+
dict(type="ShufflePoint"),
|
88 |
+
dict(type="ToTensor"),
|
89 |
+
dict(
|
90 |
+
type="Collect",
|
91 |
+
keys=("coord", "grid_coord", "segment"),
|
92 |
+
feat_keys=["color"],
|
93 |
+
),
|
94 |
+
],
|
95 |
+
test_mode=False,
|
96 |
+
),
|
97 |
+
val=dict(
|
98 |
+
type=dataset_type,
|
99 |
+
split="Area_5",
|
100 |
+
data_root=data_root,
|
101 |
+
transform=[
|
102 |
+
dict(type="CenterShift", apply_z=True),
|
103 |
+
dict(
|
104 |
+
type="Copy",
|
105 |
+
keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
|
106 |
+
),
|
107 |
+
dict(
|
108 |
+
type="GridSample",
|
109 |
+
grid_size=0.05,
|
110 |
+
hash_type="fnv",
|
111 |
+
mode="train",
|
112 |
+
keys=("coord", "color", "segment"),
|
113 |
+
return_grid_coord=True,
|
114 |
+
),
|
115 |
+
dict(type="CenterShift", apply_z=False),
|
116 |
+
dict(type="NormalizeColor"),
|
117 |
+
dict(type="ToTensor"),
|
118 |
+
dict(
|
119 |
+
type="Collect",
|
120 |
+
keys=(
|
121 |
+
"coord",
|
122 |
+
"grid_coord",
|
123 |
+
"origin_coord",
|
124 |
+
"segment",
|
125 |
+
"origin_segment",
|
126 |
+
),
|
127 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
128 |
+
feat_keys=["color"],
|
129 |
+
),
|
130 |
+
],
|
131 |
+
test_mode=False,
|
132 |
+
),
|
133 |
+
test=dict(
|
134 |
+
type=dataset_type,
|
135 |
+
split="Area_5",
|
136 |
+
data_root=data_root,
|
137 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
138 |
+
test_mode=True,
|
139 |
+
test_cfg=dict(
|
140 |
+
voxelize=dict(
|
141 |
+
type="GridSample",
|
142 |
+
grid_size=0.05,
|
143 |
+
hash_type="fnv",
|
144 |
+
mode="test",
|
145 |
+
keys=("coord", "color"),
|
146 |
+
return_grid_coord=True,
|
147 |
+
),
|
148 |
+
crop=None,
|
149 |
+
post_transform=[
|
150 |
+
dict(type="CenterShift", apply_z=False),
|
151 |
+
dict(type="ToTensor"),
|
152 |
+
dict(
|
153 |
+
type="Collect",
|
154 |
+
keys=("coord", "grid_coord", "index"),
|
155 |
+
feat_keys=("coord", "color"),
|
156 |
+
),
|
157 |
+
],
|
158 |
+
aug_transform=[
|
159 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
160 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
161 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
162 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
163 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
164 |
+
[
|
165 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
166 |
+
dict(type="RandomFlip", p=1),
|
167 |
+
],
|
168 |
+
[
|
169 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
170 |
+
dict(type="RandomFlip", p=1),
|
171 |
+
],
|
172 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
173 |
+
[
|
174 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
175 |
+
dict(type="RandomFlip", p=1),
|
176 |
+
],
|
177 |
+
[
|
178 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
179 |
+
dict(type="RandomFlip", p=1),
|
180 |
+
],
|
181 |
+
],
|
182 |
+
),
|
183 |
+
),
|
184 |
+
)
|
Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 12 # bs: total bs in all gpus
|
4 |
+
mix_prob = 0.8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = True
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultSegmentor",
|
11 |
+
backbone=dict(
|
12 |
+
type="Swin3D-v1m1",
|
13 |
+
in_channels=9,
|
14 |
+
num_classes=13,
|
15 |
+
base_grid_size=0.02,
|
16 |
+
depths=[2, 4, 9, 4, 4],
|
17 |
+
channels=[48, 96, 192, 384, 384],
|
18 |
+
num_heads=[6, 6, 12, 24, 24],
|
19 |
+
window_sizes=[5, 7, 7, 7, 7],
|
20 |
+
quant_size=4,
|
21 |
+
drop_path_rate=0.3,
|
22 |
+
up_k=3,
|
23 |
+
num_layers=5,
|
24 |
+
stem_transformer=True,
|
25 |
+
down_stride=3,
|
26 |
+
upsample="linear_attn",
|
27 |
+
knn_down=True,
|
28 |
+
cRSE="XYZ_RGB_NORM",
|
29 |
+
fp16_mode=1,
|
30 |
+
),
|
31 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
32 |
+
)
|
33 |
+
|
34 |
+
# scheduler settings
|
35 |
+
epoch = 3000
|
36 |
+
optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.05)
|
37 |
+
scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
|
38 |
+
param_dicts = [dict(keyword="blocks", lr=0.0001)]
|
39 |
+
|
40 |
+
# dataset settings
|
41 |
+
dataset_type = "S3DISDataset"
|
42 |
+
data_root = "data/s3dis"
|
43 |
+
|
44 |
+
data = dict(
|
45 |
+
num_classes=13,
|
46 |
+
ignore_index=-1,
|
47 |
+
names=[
|
48 |
+
"ceiling",
|
49 |
+
"floor",
|
50 |
+
"wall",
|
51 |
+
"beam",
|
52 |
+
"column",
|
53 |
+
"window",
|
54 |
+
"door",
|
55 |
+
"table",
|
56 |
+
"chair",
|
57 |
+
"sofa",
|
58 |
+
"bookcase",
|
59 |
+
"board",
|
60 |
+
"clutter",
|
61 |
+
],
|
62 |
+
train=dict(
|
63 |
+
type=dataset_type,
|
64 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
65 |
+
data_root=data_root,
|
66 |
+
transform=[
|
67 |
+
dict(type="CenterShift", apply_z=True),
|
68 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
69 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
70 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
71 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
72 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
73 |
+
dict(type="RandomScale", scale=[0.8, 1.2]),
|
74 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
75 |
+
dict(type="RandomFlip", p=0.5),
|
76 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
77 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
78 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
79 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
80 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
81 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
82 |
+
dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
83 |
+
dict(
|
84 |
+
type="GridSample",
|
85 |
+
grid_size=0.04,
|
86 |
+
hash_type="fnv",
|
87 |
+
mode="train",
|
88 |
+
return_grid_coord=True,
|
89 |
+
return_displacement=True,
|
90 |
+
),
|
91 |
+
dict(type="SphereCrop", point_max=80000, mode="random"),
|
92 |
+
dict(type="CenterShift", apply_z=False),
|
93 |
+
dict(type="NormalizeColor"),
|
94 |
+
# dict(type="ShufflePoint"),
|
95 |
+
dict(type="ToTensor"),
|
96 |
+
dict(
|
97 |
+
type="Collect",
|
98 |
+
keys=("coord", "grid_coord", "segment"),
|
99 |
+
feat_keys=("color", "normal", "displacement"),
|
100 |
+
coord_feat_keys=("color", "normal"),
|
101 |
+
),
|
102 |
+
],
|
103 |
+
test_mode=False,
|
104 |
+
),
|
105 |
+
val=dict(
|
106 |
+
type=dataset_type,
|
107 |
+
split="Area_5",
|
108 |
+
data_root=data_root,
|
109 |
+
transform=[
|
110 |
+
dict(type="CenterShift", apply_z=True),
|
111 |
+
dict(
|
112 |
+
type="GridSample",
|
113 |
+
grid_size=0.04,
|
114 |
+
hash_type="fnv",
|
115 |
+
mode="train",
|
116 |
+
return_grid_coord=True,
|
117 |
+
return_displacement=True,
|
118 |
+
),
|
119 |
+
dict(type="CenterShift", apply_z=False),
|
120 |
+
dict(type="NormalizeColor"),
|
121 |
+
dict(type="ToTensor"),
|
122 |
+
dict(
|
123 |
+
type="Collect",
|
124 |
+
keys=("coord", "grid_coord", "segment"),
|
125 |
+
feat_keys=("color", "normal", "displacement"),
|
126 |
+
coord_feat_keys=("color", "normal"),
|
127 |
+
),
|
128 |
+
],
|
129 |
+
test_mode=False,
|
130 |
+
),
|
131 |
+
test=dict(
|
132 |
+
type=dataset_type,
|
133 |
+
split="Area_5",
|
134 |
+
data_root=data_root,
|
135 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
136 |
+
test_mode=True,
|
137 |
+
test_cfg=dict(
|
138 |
+
voxelize=dict(
|
139 |
+
type="GridSample",
|
140 |
+
grid_size=0.04,
|
141 |
+
hash_type="fnv",
|
142 |
+
mode="test",
|
143 |
+
keys=("coord", "color", "normal"),
|
144 |
+
return_grid_coord=True,
|
145 |
+
return_displacement=True,
|
146 |
+
),
|
147 |
+
crop=None,
|
148 |
+
post_transform=[
|
149 |
+
dict(type="CenterShift", apply_z=False),
|
150 |
+
dict(type="ToTensor"),
|
151 |
+
dict(
|
152 |
+
type="Collect",
|
153 |
+
keys=("coord", "grid_coord", "index"),
|
154 |
+
feat_keys=("color", "normal", "displacement"),
|
155 |
+
coord_feat_keys=("color", "normal"),
|
156 |
+
),
|
157 |
+
],
|
158 |
+
aug_transform=[
|
159 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
160 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
161 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
162 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
163 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
164 |
+
[
|
165 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
166 |
+
dict(type="RandomFlip", p=1),
|
167 |
+
],
|
168 |
+
[
|
169 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
170 |
+
dict(type="RandomFlip", p=1),
|
171 |
+
],
|
172 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
173 |
+
[
|
174 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
175 |
+
dict(type="RandomFlip", p=1),
|
176 |
+
],
|
177 |
+
[
|
178 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
179 |
+
dict(type="RandomFlip", p=1),
|
180 |
+
],
|
181 |
+
],
|
182 |
+
),
|
183 |
+
),
|
184 |
+
)
|
Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 12 # bs: total bs in all gpus
|
4 |
+
mix_prob = 0.8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = True
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultSegmentor",
|
11 |
+
backbone=dict(
|
12 |
+
type="Swin3D-v1m1",
|
13 |
+
in_channels=9,
|
14 |
+
num_classes=13,
|
15 |
+
base_grid_size=0.02,
|
16 |
+
depths=[2, 4, 9, 4, 4],
|
17 |
+
channels=[80, 160, 320, 640, 640],
|
18 |
+
num_heads=[10, 10, 20, 40, 40],
|
19 |
+
window_sizes=[5, 7, 7, 7, 7],
|
20 |
+
quant_size=4,
|
21 |
+
drop_path_rate=0.3,
|
22 |
+
up_k=3,
|
23 |
+
num_layers=5,
|
24 |
+
stem_transformer=True,
|
25 |
+
down_stride=3,
|
26 |
+
upsample="linear_attn",
|
27 |
+
knn_down=True,
|
28 |
+
cRSE="XYZ_RGB_NORM",
|
29 |
+
fp16_mode=1,
|
30 |
+
),
|
31 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
32 |
+
)
|
33 |
+
|
34 |
+
# scheduler settings
|
35 |
+
epoch = 3000
|
36 |
+
optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.05)
|
37 |
+
scheduler = dict(
|
38 |
+
type="OneCycleLR",
|
39 |
+
max_lr=[0.001, 0.0001],
|
40 |
+
pct_start=0.05,
|
41 |
+
anneal_strategy="cos",
|
42 |
+
div_factor=10.0,
|
43 |
+
final_div_factor=1000.0,
|
44 |
+
)
|
45 |
+
param_dicts = [dict(keyword="blocks", lr=0.0001)]
|
46 |
+
|
47 |
+
# dataset settings
|
48 |
+
dataset_type = "S3DISDataset"
|
49 |
+
data_root = "data/s3dis"
|
50 |
+
|
51 |
+
data = dict(
|
52 |
+
num_classes=13,
|
53 |
+
ignore_index=-1,
|
54 |
+
names=[
|
55 |
+
"ceiling",
|
56 |
+
"floor",
|
57 |
+
"wall",
|
58 |
+
"beam",
|
59 |
+
"column",
|
60 |
+
"window",
|
61 |
+
"door",
|
62 |
+
"table",
|
63 |
+
"chair",
|
64 |
+
"sofa",
|
65 |
+
"bookcase",
|
66 |
+
"board",
|
67 |
+
"clutter",
|
68 |
+
],
|
69 |
+
train=dict(
|
70 |
+
type=dataset_type,
|
71 |
+
split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
|
72 |
+
data_root=data_root,
|
73 |
+
transform=[
|
74 |
+
dict(type="CenterShift", apply_z=True),
|
75 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
76 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
77 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
78 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
79 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
80 |
+
dict(type="RandomScale", scale=[0.8, 1.2]),
|
81 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
82 |
+
dict(type="RandomFlip", p=0.5),
|
83 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
84 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
85 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
86 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
87 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
88 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
89 |
+
dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
90 |
+
dict(
|
91 |
+
type="GridSample",
|
92 |
+
grid_size=0.04,
|
93 |
+
hash_type="fnv",
|
94 |
+
mode="train",
|
95 |
+
return_grid_coord=True,
|
96 |
+
return_displacement=True,
|
97 |
+
),
|
98 |
+
dict(type="SphereCrop", point_max=80000, mode="random"),
|
99 |
+
dict(type="CenterShift", apply_z=False),
|
100 |
+
dict(type="NormalizeColor"),
|
101 |
+
# dict(type="ShufflePoint"),
|
102 |
+
dict(type="ToTensor"),
|
103 |
+
dict(
|
104 |
+
type="Collect",
|
105 |
+
keys=("coord", "grid_coord", "segment"),
|
106 |
+
feat_keys=("color", "normal", "displacement"),
|
107 |
+
coord_feat_keys=("color", "normal"),
|
108 |
+
),
|
109 |
+
],
|
110 |
+
test_mode=False,
|
111 |
+
),
|
112 |
+
val=dict(
|
113 |
+
type=dataset_type,
|
114 |
+
split="Area_5",
|
115 |
+
data_root=data_root,
|
116 |
+
transform=[
|
117 |
+
dict(type="CenterShift", apply_z=True),
|
118 |
+
dict(
|
119 |
+
type="GridSample",
|
120 |
+
grid_size=0.04,
|
121 |
+
hash_type="fnv",
|
122 |
+
mode="train",
|
123 |
+
return_grid_coord=True,
|
124 |
+
return_displacement=True,
|
125 |
+
),
|
126 |
+
dict(type="CenterShift", apply_z=False),
|
127 |
+
dict(type="NormalizeColor"),
|
128 |
+
dict(type="ToTensor"),
|
129 |
+
dict(
|
130 |
+
type="Collect",
|
131 |
+
keys=("coord", "grid_coord", "segment"),
|
132 |
+
feat_keys=("color", "normal", "displacement"),
|
133 |
+
coord_feat_keys=("color", "normal"),
|
134 |
+
),
|
135 |
+
],
|
136 |
+
test_mode=False,
|
137 |
+
),
|
138 |
+
test=dict(
|
139 |
+
type=dataset_type,
|
140 |
+
split="Area_5",
|
141 |
+
data_root=data_root,
|
142 |
+
transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
|
143 |
+
test_mode=True,
|
144 |
+
test_cfg=dict(
|
145 |
+
voxelize=dict(
|
146 |
+
type="GridSample",
|
147 |
+
grid_size=0.04,
|
148 |
+
hash_type="fnv",
|
149 |
+
mode="test",
|
150 |
+
keys=("coord", "color", "normal"),
|
151 |
+
return_grid_coord=True,
|
152 |
+
return_displacement=True,
|
153 |
+
),
|
154 |
+
crop=None,
|
155 |
+
post_transform=[
|
156 |
+
dict(type="CenterShift", apply_z=False),
|
157 |
+
dict(type="ToTensor"),
|
158 |
+
dict(
|
159 |
+
type="Collect",
|
160 |
+
keys=("coord", "grid_coord", "index"),
|
161 |
+
feat_keys=("color", "normal", "displacement"),
|
162 |
+
coord_feat_keys=("color", "normal"),
|
163 |
+
),
|
164 |
+
],
|
165 |
+
aug_transform=[
|
166 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
167 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
168 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
169 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
170 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
171 |
+
[
|
172 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
173 |
+
dict(type="RandomFlip", p=1),
|
174 |
+
],
|
175 |
+
[
|
176 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
177 |
+
dict(type="RandomFlip", p=1),
|
178 |
+
],
|
179 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
180 |
+
[
|
181 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
182 |
+
dict(type="RandomFlip", p=1),
|
183 |
+
],
|
184 |
+
[
|
185 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
186 |
+
dict(type="RandomFlip", p=1),
|
187 |
+
],
|
188 |
+
],
|
189 |
+
),
|
190 |
+
),
|
191 |
+
)
|
Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 12
|
6 |
+
mix_prob = 0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
evaluate = True
|
10 |
+
|
11 |
+
class_names = [
|
12 |
+
"wall",
|
13 |
+
"floor",
|
14 |
+
"cabinet",
|
15 |
+
"bed",
|
16 |
+
"chair",
|
17 |
+
"sofa",
|
18 |
+
"table",
|
19 |
+
"door",
|
20 |
+
"window",
|
21 |
+
"bookshelf",
|
22 |
+
"picture",
|
23 |
+
"counter",
|
24 |
+
"desk",
|
25 |
+
"curtain",
|
26 |
+
"refridgerator",
|
27 |
+
"shower curtain",
|
28 |
+
"toilet",
|
29 |
+
"sink",
|
30 |
+
"bathtub",
|
31 |
+
"otherfurniture",
|
32 |
+
]
|
33 |
+
num_classes = 20
|
34 |
+
segment_ignore_index = (-1, 0, 1)
|
35 |
+
|
36 |
+
# model settings
|
37 |
+
model = dict(
|
38 |
+
type="PG-v1m1",
|
39 |
+
backbone=dict(
|
40 |
+
type="SpUNet-v1m1",
|
41 |
+
in_channels=6,
|
42 |
+
num_classes=0,
|
43 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
44 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
45 |
+
),
|
46 |
+
backbone_out_channels=96,
|
47 |
+
semantic_num_classes=num_classes,
|
48 |
+
semantic_ignore_index=-1,
|
49 |
+
segment_ignore_index=segment_ignore_index,
|
50 |
+
instance_ignore_index=-1,
|
51 |
+
cluster_thresh=1.5,
|
52 |
+
cluster_closed_points=300,
|
53 |
+
cluster_propose_points=100,
|
54 |
+
cluster_min_points=50,
|
55 |
+
)
|
56 |
+
|
57 |
+
# scheduler settings
|
58 |
+
epoch = 800
|
59 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
60 |
+
scheduler = dict(type="PolyLR")
|
61 |
+
|
62 |
+
# dataset settings
|
63 |
+
dataset_type = "ScanNetDataset"
|
64 |
+
data_root = "data/scannet"
|
65 |
+
|
66 |
+
data = dict(
|
67 |
+
num_classes=num_classes,
|
68 |
+
ignore_index=-1,
|
69 |
+
names=class_names,
|
70 |
+
train=dict(
|
71 |
+
type=dataset_type,
|
72 |
+
split="train",
|
73 |
+
data_root=data_root,
|
74 |
+
transform=[
|
75 |
+
dict(type="CenterShift", apply_z=True),
|
76 |
+
dict(
|
77 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
|
78 |
+
),
|
79 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
80 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
81 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
82 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
83 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
84 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
85 |
+
dict(type="RandomFlip", p=0.5),
|
86 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
87 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
88 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
89 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
|
90 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
91 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
92 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
93 |
+
dict(
|
94 |
+
type="GridSample",
|
95 |
+
grid_size=0.02,
|
96 |
+
hash_type="fnv",
|
97 |
+
mode="train",
|
98 |
+
return_grid_coord=True,
|
99 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
100 |
+
),
|
101 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
102 |
+
dict(type="NormalizeColor"),
|
103 |
+
dict(
|
104 |
+
type="InstanceParser",
|
105 |
+
segment_ignore_index=segment_ignore_index,
|
106 |
+
instance_ignore_index=-1,
|
107 |
+
),
|
108 |
+
dict(type="ToTensor"),
|
109 |
+
dict(
|
110 |
+
type="Collect",
|
111 |
+
keys=(
|
112 |
+
"coord",
|
113 |
+
"grid_coord",
|
114 |
+
"segment",
|
115 |
+
"instance",
|
116 |
+
"instance_centroid",
|
117 |
+
"bbox",
|
118 |
+
),
|
119 |
+
feat_keys=("color", "normal"),
|
120 |
+
),
|
121 |
+
],
|
122 |
+
test_mode=False,
|
123 |
+
),
|
124 |
+
val=dict(
|
125 |
+
type=dataset_type,
|
126 |
+
split="val",
|
127 |
+
data_root=data_root,
|
128 |
+
transform=[
|
129 |
+
dict(type="CenterShift", apply_z=True),
|
130 |
+
dict(
|
131 |
+
type="Copy",
|
132 |
+
keys_dict={
|
133 |
+
"coord": "origin_coord",
|
134 |
+
"segment": "origin_segment",
|
135 |
+
"instance": "origin_instance",
|
136 |
+
},
|
137 |
+
),
|
138 |
+
dict(
|
139 |
+
type="GridSample",
|
140 |
+
grid_size=0.02,
|
141 |
+
hash_type="fnv",
|
142 |
+
mode="train",
|
143 |
+
return_grid_coord=True,
|
144 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
145 |
+
),
|
146 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
147 |
+
dict(type="CenterShift", apply_z=False),
|
148 |
+
dict(type="NormalizeColor"),
|
149 |
+
dict(
|
150 |
+
type="InstanceParser",
|
151 |
+
segment_ignore_index=segment_ignore_index,
|
152 |
+
instance_ignore_index=-1,
|
153 |
+
),
|
154 |
+
dict(type="ToTensor"),
|
155 |
+
dict(
|
156 |
+
type="Collect",
|
157 |
+
keys=(
|
158 |
+
"coord",
|
159 |
+
"grid_coord",
|
160 |
+
"segment",
|
161 |
+
"instance",
|
162 |
+
"origin_coord",
|
163 |
+
"origin_segment",
|
164 |
+
"origin_instance",
|
165 |
+
"instance_centroid",
|
166 |
+
"bbox",
|
167 |
+
),
|
168 |
+
feat_keys=("color", "normal"),
|
169 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
170 |
+
),
|
171 |
+
],
|
172 |
+
test_mode=False,
|
173 |
+
),
|
174 |
+
test=dict(), # currently not available
|
175 |
+
)
|
176 |
+
|
177 |
+
hooks = [
|
178 |
+
dict(type="CheckpointLoader", keywords="module.", replacement="module."),
|
179 |
+
dict(type="IterationTimer", warmup_iter=2),
|
180 |
+
dict(type="InformationWriter"),
|
181 |
+
dict(
|
182 |
+
type="InsSegEvaluator",
|
183 |
+
segment_ignore_index=segment_ignore_index,
|
184 |
+
instance_ignore_index=-1,
|
185 |
+
),
|
186 |
+
dict(type="CheckpointSaver", save_freq=None),
|
187 |
+
]
|
Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py
ADDED
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
evaluate = True
|
10 |
+
find_unused_parameters = True
|
11 |
+
|
12 |
+
class_names = [
|
13 |
+
"wall",
|
14 |
+
"floor",
|
15 |
+
"cabinet",
|
16 |
+
"bed",
|
17 |
+
"chair",
|
18 |
+
"sofa",
|
19 |
+
"table",
|
20 |
+
"door",
|
21 |
+
"window",
|
22 |
+
"bookshelf",
|
23 |
+
"picture",
|
24 |
+
"counter",
|
25 |
+
"desk",
|
26 |
+
"curtain",
|
27 |
+
"refridgerator",
|
28 |
+
"shower curtain",
|
29 |
+
"toilet",
|
30 |
+
"sink",
|
31 |
+
"bathtub",
|
32 |
+
"otherfurniture",
|
33 |
+
]
|
34 |
+
num_classes = 20
|
35 |
+
segment_ignore_index = (-1, 0, 1)
|
36 |
+
|
37 |
+
# model settings
|
38 |
+
model = dict(
|
39 |
+
type="PG-v1m1",
|
40 |
+
backbone=dict(
|
41 |
+
type="PPT-v1m1",
|
42 |
+
backbone=dict(
|
43 |
+
type="SpUNet-v1m3",
|
44 |
+
in_channels=6,
|
45 |
+
num_classes=0,
|
46 |
+
base_channels=32,
|
47 |
+
context_channels=256,
|
48 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
49 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
50 |
+
cls_mode=False,
|
51 |
+
conditions=("ScanNet", "S3DIS", "Structured3D"),
|
52 |
+
zero_init=False,
|
53 |
+
norm_decouple=True,
|
54 |
+
norm_adaptive=True,
|
55 |
+
norm_affine=True,
|
56 |
+
),
|
57 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
58 |
+
backbone_out_channels=96,
|
59 |
+
context_channels=256,
|
60 |
+
conditions=("Structured3D", "ScanNet", "S3DIS"),
|
61 |
+
template="[x]",
|
62 |
+
clip_model="ViT-B/16",
|
63 |
+
class_name=(
|
64 |
+
"wall",
|
65 |
+
"floor",
|
66 |
+
"cabinet",
|
67 |
+
"bed",
|
68 |
+
"chair",
|
69 |
+
"sofa",
|
70 |
+
"table",
|
71 |
+
"door",
|
72 |
+
"window",
|
73 |
+
"bookshelf",
|
74 |
+
"bookcase",
|
75 |
+
"picture",
|
76 |
+
"counter",
|
77 |
+
"desk",
|
78 |
+
"shelves",
|
79 |
+
"curtain",
|
80 |
+
"dresser",
|
81 |
+
"pillow",
|
82 |
+
"mirror",
|
83 |
+
"ceiling",
|
84 |
+
"refrigerator",
|
85 |
+
"television",
|
86 |
+
"shower curtain",
|
87 |
+
"nightstand",
|
88 |
+
"toilet",
|
89 |
+
"sink",
|
90 |
+
"lamp",
|
91 |
+
"bathtub",
|
92 |
+
"garbagebin",
|
93 |
+
"board",
|
94 |
+
"beam",
|
95 |
+
"column",
|
96 |
+
"clutter",
|
97 |
+
"otherstructure",
|
98 |
+
"otherfurniture",
|
99 |
+
"otherprop",
|
100 |
+
),
|
101 |
+
valid_index=(
|
102 |
+
(
|
103 |
+
0,
|
104 |
+
1,
|
105 |
+
2,
|
106 |
+
3,
|
107 |
+
4,
|
108 |
+
5,
|
109 |
+
6,
|
110 |
+
7,
|
111 |
+
8,
|
112 |
+
11,
|
113 |
+
13,
|
114 |
+
14,
|
115 |
+
15,
|
116 |
+
16,
|
117 |
+
17,
|
118 |
+
18,
|
119 |
+
19,
|
120 |
+
20,
|
121 |
+
21,
|
122 |
+
23,
|
123 |
+
25,
|
124 |
+
26,
|
125 |
+
33,
|
126 |
+
34,
|
127 |
+
35,
|
128 |
+
),
|
129 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
|
130 |
+
(0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
|
131 |
+
),
|
132 |
+
backbone_mode=True,
|
133 |
+
),
|
134 |
+
backbone_out_channels=96,
|
135 |
+
semantic_num_classes=num_classes,
|
136 |
+
semantic_ignore_index=-1,
|
137 |
+
segment_ignore_index=segment_ignore_index,
|
138 |
+
instance_ignore_index=-1,
|
139 |
+
cluster_thresh=1.5,
|
140 |
+
cluster_closed_points=300,
|
141 |
+
cluster_propose_points=100,
|
142 |
+
cluster_min_points=50,
|
143 |
+
)
|
144 |
+
|
145 |
+
# scheduler settings
|
146 |
+
epoch = 800
|
147 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
148 |
+
scheduler = dict(type="PolyLR")
|
149 |
+
|
150 |
+
# dataset settings
|
151 |
+
dataset_type = "ScanNetDataset"
|
152 |
+
data_root = "data/scannet"
|
153 |
+
|
154 |
+
data = dict(
|
155 |
+
num_classes=num_classes,
|
156 |
+
ignore_index=-1,
|
157 |
+
names=class_names,
|
158 |
+
train=dict(
|
159 |
+
type=dataset_type,
|
160 |
+
split="train",
|
161 |
+
data_root=data_root,
|
162 |
+
transform=[
|
163 |
+
dict(type="CenterShift", apply_z=True),
|
164 |
+
dict(
|
165 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
|
166 |
+
),
|
167 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
168 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
169 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
170 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
171 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
172 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
173 |
+
dict(type="RandomFlip", p=0.5),
|
174 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
175 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
176 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
177 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
|
178 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
179 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
180 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
181 |
+
dict(
|
182 |
+
type="GridSample",
|
183 |
+
grid_size=0.02,
|
184 |
+
hash_type="fnv",
|
185 |
+
mode="train",
|
186 |
+
return_grid_coord=True,
|
187 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
188 |
+
),
|
189 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
190 |
+
dict(type="NormalizeColor"),
|
191 |
+
dict(
|
192 |
+
type="InstanceParser",
|
193 |
+
segment_ignore_index=segment_ignore_index,
|
194 |
+
instance_ignore_index=-1,
|
195 |
+
),
|
196 |
+
dict(type="Add", keys_dict={"condition": "ScanNet"}),
|
197 |
+
dict(type="ToTensor"),
|
198 |
+
dict(
|
199 |
+
type="Collect",
|
200 |
+
keys=(
|
201 |
+
"coord",
|
202 |
+
"grid_coord",
|
203 |
+
"segment",
|
204 |
+
"instance",
|
205 |
+
"instance_centroid",
|
206 |
+
"bbox",
|
207 |
+
"condition",
|
208 |
+
),
|
209 |
+
feat_keys=("color", "normal"),
|
210 |
+
),
|
211 |
+
],
|
212 |
+
test_mode=False,
|
213 |
+
),
|
214 |
+
val=dict(
|
215 |
+
type=dataset_type,
|
216 |
+
split="val",
|
217 |
+
data_root=data_root,
|
218 |
+
transform=[
|
219 |
+
dict(type="CenterShift", apply_z=True),
|
220 |
+
dict(
|
221 |
+
type="Copy",
|
222 |
+
keys_dict={
|
223 |
+
"coord": "origin_coord",
|
224 |
+
"segment": "origin_segment",
|
225 |
+
"instance": "origin_instance",
|
226 |
+
},
|
227 |
+
),
|
228 |
+
dict(
|
229 |
+
type="GridSample",
|
230 |
+
grid_size=0.02,
|
231 |
+
hash_type="fnv",
|
232 |
+
mode="train",
|
233 |
+
return_grid_coord=True,
|
234 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
235 |
+
),
|
236 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
237 |
+
dict(type="CenterShift", apply_z=False),
|
238 |
+
dict(type="NormalizeColor"),
|
239 |
+
dict(
|
240 |
+
type="InstanceParser",
|
241 |
+
segment_ignore_index=segment_ignore_index,
|
242 |
+
instance_ignore_index=-1,
|
243 |
+
),
|
244 |
+
dict(type="Add", keys_dict={"condition": "ScanNet"}),
|
245 |
+
dict(type="ToTensor"),
|
246 |
+
dict(
|
247 |
+
type="Collect",
|
248 |
+
keys=(
|
249 |
+
"coord",
|
250 |
+
"grid_coord",
|
251 |
+
"segment",
|
252 |
+
"instance",
|
253 |
+
"origin_coord",
|
254 |
+
"origin_segment",
|
255 |
+
"origin_instance",
|
256 |
+
"instance_centroid",
|
257 |
+
"bbox",
|
258 |
+
"condition",
|
259 |
+
),
|
260 |
+
feat_keys=("color", "normal"),
|
261 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
262 |
+
),
|
263 |
+
],
|
264 |
+
test_mode=False,
|
265 |
+
),
|
266 |
+
test=dict(), # currently not available
|
267 |
+
)
|
268 |
+
|
269 |
+
hooks = [
|
270 |
+
dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."),
|
271 |
+
dict(type="IterationTimer", warmup_iter=2),
|
272 |
+
dict(type="InformationWriter"),
|
273 |
+
dict(
|
274 |
+
type="InsSegEvaluator",
|
275 |
+
segment_ignore_index=segment_ignore_index,
|
276 |
+
instance_ignore_index=-1,
|
277 |
+
),
|
278 |
+
dict(type="CheckpointSaver", save_freq=None),
|
279 |
+
]
|
Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 16 # bs: total bs in all gpus
|
5 |
+
num_worker = 32
|
6 |
+
mix_prob = 0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = False
|
9 |
+
evaluate = True
|
10 |
+
|
11 |
+
class_names = [
|
12 |
+
"wall",
|
13 |
+
"floor",
|
14 |
+
"cabinet",
|
15 |
+
"bed",
|
16 |
+
"chair",
|
17 |
+
"sofa",
|
18 |
+
"table",
|
19 |
+
"door",
|
20 |
+
"window",
|
21 |
+
"bookshelf",
|
22 |
+
"picture",
|
23 |
+
"counter",
|
24 |
+
"desk",
|
25 |
+
"curtain",
|
26 |
+
"refridgerator",
|
27 |
+
"shower curtain",
|
28 |
+
"toilet",
|
29 |
+
"sink",
|
30 |
+
"bathtub",
|
31 |
+
"otherfurniture",
|
32 |
+
]
|
33 |
+
num_classes = 20
|
34 |
+
segment_ignore_index = (-1, 0, 1)
|
35 |
+
|
36 |
+
# model settings
|
37 |
+
model = dict(
|
38 |
+
type="PG-v1m1",
|
39 |
+
backbone=dict(
|
40 |
+
type="SpUNet-v1m1",
|
41 |
+
in_channels=6,
|
42 |
+
num_classes=0,
|
43 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
44 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
45 |
+
),
|
46 |
+
backbone_out_channels=96,
|
47 |
+
semantic_num_classes=num_classes,
|
48 |
+
semantic_ignore_index=-1,
|
49 |
+
segment_ignore_index=segment_ignore_index,
|
50 |
+
instance_ignore_index=-1,
|
51 |
+
cluster_thresh=1.5,
|
52 |
+
cluster_closed_points=300,
|
53 |
+
cluster_propose_points=100,
|
54 |
+
cluster_min_points=50,
|
55 |
+
)
|
56 |
+
|
57 |
+
# scheduler settings
|
58 |
+
epoch = 800
|
59 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
60 |
+
scheduler = dict(type="PolyLR")
|
61 |
+
|
62 |
+
# dataset settings
|
63 |
+
dataset_type = "ScanNetDataset"
|
64 |
+
data_root = "data/scannet"
|
65 |
+
|
66 |
+
data = dict(
|
67 |
+
num_classes=num_classes,
|
68 |
+
ignore_index=-1,
|
69 |
+
names=class_names,
|
70 |
+
train=dict(
|
71 |
+
type=dataset_type,
|
72 |
+
split="train",
|
73 |
+
data_root=data_root,
|
74 |
+
transform=[
|
75 |
+
# dict(type="CenterShift", apply_z=True),
|
76 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5),
|
77 |
+
# # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
78 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis='z', center=[0, 0, 0], p=0.5),
|
79 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis='x', p=0.5),
|
80 |
+
# dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis='y', p=0.5),
|
81 |
+
# dict(type="RandomScale", scale=[0.9, 1.1]),
|
82 |
+
# # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
83 |
+
# dict(type="RandomFlip", p=0.5),
|
84 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
85 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
86 |
+
# dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
87 |
+
# dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
|
88 |
+
# dict(type="ChromaticJitter", p=0.95, std=0.05),
|
89 |
+
# # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
90 |
+
# # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
91 |
+
# dict(type="GridSample",
|
92 |
+
# grid_size=0.02,
|
93 |
+
# hash_type='fnv',
|
94 |
+
# mode='train',
|
95 |
+
# return_grid_coord=True,
|
96 |
+
# keys=("coord", "color", "normal", "segment", "instance")),
|
97 |
+
# dict(type="SphereCrop", sample_rate=0.8, mode='random'),
|
98 |
+
# dict(type="NormalizeColor"),
|
99 |
+
dict(
|
100 |
+
type="InstanceParser",
|
101 |
+
segment_ignore_index=segment_ignore_index,
|
102 |
+
instance_ignore_index=-1,
|
103 |
+
),
|
104 |
+
dict(type="ToTensor"),
|
105 |
+
dict(
|
106 |
+
type="Collect",
|
107 |
+
keys=(
|
108 |
+
"coord",
|
109 |
+
"grid_coord",
|
110 |
+
"segment",
|
111 |
+
"instance",
|
112 |
+
"instance_centroid",
|
113 |
+
"bbox",
|
114 |
+
),
|
115 |
+
feat_keys=("color", "normal"),
|
116 |
+
),
|
117 |
+
],
|
118 |
+
test_mode=False,
|
119 |
+
),
|
120 |
+
val=dict(
|
121 |
+
type=dataset_type,
|
122 |
+
split="val",
|
123 |
+
data_root=data_root,
|
124 |
+
transform=[
|
125 |
+
dict(type="CenterShift", apply_z=True),
|
126 |
+
dict(
|
127 |
+
type="Copy",
|
128 |
+
keys_dict={
|
129 |
+
"coord": "origin_coord",
|
130 |
+
"segment": "origin_segment",
|
131 |
+
"instance": "origin_instance",
|
132 |
+
},
|
133 |
+
),
|
134 |
+
dict(
|
135 |
+
type="GridSample",
|
136 |
+
grid_size=0.02,
|
137 |
+
hash_type="fnv",
|
138 |
+
mode="train",
|
139 |
+
return_grid_coord=True,
|
140 |
+
keys=("coord", "color", "normal", "segment", "instance"),
|
141 |
+
),
|
142 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
143 |
+
dict(type="CenterShift", apply_z=False),
|
144 |
+
dict(type="NormalizeColor"),
|
145 |
+
dict(
|
146 |
+
type="InstanceParser",
|
147 |
+
segment_ignore_index=segment_ignore_index,
|
148 |
+
instance_ignore_index=-1,
|
149 |
+
),
|
150 |
+
dict(type="ToTensor"),
|
151 |
+
dict(
|
152 |
+
type="Collect",
|
153 |
+
keys=(
|
154 |
+
"coord",
|
155 |
+
"grid_coord",
|
156 |
+
"segment",
|
157 |
+
"instance",
|
158 |
+
"origin_coord",
|
159 |
+
"origin_segment",
|
160 |
+
"origin_instance",
|
161 |
+
"instance_centroid",
|
162 |
+
"bbox",
|
163 |
+
),
|
164 |
+
feat_keys=("color", "normal"),
|
165 |
+
offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
|
166 |
+
),
|
167 |
+
],
|
168 |
+
test_mode=False,
|
169 |
+
),
|
170 |
+
test=dict(), # currently not available
|
171 |
+
)
|
172 |
+
|
173 |
+
hooks = [
|
174 |
+
dict(type="CheckpointLoader", keywords="module.", replacement="module."),
|
175 |
+
dict(type="IterationTimer", warmup_iter=2),
|
176 |
+
dict(type="InformationWriter"),
|
177 |
+
dict(
|
178 |
+
type="InsSegEvaluator",
|
179 |
+
segment_ignore_index=segment_ignore_index,
|
180 |
+
instance_ignore_index=-1,
|
181 |
+
),
|
182 |
+
dict(type="CheckpointSaver", save_freq=None),
|
183 |
+
]
|
Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 32 # bs: total bs in all gpus
|
5 |
+
num_worker = 32
|
6 |
+
mix_prob = 0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = False
|
9 |
+
evaluate = False
|
10 |
+
find_unused_parameters = False
|
11 |
+
|
12 |
+
# model settings
|
13 |
+
model = dict(
|
14 |
+
type="MSC-v1m1",
|
15 |
+
backbone=dict(
|
16 |
+
type="SpUNet-v1m1",
|
17 |
+
in_channels=6,
|
18 |
+
num_classes=0,
|
19 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
20 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
21 |
+
),
|
22 |
+
backbone_in_channels=6,
|
23 |
+
backbone_out_channels=96,
|
24 |
+
mask_grid_size=0.1,
|
25 |
+
mask_rate=0.4,
|
26 |
+
view1_mix_prob=0.8,
|
27 |
+
view2_mix_prob=0,
|
28 |
+
matching_max_k=8,
|
29 |
+
matching_max_radius=0.03,
|
30 |
+
matching_max_pair=8192,
|
31 |
+
nce_t=0.4,
|
32 |
+
contrast_weight=1,
|
33 |
+
reconstruct_weight=1,
|
34 |
+
reconstruct_color=True,
|
35 |
+
reconstruct_normal=False,
|
36 |
+
)
|
37 |
+
|
38 |
+
# scheduler settings
|
39 |
+
epoch = 600
|
40 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True)
|
41 |
+
scheduler = dict(
|
42 |
+
type="OneCycleLR",
|
43 |
+
max_lr=optimizer["lr"],
|
44 |
+
pct_start=0.01,
|
45 |
+
anneal_strategy="cos",
|
46 |
+
div_factor=10.0,
|
47 |
+
final_div_factor=10000.0,
|
48 |
+
)
|
49 |
+
|
50 |
+
# dataset settings
|
51 |
+
dataset_type = "ScanNetDataset"
|
52 |
+
data_root = "data/scannet"
|
53 |
+
|
54 |
+
data = dict(
|
55 |
+
num_classes=20,
|
56 |
+
ignore_index=-1,
|
57 |
+
names=[
|
58 |
+
"wall",
|
59 |
+
"floor",
|
60 |
+
"cabinet",
|
61 |
+
"bed",
|
62 |
+
"chair",
|
63 |
+
"sofa",
|
64 |
+
"table",
|
65 |
+
"door",
|
66 |
+
"window",
|
67 |
+
"bookshelf",
|
68 |
+
"picture",
|
69 |
+
"counter",
|
70 |
+
"desk",
|
71 |
+
"curtain",
|
72 |
+
"refridgerator",
|
73 |
+
"shower curtain",
|
74 |
+
"toilet",
|
75 |
+
"sink",
|
76 |
+
"bathtub",
|
77 |
+
"otherfurniture",
|
78 |
+
],
|
79 |
+
train=dict(
|
80 |
+
type=dataset_type,
|
81 |
+
split=["train", "val", "test"],
|
82 |
+
data_root=data_root,
|
83 |
+
transform=[
|
84 |
+
dict(type="CenterShift", apply_z=True),
|
85 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
86 |
+
dict(type="Copy", keys_dict={"coord": "origin_coord"}),
|
87 |
+
dict(
|
88 |
+
type="ContrastiveViewsGenerator",
|
89 |
+
view_keys=("coord", "color", "normal", "origin_coord"),
|
90 |
+
view_trans_cfg=[
|
91 |
+
dict(
|
92 |
+
type="RandomRotate",
|
93 |
+
angle=[-1, 1],
|
94 |
+
axis="z",
|
95 |
+
center=[0, 0, 0],
|
96 |
+
p=1,
|
97 |
+
),
|
98 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
|
99 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
|
100 |
+
dict(type="RandomFlip", p=0.5),
|
101 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
102 |
+
dict(
|
103 |
+
type="RandomColorJitter",
|
104 |
+
brightness=0.4,
|
105 |
+
contrast=0.4,
|
106 |
+
saturation=0.2,
|
107 |
+
hue=0.02,
|
108 |
+
p=0.8,
|
109 |
+
),
|
110 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
111 |
+
dict(
|
112 |
+
type="GridSample",
|
113 |
+
grid_size=0.02,
|
114 |
+
hash_type="fnv",
|
115 |
+
mode="train",
|
116 |
+
keys=("origin_coord", "coord", "color", "normal"),
|
117 |
+
return_grid_coord=True,
|
118 |
+
),
|
119 |
+
dict(type="SphereCrop", sample_rate=0.6, mode="random"),
|
120 |
+
dict(type="CenterShift", apply_z=False),
|
121 |
+
dict(type="NormalizeColor"),
|
122 |
+
],
|
123 |
+
),
|
124 |
+
dict(type="ToTensor"),
|
125 |
+
dict(
|
126 |
+
type="Collect",
|
127 |
+
keys=(
|
128 |
+
"view1_origin_coord",
|
129 |
+
"view1_grid_coord",
|
130 |
+
"view1_coord",
|
131 |
+
"view1_color",
|
132 |
+
"view1_normal",
|
133 |
+
"view2_origin_coord",
|
134 |
+
"view2_grid_coord",
|
135 |
+
"view2_coord",
|
136 |
+
"view2_color",
|
137 |
+
"view2_normal",
|
138 |
+
),
|
139 |
+
offset_keys_dict=dict(
|
140 |
+
view1_offset="view1_coord", view2_offset="view2_coord"
|
141 |
+
),
|
142 |
+
view1_feat_keys=("view1_color", "view1_normal"),
|
143 |
+
view2_feat_keys=("view2_color", "view2_normal"),
|
144 |
+
),
|
145 |
+
],
|
146 |
+
test_mode=False,
|
147 |
+
),
|
148 |
+
)
|
149 |
+
|
150 |
+
hooks = [
|
151 |
+
dict(type="CheckpointLoader"),
|
152 |
+
dict(type="IterationTimer", warmup_iter=2),
|
153 |
+
dict(type="InformationWriter"),
|
154 |
+
dict(type="CheckpointSaver", save_freq=None),
|
155 |
+
]
|
Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 32 # bs: total bs in all gpus
|
5 |
+
num_worker = 32
|
6 |
+
mix_prob = 0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = False
|
9 |
+
evaluate = False
|
10 |
+
find_unused_parameters = False
|
11 |
+
|
12 |
+
# model settings
|
13 |
+
model = dict(
|
14 |
+
type="MSC-v1m1",
|
15 |
+
backbone=dict(
|
16 |
+
type="SpUNet-v1m1",
|
17 |
+
in_channels=3,
|
18 |
+
num_classes=0,
|
19 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
20 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
21 |
+
),
|
22 |
+
backbone_in_channels=3,
|
23 |
+
backbone_out_channels=96,
|
24 |
+
mask_grid_size=0.1,
|
25 |
+
mask_rate=0,
|
26 |
+
view1_mix_prob=0,
|
27 |
+
view2_mix_prob=0,
|
28 |
+
matching_max_k=8,
|
29 |
+
matching_max_radius=0.03,
|
30 |
+
matching_max_pair=4096,
|
31 |
+
nce_t=0.07,
|
32 |
+
contrast_weight=1,
|
33 |
+
reconstruct_weight=1,
|
34 |
+
reconstruct_color=False,
|
35 |
+
reconstruct_normal=False,
|
36 |
+
)
|
37 |
+
|
38 |
+
# scheduler settings
|
39 |
+
epoch = 10
|
40 |
+
eval_epoch = 10
|
41 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True)
|
42 |
+
scheduler = dict(
|
43 |
+
type="OneCycleLR",
|
44 |
+
max_lr=optimizer["lr"],
|
45 |
+
pct_start=0.01,
|
46 |
+
anneal_strategy="cos",
|
47 |
+
div_factor=10.0,
|
48 |
+
final_div_factor=10000.0,
|
49 |
+
)
|
50 |
+
|
51 |
+
# dataset settings
|
52 |
+
dataset_type = "ScanNetPairDataset"
|
53 |
+
data_root = "data/scannet_pair"
|
54 |
+
|
55 |
+
data = dict(
|
56 |
+
num_classes=20,
|
57 |
+
ignore_index=-1,
|
58 |
+
names=[
|
59 |
+
"wall",
|
60 |
+
"floor",
|
61 |
+
"cabinet",
|
62 |
+
"bed",
|
63 |
+
"chair",
|
64 |
+
"sofa",
|
65 |
+
"table",
|
66 |
+
"door",
|
67 |
+
"window",
|
68 |
+
"bookshelf",
|
69 |
+
"picture",
|
70 |
+
"counter",
|
71 |
+
"desk",
|
72 |
+
"curtain",
|
73 |
+
"refridgerator",
|
74 |
+
"shower curtain",
|
75 |
+
"toilet",
|
76 |
+
"sink",
|
77 |
+
"bathtub",
|
78 |
+
"otherfurniture",
|
79 |
+
],
|
80 |
+
train=dict(
|
81 |
+
type=dataset_type,
|
82 |
+
data_root=data_root,
|
83 |
+
view1_transform=[
|
84 |
+
dict(type="CenterShift", apply_z=True),
|
85 |
+
dict(type="Copy", keys_dict={"coord": "origin_coord"}),
|
86 |
+
# dict(type="RandomScale", scale=[0.9, 1.1]),
|
87 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
|
88 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
|
89 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
|
90 |
+
dict(type="RandomFlip", p=0.5),
|
91 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
92 |
+
dict(
|
93 |
+
type="RandomColorJitter",
|
94 |
+
brightness=0.4,
|
95 |
+
contrast=0.4,
|
96 |
+
saturation=0.2,
|
97 |
+
hue=0.02,
|
98 |
+
p=0.8,
|
99 |
+
),
|
100 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
101 |
+
dict(
|
102 |
+
type="GridSample",
|
103 |
+
grid_size=0.025,
|
104 |
+
hash_type="fnv",
|
105 |
+
mode="train",
|
106 |
+
keys=("origin_coord", "coord", "color"),
|
107 |
+
return_grid_coord=True,
|
108 |
+
),
|
109 |
+
dict(type="NormalizeColor"),
|
110 |
+
dict(type="ToTensor"),
|
111 |
+
dict(
|
112 |
+
type="Collect",
|
113 |
+
keys=("origin_coord", "grid_coord", "coord", "color"),
|
114 |
+
offset_keys_dict=dict(offset="coord"),
|
115 |
+
feat_keys=["color"],
|
116 |
+
),
|
117 |
+
],
|
118 |
+
view2_transform=[
|
119 |
+
dict(type="CenterShift", apply_z=True),
|
120 |
+
dict(type="Copy", keys_dict={"coord": "origin_coord"}),
|
121 |
+
# dict(type="RandomScale", scale=[0.9, 1.1]),
|
122 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
|
123 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
|
124 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
|
125 |
+
dict(type="RandomFlip", p=0.5),
|
126 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
127 |
+
dict(
|
128 |
+
type="RandomColorJitter",
|
129 |
+
brightness=0.4,
|
130 |
+
contrast=0.4,
|
131 |
+
saturation=0.2,
|
132 |
+
hue=0.02,
|
133 |
+
p=0.8,
|
134 |
+
),
|
135 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
136 |
+
dict(
|
137 |
+
type="GridSample",
|
138 |
+
grid_size=0.025,
|
139 |
+
hash_type="fnv",
|
140 |
+
mode="train",
|
141 |
+
keys=("origin_coord", "coord", "color"),
|
142 |
+
return_grid_coord=True,
|
143 |
+
),
|
144 |
+
dict(type="NormalizeColor"),
|
145 |
+
dict(type="ToTensor"),
|
146 |
+
dict(
|
147 |
+
type="Collect",
|
148 |
+
keys=("origin_coord", "grid_coord", "coord", "color"),
|
149 |
+
offset_keys_dict=dict(offset="coord"),
|
150 |
+
feat_keys=["color"],
|
151 |
+
),
|
152 |
+
],
|
153 |
+
test_mode=False,
|
154 |
+
),
|
155 |
+
)
|
156 |
+
|
157 |
+
hooks = [
|
158 |
+
dict(type="CheckpointLoader"),
|
159 |
+
dict(type="IterationTimer", warmup_iter=2),
|
160 |
+
dict(type="InformationWriter"),
|
161 |
+
dict(type="CheckpointSaver", save_freq=None),
|
162 |
+
]
|
Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 32 # bs: total bs in all gpus
|
5 |
+
num_worker = 32
|
6 |
+
mix_prob = 0
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = False
|
9 |
+
evaluate = False
|
10 |
+
find_unused_parameters = False
|
11 |
+
|
12 |
+
# model settings
|
13 |
+
model = dict(
|
14 |
+
type="MSC-v1m2",
|
15 |
+
backbone=dict(
|
16 |
+
type="SpUNet-v1m1",
|
17 |
+
in_channels=3,
|
18 |
+
num_classes=0,
|
19 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
20 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
21 |
+
),
|
22 |
+
backbone_in_channels=3,
|
23 |
+
backbone_out_channels=96,
|
24 |
+
mask_grid_size=0.1,
|
25 |
+
mask_rate=0,
|
26 |
+
view1_mix_prob=0,
|
27 |
+
view2_mix_prob=0,
|
28 |
+
matching_max_k=8,
|
29 |
+
matching_max_radius=0.03,
|
30 |
+
matching_max_pair=8192,
|
31 |
+
nce_t=0.4,
|
32 |
+
contrast_weight=1,
|
33 |
+
reconstruct_weight=1,
|
34 |
+
reconstruct_color=False,
|
35 |
+
reconstruct_normal=False,
|
36 |
+
partitions=4,
|
37 |
+
r1=2,
|
38 |
+
r2=20,
|
39 |
+
)
|
40 |
+
|
41 |
+
# scheduler settings
|
42 |
+
epoch = 10
|
43 |
+
eval_epoch = 10
|
44 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True)
|
45 |
+
scheduler = dict(
|
46 |
+
type="OneCycleLR",
|
47 |
+
max_lr=optimizer["lr"],
|
48 |
+
pct_start=0.01,
|
49 |
+
anneal_strategy="cos",
|
50 |
+
div_factor=10.0,
|
51 |
+
final_div_factor=10000.0,
|
52 |
+
)
|
53 |
+
|
54 |
+
# dataset settings
|
55 |
+
dataset_type = "ScanNetPairDataset"
|
56 |
+
data_root = "data/scannet_pair"
|
57 |
+
|
58 |
+
data = dict(
|
59 |
+
num_classes=20,
|
60 |
+
ignore_index=-1,
|
61 |
+
names=[
|
62 |
+
"wall",
|
63 |
+
"floor",
|
64 |
+
"cabinet",
|
65 |
+
"bed",
|
66 |
+
"chair",
|
67 |
+
"sofa",
|
68 |
+
"table",
|
69 |
+
"door",
|
70 |
+
"window",
|
71 |
+
"bookshelf",
|
72 |
+
"picture",
|
73 |
+
"counter",
|
74 |
+
"desk",
|
75 |
+
"curtain",
|
76 |
+
"refridgerator",
|
77 |
+
"shower curtain",
|
78 |
+
"toilet",
|
79 |
+
"sink",
|
80 |
+
"bathtub",
|
81 |
+
"otherfurniture",
|
82 |
+
],
|
83 |
+
train=dict(
|
84 |
+
type=dataset_type,
|
85 |
+
data_root=data_root,
|
86 |
+
view1_transform=[
|
87 |
+
dict(type="CenterShift", apply_z=True),
|
88 |
+
dict(type="Copy", keys_dict={"coord": "origin_coord"}),
|
89 |
+
# dict(type="RandomScale", scale=[0.9, 1.1]),
|
90 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
|
91 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
|
92 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
|
93 |
+
dict(type="RandomFlip", p=0.5),
|
94 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
95 |
+
dict(
|
96 |
+
type="RandomColorJitter",
|
97 |
+
brightness=0.4,
|
98 |
+
contrast=0.4,
|
99 |
+
saturation=0.2,
|
100 |
+
hue=0.02,
|
101 |
+
p=0.8,
|
102 |
+
),
|
103 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
104 |
+
dict(
|
105 |
+
type="GridSample",
|
106 |
+
grid_size=0.025,
|
107 |
+
hash_type="fnv",
|
108 |
+
mode="train",
|
109 |
+
keys=("origin_coord", "coord", "color"),
|
110 |
+
return_grid_coord=True,
|
111 |
+
),
|
112 |
+
dict(type="NormalizeColor"),
|
113 |
+
dict(type="ToTensor"),
|
114 |
+
dict(
|
115 |
+
type="Collect",
|
116 |
+
keys=("origin_coord", "grid_coord", "coord", "color"),
|
117 |
+
offset_keys_dict=dict(offset="coord"),
|
118 |
+
feat_keys=["color"],
|
119 |
+
),
|
120 |
+
],
|
121 |
+
view2_transform=[
|
122 |
+
dict(type="CenterShift", apply_z=True),
|
123 |
+
dict(type="Copy", keys_dict={"coord": "origin_coord"}),
|
124 |
+
# dict(type="RandomScale", scale=[0.9, 1.1]),
|
125 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
|
126 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
|
127 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
|
128 |
+
dict(type="RandomFlip", p=0.5),
|
129 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
130 |
+
dict(
|
131 |
+
type="RandomColorJitter",
|
132 |
+
brightness=0.4,
|
133 |
+
contrast=0.4,
|
134 |
+
saturation=0.2,
|
135 |
+
hue=0.02,
|
136 |
+
p=0.8,
|
137 |
+
),
|
138 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
139 |
+
dict(
|
140 |
+
type="GridSample",
|
141 |
+
grid_size=0.025,
|
142 |
+
hash_type="fnv",
|
143 |
+
mode="train",
|
144 |
+
keys=("origin_coord", "coord", "color"),
|
145 |
+
return_grid_coord=True,
|
146 |
+
),
|
147 |
+
dict(type="NormalizeColor"),
|
148 |
+
dict(type="ToTensor"),
|
149 |
+
dict(
|
150 |
+
type="Collect",
|
151 |
+
keys=("origin_coord", "grid_coord", "coord", "color"),
|
152 |
+
offset_keys_dict=dict(offset="coord"),
|
153 |
+
feat_keys=["color"],
|
154 |
+
),
|
155 |
+
],
|
156 |
+
test_mode=False,
|
157 |
+
),
|
158 |
+
)
|
159 |
+
|
160 |
+
hooks = [
|
161 |
+
dict(type="CheckpointLoader"),
|
162 |
+
dict(type="IterationTimer", warmup_iter=2),
|
163 |
+
dict(type="InformationWriter"),
|
164 |
+
dict(type="CheckpointSaver", save_freq=None),
|
165 |
+
]
|
Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="CAC-v1m1",
|
12 |
+
backbone=dict(
|
13 |
+
type="SpUNet-v1m1",
|
14 |
+
in_channels=6,
|
15 |
+
num_classes=0,
|
16 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
17 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
18 |
+
),
|
19 |
+
criteria=[
|
20 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
21 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
22 |
+
],
|
23 |
+
num_classes=20,
|
24 |
+
backbone_out_channels=96,
|
25 |
+
cos_temp=15,
|
26 |
+
main_weight=1,
|
27 |
+
pre_weight=1,
|
28 |
+
pre_self_weight=1,
|
29 |
+
kl_weight=1,
|
30 |
+
conf_thresh=0.75,
|
31 |
+
detach_pre_logits=True,
|
32 |
+
)
|
33 |
+
|
34 |
+
# scheduler settings
|
35 |
+
epoch = 800
|
36 |
+
optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
37 |
+
scheduler = dict(
|
38 |
+
type="OneCycleLR",
|
39 |
+
max_lr=optimizer["lr"],
|
40 |
+
pct_start=0.05,
|
41 |
+
anneal_strategy="cos",
|
42 |
+
div_factor=10.0,
|
43 |
+
final_div_factor=10000.0,
|
44 |
+
)
|
45 |
+
|
46 |
+
# dataset settings
|
47 |
+
dataset_type = "ScanNetDataset"
|
48 |
+
data_root = "data/scannet"
|
49 |
+
|
50 |
+
data = dict(
|
51 |
+
num_classes=20,
|
52 |
+
ignore_index=-1,
|
53 |
+
names=[
|
54 |
+
"wall",
|
55 |
+
"floor",
|
56 |
+
"cabinet",
|
57 |
+
"bed",
|
58 |
+
"chair",
|
59 |
+
"sofa",
|
60 |
+
"table",
|
61 |
+
"door",
|
62 |
+
"window",
|
63 |
+
"bookshelf",
|
64 |
+
"picture",
|
65 |
+
"counter",
|
66 |
+
"desk",
|
67 |
+
"curtain",
|
68 |
+
"refridgerator",
|
69 |
+
"shower curtain",
|
70 |
+
"toilet",
|
71 |
+
"sink",
|
72 |
+
"bathtub",
|
73 |
+
"otherfurniture",
|
74 |
+
],
|
75 |
+
train=dict(
|
76 |
+
type=dataset_type,
|
77 |
+
split="train",
|
78 |
+
data_root=data_root,
|
79 |
+
transform=[
|
80 |
+
dict(type="CenterShift", apply_z=True),
|
81 |
+
dict(
|
82 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
83 |
+
),
|
84 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
85 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
86 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
87 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
88 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
89 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
90 |
+
dict(type="RandomFlip", p=0.5),
|
91 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
92 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
93 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
94 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
95 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
96 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
97 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
98 |
+
dict(
|
99 |
+
type="GridSample",
|
100 |
+
grid_size=0.02,
|
101 |
+
hash_type="fnv",
|
102 |
+
mode="train",
|
103 |
+
return_grid_coord=True,
|
104 |
+
),
|
105 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
106 |
+
dict(type="CenterShift", apply_z=False),
|
107 |
+
dict(type="NormalizeColor"),
|
108 |
+
dict(type="ShufflePoint"),
|
109 |
+
dict(type="ToTensor"),
|
110 |
+
dict(
|
111 |
+
type="Collect",
|
112 |
+
keys=("coord", "grid_coord", "segment"),
|
113 |
+
feat_keys=("color", "normal"),
|
114 |
+
),
|
115 |
+
],
|
116 |
+
test_mode=False,
|
117 |
+
),
|
118 |
+
val=dict(
|
119 |
+
type=dataset_type,
|
120 |
+
split="val",
|
121 |
+
data_root=data_root,
|
122 |
+
transform=[
|
123 |
+
dict(type="CenterShift", apply_z=True),
|
124 |
+
dict(
|
125 |
+
type="GridSample",
|
126 |
+
grid_size=0.02,
|
127 |
+
hash_type="fnv",
|
128 |
+
mode="train",
|
129 |
+
return_grid_coord=True,
|
130 |
+
),
|
131 |
+
# dict(type="SphereCrop", point_max=1000000, mode="center"),
|
132 |
+
dict(type="CenterShift", apply_z=False),
|
133 |
+
dict(type="NormalizeColor"),
|
134 |
+
dict(type="ToTensor"),
|
135 |
+
dict(
|
136 |
+
type="Collect",
|
137 |
+
keys=("coord", "grid_coord", "segment"),
|
138 |
+
feat_keys=("color", "normal"),
|
139 |
+
),
|
140 |
+
],
|
141 |
+
test_mode=False,
|
142 |
+
),
|
143 |
+
test=dict(
|
144 |
+
type=dataset_type,
|
145 |
+
split="val",
|
146 |
+
data_root=data_root,
|
147 |
+
transform=[
|
148 |
+
dict(type="CenterShift", apply_z=True),
|
149 |
+
dict(type="NormalizeColor"),
|
150 |
+
],
|
151 |
+
test_mode=True,
|
152 |
+
test_cfg=dict(
|
153 |
+
voxelize=dict(
|
154 |
+
type="GridSample",
|
155 |
+
grid_size=0.02,
|
156 |
+
hash_type="fnv",
|
157 |
+
mode="test",
|
158 |
+
return_grid_coord=True,
|
159 |
+
keys=("coord", "color", "normal"),
|
160 |
+
),
|
161 |
+
crop=None,
|
162 |
+
post_transform=[
|
163 |
+
dict(type="CenterShift", apply_z=False),
|
164 |
+
dict(type="ToTensor"),
|
165 |
+
dict(
|
166 |
+
type="Collect",
|
167 |
+
keys=("coord", "grid_coord", "index"),
|
168 |
+
feat_keys=("color", "normal"),
|
169 |
+
),
|
170 |
+
],
|
171 |
+
aug_transform=[
|
172 |
+
[
|
173 |
+
dict(
|
174 |
+
type="RandomRotateTargetAngle",
|
175 |
+
angle=[0],
|
176 |
+
axis="z",
|
177 |
+
center=[0, 0, 0],
|
178 |
+
p=1,
|
179 |
+
)
|
180 |
+
],
|
181 |
+
[
|
182 |
+
dict(
|
183 |
+
type="RandomRotateTargetAngle",
|
184 |
+
angle=[1 / 2],
|
185 |
+
axis="z",
|
186 |
+
center=[0, 0, 0],
|
187 |
+
p=1,
|
188 |
+
)
|
189 |
+
],
|
190 |
+
[
|
191 |
+
dict(
|
192 |
+
type="RandomRotateTargetAngle",
|
193 |
+
angle=[1],
|
194 |
+
axis="z",
|
195 |
+
center=[0, 0, 0],
|
196 |
+
p=1,
|
197 |
+
)
|
198 |
+
],
|
199 |
+
[
|
200 |
+
dict(
|
201 |
+
type="RandomRotateTargetAngle",
|
202 |
+
angle=[3 / 2],
|
203 |
+
axis="z",
|
204 |
+
center=[0, 0, 0],
|
205 |
+
p=1,
|
206 |
+
)
|
207 |
+
],
|
208 |
+
[
|
209 |
+
dict(
|
210 |
+
type="RandomRotateTargetAngle",
|
211 |
+
angle=[0],
|
212 |
+
axis="z",
|
213 |
+
center=[0, 0, 0],
|
214 |
+
p=1,
|
215 |
+
),
|
216 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
217 |
+
],
|
218 |
+
[
|
219 |
+
dict(
|
220 |
+
type="RandomRotateTargetAngle",
|
221 |
+
angle=[1 / 2],
|
222 |
+
axis="z",
|
223 |
+
center=[0, 0, 0],
|
224 |
+
p=1,
|
225 |
+
),
|
226 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
227 |
+
],
|
228 |
+
[
|
229 |
+
dict(
|
230 |
+
type="RandomRotateTargetAngle",
|
231 |
+
angle=[1],
|
232 |
+
axis="z",
|
233 |
+
center=[0, 0, 0],
|
234 |
+
p=1,
|
235 |
+
),
|
236 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
237 |
+
],
|
238 |
+
[
|
239 |
+
dict(
|
240 |
+
type="RandomRotateTargetAngle",
|
241 |
+
angle=[3 / 2],
|
242 |
+
axis="z",
|
243 |
+
center=[0, 0, 0],
|
244 |
+
p=1,
|
245 |
+
),
|
246 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
247 |
+
],
|
248 |
+
[
|
249 |
+
dict(
|
250 |
+
type="RandomRotateTargetAngle",
|
251 |
+
angle=[0],
|
252 |
+
axis="z",
|
253 |
+
center=[0, 0, 0],
|
254 |
+
p=1,
|
255 |
+
),
|
256 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
257 |
+
],
|
258 |
+
[
|
259 |
+
dict(
|
260 |
+
type="RandomRotateTargetAngle",
|
261 |
+
angle=[1 / 2],
|
262 |
+
axis="z",
|
263 |
+
center=[0, 0, 0],
|
264 |
+
p=1,
|
265 |
+
),
|
266 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
267 |
+
],
|
268 |
+
[
|
269 |
+
dict(
|
270 |
+
type="RandomRotateTargetAngle",
|
271 |
+
angle=[1],
|
272 |
+
axis="z",
|
273 |
+
center=[0, 0, 0],
|
274 |
+
p=1,
|
275 |
+
),
|
276 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
277 |
+
],
|
278 |
+
[
|
279 |
+
dict(
|
280 |
+
type="RandomRotateTargetAngle",
|
281 |
+
angle=[3 / 2],
|
282 |
+
axis="z",
|
283 |
+
center=[0, 0, 0],
|
284 |
+
p=1,
|
285 |
+
),
|
286 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
287 |
+
],
|
288 |
+
[dict(type="RandomFlip", p=1)],
|
289 |
+
],
|
290 |
+
),
|
291 |
+
),
|
292 |
+
)
|
Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="CAC-v1m1",
|
12 |
+
backbone=dict(
|
13 |
+
type="SpUNet-v1m1",
|
14 |
+
in_channels=6,
|
15 |
+
num_classes=0,
|
16 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
17 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
18 |
+
),
|
19 |
+
criteria=[
|
20 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
21 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
22 |
+
],
|
23 |
+
num_classes=20,
|
24 |
+
backbone_out_channels=96,
|
25 |
+
cos_temp=15,
|
26 |
+
main_weight=1,
|
27 |
+
pre_weight=1,
|
28 |
+
pre_self_weight=1,
|
29 |
+
kl_weight=1,
|
30 |
+
conf_thresh=0.75,
|
31 |
+
detach_pre_logits=True,
|
32 |
+
)
|
33 |
+
|
34 |
+
# scheduler settings
|
35 |
+
epoch = 800
|
36 |
+
optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
37 |
+
scheduler = dict(
|
38 |
+
type="OneCycleLR",
|
39 |
+
max_lr=optimizer["lr"],
|
40 |
+
pct_start=0.05,
|
41 |
+
anneal_strategy="cos",
|
42 |
+
div_factor=10.0,
|
43 |
+
final_div_factor=10000.0,
|
44 |
+
)
|
45 |
+
|
46 |
+
# dataset settings
|
47 |
+
dataset_type = "ScanNetDataset"
|
48 |
+
data_root = "data/scannet"
|
49 |
+
|
50 |
+
data = dict(
|
51 |
+
num_classes=20,
|
52 |
+
ignore_index=-1,
|
53 |
+
names=[
|
54 |
+
"wall",
|
55 |
+
"floor",
|
56 |
+
"cabinet",
|
57 |
+
"bed",
|
58 |
+
"chair",
|
59 |
+
"sofa",
|
60 |
+
"table",
|
61 |
+
"door",
|
62 |
+
"window",
|
63 |
+
"bookshelf",
|
64 |
+
"picture",
|
65 |
+
"counter",
|
66 |
+
"desk",
|
67 |
+
"curtain",
|
68 |
+
"refridgerator",
|
69 |
+
"shower curtain",
|
70 |
+
"toilet",
|
71 |
+
"sink",
|
72 |
+
"bathtub",
|
73 |
+
"otherfurniture",
|
74 |
+
],
|
75 |
+
train=dict(
|
76 |
+
type=dataset_type,
|
77 |
+
split="train",
|
78 |
+
data_root=data_root,
|
79 |
+
transform=[
|
80 |
+
dict(type="CenterShift", apply_z=True),
|
81 |
+
dict(
|
82 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
83 |
+
),
|
84 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
85 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
86 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
87 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
88 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
89 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
90 |
+
dict(type="RandomFlip", p=0.5),
|
91 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
92 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
93 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
94 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
95 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
96 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
97 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
98 |
+
dict(
|
99 |
+
type="GridSample",
|
100 |
+
grid_size=0.02,
|
101 |
+
hash_type="fnv",
|
102 |
+
mode="train",
|
103 |
+
return_grid_coord=True,
|
104 |
+
),
|
105 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
106 |
+
dict(type="CenterShift", apply_z=False),
|
107 |
+
dict(type="NormalizeColor"),
|
108 |
+
dict(type="ShufflePoint"),
|
109 |
+
dict(type="ToTensor"),
|
110 |
+
dict(
|
111 |
+
type="Collect",
|
112 |
+
keys=("coord", "grid_coord", "segment"),
|
113 |
+
feat_keys=("color", "normal"),
|
114 |
+
),
|
115 |
+
],
|
116 |
+
test_mode=False,
|
117 |
+
),
|
118 |
+
val=dict(
|
119 |
+
type=dataset_type,
|
120 |
+
split="val",
|
121 |
+
data_root=data_root,
|
122 |
+
transform=[
|
123 |
+
dict(type="CenterShift", apply_z=True),
|
124 |
+
dict(
|
125 |
+
type="GridSample",
|
126 |
+
grid_size=0.02,
|
127 |
+
hash_type="fnv",
|
128 |
+
mode="train",
|
129 |
+
return_grid_coord=True,
|
130 |
+
),
|
131 |
+
# dict(type="SphereCrop", point_max=1000000, mode="center"),
|
132 |
+
dict(type="CenterShift", apply_z=False),
|
133 |
+
dict(type="NormalizeColor"),
|
134 |
+
dict(type="ToTensor"),
|
135 |
+
dict(
|
136 |
+
type="Collect",
|
137 |
+
keys=("coord", "grid_coord", "segment"),
|
138 |
+
feat_keys=("color", "normal"),
|
139 |
+
),
|
140 |
+
],
|
141 |
+
test_mode=False,
|
142 |
+
),
|
143 |
+
test=dict(
|
144 |
+
type=dataset_type,
|
145 |
+
split="val",
|
146 |
+
data_root=data_root,
|
147 |
+
transform=[
|
148 |
+
dict(type="CenterShift", apply_z=True),
|
149 |
+
dict(type="NormalizeColor"),
|
150 |
+
],
|
151 |
+
test_mode=True,
|
152 |
+
test_cfg=dict(
|
153 |
+
voxelize=dict(
|
154 |
+
type="GridSample",
|
155 |
+
grid_size=0.02,
|
156 |
+
hash_type="fnv",
|
157 |
+
mode="test",
|
158 |
+
return_grid_coord=True,
|
159 |
+
keys=("coord", "color", "normal"),
|
160 |
+
),
|
161 |
+
crop=None,
|
162 |
+
post_transform=[
|
163 |
+
dict(type="CenterShift", apply_z=False),
|
164 |
+
dict(type="ToTensor"),
|
165 |
+
dict(
|
166 |
+
type="Collect",
|
167 |
+
keys=("coord", "grid_coord", "index"),
|
168 |
+
feat_keys=("color", "normal"),
|
169 |
+
),
|
170 |
+
],
|
171 |
+
aug_transform=[
|
172 |
+
[
|
173 |
+
dict(
|
174 |
+
type="RandomRotateTargetAngle",
|
175 |
+
angle=[0],
|
176 |
+
axis="z",
|
177 |
+
center=[0, 0, 0],
|
178 |
+
p=1,
|
179 |
+
)
|
180 |
+
],
|
181 |
+
[
|
182 |
+
dict(
|
183 |
+
type="RandomRotateTargetAngle",
|
184 |
+
angle=[1 / 2],
|
185 |
+
axis="z",
|
186 |
+
center=[0, 0, 0],
|
187 |
+
p=1,
|
188 |
+
)
|
189 |
+
],
|
190 |
+
[
|
191 |
+
dict(
|
192 |
+
type="RandomRotateTargetAngle",
|
193 |
+
angle=[1],
|
194 |
+
axis="z",
|
195 |
+
center=[0, 0, 0],
|
196 |
+
p=1,
|
197 |
+
)
|
198 |
+
],
|
199 |
+
[
|
200 |
+
dict(
|
201 |
+
type="RandomRotateTargetAngle",
|
202 |
+
angle=[3 / 2],
|
203 |
+
axis="z",
|
204 |
+
center=[0, 0, 0],
|
205 |
+
p=1,
|
206 |
+
)
|
207 |
+
],
|
208 |
+
[
|
209 |
+
dict(
|
210 |
+
type="RandomRotateTargetAngle",
|
211 |
+
angle=[0],
|
212 |
+
axis="z",
|
213 |
+
center=[0, 0, 0],
|
214 |
+
p=1,
|
215 |
+
),
|
216 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
217 |
+
],
|
218 |
+
[
|
219 |
+
dict(
|
220 |
+
type="RandomRotateTargetAngle",
|
221 |
+
angle=[1 / 2],
|
222 |
+
axis="z",
|
223 |
+
center=[0, 0, 0],
|
224 |
+
p=1,
|
225 |
+
),
|
226 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
227 |
+
],
|
228 |
+
[
|
229 |
+
dict(
|
230 |
+
type="RandomRotateTargetAngle",
|
231 |
+
angle=[1],
|
232 |
+
axis="z",
|
233 |
+
center=[0, 0, 0],
|
234 |
+
p=1,
|
235 |
+
),
|
236 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
237 |
+
],
|
238 |
+
[
|
239 |
+
dict(
|
240 |
+
type="RandomRotateTargetAngle",
|
241 |
+
angle=[3 / 2],
|
242 |
+
axis="z",
|
243 |
+
center=[0, 0, 0],
|
244 |
+
p=1,
|
245 |
+
),
|
246 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
247 |
+
],
|
248 |
+
[
|
249 |
+
dict(
|
250 |
+
type="RandomRotateTargetAngle",
|
251 |
+
angle=[0],
|
252 |
+
axis="z",
|
253 |
+
center=[0, 0, 0],
|
254 |
+
p=1,
|
255 |
+
),
|
256 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
257 |
+
],
|
258 |
+
[
|
259 |
+
dict(
|
260 |
+
type="RandomRotateTargetAngle",
|
261 |
+
angle=[1 / 2],
|
262 |
+
axis="z",
|
263 |
+
center=[0, 0, 0],
|
264 |
+
p=1,
|
265 |
+
),
|
266 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
267 |
+
],
|
268 |
+
[
|
269 |
+
dict(
|
270 |
+
type="RandomRotateTargetAngle",
|
271 |
+
angle=[1],
|
272 |
+
axis="z",
|
273 |
+
center=[0, 0, 0],
|
274 |
+
p=1,
|
275 |
+
),
|
276 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
277 |
+
],
|
278 |
+
[
|
279 |
+
dict(
|
280 |
+
type="RandomRotateTargetAngle",
|
281 |
+
angle=[3 / 2],
|
282 |
+
axis="z",
|
283 |
+
center=[0, 0, 0],
|
284 |
+
p=1,
|
285 |
+
),
|
286 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
287 |
+
],
|
288 |
+
[dict(type="RandomFlip", p=1)],
|
289 |
+
],
|
290 |
+
),
|
291 |
+
),
|
292 |
+
)
|
Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py
ADDED
@@ -0,0 +1,309 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="CAC-v1m1",
|
12 |
+
backbone=dict(
|
13 |
+
type="PT-v2m2",
|
14 |
+
in_channels=9,
|
15 |
+
num_classes=0,
|
16 |
+
patch_embed_depth=1,
|
17 |
+
patch_embed_channels=48,
|
18 |
+
patch_embed_groups=6,
|
19 |
+
patch_embed_neighbours=8,
|
20 |
+
enc_depths=(2, 2, 6, 2),
|
21 |
+
enc_channels=(96, 192, 384, 512),
|
22 |
+
enc_groups=(12, 24, 48, 64),
|
23 |
+
enc_neighbours=(16, 16, 16, 16),
|
24 |
+
dec_depths=(1, 1, 1, 1),
|
25 |
+
dec_channels=(48, 96, 192, 384),
|
26 |
+
dec_groups=(6, 12, 24, 48),
|
27 |
+
dec_neighbours=(16, 16, 16, 16),
|
28 |
+
grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
|
29 |
+
attn_qkv_bias=True,
|
30 |
+
pe_multiplier=False,
|
31 |
+
pe_bias=True,
|
32 |
+
attn_drop_rate=0.0,
|
33 |
+
drop_path_rate=0.3,
|
34 |
+
enable_checkpoint=False,
|
35 |
+
unpool_backend="map", # map / interp
|
36 |
+
),
|
37 |
+
criteria=[
|
38 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
39 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
40 |
+
],
|
41 |
+
num_classes=20,
|
42 |
+
backbone_out_channels=48,
|
43 |
+
cos_temp=15,
|
44 |
+
main_weight=1,
|
45 |
+
pre_weight=1,
|
46 |
+
pre_self_weight=1,
|
47 |
+
kl_weight=1,
|
48 |
+
conf_thresh=0.75,
|
49 |
+
detach_pre_logits=True,
|
50 |
+
)
|
51 |
+
|
52 |
+
# scheduler settings
|
53 |
+
epoch = 900
|
54 |
+
optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
|
55 |
+
scheduler = dict(
|
56 |
+
type="OneCycleLR",
|
57 |
+
max_lr=optimizer["lr"],
|
58 |
+
pct_start=0.05,
|
59 |
+
anneal_strategy="cos",
|
60 |
+
div_factor=10.0,
|
61 |
+
final_div_factor=1000.0,
|
62 |
+
)
|
63 |
+
|
64 |
+
# dataset settings
|
65 |
+
dataset_type = "ScanNetDataset"
|
66 |
+
data_root = "data/scannet"
|
67 |
+
|
68 |
+
data = dict(
|
69 |
+
num_classes=20,
|
70 |
+
ignore_index=-1,
|
71 |
+
names=[
|
72 |
+
"wall",
|
73 |
+
"floor",
|
74 |
+
"cabinet",
|
75 |
+
"bed",
|
76 |
+
"chair",
|
77 |
+
"sofa",
|
78 |
+
"table",
|
79 |
+
"door",
|
80 |
+
"window",
|
81 |
+
"bookshelf",
|
82 |
+
"picture",
|
83 |
+
"counter",
|
84 |
+
"desk",
|
85 |
+
"curtain",
|
86 |
+
"refridgerator",
|
87 |
+
"shower curtain",
|
88 |
+
"toilet",
|
89 |
+
"sink",
|
90 |
+
"bathtub",
|
91 |
+
"otherfurniture",
|
92 |
+
],
|
93 |
+
train=dict(
|
94 |
+
type=dataset_type,
|
95 |
+
split="train",
|
96 |
+
data_root=data_root,
|
97 |
+
transform=[
|
98 |
+
dict(type="CenterShift", apply_z=True),
|
99 |
+
dict(
|
100 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
101 |
+
),
|
102 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
103 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
104 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
105 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
106 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
107 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
108 |
+
dict(type="RandomFlip", p=0.5),
|
109 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
110 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
111 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
112 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
113 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
114 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
115 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
116 |
+
dict(
|
117 |
+
type="GridSample",
|
118 |
+
grid_size=0.02,
|
119 |
+
hash_type="fnv",
|
120 |
+
mode="train",
|
121 |
+
return_min_coord=True,
|
122 |
+
),
|
123 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
124 |
+
dict(type="CenterShift", apply_z=False),
|
125 |
+
dict(type="NormalizeColor"),
|
126 |
+
dict(type="ShufflePoint"),
|
127 |
+
dict(type="ToTensor"),
|
128 |
+
dict(
|
129 |
+
type="Collect",
|
130 |
+
keys=("coord", "segment"),
|
131 |
+
feat_keys=("coord", "color", "normal"),
|
132 |
+
),
|
133 |
+
],
|
134 |
+
test_mode=False,
|
135 |
+
),
|
136 |
+
val=dict(
|
137 |
+
type=dataset_type,
|
138 |
+
split="val",
|
139 |
+
data_root=data_root,
|
140 |
+
transform=[
|
141 |
+
dict(type="CenterShift", apply_z=True),
|
142 |
+
dict(
|
143 |
+
type="GridSample",
|
144 |
+
grid_size=0.02,
|
145 |
+
hash_type="fnv",
|
146 |
+
mode="train",
|
147 |
+
return_min_coord=True,
|
148 |
+
),
|
149 |
+
# dict(type="SphereCrop", point_max=1000000, mode="center"),
|
150 |
+
dict(type="CenterShift", apply_z=False),
|
151 |
+
dict(type="NormalizeColor"),
|
152 |
+
dict(type="ToTensor"),
|
153 |
+
dict(
|
154 |
+
type="Collect",
|
155 |
+
keys=("coord", "segment"),
|
156 |
+
feat_keys=("coord", "color", "normal"),
|
157 |
+
),
|
158 |
+
],
|
159 |
+
test_mode=False,
|
160 |
+
),
|
161 |
+
test=dict(
|
162 |
+
type=dataset_type,
|
163 |
+
split="val",
|
164 |
+
data_root=data_root,
|
165 |
+
transform=[
|
166 |
+
dict(type="CenterShift", apply_z=True),
|
167 |
+
dict(type="NormalizeColor"),
|
168 |
+
],
|
169 |
+
test_mode=True,
|
170 |
+
test_cfg=dict(
|
171 |
+
voxelize=dict(
|
172 |
+
type="GridSample",
|
173 |
+
grid_size=0.02,
|
174 |
+
hash_type="fnv",
|
175 |
+
mode="test",
|
176 |
+
keys=("coord", "color", "normal"),
|
177 |
+
),
|
178 |
+
crop=None,
|
179 |
+
post_transform=[
|
180 |
+
dict(type="CenterShift", apply_z=False),
|
181 |
+
dict(type="ToTensor"),
|
182 |
+
dict(
|
183 |
+
type="Collect",
|
184 |
+
keys=("coord", "index"),
|
185 |
+
feat_keys=("coord", "color", "normal"),
|
186 |
+
),
|
187 |
+
],
|
188 |
+
aug_transform=[
|
189 |
+
[
|
190 |
+
dict(
|
191 |
+
type="RandomRotateTargetAngle",
|
192 |
+
angle=[0],
|
193 |
+
axis="z",
|
194 |
+
center=[0, 0, 0],
|
195 |
+
p=1,
|
196 |
+
)
|
197 |
+
],
|
198 |
+
[
|
199 |
+
dict(
|
200 |
+
type="RandomRotateTargetAngle",
|
201 |
+
angle=[1 / 2],
|
202 |
+
axis="z",
|
203 |
+
center=[0, 0, 0],
|
204 |
+
p=1,
|
205 |
+
)
|
206 |
+
],
|
207 |
+
[
|
208 |
+
dict(
|
209 |
+
type="RandomRotateTargetAngle",
|
210 |
+
angle=[1],
|
211 |
+
axis="z",
|
212 |
+
center=[0, 0, 0],
|
213 |
+
p=1,
|
214 |
+
)
|
215 |
+
],
|
216 |
+
[
|
217 |
+
dict(
|
218 |
+
type="RandomRotateTargetAngle",
|
219 |
+
angle=[3 / 2],
|
220 |
+
axis="z",
|
221 |
+
center=[0, 0, 0],
|
222 |
+
p=1,
|
223 |
+
)
|
224 |
+
],
|
225 |
+
[
|
226 |
+
dict(
|
227 |
+
type="RandomRotateTargetAngle",
|
228 |
+
angle=[0],
|
229 |
+
axis="z",
|
230 |
+
center=[0, 0, 0],
|
231 |
+
p=1,
|
232 |
+
),
|
233 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
234 |
+
],
|
235 |
+
[
|
236 |
+
dict(
|
237 |
+
type="RandomRotateTargetAngle",
|
238 |
+
angle=[1 / 2],
|
239 |
+
axis="z",
|
240 |
+
center=[0, 0, 0],
|
241 |
+
p=1,
|
242 |
+
),
|
243 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
244 |
+
],
|
245 |
+
[
|
246 |
+
dict(
|
247 |
+
type="RandomRotateTargetAngle",
|
248 |
+
angle=[1],
|
249 |
+
axis="z",
|
250 |
+
center=[0, 0, 0],
|
251 |
+
p=1,
|
252 |
+
),
|
253 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
254 |
+
],
|
255 |
+
[
|
256 |
+
dict(
|
257 |
+
type="RandomRotateTargetAngle",
|
258 |
+
angle=[3 / 2],
|
259 |
+
axis="z",
|
260 |
+
center=[0, 0, 0],
|
261 |
+
p=1,
|
262 |
+
),
|
263 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
264 |
+
],
|
265 |
+
[
|
266 |
+
dict(
|
267 |
+
type="RandomRotateTargetAngle",
|
268 |
+
angle=[0],
|
269 |
+
axis="z",
|
270 |
+
center=[0, 0, 0],
|
271 |
+
p=1,
|
272 |
+
),
|
273 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
274 |
+
],
|
275 |
+
[
|
276 |
+
dict(
|
277 |
+
type="RandomRotateTargetAngle",
|
278 |
+
angle=[1 / 2],
|
279 |
+
axis="z",
|
280 |
+
center=[0, 0, 0],
|
281 |
+
p=1,
|
282 |
+
),
|
283 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
284 |
+
],
|
285 |
+
[
|
286 |
+
dict(
|
287 |
+
type="RandomRotateTargetAngle",
|
288 |
+
angle=[1],
|
289 |
+
axis="z",
|
290 |
+
center=[0, 0, 0],
|
291 |
+
p=1,
|
292 |
+
),
|
293 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
294 |
+
],
|
295 |
+
[
|
296 |
+
dict(
|
297 |
+
type="RandomRotateTargetAngle",
|
298 |
+
angle=[3 / 2],
|
299 |
+
axis="z",
|
300 |
+
center=[0, 0, 0],
|
301 |
+
p=1,
|
302 |
+
),
|
303 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
304 |
+
],
|
305 |
+
[dict(type="RandomFlip", p=1)],
|
306 |
+
],
|
307 |
+
),
|
308 |
+
),
|
309 |
+
)
|
Pointcept/configs/scannet/semseg-minkunet34c-0-base.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="DefaultSegmentor",
|
12 |
+
backbone=dict(type="MinkUNet34C", in_channels=9, out_channels=20),
|
13 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
14 |
+
)
|
15 |
+
|
16 |
+
# scheduler settings
|
17 |
+
epoch = 600
|
18 |
+
optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
19 |
+
scheduler = dict(
|
20 |
+
type="OneCycleLR",
|
21 |
+
max_lr=optimizer["lr"],
|
22 |
+
pct_start=0.05,
|
23 |
+
anneal_strategy="cos",
|
24 |
+
div_factor=10.0,
|
25 |
+
final_div_factor=10000.0,
|
26 |
+
)
|
27 |
+
|
28 |
+
# dataset settings
|
29 |
+
dataset_type = "ScanNetDataset"
|
30 |
+
data_root = "data/scannet"
|
31 |
+
|
32 |
+
data = dict(
|
33 |
+
num_classes=20,
|
34 |
+
ignore_index=-1,
|
35 |
+
names=[
|
36 |
+
"wall",
|
37 |
+
"floor",
|
38 |
+
"cabinet",
|
39 |
+
"bed",
|
40 |
+
"chair",
|
41 |
+
"sofa",
|
42 |
+
"table",
|
43 |
+
"door",
|
44 |
+
"window",
|
45 |
+
"bookshelf",
|
46 |
+
"picture",
|
47 |
+
"counter",
|
48 |
+
"desk",
|
49 |
+
"curtain",
|
50 |
+
"refridgerator",
|
51 |
+
"shower curtain",
|
52 |
+
"toilet",
|
53 |
+
"sink",
|
54 |
+
"bathtub",
|
55 |
+
"otherfurniture",
|
56 |
+
],
|
57 |
+
train=dict(
|
58 |
+
type=dataset_type,
|
59 |
+
split="train",
|
60 |
+
data_root=data_root,
|
61 |
+
transform=[
|
62 |
+
dict(type="CenterShift", apply_z=True),
|
63 |
+
dict(
|
64 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
65 |
+
),
|
66 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
67 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
68 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
69 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
70 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
71 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
72 |
+
dict(type="RandomFlip", p=0.5),
|
73 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
74 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
75 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
76 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
77 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
78 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
79 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
80 |
+
dict(
|
81 |
+
type="GridSample",
|
82 |
+
grid_size=0.02,
|
83 |
+
hash_type="fnv",
|
84 |
+
mode="train",
|
85 |
+
return_grid_coord=True,
|
86 |
+
),
|
87 |
+
# dict(type="SphereCrop", point_max=100000, mode="random"),
|
88 |
+
dict(type="CenterShift", apply_z=False),
|
89 |
+
dict(type="NormalizeColor"),
|
90 |
+
dict(type="ShufflePoint"),
|
91 |
+
dict(type="ToTensor"),
|
92 |
+
dict(
|
93 |
+
type="Collect",
|
94 |
+
keys=("coord", "grid_coord", "segment"),
|
95 |
+
feat_keys=("coord", "color", "normal"),
|
96 |
+
),
|
97 |
+
],
|
98 |
+
test_mode=False,
|
99 |
+
),
|
100 |
+
val=dict(
|
101 |
+
type=dataset_type,
|
102 |
+
split="val",
|
103 |
+
data_root=data_root,
|
104 |
+
transform=[
|
105 |
+
dict(type="CenterShift", apply_z=True),
|
106 |
+
dict(
|
107 |
+
type="GridSample",
|
108 |
+
grid_size=0.02,
|
109 |
+
hash_type="fnv",
|
110 |
+
mode="train",
|
111 |
+
return_grid_coord=True,
|
112 |
+
),
|
113 |
+
# dict(type="SphereCrop", point_max=1000000, mode="center"),
|
114 |
+
dict(type="CenterShift", apply_z=False),
|
115 |
+
dict(type="NormalizeColor"),
|
116 |
+
dict(type="ToTensor"),
|
117 |
+
dict(
|
118 |
+
type="Collect",
|
119 |
+
keys=("coord", "grid_coord", "segment"),
|
120 |
+
feat_keys=("coord", "color", "normal"),
|
121 |
+
),
|
122 |
+
],
|
123 |
+
test_mode=False,
|
124 |
+
),
|
125 |
+
test=dict(
|
126 |
+
type=dataset_type,
|
127 |
+
split="val",
|
128 |
+
data_root=data_root,
|
129 |
+
transform=[
|
130 |
+
dict(type="CenterShift", apply_z=True),
|
131 |
+
dict(type="NormalizeColor"),
|
132 |
+
],
|
133 |
+
test_mode=True,
|
134 |
+
test_cfg=dict(
|
135 |
+
voxelize=dict(
|
136 |
+
type="GridSample",
|
137 |
+
grid_size=0.02,
|
138 |
+
hash_type="fnv",
|
139 |
+
mode="test",
|
140 |
+
return_grid_coord=True,
|
141 |
+
keys=("coord", "color", "normal"),
|
142 |
+
),
|
143 |
+
crop=None,
|
144 |
+
post_transform=[
|
145 |
+
dict(type="CenterShift", apply_z=False),
|
146 |
+
dict(type="ToTensor"),
|
147 |
+
dict(
|
148 |
+
type="Collect",
|
149 |
+
keys=("coord", "grid_coord", "index"),
|
150 |
+
feat_keys=("coord", "color", "normal"),
|
151 |
+
),
|
152 |
+
],
|
153 |
+
aug_transform=[
|
154 |
+
[
|
155 |
+
dict(
|
156 |
+
type="RandomRotateTargetAngle",
|
157 |
+
angle=[0],
|
158 |
+
axis="z",
|
159 |
+
center=[0, 0, 0],
|
160 |
+
p=1,
|
161 |
+
)
|
162 |
+
],
|
163 |
+
[
|
164 |
+
dict(
|
165 |
+
type="RandomRotateTargetAngle",
|
166 |
+
angle=[1 / 2],
|
167 |
+
axis="z",
|
168 |
+
center=[0, 0, 0],
|
169 |
+
p=1,
|
170 |
+
)
|
171 |
+
],
|
172 |
+
[
|
173 |
+
dict(
|
174 |
+
type="RandomRotateTargetAngle",
|
175 |
+
angle=[1],
|
176 |
+
axis="z",
|
177 |
+
center=[0, 0, 0],
|
178 |
+
p=1,
|
179 |
+
)
|
180 |
+
],
|
181 |
+
[
|
182 |
+
dict(
|
183 |
+
type="RandomRotateTargetAngle",
|
184 |
+
angle=[3 / 2],
|
185 |
+
axis="z",
|
186 |
+
center=[0, 0, 0],
|
187 |
+
p=1,
|
188 |
+
)
|
189 |
+
],
|
190 |
+
],
|
191 |
+
),
|
192 |
+
),
|
193 |
+
)
|
Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py
ADDED
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
sync_bn = True
|
9 |
+
|
10 |
+
# model settings
|
11 |
+
model = dict(
|
12 |
+
type="DefaultSegmentor",
|
13 |
+
backbone=dict(
|
14 |
+
type="OACNNs",
|
15 |
+
in_channels=9,
|
16 |
+
num_classes=20,
|
17 |
+
embed_channels=64,
|
18 |
+
enc_channels=[64, 64, 128, 256],
|
19 |
+
groups=[4, 4, 8, 16],
|
20 |
+
enc_depth=[3, 3, 9, 8],
|
21 |
+
dec_channels=[256, 256, 256, 256],
|
22 |
+
point_grid_size=[[8, 12, 16, 16], [6, 9, 12, 12], [4, 6, 8, 8], [3, 4, 6, 6]],
|
23 |
+
dec_depth=[2, 2, 2, 2],
|
24 |
+
enc_num_ref=[16, 16, 16, 16],
|
25 |
+
),
|
26 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
27 |
+
)
|
28 |
+
|
29 |
+
|
30 |
+
epoch = 900
|
31 |
+
optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.02)
|
32 |
+
scheduler = dict(
|
33 |
+
type="OneCycleLR",
|
34 |
+
max_lr=optimizer["lr"],
|
35 |
+
pct_start=0.05,
|
36 |
+
anneal_strategy="cos",
|
37 |
+
div_factor=10.0,
|
38 |
+
final_div_factor=1000.0,
|
39 |
+
)
|
40 |
+
|
41 |
+
# dataset settings
|
42 |
+
dataset_type = "ScanNetDataset"
|
43 |
+
data_root = "data/scannet"
|
44 |
+
|
45 |
+
data = dict(
|
46 |
+
num_classes=20,
|
47 |
+
ignore_index=-1,
|
48 |
+
names=[
|
49 |
+
"wall",
|
50 |
+
"floor",
|
51 |
+
"cabinet",
|
52 |
+
"bed",
|
53 |
+
"chair",
|
54 |
+
"sofa",
|
55 |
+
"table",
|
56 |
+
"door",
|
57 |
+
"window",
|
58 |
+
"bookshelf",
|
59 |
+
"picture",
|
60 |
+
"counter",
|
61 |
+
"desk",
|
62 |
+
"curtain",
|
63 |
+
"refridgerator",
|
64 |
+
"shower curtain",
|
65 |
+
"toilet",
|
66 |
+
"sink",
|
67 |
+
"bathtub",
|
68 |
+
"otherfurniture",
|
69 |
+
],
|
70 |
+
train=dict(
|
71 |
+
type=dataset_type,
|
72 |
+
split="train",
|
73 |
+
data_root=data_root,
|
74 |
+
transform=[
|
75 |
+
dict(type="CenterShift", apply_z=True),
|
76 |
+
dict(
|
77 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
78 |
+
),
|
79 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
80 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
81 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
82 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
83 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
84 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
85 |
+
dict(type="RandomFlip", p=0.5),
|
86 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
87 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
88 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
89 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
90 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
91 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
92 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
93 |
+
dict(
|
94 |
+
type="GridSample",
|
95 |
+
grid_size=0.02,
|
96 |
+
hash_type="fnv",
|
97 |
+
mode="train",
|
98 |
+
return_grid_coord=True,
|
99 |
+
return_min_coord=True,
|
100 |
+
),
|
101 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
102 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
103 |
+
dict(type="CenterShift", apply_z=False),
|
104 |
+
dict(type="NormalizeColor"),
|
105 |
+
dict(type="ShufflePoint"),
|
106 |
+
dict(type="ToTensor"),
|
107 |
+
dict(
|
108 |
+
type="Collect",
|
109 |
+
keys=("coord", "grid_coord", "segment"),
|
110 |
+
feat_keys=("coord", "normal", "color"),
|
111 |
+
),
|
112 |
+
],
|
113 |
+
test_mode=False,
|
114 |
+
),
|
115 |
+
val=dict(
|
116 |
+
type=dataset_type,
|
117 |
+
split="val",
|
118 |
+
data_root=data_root,
|
119 |
+
transform=[
|
120 |
+
dict(type="CenterShift", apply_z=True),
|
121 |
+
dict(
|
122 |
+
type="GridSample",
|
123 |
+
grid_size=0.02,
|
124 |
+
hash_type="fnv",
|
125 |
+
mode="train",
|
126 |
+
return_grid_coord=True,
|
127 |
+
return_min_coord=True,
|
128 |
+
),
|
129 |
+
# dict(type="SphereCrop", point_max=1000000, mode='center'),
|
130 |
+
dict(type="CenterShift", apply_z=False),
|
131 |
+
dict(type="NormalizeColor"),
|
132 |
+
dict(type="ToTensor"),
|
133 |
+
dict(
|
134 |
+
type="Collect",
|
135 |
+
keys=("coord", "grid_coord", "segment"),
|
136 |
+
feat_keys=("coord", "normal", "color"),
|
137 |
+
),
|
138 |
+
],
|
139 |
+
test_mode=False,
|
140 |
+
),
|
141 |
+
test=dict(
|
142 |
+
type=dataset_type,
|
143 |
+
split="val",
|
144 |
+
data_root=data_root,
|
145 |
+
transform=[
|
146 |
+
dict(type="CenterShift", apply_z=True),
|
147 |
+
dict(type="NormalizeColor"),
|
148 |
+
],
|
149 |
+
test_mode=True,
|
150 |
+
test_cfg=dict(
|
151 |
+
voxelize=dict(
|
152 |
+
type="GridSample",
|
153 |
+
grid_size=0.02,
|
154 |
+
hash_type="fnv",
|
155 |
+
mode="test",
|
156 |
+
return_grid_coord=True,
|
157 |
+
keys=("coord", "normal", "color"),
|
158 |
+
),
|
159 |
+
crop=None,
|
160 |
+
post_transform=[
|
161 |
+
dict(type="CenterShift", apply_z=False),
|
162 |
+
dict(type="ToTensor"),
|
163 |
+
dict(
|
164 |
+
type="Collect",
|
165 |
+
keys=("coord", "grid_coord", "index"),
|
166 |
+
feat_keys=("coord", "normal", "color"),
|
167 |
+
),
|
168 |
+
],
|
169 |
+
aug_transform=[
|
170 |
+
[
|
171 |
+
dict(
|
172 |
+
type="RandomRotateTargetAngle",
|
173 |
+
angle=[0],
|
174 |
+
axis="z",
|
175 |
+
center=[0, 0, 0],
|
176 |
+
p=1,
|
177 |
+
)
|
178 |
+
],
|
179 |
+
[
|
180 |
+
dict(
|
181 |
+
type="RandomRotateTargetAngle",
|
182 |
+
angle=[1 / 2],
|
183 |
+
axis="z",
|
184 |
+
center=[0, 0, 0],
|
185 |
+
p=1,
|
186 |
+
)
|
187 |
+
],
|
188 |
+
[
|
189 |
+
dict(
|
190 |
+
type="RandomRotateTargetAngle",
|
191 |
+
angle=[1],
|
192 |
+
axis="z",
|
193 |
+
center=[0, 0, 0],
|
194 |
+
p=1,
|
195 |
+
)
|
196 |
+
],
|
197 |
+
[
|
198 |
+
dict(
|
199 |
+
type="RandomRotateTargetAngle",
|
200 |
+
angle=[3 / 2],
|
201 |
+
axis="z",
|
202 |
+
center=[0, 0, 0],
|
203 |
+
p=1,
|
204 |
+
)
|
205 |
+
],
|
206 |
+
[
|
207 |
+
dict(
|
208 |
+
type="RandomRotateTargetAngle",
|
209 |
+
angle=[0],
|
210 |
+
axis="z",
|
211 |
+
center=[0, 0, 0],
|
212 |
+
p=1,
|
213 |
+
),
|
214 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
215 |
+
],
|
216 |
+
[
|
217 |
+
dict(
|
218 |
+
type="RandomRotateTargetAngle",
|
219 |
+
angle=[1 / 2],
|
220 |
+
axis="z",
|
221 |
+
center=[0, 0, 0],
|
222 |
+
p=1,
|
223 |
+
),
|
224 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
225 |
+
],
|
226 |
+
[
|
227 |
+
dict(
|
228 |
+
type="RandomRotateTargetAngle",
|
229 |
+
angle=[1],
|
230 |
+
axis="z",
|
231 |
+
center=[0, 0, 0],
|
232 |
+
p=1,
|
233 |
+
),
|
234 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
235 |
+
],
|
236 |
+
[
|
237 |
+
dict(
|
238 |
+
type="RandomRotateTargetAngle",
|
239 |
+
angle=[3 / 2],
|
240 |
+
axis="z",
|
241 |
+
center=[0, 0, 0],
|
242 |
+
p=1,
|
243 |
+
),
|
244 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
245 |
+
],
|
246 |
+
[
|
247 |
+
dict(
|
248 |
+
type="RandomRotateTargetAngle",
|
249 |
+
angle=[0],
|
250 |
+
axis="z",
|
251 |
+
center=[0, 0, 0],
|
252 |
+
p=1,
|
253 |
+
),
|
254 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
255 |
+
],
|
256 |
+
[
|
257 |
+
dict(
|
258 |
+
type="RandomRotateTargetAngle",
|
259 |
+
angle=[1 / 2],
|
260 |
+
axis="z",
|
261 |
+
center=[0, 0, 0],
|
262 |
+
p=1,
|
263 |
+
),
|
264 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
265 |
+
],
|
266 |
+
[
|
267 |
+
dict(
|
268 |
+
type="RandomRotateTargetAngle",
|
269 |
+
angle=[1],
|
270 |
+
axis="z",
|
271 |
+
center=[0, 0, 0],
|
272 |
+
p=1,
|
273 |
+
),
|
274 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
275 |
+
],
|
276 |
+
[
|
277 |
+
dict(
|
278 |
+
type="RandomRotateTargetAngle",
|
279 |
+
angle=[3 / 2],
|
280 |
+
axis="z",
|
281 |
+
center=[0, 0, 0],
|
282 |
+
p=1,
|
283 |
+
),
|
284 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
285 |
+
],
|
286 |
+
[dict(type="RandomFlip", p=1)],
|
287 |
+
],
|
288 |
+
),
|
289 |
+
),
|
290 |
+
)
|
Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = False
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="DefaultSegmentor",
|
12 |
+
backbone=dict(
|
13 |
+
type="OctFormer-v1m1",
|
14 |
+
in_channels=10,
|
15 |
+
num_classes=20,
|
16 |
+
fpn_channels=168,
|
17 |
+
channels=(96, 192, 384, 384),
|
18 |
+
num_blocks=(2, 2, 18, 2),
|
19 |
+
num_heads=(6, 12, 24, 24),
|
20 |
+
patch_size=26,
|
21 |
+
stem_down=2,
|
22 |
+
head_up=2,
|
23 |
+
dilation=4,
|
24 |
+
drop_path=0.5,
|
25 |
+
nempty=True,
|
26 |
+
octree_depth=11,
|
27 |
+
octree_full_depth=2,
|
28 |
+
),
|
29 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
30 |
+
)
|
31 |
+
|
32 |
+
# scheduler settings
|
33 |
+
epoch = 600
|
34 |
+
optimizer = dict(type="AdamW", lr=0.0015, weight_decay=0.05)
|
35 |
+
scheduler = dict(
|
36 |
+
type="MultiStepWithWarmupLR",
|
37 |
+
milestones=[0.6, 0.9],
|
38 |
+
gamma=0.1,
|
39 |
+
warmup_rate=0.05,
|
40 |
+
warmup_scale=1e-5,
|
41 |
+
)
|
42 |
+
param_dicts = [dict(keyword="blocks", lr=0.00015)]
|
43 |
+
|
44 |
+
# dataset settings
|
45 |
+
dataset_type = "ScanNetDataset"
|
46 |
+
data_root = "data/scannet"
|
47 |
+
|
48 |
+
data = dict(
|
49 |
+
num_classes=20,
|
50 |
+
ignore_index=-1,
|
51 |
+
names=[
|
52 |
+
"wall",
|
53 |
+
"floor",
|
54 |
+
"cabinet",
|
55 |
+
"bed",
|
56 |
+
"chair",
|
57 |
+
"sofa",
|
58 |
+
"table",
|
59 |
+
"door",
|
60 |
+
"window",
|
61 |
+
"bookshelf",
|
62 |
+
"picture",
|
63 |
+
"counter",
|
64 |
+
"desk",
|
65 |
+
"curtain",
|
66 |
+
"refridgerator",
|
67 |
+
"shower curtain",
|
68 |
+
"toilet",
|
69 |
+
"sink",
|
70 |
+
"bathtub",
|
71 |
+
"otherfurniture",
|
72 |
+
],
|
73 |
+
train=dict(
|
74 |
+
type=dataset_type,
|
75 |
+
split="train",
|
76 |
+
data_root=data_root,
|
77 |
+
transform=[
|
78 |
+
dict(type="CenterShift", apply_z=True),
|
79 |
+
dict(
|
80 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
81 |
+
),
|
82 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
83 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
84 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
85 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
86 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
87 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
88 |
+
dict(type="RandomFlip", p=0.5),
|
89 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
90 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
91 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
92 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
|
93 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
94 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
95 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
96 |
+
dict(
|
97 |
+
type="GridSample",
|
98 |
+
grid_size=0.01,
|
99 |
+
hash_type="fnv",
|
100 |
+
mode="train",
|
101 |
+
return_min_coord=True,
|
102 |
+
return_displacement=True,
|
103 |
+
project_displacement=True,
|
104 |
+
),
|
105 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
106 |
+
dict(type="SphereCrop", point_max=120000, mode="random"),
|
107 |
+
dict(type="CenterShift", apply_z=False),
|
108 |
+
dict(type="NormalizeColor"),
|
109 |
+
dict(type="ShufflePoint"),
|
110 |
+
dict(type="ToTensor"),
|
111 |
+
dict(
|
112 |
+
type="Collect",
|
113 |
+
keys=("coord", "normal", "segment"),
|
114 |
+
feat_keys=("coord", "color", "normal", "displacement"),
|
115 |
+
),
|
116 |
+
],
|
117 |
+
test_mode=False,
|
118 |
+
),
|
119 |
+
val=dict(
|
120 |
+
type=dataset_type,
|
121 |
+
split="val",
|
122 |
+
data_root=data_root,
|
123 |
+
transform=[
|
124 |
+
dict(type="CenterShift", apply_z=True),
|
125 |
+
dict(
|
126 |
+
type="GridSample",
|
127 |
+
grid_size=0.01,
|
128 |
+
hash_type="fnv",
|
129 |
+
mode="train",
|
130 |
+
return_min_coord=True,
|
131 |
+
return_displacement=True,
|
132 |
+
project_displacement=True,
|
133 |
+
),
|
134 |
+
# dict(type="SphereCrop", point_max=1000000, mode="center"),
|
135 |
+
dict(type="CenterShift", apply_z=False),
|
136 |
+
dict(type="NormalizeColor"),
|
137 |
+
dict(type="ToTensor"),
|
138 |
+
dict(
|
139 |
+
type="Collect",
|
140 |
+
keys=("coord", "normal", "segment"),
|
141 |
+
feat_keys=("coord", "color", "normal", "displacement"),
|
142 |
+
),
|
143 |
+
],
|
144 |
+
test_mode=False,
|
145 |
+
),
|
146 |
+
test=dict(
|
147 |
+
type=dataset_type,
|
148 |
+
split="val",
|
149 |
+
data_root=data_root,
|
150 |
+
transform=[
|
151 |
+
dict(type="CenterShift", apply_z=True),
|
152 |
+
dict(type="NormalizeColor"),
|
153 |
+
],
|
154 |
+
test_mode=True,
|
155 |
+
test_cfg=dict(
|
156 |
+
voxelize=dict(
|
157 |
+
type="GridSample",
|
158 |
+
grid_size=0.01,
|
159 |
+
hash_type="fnv",
|
160 |
+
mode="test",
|
161 |
+
keys=("coord", "color", "normal"),
|
162 |
+
return_displacement=True,
|
163 |
+
project_displacement=True,
|
164 |
+
),
|
165 |
+
crop=None,
|
166 |
+
post_transform=[
|
167 |
+
dict(type="CenterShift", apply_z=False),
|
168 |
+
dict(type="ToTensor"),
|
169 |
+
dict(
|
170 |
+
type="Collect",
|
171 |
+
keys=("coord", "normal", "index"),
|
172 |
+
feat_keys=("coord", "color", "normal", "displacement"),
|
173 |
+
),
|
174 |
+
],
|
175 |
+
aug_transform=[
|
176 |
+
[
|
177 |
+
dict(
|
178 |
+
type="RandomRotateTargetAngle",
|
179 |
+
angle=[0],
|
180 |
+
axis="z",
|
181 |
+
center=[0, 0, 0],
|
182 |
+
p=1,
|
183 |
+
)
|
184 |
+
],
|
185 |
+
[
|
186 |
+
dict(
|
187 |
+
type="RandomRotateTargetAngle",
|
188 |
+
angle=[1 / 2],
|
189 |
+
axis="z",
|
190 |
+
center=[0, 0, 0],
|
191 |
+
p=1,
|
192 |
+
)
|
193 |
+
],
|
194 |
+
[
|
195 |
+
dict(
|
196 |
+
type="RandomRotateTargetAngle",
|
197 |
+
angle=[1],
|
198 |
+
axis="z",
|
199 |
+
center=[0, 0, 0],
|
200 |
+
p=1,
|
201 |
+
)
|
202 |
+
],
|
203 |
+
[
|
204 |
+
dict(
|
205 |
+
type="RandomRotateTargetAngle",
|
206 |
+
angle=[3 / 2],
|
207 |
+
axis="z",
|
208 |
+
center=[0, 0, 0],
|
209 |
+
p=1,
|
210 |
+
)
|
211 |
+
],
|
212 |
+
[
|
213 |
+
dict(
|
214 |
+
type="RandomRotateTargetAngle",
|
215 |
+
angle=[0],
|
216 |
+
axis="z",
|
217 |
+
center=[0, 0, 0],
|
218 |
+
p=1,
|
219 |
+
),
|
220 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
221 |
+
],
|
222 |
+
[
|
223 |
+
dict(
|
224 |
+
type="RandomRotateTargetAngle",
|
225 |
+
angle=[1 / 2],
|
226 |
+
axis="z",
|
227 |
+
center=[0, 0, 0],
|
228 |
+
p=1,
|
229 |
+
),
|
230 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
231 |
+
],
|
232 |
+
[
|
233 |
+
dict(
|
234 |
+
type="RandomRotateTargetAngle",
|
235 |
+
angle=[1],
|
236 |
+
axis="z",
|
237 |
+
center=[0, 0, 0],
|
238 |
+
p=1,
|
239 |
+
),
|
240 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
241 |
+
],
|
242 |
+
[
|
243 |
+
dict(
|
244 |
+
type="RandomRotateTargetAngle",
|
245 |
+
angle=[3 / 2],
|
246 |
+
axis="z",
|
247 |
+
center=[0, 0, 0],
|
248 |
+
p=1,
|
249 |
+
),
|
250 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
251 |
+
],
|
252 |
+
[
|
253 |
+
dict(
|
254 |
+
type="RandomRotateTargetAngle",
|
255 |
+
angle=[0],
|
256 |
+
axis="z",
|
257 |
+
center=[0, 0, 0],
|
258 |
+
p=1,
|
259 |
+
),
|
260 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
261 |
+
],
|
262 |
+
[
|
263 |
+
dict(
|
264 |
+
type="RandomRotateTargetAngle",
|
265 |
+
angle=[1 / 2],
|
266 |
+
axis="z",
|
267 |
+
center=[0, 0, 0],
|
268 |
+
p=1,
|
269 |
+
),
|
270 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
271 |
+
],
|
272 |
+
[
|
273 |
+
dict(
|
274 |
+
type="RandomRotateTargetAngle",
|
275 |
+
angle=[1],
|
276 |
+
axis="z",
|
277 |
+
center=[0, 0, 0],
|
278 |
+
p=1,
|
279 |
+
),
|
280 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
281 |
+
],
|
282 |
+
[
|
283 |
+
dict(
|
284 |
+
type="RandomRotateTargetAngle",
|
285 |
+
angle=[3 / 2],
|
286 |
+
axis="z",
|
287 |
+
center=[0, 0, 0],
|
288 |
+
p=1,
|
289 |
+
),
|
290 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
291 |
+
],
|
292 |
+
[dict(type="RandomFlip", p=1)],
|
293 |
+
],
|
294 |
+
),
|
295 |
+
),
|
296 |
+
)
|
Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py
ADDED
@@ -0,0 +1,391 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 24 # bs: total bs in all gpus
|
5 |
+
num_worker = 48
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
find_unused_parameters = True
|
10 |
+
|
11 |
+
# trainer
|
12 |
+
train = dict(
|
13 |
+
type="MultiDatasetTrainer",
|
14 |
+
)
|
15 |
+
|
16 |
+
# model settings
|
17 |
+
model = dict(
|
18 |
+
type="PPT-v1m1",
|
19 |
+
backbone=dict(
|
20 |
+
type="SpUNet-v1m3",
|
21 |
+
in_channels=6,
|
22 |
+
num_classes=0,
|
23 |
+
base_channels=32,
|
24 |
+
context_channels=256,
|
25 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
26 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
27 |
+
cls_mode=False,
|
28 |
+
conditions=("ScanNet", "S3DIS", "Structured3D"),
|
29 |
+
zero_init=False,
|
30 |
+
norm_decouple=True,
|
31 |
+
norm_adaptive=True,
|
32 |
+
norm_affine=True,
|
33 |
+
),
|
34 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
35 |
+
backbone_out_channels=96,
|
36 |
+
context_channels=256,
|
37 |
+
conditions=("Structured3D", "ScanNet", "S3DIS"),
|
38 |
+
template="[x]",
|
39 |
+
clip_model="ViT-B/16",
|
40 |
+
# fmt: off
|
41 |
+
class_name=(
|
42 |
+
"wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
|
43 |
+
"window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
|
44 |
+
"dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
|
45 |
+
"toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
|
46 |
+
"clutter", "otherstructure", "otherfurniture", "otherprop",
|
47 |
+
),
|
48 |
+
valid_index=(
|
49 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
|
50 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
|
51 |
+
(0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
|
52 |
+
),
|
53 |
+
# fmt: on
|
54 |
+
backbone_mode=False,
|
55 |
+
)
|
56 |
+
|
57 |
+
# scheduler settings
|
58 |
+
epoch = 100
|
59 |
+
optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
60 |
+
scheduler = dict(
|
61 |
+
type="OneCycleLR",
|
62 |
+
max_lr=optimizer["lr"],
|
63 |
+
pct_start=0.05,
|
64 |
+
anneal_strategy="cos",
|
65 |
+
div_factor=10.0,
|
66 |
+
final_div_factor=10000.0,
|
67 |
+
)
|
68 |
+
# param_dicts = [dict(keyword="modulation", lr=0.005)]
|
69 |
+
|
70 |
+
# dataset settings
|
71 |
+
data = dict(
|
72 |
+
num_classes=20,
|
73 |
+
ignore_index=-1,
|
74 |
+
names=[
|
75 |
+
"wall",
|
76 |
+
"floor",
|
77 |
+
"cabinet",
|
78 |
+
"bed",
|
79 |
+
"chair",
|
80 |
+
"sofa",
|
81 |
+
"table",
|
82 |
+
"door",
|
83 |
+
"window",
|
84 |
+
"bookshelf",
|
85 |
+
"picture",
|
86 |
+
"counter",
|
87 |
+
"desk",
|
88 |
+
"curtain",
|
89 |
+
"refridgerator",
|
90 |
+
"shower curtain",
|
91 |
+
"toilet",
|
92 |
+
"sink",
|
93 |
+
"bathtub",
|
94 |
+
"otherfurniture",
|
95 |
+
],
|
96 |
+
train=dict(
|
97 |
+
type="ConcatDataset",
|
98 |
+
datasets=[
|
99 |
+
# Structured3D
|
100 |
+
dict(
|
101 |
+
type="Structured3DDataset",
|
102 |
+
split="train",
|
103 |
+
data_root="data/structured3d",
|
104 |
+
transform=[
|
105 |
+
dict(type="CenterShift", apply_z=True),
|
106 |
+
dict(
|
107 |
+
type="RandomDropout",
|
108 |
+
dropout_ratio=0.2,
|
109 |
+
dropout_application_ratio=0.2,
|
110 |
+
),
|
111 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
112 |
+
dict(
|
113 |
+
type="RandomRotate",
|
114 |
+
angle=[-1, 1],
|
115 |
+
axis="z",
|
116 |
+
center=[0, 0, 0],
|
117 |
+
p=0.5,
|
118 |
+
),
|
119 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
120 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
121 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
122 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
123 |
+
dict(type="RandomFlip", p=0.5),
|
124 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
125 |
+
dict(
|
126 |
+
type="ElasticDistortion",
|
127 |
+
distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
128 |
+
),
|
129 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
130 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
131 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
132 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
133 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
134 |
+
dict(
|
135 |
+
type="GridSample",
|
136 |
+
grid_size=0.02,
|
137 |
+
hash_type="fnv",
|
138 |
+
mode="train",
|
139 |
+
return_grid_coord=True,
|
140 |
+
),
|
141 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
142 |
+
dict(type="CenterShift", apply_z=False),
|
143 |
+
dict(type="NormalizeColor"),
|
144 |
+
dict(type="ShufflePoint"),
|
145 |
+
dict(type="Add", keys_dict={"condition": "Structured3D"}),
|
146 |
+
dict(type="ToTensor"),
|
147 |
+
dict(
|
148 |
+
type="Collect",
|
149 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
150 |
+
feat_keys=("color", "normal"),
|
151 |
+
),
|
152 |
+
],
|
153 |
+
test_mode=False,
|
154 |
+
loop=2, # sampling weight
|
155 |
+
),
|
156 |
+
# ScanNet
|
157 |
+
dict(
|
158 |
+
type="ScanNetDataset",
|
159 |
+
split="train",
|
160 |
+
data_root="data/scannet",
|
161 |
+
transform=[
|
162 |
+
dict(type="CenterShift", apply_z=True),
|
163 |
+
dict(
|
164 |
+
type="RandomDropout",
|
165 |
+
dropout_ratio=0.2,
|
166 |
+
dropout_application_ratio=0.2,
|
167 |
+
),
|
168 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
169 |
+
dict(
|
170 |
+
type="RandomRotate",
|
171 |
+
angle=[-1, 1],
|
172 |
+
axis="z",
|
173 |
+
center=[0, 0, 0],
|
174 |
+
p=0.5,
|
175 |
+
),
|
176 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
177 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
178 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
179 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
180 |
+
dict(type="RandomFlip", p=0.5),
|
181 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
182 |
+
dict(
|
183 |
+
type="ElasticDistortion",
|
184 |
+
distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
185 |
+
),
|
186 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
187 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
188 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
189 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
190 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
191 |
+
dict(
|
192 |
+
type="GridSample",
|
193 |
+
grid_size=0.02,
|
194 |
+
hash_type="fnv",
|
195 |
+
mode="train",
|
196 |
+
return_grid_coord=True,
|
197 |
+
),
|
198 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
199 |
+
dict(type="CenterShift", apply_z=False),
|
200 |
+
dict(type="NormalizeColor"),
|
201 |
+
dict(type="ShufflePoint"),
|
202 |
+
dict(type="Add", keys_dict={"condition": "ScanNet"}),
|
203 |
+
dict(type="ToTensor"),
|
204 |
+
dict(
|
205 |
+
type="Collect",
|
206 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
207 |
+
feat_keys=("color", "normal"),
|
208 |
+
),
|
209 |
+
],
|
210 |
+
test_mode=False,
|
211 |
+
loop=1, # sampling weight
|
212 |
+
),
|
213 |
+
],
|
214 |
+
),
|
215 |
+
val=dict(
|
216 |
+
type="ScanNetDataset",
|
217 |
+
split="val",
|
218 |
+
data_root="data/scannet",
|
219 |
+
transform=[
|
220 |
+
dict(type="CenterShift", apply_z=True),
|
221 |
+
dict(
|
222 |
+
type="GridSample",
|
223 |
+
grid_size=0.02,
|
224 |
+
hash_type="fnv",
|
225 |
+
mode="train",
|
226 |
+
return_grid_coord=True,
|
227 |
+
),
|
228 |
+
# dict(type="SphereCrop", point_max=1000000, mode="center"),
|
229 |
+
dict(type="CenterShift", apply_z=False),
|
230 |
+
dict(type="NormalizeColor"),
|
231 |
+
dict(type="ToTensor"),
|
232 |
+
dict(type="Add", keys_dict={"condition": "ScanNet"}),
|
233 |
+
dict(
|
234 |
+
type="Collect",
|
235 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
236 |
+
feat_keys=("color", "normal"),
|
237 |
+
),
|
238 |
+
],
|
239 |
+
test_mode=False,
|
240 |
+
),
|
241 |
+
test=dict(
|
242 |
+
type="ScanNetDataset",
|
243 |
+
split="val",
|
244 |
+
data_root="data/scannet",
|
245 |
+
transform=[
|
246 |
+
dict(type="CenterShift", apply_z=True),
|
247 |
+
dict(type="NormalizeColor"),
|
248 |
+
],
|
249 |
+
test_mode=True,
|
250 |
+
test_cfg=dict(
|
251 |
+
voxelize=dict(
|
252 |
+
type="GridSample",
|
253 |
+
grid_size=0.02,
|
254 |
+
hash_type="fnv",
|
255 |
+
mode="test",
|
256 |
+
return_grid_coord=True,
|
257 |
+
keys=("coord", "color", "normal"),
|
258 |
+
),
|
259 |
+
crop=None,
|
260 |
+
post_transform=[
|
261 |
+
dict(type="CenterShift", apply_z=False),
|
262 |
+
dict(type="Add", keys_dict={"condition": "ScanNet"}),
|
263 |
+
dict(type="ToTensor"),
|
264 |
+
dict(
|
265 |
+
type="Collect",
|
266 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
267 |
+
feat_keys=("color", "normal"),
|
268 |
+
),
|
269 |
+
],
|
270 |
+
aug_transform=[
|
271 |
+
[
|
272 |
+
dict(
|
273 |
+
type="RandomRotateTargetAngle",
|
274 |
+
angle=[0],
|
275 |
+
axis="z",
|
276 |
+
center=[0, 0, 0],
|
277 |
+
p=1,
|
278 |
+
)
|
279 |
+
],
|
280 |
+
[
|
281 |
+
dict(
|
282 |
+
type="RandomRotateTargetAngle",
|
283 |
+
angle=[1 / 2],
|
284 |
+
axis="z",
|
285 |
+
center=[0, 0, 0],
|
286 |
+
p=1,
|
287 |
+
)
|
288 |
+
],
|
289 |
+
[
|
290 |
+
dict(
|
291 |
+
type="RandomRotateTargetAngle",
|
292 |
+
angle=[1],
|
293 |
+
axis="z",
|
294 |
+
center=[0, 0, 0],
|
295 |
+
p=1,
|
296 |
+
)
|
297 |
+
],
|
298 |
+
[
|
299 |
+
dict(
|
300 |
+
type="RandomRotateTargetAngle",
|
301 |
+
angle=[3 / 2],
|
302 |
+
axis="z",
|
303 |
+
center=[0, 0, 0],
|
304 |
+
p=1,
|
305 |
+
)
|
306 |
+
],
|
307 |
+
[
|
308 |
+
dict(
|
309 |
+
type="RandomRotateTargetAngle",
|
310 |
+
angle=[0],
|
311 |
+
axis="z",
|
312 |
+
center=[0, 0, 0],
|
313 |
+
p=1,
|
314 |
+
),
|
315 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
316 |
+
],
|
317 |
+
[
|
318 |
+
dict(
|
319 |
+
type="RandomRotateTargetAngle",
|
320 |
+
angle=[1 / 2],
|
321 |
+
axis="z",
|
322 |
+
center=[0, 0, 0],
|
323 |
+
p=1,
|
324 |
+
),
|
325 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
326 |
+
],
|
327 |
+
[
|
328 |
+
dict(
|
329 |
+
type="RandomRotateTargetAngle",
|
330 |
+
angle=[1],
|
331 |
+
axis="z",
|
332 |
+
center=[0, 0, 0],
|
333 |
+
p=1,
|
334 |
+
),
|
335 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
336 |
+
],
|
337 |
+
[
|
338 |
+
dict(
|
339 |
+
type="RandomRotateTargetAngle",
|
340 |
+
angle=[3 / 2],
|
341 |
+
axis="z",
|
342 |
+
center=[0, 0, 0],
|
343 |
+
p=1,
|
344 |
+
),
|
345 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
346 |
+
],
|
347 |
+
[
|
348 |
+
dict(
|
349 |
+
type="RandomRotateTargetAngle",
|
350 |
+
angle=[0],
|
351 |
+
axis="z",
|
352 |
+
center=[0, 0, 0],
|
353 |
+
p=1,
|
354 |
+
),
|
355 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
356 |
+
],
|
357 |
+
[
|
358 |
+
dict(
|
359 |
+
type="RandomRotateTargetAngle",
|
360 |
+
angle=[1 / 2],
|
361 |
+
axis="z",
|
362 |
+
center=[0, 0, 0],
|
363 |
+
p=1,
|
364 |
+
),
|
365 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
366 |
+
],
|
367 |
+
[
|
368 |
+
dict(
|
369 |
+
type="RandomRotateTargetAngle",
|
370 |
+
angle=[1],
|
371 |
+
axis="z",
|
372 |
+
center=[0, 0, 0],
|
373 |
+
p=1,
|
374 |
+
),
|
375 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
376 |
+
],
|
377 |
+
[
|
378 |
+
dict(
|
379 |
+
type="RandomRotateTargetAngle",
|
380 |
+
angle=[3 / 2],
|
381 |
+
axis="z",
|
382 |
+
center=[0, 0, 0],
|
383 |
+
p=1,
|
384 |
+
),
|
385 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
386 |
+
],
|
387 |
+
[dict(type="RandomFlip", p=1)],
|
388 |
+
],
|
389 |
+
),
|
390 |
+
),
|
391 |
+
)
|
Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py
ADDED
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 24 # bs: total bs in all gpus
|
5 |
+
num_worker = 48
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
find_unused_parameters = True
|
10 |
+
evaluate = False
|
11 |
+
|
12 |
+
# trainer
|
13 |
+
train = dict(
|
14 |
+
type="MultiDatasetTrainer",
|
15 |
+
)
|
16 |
+
|
17 |
+
# model settings
|
18 |
+
model = dict(
|
19 |
+
type="PPT-v1m1",
|
20 |
+
backbone=dict(
|
21 |
+
type="SpUNet-v1m3",
|
22 |
+
in_channels=6,
|
23 |
+
num_classes=0,
|
24 |
+
base_channels=32,
|
25 |
+
context_channels=256,
|
26 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
27 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
28 |
+
cls_mode=False,
|
29 |
+
conditions=("ScanNet", "S3DIS", "Structured3D"),
|
30 |
+
zero_init=False,
|
31 |
+
norm_decouple=True,
|
32 |
+
norm_adaptive=True,
|
33 |
+
norm_affine=True,
|
34 |
+
),
|
35 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
36 |
+
backbone_out_channels=96,
|
37 |
+
context_channels=256,
|
38 |
+
conditions=("Structured3D", "ScanNet", "S3DIS"),
|
39 |
+
template="[x]",
|
40 |
+
clip_model="ViT-B/16",
|
41 |
+
# fmt: off
|
42 |
+
class_name=(
|
43 |
+
"wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
|
44 |
+
"window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
|
45 |
+
"dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
|
46 |
+
"toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
|
47 |
+
"clutter", "otherstructure", "otherfurniture", "otherprop",
|
48 |
+
),
|
49 |
+
valid_index=(
|
50 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
|
51 |
+
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
|
52 |
+
(0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
|
53 |
+
),
|
54 |
+
# fmt: on
|
55 |
+
backbone_mode=False,
|
56 |
+
)
|
57 |
+
|
58 |
+
# scheduler settings
|
59 |
+
epoch = 100
|
60 |
+
optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
61 |
+
scheduler = dict(
|
62 |
+
type="OneCycleLR",
|
63 |
+
max_lr=optimizer["lr"],
|
64 |
+
pct_start=0.05,
|
65 |
+
anneal_strategy="cos",
|
66 |
+
div_factor=10.0,
|
67 |
+
final_div_factor=10000.0,
|
68 |
+
)
|
69 |
+
# param_dicts = [dict(keyword="modulation", lr=0.005)]
|
70 |
+
|
71 |
+
# dataset settings
|
72 |
+
data = dict(
|
73 |
+
num_classes=20,
|
74 |
+
ignore_index=-1,
|
75 |
+
names=[
|
76 |
+
"wall",
|
77 |
+
"floor",
|
78 |
+
"cabinet",
|
79 |
+
"bed",
|
80 |
+
"chair",
|
81 |
+
"sofa",
|
82 |
+
"table",
|
83 |
+
"door",
|
84 |
+
"window",
|
85 |
+
"bookshelf",
|
86 |
+
"picture",
|
87 |
+
"counter",
|
88 |
+
"desk",
|
89 |
+
"curtain",
|
90 |
+
"refridgerator",
|
91 |
+
"shower curtain",
|
92 |
+
"toilet",
|
93 |
+
"sink",
|
94 |
+
"bathtub",
|
95 |
+
"otherfurniture",
|
96 |
+
],
|
97 |
+
train=dict(
|
98 |
+
type="ConcatDataset",
|
99 |
+
datasets=[
|
100 |
+
# Structured3D
|
101 |
+
dict(
|
102 |
+
type="Structured3DDataset",
|
103 |
+
split=["train", "val"],
|
104 |
+
data_root="data/structured3d",
|
105 |
+
transform=[
|
106 |
+
dict(type="CenterShift", apply_z=True),
|
107 |
+
dict(
|
108 |
+
type="RandomDropout",
|
109 |
+
dropout_ratio=0.2,
|
110 |
+
dropout_application_ratio=0.2,
|
111 |
+
),
|
112 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
113 |
+
dict(
|
114 |
+
type="RandomRotate",
|
115 |
+
angle=[-1, 1],
|
116 |
+
axis="z",
|
117 |
+
center=[0, 0, 0],
|
118 |
+
p=0.5,
|
119 |
+
),
|
120 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
121 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
122 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
123 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
124 |
+
dict(type="RandomFlip", p=0.5),
|
125 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
126 |
+
dict(
|
127 |
+
type="ElasticDistortion",
|
128 |
+
distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
129 |
+
),
|
130 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
131 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
132 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
133 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
134 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
135 |
+
dict(
|
136 |
+
type="GridSample",
|
137 |
+
grid_size=0.02,
|
138 |
+
hash_type="fnv",
|
139 |
+
mode="train",
|
140 |
+
return_grid_coord=True,
|
141 |
+
),
|
142 |
+
dict(type="SphereCrop", sample_rate=0.8, mode="random"),
|
143 |
+
dict(type="CenterShift", apply_z=False),
|
144 |
+
dict(type="NormalizeColor"),
|
145 |
+
dict(type="ShufflePoint"),
|
146 |
+
dict(type="Add", keys_dict={"condition": "Structured3D"}),
|
147 |
+
dict(type="ToTensor"),
|
148 |
+
dict(
|
149 |
+
type="Collect",
|
150 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
151 |
+
feat_keys=("color", "normal"),
|
152 |
+
),
|
153 |
+
],
|
154 |
+
test_mode=False,
|
155 |
+
loop=2, # sampling weight
|
156 |
+
),
|
157 |
+
# ScanNet
|
158 |
+
dict(
|
159 |
+
type="ScanNetDataset",
|
160 |
+
split=["train", "val"],
|
161 |
+
data_root="data/scannet",
|
162 |
+
transform=[
|
163 |
+
dict(type="CenterShift", apply_z=True),
|
164 |
+
dict(
|
165 |
+
type="RandomDropout",
|
166 |
+
dropout_ratio=0.2,
|
167 |
+
dropout_application_ratio=0.2,
|
168 |
+
),
|
169 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
170 |
+
dict(
|
171 |
+
type="RandomRotate",
|
172 |
+
angle=[-1, 1],
|
173 |
+
axis="z",
|
174 |
+
center=[0, 0, 0],
|
175 |
+
p=0.5,
|
176 |
+
),
|
177 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
178 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
179 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
180 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
181 |
+
dict(type="RandomFlip", p=0.5),
|
182 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
183 |
+
dict(
|
184 |
+
type="ElasticDistortion",
|
185 |
+
distortion_params=[[0.2, 0.4], [0.8, 1.6]],
|
186 |
+
),
|
187 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
188 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
189 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
190 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
191 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
192 |
+
dict(
|
193 |
+
type="GridSample",
|
194 |
+
grid_size=0.02,
|
195 |
+
hash_type="fnv",
|
196 |
+
mode="train",
|
197 |
+
return_grid_coord=True,
|
198 |
+
),
|
199 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
200 |
+
dict(type="CenterShift", apply_z=False),
|
201 |
+
dict(type="NormalizeColor"),
|
202 |
+
dict(type="ShufflePoint"),
|
203 |
+
dict(type="Add", keys_dict={"condition": "ScanNet"}),
|
204 |
+
dict(type="ToTensor"),
|
205 |
+
dict(
|
206 |
+
type="Collect",
|
207 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
208 |
+
feat_keys=("color", "normal"),
|
209 |
+
),
|
210 |
+
],
|
211 |
+
test_mode=False,
|
212 |
+
loop=1, # sampling weight
|
213 |
+
),
|
214 |
+
],
|
215 |
+
),
|
216 |
+
test=dict(
|
217 |
+
type="ScanNetDataset",
|
218 |
+
split="test",
|
219 |
+
data_root="data/scannet",
|
220 |
+
transform=[
|
221 |
+
dict(type="CenterShift", apply_z=True),
|
222 |
+
dict(type="NormalizeColor"),
|
223 |
+
],
|
224 |
+
test_mode=True,
|
225 |
+
test_cfg=dict(
|
226 |
+
voxelize=dict(
|
227 |
+
type="GridSample",
|
228 |
+
grid_size=0.02,
|
229 |
+
hash_type="fnv",
|
230 |
+
mode="test",
|
231 |
+
return_grid_coord=True,
|
232 |
+
keys=("coord", "color", "normal"),
|
233 |
+
),
|
234 |
+
crop=None,
|
235 |
+
post_transform=[
|
236 |
+
dict(type="CenterShift", apply_z=False),
|
237 |
+
dict(type="Add", keys_dict={"condition": "ScanNet"}),
|
238 |
+
dict(type="ToTensor"),
|
239 |
+
dict(
|
240 |
+
type="Collect",
|
241 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
242 |
+
feat_keys=("color", "normal"),
|
243 |
+
),
|
244 |
+
],
|
245 |
+
aug_transform=[
|
246 |
+
[
|
247 |
+
dict(
|
248 |
+
type="RandomRotateTargetAngle",
|
249 |
+
angle=[0],
|
250 |
+
axis="z",
|
251 |
+
center=[0, 0, 0],
|
252 |
+
p=1,
|
253 |
+
)
|
254 |
+
],
|
255 |
+
[
|
256 |
+
dict(
|
257 |
+
type="RandomRotateTargetAngle",
|
258 |
+
angle=[1 / 2],
|
259 |
+
axis="z",
|
260 |
+
center=[0, 0, 0],
|
261 |
+
p=1,
|
262 |
+
)
|
263 |
+
],
|
264 |
+
[
|
265 |
+
dict(
|
266 |
+
type="RandomRotateTargetAngle",
|
267 |
+
angle=[1],
|
268 |
+
axis="z",
|
269 |
+
center=[0, 0, 0],
|
270 |
+
p=1,
|
271 |
+
)
|
272 |
+
],
|
273 |
+
[
|
274 |
+
dict(
|
275 |
+
type="RandomRotateTargetAngle",
|
276 |
+
angle=[3 / 2],
|
277 |
+
axis="z",
|
278 |
+
center=[0, 0, 0],
|
279 |
+
p=1,
|
280 |
+
)
|
281 |
+
],
|
282 |
+
[
|
283 |
+
dict(
|
284 |
+
type="RandomRotateTargetAngle",
|
285 |
+
angle=[0],
|
286 |
+
axis="z",
|
287 |
+
center=[0, 0, 0],
|
288 |
+
p=1,
|
289 |
+
),
|
290 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
291 |
+
],
|
292 |
+
[
|
293 |
+
dict(
|
294 |
+
type="RandomRotateTargetAngle",
|
295 |
+
angle=[1 / 2],
|
296 |
+
axis="z",
|
297 |
+
center=[0, 0, 0],
|
298 |
+
p=1,
|
299 |
+
),
|
300 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
301 |
+
],
|
302 |
+
[
|
303 |
+
dict(
|
304 |
+
type="RandomRotateTargetAngle",
|
305 |
+
angle=[1],
|
306 |
+
axis="z",
|
307 |
+
center=[0, 0, 0],
|
308 |
+
p=1,
|
309 |
+
),
|
310 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
311 |
+
],
|
312 |
+
[
|
313 |
+
dict(
|
314 |
+
type="RandomRotateTargetAngle",
|
315 |
+
angle=[3 / 2],
|
316 |
+
axis="z",
|
317 |
+
center=[0, 0, 0],
|
318 |
+
p=1,
|
319 |
+
),
|
320 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
321 |
+
],
|
322 |
+
[
|
323 |
+
dict(
|
324 |
+
type="RandomRotateTargetAngle",
|
325 |
+
angle=[0],
|
326 |
+
axis="z",
|
327 |
+
center=[0, 0, 0],
|
328 |
+
p=1,
|
329 |
+
),
|
330 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
331 |
+
],
|
332 |
+
[
|
333 |
+
dict(
|
334 |
+
type="RandomRotateTargetAngle",
|
335 |
+
angle=[1 / 2],
|
336 |
+
axis="z",
|
337 |
+
center=[0, 0, 0],
|
338 |
+
p=1,
|
339 |
+
),
|
340 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
341 |
+
],
|
342 |
+
[
|
343 |
+
dict(
|
344 |
+
type="RandomRotateTargetAngle",
|
345 |
+
angle=[1],
|
346 |
+
axis="z",
|
347 |
+
center=[0, 0, 0],
|
348 |
+
p=1,
|
349 |
+
),
|
350 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
351 |
+
],
|
352 |
+
[
|
353 |
+
dict(
|
354 |
+
type="RandomRotateTargetAngle",
|
355 |
+
angle=[3 / 2],
|
356 |
+
axis="z",
|
357 |
+
center=[0, 0, 0],
|
358 |
+
p=1,
|
359 |
+
),
|
360 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
361 |
+
],
|
362 |
+
[dict(type="RandomFlip", p=1)],
|
363 |
+
],
|
364 |
+
),
|
365 |
+
),
|
366 |
+
)
|