9fa78f677a597025dfa37d38280fdfe720edec1faf9bc90b948f6f02ad471528
Browse files- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/model_zoo/deprecated.json +6 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/model_zoo/mmcls.json +31 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/model_zoo/open_mmlab.json +50 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/__init__.py +81 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/assign_score_withk.py +123 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/ball_query.py +55 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/bbox.py +72 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/border_align.py +109 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/box_iou_rotated.py +45 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/carafe.py +287 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/cc_attention.py +83 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/contour_expand.py +49 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/corner_pool.py +161 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/correlation.py +196 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deform_conv.py +405 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deform_roi_pool.py +204 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deprecated_wrappers.py +43 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/focal_loss.py +212 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/furthest_point_sample.py +83 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/fused_bias_leakyrelu.py +268 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/gather_points.py +57 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/group_points.py +224 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/info.py +36 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/iou3d.py +85 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/knn.py +77 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/masked_conv.py +111 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/merge_cells.py +149 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/modulated_deform_conv.py +282 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/multi_scale_deform_attn.py +358 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/nms.py +417 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/pixel_group.py +75 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/point_sample.py +336 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/points_in_boxes.py +133 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/points_sampler.py +177 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/psa_mask.py +92 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_align.py +223 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_align_rotated.py +177 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_pool.py +86 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roiaware_pool3d.py +114 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roipoint_pool3d.py +77 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/saconv.py +145 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/scatter_points.py +135 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/sync_bn.py +279 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/three_interpolate.py +68 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/three_nn.py +51 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/tin_shift.py +68 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/upfirdn2d.py +330 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/voxelize.py +132 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/__init__.py +13 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/_functions.py +79 -0
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/model_zoo/deprecated.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"resnet50_caffe": "detectron/resnet50_caffe",
|
3 |
+
"resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr",
|
4 |
+
"resnet101_caffe": "detectron/resnet101_caffe",
|
5 |
+
"resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr"
|
6 |
+
}
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/model_zoo/mmcls.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_batch256_imagenet_20210208-4271cd6c.pth",
|
3 |
+
"vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_batch256_imagenet_20210208-4d1d6080.pth",
|
4 |
+
"vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_batch256_imagenet_20210208-db26f1a5.pth",
|
5 |
+
"vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_batch256_imagenet_20210208-e6920e4a.pth",
|
6 |
+
"vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_batch256_imagenet_20210207-f244902c.pth",
|
7 |
+
"vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_batch256_imagenet_20210207-1a8b7864.pth",
|
8 |
+
"vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_batch256_imagenet_20210208-7e55cd29.pth",
|
9 |
+
"vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_batch256_imagenet_20210208-da620c4f.pth",
|
10 |
+
"resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_batch256_imagenet_20200708-34ab8f90.pth",
|
11 |
+
"resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_batch256_imagenet_20200708-32ffb4f7.pth",
|
12 |
+
"resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth",
|
13 |
+
"resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_batch256_imagenet_20200708-753f3608.pth",
|
14 |
+
"resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_batch256_imagenet_20200708-ec25b1f9.pth",
|
15 |
+
"resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_batch256_imagenet_20200708-1ad0ce94.pth",
|
16 |
+
"resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_batch256_imagenet_20200708-9cb302ef.pth",
|
17 |
+
"resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_batch256_imagenet_20200708-e79cb6a2.pth",
|
18 |
+
"resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth",
|
19 |
+
"resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth",
|
20 |
+
"resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth",
|
21 |
+
"resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth",
|
22 |
+
"se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth",
|
23 |
+
"se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth",
|
24 |
+
"resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth",
|
25 |
+
"resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth",
|
26 |
+
"resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth",
|
27 |
+
"resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth",
|
28 |
+
"shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth",
|
29 |
+
"shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth",
|
30 |
+
"mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth"
|
31 |
+
}
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/model_zoo/open_mmlab.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth",
|
3 |
+
"detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth",
|
4 |
+
"detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth",
|
5 |
+
"detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth",
|
6 |
+
"detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth",
|
7 |
+
"detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth",
|
8 |
+
"resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth",
|
9 |
+
"resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth",
|
10 |
+
"resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth",
|
11 |
+
"contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth",
|
12 |
+
"detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth",
|
13 |
+
"detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth",
|
14 |
+
"jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth",
|
15 |
+
"jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth",
|
16 |
+
"jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth",
|
17 |
+
"jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth",
|
18 |
+
"jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth",
|
19 |
+
"jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth",
|
20 |
+
"msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth",
|
21 |
+
"msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth",
|
22 |
+
"msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth",
|
23 |
+
"msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth",
|
24 |
+
"msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth",
|
25 |
+
"bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth",
|
26 |
+
"kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth",
|
27 |
+
"kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth",
|
28 |
+
"res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth",
|
29 |
+
"regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth",
|
30 |
+
"regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth",
|
31 |
+
"regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth",
|
32 |
+
"regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth",
|
33 |
+
"regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth",
|
34 |
+
"regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth",
|
35 |
+
"regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth",
|
36 |
+
"regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth",
|
37 |
+
"resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth",
|
38 |
+
"resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth",
|
39 |
+
"resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth",
|
40 |
+
"mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth",
|
41 |
+
"mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth",
|
42 |
+
"mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth",
|
43 |
+
"contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth",
|
44 |
+
"contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth",
|
45 |
+
"resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth",
|
46 |
+
"resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth",
|
47 |
+
"resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth",
|
48 |
+
"darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth",
|
49 |
+
"mmdet/mobilenet_v2": "https://download.openmmlab.com/mmdetection/v2.0/third_party/mobilenet_v2_batch256_imagenet-ff34753d.pth"
|
50 |
+
}
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/__init__.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from .assign_score_withk import assign_score_withk
|
3 |
+
from .ball_query import ball_query
|
4 |
+
from .bbox import bbox_overlaps
|
5 |
+
from .border_align import BorderAlign, border_align
|
6 |
+
from .box_iou_rotated import box_iou_rotated
|
7 |
+
from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive
|
8 |
+
from .cc_attention import CrissCrossAttention
|
9 |
+
from .contour_expand import contour_expand
|
10 |
+
from .corner_pool import CornerPool
|
11 |
+
from .correlation import Correlation
|
12 |
+
from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d
|
13 |
+
from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack,
|
14 |
+
ModulatedDeformRoIPoolPack, deform_roi_pool)
|
15 |
+
from .deprecated_wrappers import Conv2d_deprecated as Conv2d
|
16 |
+
from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d
|
17 |
+
from .deprecated_wrappers import Linear_deprecated as Linear
|
18 |
+
from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d
|
19 |
+
from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss,
|
20 |
+
sigmoid_focal_loss, softmax_focal_loss)
|
21 |
+
from .furthest_point_sample import (furthest_point_sample,
|
22 |
+
furthest_point_sample_with_dist)
|
23 |
+
from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu
|
24 |
+
from .gather_points import gather_points
|
25 |
+
from .group_points import GroupAll, QueryAndGroup, grouping_operation
|
26 |
+
from .info import (get_compiler_version, get_compiling_cuda_version,
|
27 |
+
get_onnxruntime_op_path)
|
28 |
+
from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev
|
29 |
+
from .knn import knn
|
30 |
+
from .masked_conv import MaskedConv2d, masked_conv2d
|
31 |
+
from .modulated_deform_conv import (ModulatedDeformConv2d,
|
32 |
+
ModulatedDeformConv2dPack,
|
33 |
+
modulated_deform_conv2d)
|
34 |
+
from .multi_scale_deform_attn import MultiScaleDeformableAttention
|
35 |
+
from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms
|
36 |
+
from .pixel_group import pixel_group
|
37 |
+
from .point_sample import (SimpleRoIAlign, point_sample,
|
38 |
+
rel_roi_point_to_rel_img_point)
|
39 |
+
from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu,
|
40 |
+
points_in_boxes_part)
|
41 |
+
from .points_sampler import PointsSampler
|
42 |
+
from .psa_mask import PSAMask
|
43 |
+
from .roi_align import RoIAlign, roi_align
|
44 |
+
from .roi_align_rotated import RoIAlignRotated, roi_align_rotated
|
45 |
+
from .roi_pool import RoIPool, roi_pool
|
46 |
+
from .roiaware_pool3d import RoIAwarePool3d
|
47 |
+
from .roipoint_pool3d import RoIPointPool3d
|
48 |
+
from .saconv import SAConv2d
|
49 |
+
from .scatter_points import DynamicScatter, dynamic_scatter
|
50 |
+
from .sync_bn import SyncBatchNorm
|
51 |
+
from .three_interpolate import three_interpolate
|
52 |
+
from .three_nn import three_nn
|
53 |
+
from .tin_shift import TINShift, tin_shift
|
54 |
+
from .upfirdn2d import upfirdn2d
|
55 |
+
from .voxelize import Voxelization, voxelization
|
56 |
+
|
57 |
+
__all__ = [
|
58 |
+
'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe',
|
59 |
+
'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack',
|
60 |
+
'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack',
|
61 |
+
'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss',
|
62 |
+
'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss',
|
63 |
+
'get_compiler_version', 'get_compiling_cuda_version',
|
64 |
+
'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d',
|
65 |
+
'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack',
|
66 |
+
'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match',
|
67 |
+
'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d',
|
68 |
+
'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask',
|
69 |
+
'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign',
|
70 |
+
'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk',
|
71 |
+
'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query',
|
72 |
+
'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu',
|
73 |
+
'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup',
|
74 |
+
'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn',
|
75 |
+
'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign',
|
76 |
+
'border_align', 'gather_points', 'furthest_point_sample',
|
77 |
+
'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation',
|
78 |
+
'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization',
|
79 |
+
'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d',
|
80 |
+
'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all'
|
81 |
+
]
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/assign_score_withk.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.autograd import Function
|
2 |
+
|
3 |
+
from ..utils import ext_loader
|
4 |
+
|
5 |
+
ext_module = ext_loader.load_ext(
|
6 |
+
'_ext', ['assign_score_withk_forward', 'assign_score_withk_backward'])
|
7 |
+
|
8 |
+
|
9 |
+
class AssignScoreWithK(Function):
|
10 |
+
r"""Perform weighted sum to generate output features according to scores.
|
11 |
+
Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
|
12 |
+
scene_seg/lib/paconv_lib/src/gpu>`_.
|
13 |
+
|
14 |
+
This is a memory-efficient CUDA implementation of assign_scores operation,
|
15 |
+
which first transform all point features with weight bank, then assemble
|
16 |
+
neighbor features with ``knn_idx`` and perform weighted sum of ``scores``.
|
17 |
+
|
18 |
+
See the `paper <https://arxiv.org/pdf/2103.14635.pdf>`_ appendix Sec. D for
|
19 |
+
more detailed descriptions.
|
20 |
+
|
21 |
+
Note:
|
22 |
+
This implementation assumes using ``neighbor`` kernel input, which is
|
23 |
+
(point_features - center_features, point_features).
|
24 |
+
See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/
|
25 |
+
pointnet2/paconv.py#L128 for more details.
|
26 |
+
"""
|
27 |
+
|
28 |
+
@staticmethod
|
29 |
+
def forward(ctx,
|
30 |
+
scores,
|
31 |
+
point_features,
|
32 |
+
center_features,
|
33 |
+
knn_idx,
|
34 |
+
aggregate='sum'):
|
35 |
+
"""
|
36 |
+
Args:
|
37 |
+
scores (torch.Tensor): (B, npoint, K, M), predicted scores to
|
38 |
+
aggregate weight matrices in the weight bank.
|
39 |
+
``npoint`` is the number of sampled centers.
|
40 |
+
``K`` is the number of queried neighbors.
|
41 |
+
``M`` is the number of weight matrices in the weight bank.
|
42 |
+
point_features (torch.Tensor): (B, N, M, out_dim)
|
43 |
+
Pre-computed point features to be aggregated.
|
44 |
+
center_features (torch.Tensor): (B, N, M, out_dim)
|
45 |
+
Pre-computed center features to be aggregated.
|
46 |
+
knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN.
|
47 |
+
We assume the first idx in each row is the idx of the center.
|
48 |
+
aggregate (str, optional): Aggregation method.
|
49 |
+
Can be 'sum', 'avg' or 'max'. Defaults: 'sum'.
|
50 |
+
|
51 |
+
Returns:
|
52 |
+
torch.Tensor: (B, out_dim, npoint, K), the aggregated features.
|
53 |
+
"""
|
54 |
+
agg = {'sum': 0, 'avg': 1, 'max': 2}
|
55 |
+
|
56 |
+
B, N, M, out_dim = point_features.size()
|
57 |
+
_, npoint, K, _ = scores.size()
|
58 |
+
|
59 |
+
output = point_features.new_zeros((B, out_dim, npoint, K))
|
60 |
+
ext_module.assign_score_withk_forward(
|
61 |
+
point_features.contiguous(),
|
62 |
+
center_features.contiguous(),
|
63 |
+
scores.contiguous(),
|
64 |
+
knn_idx.contiguous(),
|
65 |
+
output,
|
66 |
+
B=B,
|
67 |
+
N0=N,
|
68 |
+
N1=npoint,
|
69 |
+
M=M,
|
70 |
+
K=K,
|
71 |
+
O=out_dim,
|
72 |
+
aggregate=agg[aggregate])
|
73 |
+
|
74 |
+
ctx.save_for_backward(output, point_features, center_features, scores,
|
75 |
+
knn_idx)
|
76 |
+
ctx.agg = agg[aggregate]
|
77 |
+
|
78 |
+
return output
|
79 |
+
|
80 |
+
@staticmethod
|
81 |
+
def backward(ctx, grad_out):
|
82 |
+
"""
|
83 |
+
Args:
|
84 |
+
grad_out (torch.Tensor): (B, out_dim, npoint, K)
|
85 |
+
|
86 |
+
Returns:
|
87 |
+
grad_scores (torch.Tensor): (B, npoint, K, M)
|
88 |
+
grad_point_features (torch.Tensor): (B, N, M, out_dim)
|
89 |
+
grad_center_features (torch.Tensor): (B, N, M, out_dim)
|
90 |
+
"""
|
91 |
+
_, point_features, center_features, scores, knn_idx = ctx.saved_tensors
|
92 |
+
|
93 |
+
agg = ctx.agg
|
94 |
+
|
95 |
+
B, N, M, out_dim = point_features.size()
|
96 |
+
_, npoint, K, _ = scores.size()
|
97 |
+
|
98 |
+
grad_point_features = point_features.new_zeros(point_features.shape)
|
99 |
+
grad_center_features = center_features.new_zeros(center_features.shape)
|
100 |
+
grad_scores = scores.new_zeros(scores.shape)
|
101 |
+
|
102 |
+
ext_module.assign_score_withk_backward(
|
103 |
+
grad_out.contiguous(),
|
104 |
+
point_features.contiguous(),
|
105 |
+
center_features.contiguous(),
|
106 |
+
scores.contiguous(),
|
107 |
+
knn_idx.contiguous(),
|
108 |
+
grad_point_features,
|
109 |
+
grad_center_features,
|
110 |
+
grad_scores,
|
111 |
+
B=B,
|
112 |
+
N0=N,
|
113 |
+
N1=npoint,
|
114 |
+
M=M,
|
115 |
+
K=K,
|
116 |
+
O=out_dim,
|
117 |
+
aggregate=agg)
|
118 |
+
|
119 |
+
return grad_scores, grad_point_features, \
|
120 |
+
grad_center_features, None, None
|
121 |
+
|
122 |
+
|
123 |
+
assign_score_withk = AssignScoreWithK.apply
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/ball_query.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
from torch.autograd import Function
|
4 |
+
|
5 |
+
from ..utils import ext_loader
|
6 |
+
|
7 |
+
ext_module = ext_loader.load_ext('_ext', ['ball_query_forward'])
|
8 |
+
|
9 |
+
|
10 |
+
class BallQuery(Function):
|
11 |
+
"""Find nearby points in spherical space."""
|
12 |
+
|
13 |
+
@staticmethod
|
14 |
+
def forward(ctx, min_radius: float, max_radius: float, sample_num: int,
|
15 |
+
xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor:
|
16 |
+
"""
|
17 |
+
Args:
|
18 |
+
min_radius (float): minimum radius of the balls.
|
19 |
+
max_radius (float): maximum radius of the balls.
|
20 |
+
sample_num (int): maximum number of features in the balls.
|
21 |
+
xyz (Tensor): (B, N, 3) xyz coordinates of the features.
|
22 |
+
center_xyz (Tensor): (B, npoint, 3) centers of the ball query.
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
Tensor: (B, npoint, nsample) tensor with the indices of
|
26 |
+
the features that form the query balls.
|
27 |
+
"""
|
28 |
+
assert center_xyz.is_contiguous()
|
29 |
+
assert xyz.is_contiguous()
|
30 |
+
assert min_radius < max_radius
|
31 |
+
|
32 |
+
B, N, _ = xyz.size()
|
33 |
+
npoint = center_xyz.size(1)
|
34 |
+
idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int)
|
35 |
+
|
36 |
+
ext_module.ball_query_forward(
|
37 |
+
center_xyz,
|
38 |
+
xyz,
|
39 |
+
idx,
|
40 |
+
b=B,
|
41 |
+
n=N,
|
42 |
+
m=npoint,
|
43 |
+
min_radius=min_radius,
|
44 |
+
max_radius=max_radius,
|
45 |
+
nsample=sample_num)
|
46 |
+
if torch.__version__ != 'parrots':
|
47 |
+
ctx.mark_non_differentiable(idx)
|
48 |
+
return idx
|
49 |
+
|
50 |
+
@staticmethod
|
51 |
+
def backward(ctx, a=None):
|
52 |
+
return None, None, None, None
|
53 |
+
|
54 |
+
|
55 |
+
ball_query = BallQuery.apply
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/bbox.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from ..utils import ext_loader
|
3 |
+
|
4 |
+
ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps'])
|
5 |
+
|
6 |
+
|
7 |
+
def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0):
|
8 |
+
"""Calculate overlap between two set of bboxes.
|
9 |
+
|
10 |
+
If ``aligned`` is ``False``, then calculate the ious between each bbox
|
11 |
+
of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
|
12 |
+
bboxes1 and bboxes2.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
bboxes1 (Tensor): shape (m, 4) in <x1, y1, x2, y2> format or empty.
|
16 |
+
bboxes2 (Tensor): shape (n, 4) in <x1, y1, x2, y2> format or empty.
|
17 |
+
If aligned is ``True``, then m and n must be equal.
|
18 |
+
mode (str): "iou" (intersection over union) or iof (intersection over
|
19 |
+
foreground).
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
ious(Tensor): shape (m, n) if aligned == False else shape (m, 1)
|
23 |
+
|
24 |
+
Example:
|
25 |
+
>>> bboxes1 = torch.FloatTensor([
|
26 |
+
>>> [0, 0, 10, 10],
|
27 |
+
>>> [10, 10, 20, 20],
|
28 |
+
>>> [32, 32, 38, 42],
|
29 |
+
>>> ])
|
30 |
+
>>> bboxes2 = torch.FloatTensor([
|
31 |
+
>>> [0, 0, 10, 20],
|
32 |
+
>>> [0, 10, 10, 19],
|
33 |
+
>>> [10, 10, 20, 20],
|
34 |
+
>>> ])
|
35 |
+
>>> bbox_overlaps(bboxes1, bboxes2)
|
36 |
+
tensor([[0.5000, 0.0000, 0.0000],
|
37 |
+
[0.0000, 0.0000, 1.0000],
|
38 |
+
[0.0000, 0.0000, 0.0000]])
|
39 |
+
|
40 |
+
Example:
|
41 |
+
>>> empty = torch.FloatTensor([])
|
42 |
+
>>> nonempty = torch.FloatTensor([
|
43 |
+
>>> [0, 0, 10, 9],
|
44 |
+
>>> ])
|
45 |
+
>>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
|
46 |
+
>>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
|
47 |
+
>>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
|
48 |
+
"""
|
49 |
+
|
50 |
+
mode_dict = {'iou': 0, 'iof': 1}
|
51 |
+
assert mode in mode_dict.keys()
|
52 |
+
mode_flag = mode_dict[mode]
|
53 |
+
# Either the boxes are empty or the length of boxes' last dimension is 4
|
54 |
+
assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0)
|
55 |
+
assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0)
|
56 |
+
assert offset == 1 or offset == 0
|
57 |
+
|
58 |
+
rows = bboxes1.size(0)
|
59 |
+
cols = bboxes2.size(0)
|
60 |
+
if aligned:
|
61 |
+
assert rows == cols
|
62 |
+
|
63 |
+
if rows * cols == 0:
|
64 |
+
return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols)
|
65 |
+
|
66 |
+
if aligned:
|
67 |
+
ious = bboxes1.new_zeros(rows)
|
68 |
+
else:
|
69 |
+
ious = bboxes1.new_zeros((rows, cols))
|
70 |
+
ext_module.bbox_overlaps(
|
71 |
+
bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset)
|
72 |
+
return ious
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/border_align.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
# modified from
|
3 |
+
# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py
|
4 |
+
|
5 |
+
import torch
|
6 |
+
import torch.nn as nn
|
7 |
+
from torch.autograd import Function
|
8 |
+
from torch.autograd.function import once_differentiable
|
9 |
+
|
10 |
+
from ..utils import ext_loader
|
11 |
+
|
12 |
+
ext_module = ext_loader.load_ext(
|
13 |
+
'_ext', ['border_align_forward', 'border_align_backward'])
|
14 |
+
|
15 |
+
|
16 |
+
class BorderAlignFunction(Function):
|
17 |
+
|
18 |
+
@staticmethod
|
19 |
+
def symbolic(g, input, boxes, pool_size):
|
20 |
+
return g.op(
|
21 |
+
'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size)
|
22 |
+
|
23 |
+
@staticmethod
|
24 |
+
def forward(ctx, input, boxes, pool_size):
|
25 |
+
ctx.pool_size = pool_size
|
26 |
+
ctx.input_shape = input.size()
|
27 |
+
|
28 |
+
assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]'
|
29 |
+
assert boxes.size(2) == 4, \
|
30 |
+
'the last dimension of boxes must be (x1, y1, x2, y2)'
|
31 |
+
assert input.size(1) % 4 == 0, \
|
32 |
+
'the channel for input feature must be divisible by factor 4'
|
33 |
+
|
34 |
+
# [B, C//4, H*W, 4]
|
35 |
+
output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4)
|
36 |
+
output = input.new_zeros(output_shape)
|
37 |
+
# `argmax_idx` only used for backward
|
38 |
+
argmax_idx = input.new_zeros(output_shape).to(torch.int)
|
39 |
+
|
40 |
+
ext_module.border_align_forward(
|
41 |
+
input, boxes, output, argmax_idx, pool_size=ctx.pool_size)
|
42 |
+
|
43 |
+
ctx.save_for_backward(boxes, argmax_idx)
|
44 |
+
return output
|
45 |
+
|
46 |
+
@staticmethod
|
47 |
+
@once_differentiable
|
48 |
+
def backward(ctx, grad_output):
|
49 |
+
boxes, argmax_idx = ctx.saved_tensors
|
50 |
+
grad_input = grad_output.new_zeros(ctx.input_shape)
|
51 |
+
# complex head architecture may cause grad_output uncontiguous
|
52 |
+
grad_output = grad_output.contiguous()
|
53 |
+
ext_module.border_align_backward(
|
54 |
+
grad_output,
|
55 |
+
boxes,
|
56 |
+
argmax_idx,
|
57 |
+
grad_input,
|
58 |
+
pool_size=ctx.pool_size)
|
59 |
+
return grad_input, None, None
|
60 |
+
|
61 |
+
|
62 |
+
border_align = BorderAlignFunction.apply
|
63 |
+
|
64 |
+
|
65 |
+
class BorderAlign(nn.Module):
|
66 |
+
r"""Border align pooling layer.
|
67 |
+
|
68 |
+
Applies border_align over the input feature based on predicted bboxes.
|
69 |
+
The details were described in the paper
|
70 |
+
`BorderDet: Border Feature for Dense Object Detection
|
71 |
+
<https://arxiv.org/abs/2007.11056>`_.
|
72 |
+
|
73 |
+
For each border line (e.g. top, left, bottom or right) of each box,
|
74 |
+
border_align does the following:
|
75 |
+
1. uniformly samples `pool_size`+1 positions on this line, involving \
|
76 |
+
the start and end points.
|
77 |
+
2. the corresponding features on these points are computed by \
|
78 |
+
bilinear interpolation.
|
79 |
+
3. max pooling over all the `pool_size`+1 positions are used for \
|
80 |
+
computing pooled feature.
|
81 |
+
|
82 |
+
Args:
|
83 |
+
pool_size (int): number of positions sampled over the boxes' borders
|
84 |
+
(e.g. top, bottom, left, right).
|
85 |
+
|
86 |
+
"""
|
87 |
+
|
88 |
+
def __init__(self, pool_size):
|
89 |
+
super(BorderAlign, self).__init__()
|
90 |
+
self.pool_size = pool_size
|
91 |
+
|
92 |
+
def forward(self, input, boxes):
|
93 |
+
"""
|
94 |
+
Args:
|
95 |
+
input: Features with shape [N,4C,H,W]. Channels ranged in [0,C),
|
96 |
+
[C,2C), [2C,3C), [3C,4C) represent the top, left, bottom,
|
97 |
+
right features respectively.
|
98 |
+
boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2).
|
99 |
+
|
100 |
+
Returns:
|
101 |
+
Tensor: Pooled features with shape [N,C,H*W,4]. The order is
|
102 |
+
(top,left,bottom,right) for the last dimension.
|
103 |
+
"""
|
104 |
+
return border_align(input, boxes, self.pool_size)
|
105 |
+
|
106 |
+
def __repr__(self):
|
107 |
+
s = self.__class__.__name__
|
108 |
+
s += f'(pool_size={self.pool_size})'
|
109 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/box_iou_rotated.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from ..utils import ext_loader
|
3 |
+
|
4 |
+
ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated'])
|
5 |
+
|
6 |
+
|
7 |
+
def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False):
|
8 |
+
"""Return intersection-over-union (Jaccard index) of boxes.
|
9 |
+
|
10 |
+
Both sets of boxes are expected to be in
|
11 |
+
(x_center, y_center, width, height, angle) format.
|
12 |
+
|
13 |
+
If ``aligned`` is ``False``, then calculate the ious between each bbox
|
14 |
+
of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
|
15 |
+
bboxes1 and bboxes2.
|
16 |
+
|
17 |
+
Arguments:
|
18 |
+
boxes1 (Tensor): rotated bboxes 1. \
|
19 |
+
It has shape (N, 5), indicating (x, y, w, h, theta) for each row.
|
20 |
+
Note that theta is in radian.
|
21 |
+
boxes2 (Tensor): rotated bboxes 2. \
|
22 |
+
It has shape (M, 5), indicating (x, y, w, h, theta) for each row.
|
23 |
+
Note that theta is in radian.
|
24 |
+
mode (str): "iou" (intersection over union) or iof (intersection over
|
25 |
+
foreground).
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
ious(Tensor): shape (N, M) if aligned == False else shape (N,)
|
29 |
+
"""
|
30 |
+
assert mode in ['iou', 'iof']
|
31 |
+
mode_dict = {'iou': 0, 'iof': 1}
|
32 |
+
mode_flag = mode_dict[mode]
|
33 |
+
rows = bboxes1.size(0)
|
34 |
+
cols = bboxes2.size(0)
|
35 |
+
if aligned:
|
36 |
+
ious = bboxes1.new_zeros(rows)
|
37 |
+
else:
|
38 |
+
ious = bboxes1.new_zeros((rows * cols))
|
39 |
+
bboxes1 = bboxes1.contiguous()
|
40 |
+
bboxes2 = bboxes2.contiguous()
|
41 |
+
ext_module.box_iou_rotated(
|
42 |
+
bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned)
|
43 |
+
if not aligned:
|
44 |
+
ious = ious.view(rows, cols)
|
45 |
+
return ious
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/carafe.py
ADDED
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from torch.autograd import Function
|
6 |
+
from torch.nn.modules.module import Module
|
7 |
+
|
8 |
+
from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init
|
9 |
+
from ..utils import ext_loader
|
10 |
+
|
11 |
+
ext_module = ext_loader.load_ext('_ext', [
|
12 |
+
'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward',
|
13 |
+
'carafe_backward'
|
14 |
+
])
|
15 |
+
|
16 |
+
|
17 |
+
class CARAFENaiveFunction(Function):
|
18 |
+
|
19 |
+
@staticmethod
|
20 |
+
def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
|
21 |
+
return g.op(
|
22 |
+
'mmcv::MMCVCARAFENaive',
|
23 |
+
features,
|
24 |
+
masks,
|
25 |
+
kernel_size_i=kernel_size,
|
26 |
+
group_size_i=group_size,
|
27 |
+
scale_factor_f=scale_factor)
|
28 |
+
|
29 |
+
@staticmethod
|
30 |
+
def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
|
31 |
+
assert scale_factor >= 1
|
32 |
+
assert masks.size(1) == kernel_size * kernel_size * group_size
|
33 |
+
assert masks.size(-1) == features.size(-1) * scale_factor
|
34 |
+
assert masks.size(-2) == features.size(-2) * scale_factor
|
35 |
+
assert features.size(1) % group_size == 0
|
36 |
+
assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
|
37 |
+
ctx.kernel_size = kernel_size
|
38 |
+
ctx.group_size = group_size
|
39 |
+
ctx.scale_factor = scale_factor
|
40 |
+
ctx.feature_size = features.size()
|
41 |
+
ctx.mask_size = masks.size()
|
42 |
+
|
43 |
+
n, c, h, w = features.size()
|
44 |
+
output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
|
45 |
+
ext_module.carafe_naive_forward(
|
46 |
+
features,
|
47 |
+
masks,
|
48 |
+
output,
|
49 |
+
kernel_size=kernel_size,
|
50 |
+
group_size=group_size,
|
51 |
+
scale_factor=scale_factor)
|
52 |
+
|
53 |
+
if features.requires_grad or masks.requires_grad:
|
54 |
+
ctx.save_for_backward(features, masks)
|
55 |
+
return output
|
56 |
+
|
57 |
+
@staticmethod
|
58 |
+
def backward(ctx, grad_output):
|
59 |
+
assert grad_output.is_cuda
|
60 |
+
|
61 |
+
features, masks = ctx.saved_tensors
|
62 |
+
kernel_size = ctx.kernel_size
|
63 |
+
group_size = ctx.group_size
|
64 |
+
scale_factor = ctx.scale_factor
|
65 |
+
|
66 |
+
grad_input = torch.zeros_like(features)
|
67 |
+
grad_masks = torch.zeros_like(masks)
|
68 |
+
ext_module.carafe_naive_backward(
|
69 |
+
grad_output.contiguous(),
|
70 |
+
features,
|
71 |
+
masks,
|
72 |
+
grad_input,
|
73 |
+
grad_masks,
|
74 |
+
kernel_size=kernel_size,
|
75 |
+
group_size=group_size,
|
76 |
+
scale_factor=scale_factor)
|
77 |
+
|
78 |
+
return grad_input, grad_masks, None, None, None
|
79 |
+
|
80 |
+
|
81 |
+
carafe_naive = CARAFENaiveFunction.apply
|
82 |
+
|
83 |
+
|
84 |
+
class CARAFENaive(Module):
|
85 |
+
|
86 |
+
def __init__(self, kernel_size, group_size, scale_factor):
|
87 |
+
super(CARAFENaive, self).__init__()
|
88 |
+
|
89 |
+
assert isinstance(kernel_size, int) and isinstance(
|
90 |
+
group_size, int) and isinstance(scale_factor, int)
|
91 |
+
self.kernel_size = kernel_size
|
92 |
+
self.group_size = group_size
|
93 |
+
self.scale_factor = scale_factor
|
94 |
+
|
95 |
+
def forward(self, features, masks):
|
96 |
+
return carafe_naive(features, masks, self.kernel_size, self.group_size,
|
97 |
+
self.scale_factor)
|
98 |
+
|
99 |
+
|
100 |
+
class CARAFEFunction(Function):
|
101 |
+
|
102 |
+
@staticmethod
|
103 |
+
def symbolic(g, features, masks, kernel_size, group_size, scale_factor):
|
104 |
+
return g.op(
|
105 |
+
'mmcv::MMCVCARAFE',
|
106 |
+
features,
|
107 |
+
masks,
|
108 |
+
kernel_size_i=kernel_size,
|
109 |
+
group_size_i=group_size,
|
110 |
+
scale_factor_f=scale_factor)
|
111 |
+
|
112 |
+
@staticmethod
|
113 |
+
def forward(ctx, features, masks, kernel_size, group_size, scale_factor):
|
114 |
+
assert scale_factor >= 1
|
115 |
+
assert masks.size(1) == kernel_size * kernel_size * group_size
|
116 |
+
assert masks.size(-1) == features.size(-1) * scale_factor
|
117 |
+
assert masks.size(-2) == features.size(-2) * scale_factor
|
118 |
+
assert features.size(1) % group_size == 0
|
119 |
+
assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1
|
120 |
+
ctx.kernel_size = kernel_size
|
121 |
+
ctx.group_size = group_size
|
122 |
+
ctx.scale_factor = scale_factor
|
123 |
+
ctx.feature_size = features.size()
|
124 |
+
ctx.mask_size = masks.size()
|
125 |
+
|
126 |
+
n, c, h, w = features.size()
|
127 |
+
output = features.new_zeros((n, c, h * scale_factor, w * scale_factor))
|
128 |
+
routput = features.new_zeros(output.size(), requires_grad=False)
|
129 |
+
rfeatures = features.new_zeros(features.size(), requires_grad=False)
|
130 |
+
rmasks = masks.new_zeros(masks.size(), requires_grad=False)
|
131 |
+
ext_module.carafe_forward(
|
132 |
+
features,
|
133 |
+
masks,
|
134 |
+
rfeatures,
|
135 |
+
routput,
|
136 |
+
rmasks,
|
137 |
+
output,
|
138 |
+
kernel_size=kernel_size,
|
139 |
+
group_size=group_size,
|
140 |
+
scale_factor=scale_factor)
|
141 |
+
|
142 |
+
if features.requires_grad or masks.requires_grad:
|
143 |
+
ctx.save_for_backward(features, masks, rfeatures)
|
144 |
+
return output
|
145 |
+
|
146 |
+
@staticmethod
|
147 |
+
def backward(ctx, grad_output):
|
148 |
+
assert grad_output.is_cuda
|
149 |
+
|
150 |
+
features, masks, rfeatures = ctx.saved_tensors
|
151 |
+
kernel_size = ctx.kernel_size
|
152 |
+
group_size = ctx.group_size
|
153 |
+
scale_factor = ctx.scale_factor
|
154 |
+
|
155 |
+
rgrad_output = torch.zeros_like(grad_output, requires_grad=False)
|
156 |
+
rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False)
|
157 |
+
rgrad_input = torch.zeros_like(features, requires_grad=False)
|
158 |
+
rgrad_masks = torch.zeros_like(masks, requires_grad=False)
|
159 |
+
grad_input = torch.zeros_like(features, requires_grad=False)
|
160 |
+
grad_masks = torch.zeros_like(masks, requires_grad=False)
|
161 |
+
ext_module.carafe_backward(
|
162 |
+
grad_output.contiguous(),
|
163 |
+
rfeatures,
|
164 |
+
masks,
|
165 |
+
rgrad_output,
|
166 |
+
rgrad_input_hs,
|
167 |
+
rgrad_input,
|
168 |
+
rgrad_masks,
|
169 |
+
grad_input,
|
170 |
+
grad_masks,
|
171 |
+
kernel_size=kernel_size,
|
172 |
+
group_size=group_size,
|
173 |
+
scale_factor=scale_factor)
|
174 |
+
return grad_input, grad_masks, None, None, None
|
175 |
+
|
176 |
+
|
177 |
+
carafe = CARAFEFunction.apply
|
178 |
+
|
179 |
+
|
180 |
+
class CARAFE(Module):
|
181 |
+
""" CARAFE: Content-Aware ReAssembly of FEatures
|
182 |
+
|
183 |
+
Please refer to https://arxiv.org/abs/1905.02188 for more details.
|
184 |
+
|
185 |
+
Args:
|
186 |
+
kernel_size (int): reassemble kernel size
|
187 |
+
group_size (int): reassemble group size
|
188 |
+
scale_factor (int): upsample ratio
|
189 |
+
|
190 |
+
Returns:
|
191 |
+
upsampled feature map
|
192 |
+
"""
|
193 |
+
|
194 |
+
def __init__(self, kernel_size, group_size, scale_factor):
|
195 |
+
super(CARAFE, self).__init__()
|
196 |
+
|
197 |
+
assert isinstance(kernel_size, int) and isinstance(
|
198 |
+
group_size, int) and isinstance(scale_factor, int)
|
199 |
+
self.kernel_size = kernel_size
|
200 |
+
self.group_size = group_size
|
201 |
+
self.scale_factor = scale_factor
|
202 |
+
|
203 |
+
def forward(self, features, masks):
|
204 |
+
return carafe(features, masks, self.kernel_size, self.group_size,
|
205 |
+
self.scale_factor)
|
206 |
+
|
207 |
+
|
208 |
+
@UPSAMPLE_LAYERS.register_module(name='carafe')
|
209 |
+
class CARAFEPack(nn.Module):
|
210 |
+
"""A unified package of CARAFE upsampler that contains: 1) channel
|
211 |
+
compressor 2) content encoder 3) CARAFE op.
|
212 |
+
|
213 |
+
Official implementation of ICCV 2019 paper
|
214 |
+
CARAFE: Content-Aware ReAssembly of FEatures
|
215 |
+
Please refer to https://arxiv.org/abs/1905.02188 for more details.
|
216 |
+
|
217 |
+
Args:
|
218 |
+
channels (int): input feature channels
|
219 |
+
scale_factor (int): upsample ratio
|
220 |
+
up_kernel (int): kernel size of CARAFE op
|
221 |
+
up_group (int): group size of CARAFE op
|
222 |
+
encoder_kernel (int): kernel size of content encoder
|
223 |
+
encoder_dilation (int): dilation of content encoder
|
224 |
+
compressed_channels (int): output channels of channels compressor
|
225 |
+
|
226 |
+
Returns:
|
227 |
+
upsampled feature map
|
228 |
+
"""
|
229 |
+
|
230 |
+
def __init__(self,
|
231 |
+
channels,
|
232 |
+
scale_factor,
|
233 |
+
up_kernel=5,
|
234 |
+
up_group=1,
|
235 |
+
encoder_kernel=3,
|
236 |
+
encoder_dilation=1,
|
237 |
+
compressed_channels=64):
|
238 |
+
super(CARAFEPack, self).__init__()
|
239 |
+
self.channels = channels
|
240 |
+
self.scale_factor = scale_factor
|
241 |
+
self.up_kernel = up_kernel
|
242 |
+
self.up_group = up_group
|
243 |
+
self.encoder_kernel = encoder_kernel
|
244 |
+
self.encoder_dilation = encoder_dilation
|
245 |
+
self.compressed_channels = compressed_channels
|
246 |
+
self.channel_compressor = nn.Conv2d(channels, self.compressed_channels,
|
247 |
+
1)
|
248 |
+
self.content_encoder = nn.Conv2d(
|
249 |
+
self.compressed_channels,
|
250 |
+
self.up_kernel * self.up_kernel * self.up_group *
|
251 |
+
self.scale_factor * self.scale_factor,
|
252 |
+
self.encoder_kernel,
|
253 |
+
padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2),
|
254 |
+
dilation=self.encoder_dilation,
|
255 |
+
groups=1)
|
256 |
+
self.init_weights()
|
257 |
+
|
258 |
+
def init_weights(self):
|
259 |
+
for m in self.modules():
|
260 |
+
if isinstance(m, nn.Conv2d):
|
261 |
+
xavier_init(m, distribution='uniform')
|
262 |
+
normal_init(self.content_encoder, std=0.001)
|
263 |
+
|
264 |
+
def kernel_normalizer(self, mask):
|
265 |
+
mask = F.pixel_shuffle(mask, self.scale_factor)
|
266 |
+
n, mask_c, h, w = mask.size()
|
267 |
+
# use float division explicitly,
|
268 |
+
# to void inconsistency while exporting to onnx
|
269 |
+
mask_channel = int(mask_c / float(self.up_kernel**2))
|
270 |
+
mask = mask.view(n, mask_channel, -1, h, w)
|
271 |
+
|
272 |
+
mask = F.softmax(mask, dim=2, dtype=mask.dtype)
|
273 |
+
mask = mask.view(n, mask_c, h, w).contiguous()
|
274 |
+
|
275 |
+
return mask
|
276 |
+
|
277 |
+
def feature_reassemble(self, x, mask):
|
278 |
+
x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor)
|
279 |
+
return x
|
280 |
+
|
281 |
+
def forward(self, x):
|
282 |
+
compressed_x = self.channel_compressor(x)
|
283 |
+
mask = self.content_encoder(compressed_x)
|
284 |
+
mask = self.kernel_normalizer(mask)
|
285 |
+
|
286 |
+
x = self.feature_reassemble(x, mask)
|
287 |
+
return x
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/cc_attention.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
from annotator.mmpkg.mmcv.cnn import PLUGIN_LAYERS, Scale
|
7 |
+
|
8 |
+
|
9 |
+
def NEG_INF_DIAG(n, device):
|
10 |
+
"""Returns a diagonal matrix of size [n, n].
|
11 |
+
|
12 |
+
The diagonal are all "-inf". This is for avoiding calculating the
|
13 |
+
overlapped element in the Criss-Cross twice.
|
14 |
+
"""
|
15 |
+
return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0)
|
16 |
+
|
17 |
+
|
18 |
+
@PLUGIN_LAYERS.register_module()
|
19 |
+
class CrissCrossAttention(nn.Module):
|
20 |
+
"""Criss-Cross Attention Module.
|
21 |
+
|
22 |
+
.. note::
|
23 |
+
Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch
|
24 |
+
to a pure PyTorch and equivalent implementation. For more
|
25 |
+
details, please refer to https://github.com/open-mmlab/mmcv/pull/1201.
|
26 |
+
|
27 |
+
Speed comparison for one forward pass
|
28 |
+
|
29 |
+
- Input size: [2,512,97,97]
|
30 |
+
- Device: 1 NVIDIA GeForce RTX 2080 Ti
|
31 |
+
|
32 |
+
+-----------------------+---------------+------------+---------------+
|
33 |
+
| |PyTorch version|CUDA version|Relative speed |
|
34 |
+
+=======================+===============+============+===============+
|
35 |
+
|with torch.no_grad() |0.00554402 s |0.0299619 s |5.4x |
|
36 |
+
+-----------------------+---------------+------------+---------------+
|
37 |
+
|no with torch.no_grad()|0.00562803 s |0.0301349 s |5.4x |
|
38 |
+
+-----------------------+---------------+------------+---------------+
|
39 |
+
|
40 |
+
Args:
|
41 |
+
in_channels (int): Channels of the input feature map.
|
42 |
+
"""
|
43 |
+
|
44 |
+
def __init__(self, in_channels):
|
45 |
+
super().__init__()
|
46 |
+
self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
|
47 |
+
self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
|
48 |
+
self.value_conv = nn.Conv2d(in_channels, in_channels, 1)
|
49 |
+
self.gamma = Scale(0.)
|
50 |
+
self.in_channels = in_channels
|
51 |
+
|
52 |
+
def forward(self, x):
|
53 |
+
"""forward function of Criss-Cross Attention.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
x (Tensor): Input feature. \
|
57 |
+
shape (batch_size, in_channels, height, width)
|
58 |
+
Returns:
|
59 |
+
Tensor: Output of the layer, with shape of \
|
60 |
+
(batch_size, in_channels, height, width)
|
61 |
+
"""
|
62 |
+
B, C, H, W = x.size()
|
63 |
+
query = self.query_conv(x)
|
64 |
+
key = self.key_conv(x)
|
65 |
+
value = self.value_conv(x)
|
66 |
+
energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG(
|
67 |
+
H, query.device)
|
68 |
+
energy_H = energy_H.transpose(1, 2)
|
69 |
+
energy_W = torch.einsum('bchw,bchj->bhwj', query, key)
|
70 |
+
attn = F.softmax(
|
71 |
+
torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)]
|
72 |
+
out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H])
|
73 |
+
out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:])
|
74 |
+
|
75 |
+
out = self.gamma(out) + x
|
76 |
+
out = out.contiguous()
|
77 |
+
|
78 |
+
return out
|
79 |
+
|
80 |
+
def __repr__(self):
|
81 |
+
s = self.__class__.__name__
|
82 |
+
s += f'(in_channels={self.in_channels})'
|
83 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/contour_expand.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
|
5 |
+
from ..utils import ext_loader
|
6 |
+
|
7 |
+
ext_module = ext_loader.load_ext('_ext', ['contour_expand'])
|
8 |
+
|
9 |
+
|
10 |
+
def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
|
11 |
+
kernel_num):
|
12 |
+
"""Expand kernel contours so that foreground pixels are assigned into
|
13 |
+
instances.
|
14 |
+
|
15 |
+
Arguments:
|
16 |
+
kernel_mask (np.array or Tensor): The instance kernel mask with
|
17 |
+
size hxw.
|
18 |
+
internal_kernel_label (np.array or Tensor): The instance internal
|
19 |
+
kernel label with size hxw.
|
20 |
+
min_kernel_area (int): The minimum kernel area.
|
21 |
+
kernel_num (int): The instance kernel number.
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
label (list): The instance index map with size hxw.
|
25 |
+
"""
|
26 |
+
assert isinstance(kernel_mask, (torch.Tensor, np.ndarray))
|
27 |
+
assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray))
|
28 |
+
assert isinstance(min_kernel_area, int)
|
29 |
+
assert isinstance(kernel_num, int)
|
30 |
+
|
31 |
+
if isinstance(kernel_mask, np.ndarray):
|
32 |
+
kernel_mask = torch.from_numpy(kernel_mask)
|
33 |
+
if isinstance(internal_kernel_label, np.ndarray):
|
34 |
+
internal_kernel_label = torch.from_numpy(internal_kernel_label)
|
35 |
+
|
36 |
+
if torch.__version__ == 'parrots':
|
37 |
+
if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0:
|
38 |
+
label = []
|
39 |
+
else:
|
40 |
+
label = ext_module.contour_expand(
|
41 |
+
kernel_mask,
|
42 |
+
internal_kernel_label,
|
43 |
+
min_kernel_area=min_kernel_area,
|
44 |
+
kernel_num=kernel_num)
|
45 |
+
label = label.tolist()
|
46 |
+
else:
|
47 |
+
label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
|
48 |
+
min_kernel_area, kernel_num)
|
49 |
+
return label
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/corner_pool.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
from torch import nn
|
4 |
+
from torch.autograd import Function
|
5 |
+
|
6 |
+
from ..utils import ext_loader
|
7 |
+
|
8 |
+
ext_module = ext_loader.load_ext('_ext', [
|
9 |
+
'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward',
|
10 |
+
'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward',
|
11 |
+
'right_pool_forward', 'right_pool_backward'
|
12 |
+
])
|
13 |
+
|
14 |
+
_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3}
|
15 |
+
|
16 |
+
|
17 |
+
class TopPoolFunction(Function):
|
18 |
+
|
19 |
+
@staticmethod
|
20 |
+
def symbolic(g, input):
|
21 |
+
output = g.op(
|
22 |
+
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top']))
|
23 |
+
return output
|
24 |
+
|
25 |
+
@staticmethod
|
26 |
+
def forward(ctx, input):
|
27 |
+
output = ext_module.top_pool_forward(input)
|
28 |
+
ctx.save_for_backward(input)
|
29 |
+
return output
|
30 |
+
|
31 |
+
@staticmethod
|
32 |
+
def backward(ctx, grad_output):
|
33 |
+
input, = ctx.saved_tensors
|
34 |
+
output = ext_module.top_pool_backward(input, grad_output)
|
35 |
+
return output
|
36 |
+
|
37 |
+
|
38 |
+
class BottomPoolFunction(Function):
|
39 |
+
|
40 |
+
@staticmethod
|
41 |
+
def symbolic(g, input):
|
42 |
+
output = g.op(
|
43 |
+
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom']))
|
44 |
+
return output
|
45 |
+
|
46 |
+
@staticmethod
|
47 |
+
def forward(ctx, input):
|
48 |
+
output = ext_module.bottom_pool_forward(input)
|
49 |
+
ctx.save_for_backward(input)
|
50 |
+
return output
|
51 |
+
|
52 |
+
@staticmethod
|
53 |
+
def backward(ctx, grad_output):
|
54 |
+
input, = ctx.saved_tensors
|
55 |
+
output = ext_module.bottom_pool_backward(input, grad_output)
|
56 |
+
return output
|
57 |
+
|
58 |
+
|
59 |
+
class LeftPoolFunction(Function):
|
60 |
+
|
61 |
+
@staticmethod
|
62 |
+
def symbolic(g, input):
|
63 |
+
output = g.op(
|
64 |
+
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left']))
|
65 |
+
return output
|
66 |
+
|
67 |
+
@staticmethod
|
68 |
+
def forward(ctx, input):
|
69 |
+
output = ext_module.left_pool_forward(input)
|
70 |
+
ctx.save_for_backward(input)
|
71 |
+
return output
|
72 |
+
|
73 |
+
@staticmethod
|
74 |
+
def backward(ctx, grad_output):
|
75 |
+
input, = ctx.saved_tensors
|
76 |
+
output = ext_module.left_pool_backward(input, grad_output)
|
77 |
+
return output
|
78 |
+
|
79 |
+
|
80 |
+
class RightPoolFunction(Function):
|
81 |
+
|
82 |
+
@staticmethod
|
83 |
+
def symbolic(g, input):
|
84 |
+
output = g.op(
|
85 |
+
'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right']))
|
86 |
+
return output
|
87 |
+
|
88 |
+
@staticmethod
|
89 |
+
def forward(ctx, input):
|
90 |
+
output = ext_module.right_pool_forward(input)
|
91 |
+
ctx.save_for_backward(input)
|
92 |
+
return output
|
93 |
+
|
94 |
+
@staticmethod
|
95 |
+
def backward(ctx, grad_output):
|
96 |
+
input, = ctx.saved_tensors
|
97 |
+
output = ext_module.right_pool_backward(input, grad_output)
|
98 |
+
return output
|
99 |
+
|
100 |
+
|
101 |
+
class CornerPool(nn.Module):
|
102 |
+
"""Corner Pooling.
|
103 |
+
|
104 |
+
Corner Pooling is a new type of pooling layer that helps a
|
105 |
+
convolutional network better localize corners of bounding boxes.
|
106 |
+
|
107 |
+
Please refer to https://arxiv.org/abs/1808.01244 for more details.
|
108 |
+
Code is modified from https://github.com/princeton-vl/CornerNet-Lite.
|
109 |
+
|
110 |
+
Args:
|
111 |
+
mode(str): Pooling orientation for the pooling layer
|
112 |
+
|
113 |
+
- 'bottom': Bottom Pooling
|
114 |
+
- 'left': Left Pooling
|
115 |
+
- 'right': Right Pooling
|
116 |
+
- 'top': Top Pooling
|
117 |
+
|
118 |
+
Returns:
|
119 |
+
Feature map after pooling.
|
120 |
+
"""
|
121 |
+
|
122 |
+
pool_functions = {
|
123 |
+
'bottom': BottomPoolFunction,
|
124 |
+
'left': LeftPoolFunction,
|
125 |
+
'right': RightPoolFunction,
|
126 |
+
'top': TopPoolFunction,
|
127 |
+
}
|
128 |
+
|
129 |
+
cummax_dim_flip = {
|
130 |
+
'bottom': (2, False),
|
131 |
+
'left': (3, True),
|
132 |
+
'right': (3, False),
|
133 |
+
'top': (2, True),
|
134 |
+
}
|
135 |
+
|
136 |
+
def __init__(self, mode):
|
137 |
+
super(CornerPool, self).__init__()
|
138 |
+
assert mode in self.pool_functions
|
139 |
+
self.mode = mode
|
140 |
+
self.corner_pool = self.pool_functions[mode]
|
141 |
+
|
142 |
+
def forward(self, x):
|
143 |
+
if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0':
|
144 |
+
if torch.onnx.is_in_onnx_export():
|
145 |
+
assert torch.__version__ >= '1.7.0', \
|
146 |
+
'When `cummax` serves as an intermediate component whose '\
|
147 |
+
'outputs is used as inputs for another modules, it\'s '\
|
148 |
+
'expected that pytorch version must be >= 1.7.0, '\
|
149 |
+
'otherwise Error appears like: `RuntimeError: tuple '\
|
150 |
+
'appears in op that does not forward tuples, unsupported '\
|
151 |
+
'kind: prim::PythonOp`.'
|
152 |
+
|
153 |
+
dim, flip = self.cummax_dim_flip[self.mode]
|
154 |
+
if flip:
|
155 |
+
x = x.flip(dim)
|
156 |
+
pool_tensor, _ = torch.cummax(x, dim=dim)
|
157 |
+
if flip:
|
158 |
+
pool_tensor = pool_tensor.flip(dim)
|
159 |
+
return pool_tensor
|
160 |
+
else:
|
161 |
+
return self.corner_pool.apply(x)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/correlation.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
from torch import Tensor, nn
|
4 |
+
from torch.autograd import Function
|
5 |
+
from torch.autograd.function import once_differentiable
|
6 |
+
from torch.nn.modules.utils import _pair
|
7 |
+
|
8 |
+
from ..utils import ext_loader
|
9 |
+
|
10 |
+
ext_module = ext_loader.load_ext(
|
11 |
+
'_ext', ['correlation_forward', 'correlation_backward'])
|
12 |
+
|
13 |
+
|
14 |
+
class CorrelationFunction(Function):
|
15 |
+
|
16 |
+
@staticmethod
|
17 |
+
def forward(ctx,
|
18 |
+
input1,
|
19 |
+
input2,
|
20 |
+
kernel_size=1,
|
21 |
+
max_displacement=1,
|
22 |
+
stride=1,
|
23 |
+
padding=1,
|
24 |
+
dilation=1,
|
25 |
+
dilation_patch=1):
|
26 |
+
|
27 |
+
ctx.save_for_backward(input1, input2)
|
28 |
+
|
29 |
+
kH, kW = ctx.kernel_size = _pair(kernel_size)
|
30 |
+
patch_size = max_displacement * 2 + 1
|
31 |
+
ctx.patch_size = patch_size
|
32 |
+
dH, dW = ctx.stride = _pair(stride)
|
33 |
+
padH, padW = ctx.padding = _pair(padding)
|
34 |
+
dilationH, dilationW = ctx.dilation = _pair(dilation)
|
35 |
+
dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair(
|
36 |
+
dilation_patch)
|
37 |
+
|
38 |
+
output_size = CorrelationFunction._output_size(ctx, input1)
|
39 |
+
|
40 |
+
output = input1.new_zeros(output_size)
|
41 |
+
|
42 |
+
ext_module.correlation_forward(
|
43 |
+
input1,
|
44 |
+
input2,
|
45 |
+
output,
|
46 |
+
kH=kH,
|
47 |
+
kW=kW,
|
48 |
+
patchH=patch_size,
|
49 |
+
patchW=patch_size,
|
50 |
+
padH=padH,
|
51 |
+
padW=padW,
|
52 |
+
dilationH=dilationH,
|
53 |
+
dilationW=dilationW,
|
54 |
+
dilation_patchH=dilation_patchH,
|
55 |
+
dilation_patchW=dilation_patchW,
|
56 |
+
dH=dH,
|
57 |
+
dW=dW)
|
58 |
+
|
59 |
+
return output
|
60 |
+
|
61 |
+
@staticmethod
|
62 |
+
@once_differentiable
|
63 |
+
def backward(ctx, grad_output):
|
64 |
+
input1, input2 = ctx.saved_tensors
|
65 |
+
|
66 |
+
kH, kW = ctx.kernel_size
|
67 |
+
patch_size = ctx.patch_size
|
68 |
+
padH, padW = ctx.padding
|
69 |
+
dilationH, dilationW = ctx.dilation
|
70 |
+
dilation_patchH, dilation_patchW = ctx.dilation_patch
|
71 |
+
dH, dW = ctx.stride
|
72 |
+
grad_input1 = torch.zeros_like(input1)
|
73 |
+
grad_input2 = torch.zeros_like(input2)
|
74 |
+
|
75 |
+
ext_module.correlation_backward(
|
76 |
+
grad_output,
|
77 |
+
input1,
|
78 |
+
input2,
|
79 |
+
grad_input1,
|
80 |
+
grad_input2,
|
81 |
+
kH=kH,
|
82 |
+
kW=kW,
|
83 |
+
patchH=patch_size,
|
84 |
+
patchW=patch_size,
|
85 |
+
padH=padH,
|
86 |
+
padW=padW,
|
87 |
+
dilationH=dilationH,
|
88 |
+
dilationW=dilationW,
|
89 |
+
dilation_patchH=dilation_patchH,
|
90 |
+
dilation_patchW=dilation_patchW,
|
91 |
+
dH=dH,
|
92 |
+
dW=dW)
|
93 |
+
return grad_input1, grad_input2, None, None, None, None, None, None
|
94 |
+
|
95 |
+
@staticmethod
|
96 |
+
def _output_size(ctx, input1):
|
97 |
+
iH, iW = input1.size(2), input1.size(3)
|
98 |
+
batch_size = input1.size(0)
|
99 |
+
kH, kW = ctx.kernel_size
|
100 |
+
patch_size = ctx.patch_size
|
101 |
+
dH, dW = ctx.stride
|
102 |
+
padH, padW = ctx.padding
|
103 |
+
dilationH, dilationW = ctx.dilation
|
104 |
+
dilatedKH = (kH - 1) * dilationH + 1
|
105 |
+
dilatedKW = (kW - 1) * dilationW + 1
|
106 |
+
|
107 |
+
oH = int((iH + 2 * padH - dilatedKH) / dH + 1)
|
108 |
+
oW = int((iW + 2 * padW - dilatedKW) / dW + 1)
|
109 |
+
|
110 |
+
output_size = (batch_size, patch_size, patch_size, oH, oW)
|
111 |
+
return output_size
|
112 |
+
|
113 |
+
|
114 |
+
class Correlation(nn.Module):
|
115 |
+
r"""Correlation operator
|
116 |
+
|
117 |
+
This correlation operator works for optical flow correlation computation.
|
118 |
+
|
119 |
+
There are two batched tensors with shape :math:`(N, C, H, W)`,
|
120 |
+
and the correlation output's shape is :math:`(N, max\_displacement \times
|
121 |
+
2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})`
|
122 |
+
|
123 |
+
where
|
124 |
+
|
125 |
+
.. math::
|
126 |
+
H_{out} = \left\lfloor\frac{H_{in} + 2 \times padding -
|
127 |
+
dilation \times (kernel\_size - 1) - 1}
|
128 |
+
{stride} + 1\right\rfloor
|
129 |
+
|
130 |
+
.. math::
|
131 |
+
W_{out} = \left\lfloor\frac{W_{in} + 2 \times padding - dilation
|
132 |
+
\times (kernel\_size - 1) - 1}
|
133 |
+
{stride} + 1\right\rfloor
|
134 |
+
|
135 |
+
the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding
|
136 |
+
window convolution between input1 and shifted input2,
|
137 |
+
|
138 |
+
.. math::
|
139 |
+
Corr(N_i, dx, dy) =
|
140 |
+
\sum_{c=0}^{C-1}
|
141 |
+
input1(N_i, c) \star
|
142 |
+
\mathcal{S}(input2(N_i, c), dy, dx)
|
143 |
+
|
144 |
+
where :math:`\star` is the valid 2d sliding window convolution operator,
|
145 |
+
and :math:`\mathcal{S}` means shifting the input features (auto-complete
|
146 |
+
zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in
|
147 |
+
[-max\_displacement \times dilation\_patch, max\_displacement \times
|
148 |
+
dilation\_patch]`.
|
149 |
+
|
150 |
+
Args:
|
151 |
+
kernel_size (int): The size of sliding window i.e. local neighborhood
|
152 |
+
representing the center points and involved in correlation
|
153 |
+
computation. Defaults to 1.
|
154 |
+
max_displacement (int): The radius for computing correlation volume,
|
155 |
+
but the actual working space can be dilated by dilation_patch.
|
156 |
+
Defaults to 1.
|
157 |
+
stride (int): The stride of the sliding blocks in the input spatial
|
158 |
+
dimensions. Defaults to 1.
|
159 |
+
padding (int): Zero padding added to all four sides of the input1.
|
160 |
+
Defaults to 0.
|
161 |
+
dilation (int): The spacing of local neighborhood that will involved
|
162 |
+
in correlation. Defaults to 1.
|
163 |
+
dilation_patch (int): The spacing between position need to compute
|
164 |
+
correlation. Defaults to 1.
|
165 |
+
"""
|
166 |
+
|
167 |
+
def __init__(self,
|
168 |
+
kernel_size: int = 1,
|
169 |
+
max_displacement: int = 1,
|
170 |
+
stride: int = 1,
|
171 |
+
padding: int = 0,
|
172 |
+
dilation: int = 1,
|
173 |
+
dilation_patch: int = 1) -> None:
|
174 |
+
super().__init__()
|
175 |
+
self.kernel_size = kernel_size
|
176 |
+
self.max_displacement = max_displacement
|
177 |
+
self.stride = stride
|
178 |
+
self.padding = padding
|
179 |
+
self.dilation = dilation
|
180 |
+
self.dilation_patch = dilation_patch
|
181 |
+
|
182 |
+
def forward(self, input1: Tensor, input2: Tensor) -> Tensor:
|
183 |
+
return CorrelationFunction.apply(input1, input2, self.kernel_size,
|
184 |
+
self.max_displacement, self.stride,
|
185 |
+
self.padding, self.dilation,
|
186 |
+
self.dilation_patch)
|
187 |
+
|
188 |
+
def __repr__(self) -> str:
|
189 |
+
s = self.__class__.__name__
|
190 |
+
s += f'(kernel_size={self.kernel_size}, '
|
191 |
+
s += f'max_displacement={self.max_displacement}, '
|
192 |
+
s += f'stride={self.stride}, '
|
193 |
+
s += f'padding={self.padding}, '
|
194 |
+
s += f'dilation={self.dilation}, '
|
195 |
+
s += f'dilation_patch={self.dilation_patch})'
|
196 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deform_conv.py
ADDED
@@ -0,0 +1,405 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from typing import Tuple, Union
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
import torch.nn.functional as F
|
7 |
+
from torch import Tensor
|
8 |
+
from torch.autograd import Function
|
9 |
+
from torch.autograd.function import once_differentiable
|
10 |
+
from torch.nn.modules.utils import _pair, _single
|
11 |
+
|
12 |
+
from annotator.mmpkg.mmcv.utils import deprecated_api_warning
|
13 |
+
from ..cnn import CONV_LAYERS
|
14 |
+
from ..utils import ext_loader, print_log
|
15 |
+
|
16 |
+
ext_module = ext_loader.load_ext('_ext', [
|
17 |
+
'deform_conv_forward', 'deform_conv_backward_input',
|
18 |
+
'deform_conv_backward_parameters'
|
19 |
+
])
|
20 |
+
|
21 |
+
|
22 |
+
class DeformConv2dFunction(Function):
|
23 |
+
|
24 |
+
@staticmethod
|
25 |
+
def symbolic(g,
|
26 |
+
input,
|
27 |
+
offset,
|
28 |
+
weight,
|
29 |
+
stride,
|
30 |
+
padding,
|
31 |
+
dilation,
|
32 |
+
groups,
|
33 |
+
deform_groups,
|
34 |
+
bias=False,
|
35 |
+
im2col_step=32):
|
36 |
+
return g.op(
|
37 |
+
'mmcv::MMCVDeformConv2d',
|
38 |
+
input,
|
39 |
+
offset,
|
40 |
+
weight,
|
41 |
+
stride_i=stride,
|
42 |
+
padding_i=padding,
|
43 |
+
dilation_i=dilation,
|
44 |
+
groups_i=groups,
|
45 |
+
deform_groups_i=deform_groups,
|
46 |
+
bias_i=bias,
|
47 |
+
im2col_step_i=im2col_step)
|
48 |
+
|
49 |
+
@staticmethod
|
50 |
+
def forward(ctx,
|
51 |
+
input,
|
52 |
+
offset,
|
53 |
+
weight,
|
54 |
+
stride=1,
|
55 |
+
padding=0,
|
56 |
+
dilation=1,
|
57 |
+
groups=1,
|
58 |
+
deform_groups=1,
|
59 |
+
bias=False,
|
60 |
+
im2col_step=32):
|
61 |
+
if input is not None and input.dim() != 4:
|
62 |
+
raise ValueError(
|
63 |
+
f'Expected 4D tensor as input, got {input.dim()}D tensor \
|
64 |
+
instead.')
|
65 |
+
assert bias is False, 'Only support bias is False.'
|
66 |
+
ctx.stride = _pair(stride)
|
67 |
+
ctx.padding = _pair(padding)
|
68 |
+
ctx.dilation = _pair(dilation)
|
69 |
+
ctx.groups = groups
|
70 |
+
ctx.deform_groups = deform_groups
|
71 |
+
ctx.im2col_step = im2col_step
|
72 |
+
|
73 |
+
# When pytorch version >= 1.6.0, amp is adopted for fp16 mode;
|
74 |
+
# amp won't cast the type of model (float32), but "offset" is cast
|
75 |
+
# to float16 by nn.Conv2d automatically, leading to the type
|
76 |
+
# mismatch with input (when it is float32) or weight.
|
77 |
+
# The flag for whether to use fp16 or amp is the type of "offset",
|
78 |
+
# we cast weight and input to temporarily support fp16 and amp
|
79 |
+
# whatever the pytorch version is.
|
80 |
+
input = input.type_as(offset)
|
81 |
+
weight = weight.type_as(input)
|
82 |
+
ctx.save_for_backward(input, offset, weight)
|
83 |
+
|
84 |
+
output = input.new_empty(
|
85 |
+
DeformConv2dFunction._output_size(ctx, input, weight))
|
86 |
+
|
87 |
+
ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones
|
88 |
+
|
89 |
+
cur_im2col_step = min(ctx.im2col_step, input.size(0))
|
90 |
+
assert (input.size(0) %
|
91 |
+
cur_im2col_step) == 0, 'im2col step must divide batchsize'
|
92 |
+
ext_module.deform_conv_forward(
|
93 |
+
input,
|
94 |
+
weight,
|
95 |
+
offset,
|
96 |
+
output,
|
97 |
+
ctx.bufs_[0],
|
98 |
+
ctx.bufs_[1],
|
99 |
+
kW=weight.size(3),
|
100 |
+
kH=weight.size(2),
|
101 |
+
dW=ctx.stride[1],
|
102 |
+
dH=ctx.stride[0],
|
103 |
+
padW=ctx.padding[1],
|
104 |
+
padH=ctx.padding[0],
|
105 |
+
dilationW=ctx.dilation[1],
|
106 |
+
dilationH=ctx.dilation[0],
|
107 |
+
group=ctx.groups,
|
108 |
+
deformable_group=ctx.deform_groups,
|
109 |
+
im2col_step=cur_im2col_step)
|
110 |
+
return output
|
111 |
+
|
112 |
+
@staticmethod
|
113 |
+
@once_differentiable
|
114 |
+
def backward(ctx, grad_output):
|
115 |
+
input, offset, weight = ctx.saved_tensors
|
116 |
+
|
117 |
+
grad_input = grad_offset = grad_weight = None
|
118 |
+
|
119 |
+
cur_im2col_step = min(ctx.im2col_step, input.size(0))
|
120 |
+
assert (input.size(0) % cur_im2col_step
|
121 |
+
) == 0, 'batch size must be divisible by im2col_step'
|
122 |
+
|
123 |
+
grad_output = grad_output.contiguous()
|
124 |
+
if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
|
125 |
+
grad_input = torch.zeros_like(input)
|
126 |
+
grad_offset = torch.zeros_like(offset)
|
127 |
+
ext_module.deform_conv_backward_input(
|
128 |
+
input,
|
129 |
+
offset,
|
130 |
+
grad_output,
|
131 |
+
grad_input,
|
132 |
+
grad_offset,
|
133 |
+
weight,
|
134 |
+
ctx.bufs_[0],
|
135 |
+
kW=weight.size(3),
|
136 |
+
kH=weight.size(2),
|
137 |
+
dW=ctx.stride[1],
|
138 |
+
dH=ctx.stride[0],
|
139 |
+
padW=ctx.padding[1],
|
140 |
+
padH=ctx.padding[0],
|
141 |
+
dilationW=ctx.dilation[1],
|
142 |
+
dilationH=ctx.dilation[0],
|
143 |
+
group=ctx.groups,
|
144 |
+
deformable_group=ctx.deform_groups,
|
145 |
+
im2col_step=cur_im2col_step)
|
146 |
+
|
147 |
+
if ctx.needs_input_grad[2]:
|
148 |
+
grad_weight = torch.zeros_like(weight)
|
149 |
+
ext_module.deform_conv_backward_parameters(
|
150 |
+
input,
|
151 |
+
offset,
|
152 |
+
grad_output,
|
153 |
+
grad_weight,
|
154 |
+
ctx.bufs_[0],
|
155 |
+
ctx.bufs_[1],
|
156 |
+
kW=weight.size(3),
|
157 |
+
kH=weight.size(2),
|
158 |
+
dW=ctx.stride[1],
|
159 |
+
dH=ctx.stride[0],
|
160 |
+
padW=ctx.padding[1],
|
161 |
+
padH=ctx.padding[0],
|
162 |
+
dilationW=ctx.dilation[1],
|
163 |
+
dilationH=ctx.dilation[0],
|
164 |
+
group=ctx.groups,
|
165 |
+
deformable_group=ctx.deform_groups,
|
166 |
+
scale=1,
|
167 |
+
im2col_step=cur_im2col_step)
|
168 |
+
|
169 |
+
return grad_input, grad_offset, grad_weight, \
|
170 |
+
None, None, None, None, None, None, None
|
171 |
+
|
172 |
+
@staticmethod
|
173 |
+
def _output_size(ctx, input, weight):
|
174 |
+
channels = weight.size(0)
|
175 |
+
output_size = (input.size(0), channels)
|
176 |
+
for d in range(input.dim() - 2):
|
177 |
+
in_size = input.size(d + 2)
|
178 |
+
pad = ctx.padding[d]
|
179 |
+
kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
|
180 |
+
stride_ = ctx.stride[d]
|
181 |
+
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
|
182 |
+
if not all(map(lambda s: s > 0, output_size)):
|
183 |
+
raise ValueError(
|
184 |
+
'convolution input is too small (output would be ' +
|
185 |
+
'x'.join(map(str, output_size)) + ')')
|
186 |
+
return output_size
|
187 |
+
|
188 |
+
|
189 |
+
deform_conv2d = DeformConv2dFunction.apply
|
190 |
+
|
191 |
+
|
192 |
+
class DeformConv2d(nn.Module):
|
193 |
+
r"""Deformable 2D convolution.
|
194 |
+
|
195 |
+
Applies a deformable 2D convolution over an input signal composed of
|
196 |
+
several input planes. DeformConv2d was described in the paper
|
197 |
+
`Deformable Convolutional Networks
|
198 |
+
<https://arxiv.org/pdf/1703.06211.pdf>`_
|
199 |
+
|
200 |
+
Note:
|
201 |
+
The argument ``im2col_step`` was added in version 1.3.17, which means
|
202 |
+
number of samples processed by the ``im2col_cuda_kernel`` per call.
|
203 |
+
It enables users to define ``batch_size`` and ``im2col_step`` more
|
204 |
+
flexibly and solved `issue mmcv#1440
|
205 |
+
<https://github.com/open-mmlab/mmcv/issues/1440>`_.
|
206 |
+
|
207 |
+
Args:
|
208 |
+
in_channels (int): Number of channels in the input image.
|
209 |
+
out_channels (int): Number of channels produced by the convolution.
|
210 |
+
kernel_size(int, tuple): Size of the convolving kernel.
|
211 |
+
stride(int, tuple): Stride of the convolution. Default: 1.
|
212 |
+
padding (int or tuple): Zero-padding added to both sides of the input.
|
213 |
+
Default: 0.
|
214 |
+
dilation (int or tuple): Spacing between kernel elements. Default: 1.
|
215 |
+
groups (int): Number of blocked connections from input.
|
216 |
+
channels to output channels. Default: 1.
|
217 |
+
deform_groups (int): Number of deformable group partitions.
|
218 |
+
bias (bool): If True, adds a learnable bias to the output.
|
219 |
+
Default: False.
|
220 |
+
im2col_step (int): Number of samples processed by im2col_cuda_kernel
|
221 |
+
per call. It will work when ``batch_size`` > ``im2col_step``, but
|
222 |
+
``batch_size`` must be divisible by ``im2col_step``. Default: 32.
|
223 |
+
`New in version 1.3.17.`
|
224 |
+
"""
|
225 |
+
|
226 |
+
@deprecated_api_warning({'deformable_groups': 'deform_groups'},
|
227 |
+
cls_name='DeformConv2d')
|
228 |
+
def __init__(self,
|
229 |
+
in_channels: int,
|
230 |
+
out_channels: int,
|
231 |
+
kernel_size: Union[int, Tuple[int, ...]],
|
232 |
+
stride: Union[int, Tuple[int, ...]] = 1,
|
233 |
+
padding: Union[int, Tuple[int, ...]] = 0,
|
234 |
+
dilation: Union[int, Tuple[int, ...]] = 1,
|
235 |
+
groups: int = 1,
|
236 |
+
deform_groups: int = 1,
|
237 |
+
bias: bool = False,
|
238 |
+
im2col_step: int = 32) -> None:
|
239 |
+
super(DeformConv2d, self).__init__()
|
240 |
+
|
241 |
+
assert not bias, \
|
242 |
+
f'bias={bias} is not supported in DeformConv2d.'
|
243 |
+
assert in_channels % groups == 0, \
|
244 |
+
f'in_channels {in_channels} cannot be divisible by groups {groups}'
|
245 |
+
assert out_channels % groups == 0, \
|
246 |
+
f'out_channels {out_channels} cannot be divisible by groups \
|
247 |
+
{groups}'
|
248 |
+
|
249 |
+
self.in_channels = in_channels
|
250 |
+
self.out_channels = out_channels
|
251 |
+
self.kernel_size = _pair(kernel_size)
|
252 |
+
self.stride = _pair(stride)
|
253 |
+
self.padding = _pair(padding)
|
254 |
+
self.dilation = _pair(dilation)
|
255 |
+
self.groups = groups
|
256 |
+
self.deform_groups = deform_groups
|
257 |
+
self.im2col_step = im2col_step
|
258 |
+
# enable compatibility with nn.Conv2d
|
259 |
+
self.transposed = False
|
260 |
+
self.output_padding = _single(0)
|
261 |
+
|
262 |
+
# only weight, no bias
|
263 |
+
self.weight = nn.Parameter(
|
264 |
+
torch.Tensor(out_channels, in_channels // self.groups,
|
265 |
+
*self.kernel_size))
|
266 |
+
|
267 |
+
self.reset_parameters()
|
268 |
+
|
269 |
+
def reset_parameters(self):
|
270 |
+
# switch the initialization of `self.weight` to the standard kaiming
|
271 |
+
# method described in `Delving deep into rectifiers: Surpassing
|
272 |
+
# human-level performance on ImageNet classification` - He, K. et al.
|
273 |
+
# (2015), using a uniform distribution
|
274 |
+
nn.init.kaiming_uniform_(self.weight, nonlinearity='relu')
|
275 |
+
|
276 |
+
def forward(self, x: Tensor, offset: Tensor) -> Tensor:
|
277 |
+
"""Deformable Convolutional forward function.
|
278 |
+
|
279 |
+
Args:
|
280 |
+
x (Tensor): Input feature, shape (B, C_in, H_in, W_in)
|
281 |
+
offset (Tensor): Offset for deformable convolution, shape
|
282 |
+
(B, deform_groups*kernel_size[0]*kernel_size[1]*2,
|
283 |
+
H_out, W_out), H_out, W_out are equal to the output's.
|
284 |
+
|
285 |
+
An offset is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`.
|
286 |
+
The spatial arrangement is like:
|
287 |
+
|
288 |
+
.. code:: text
|
289 |
+
|
290 |
+
(x0, y0) (x1, y1) (x2, y2)
|
291 |
+
(x3, y3) (x4, y4) (x5, y5)
|
292 |
+
(x6, y6) (x7, y7) (x8, y8)
|
293 |
+
|
294 |
+
Returns:
|
295 |
+
Tensor: Output of the layer.
|
296 |
+
"""
|
297 |
+
# To fix an assert error in deform_conv_cuda.cpp:128
|
298 |
+
# input image is smaller than kernel
|
299 |
+
input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) <
|
300 |
+
self.kernel_size[1])
|
301 |
+
if input_pad:
|
302 |
+
pad_h = max(self.kernel_size[0] - x.size(2), 0)
|
303 |
+
pad_w = max(self.kernel_size[1] - x.size(3), 0)
|
304 |
+
x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
|
305 |
+
offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0)
|
306 |
+
offset = offset.contiguous()
|
307 |
+
out = deform_conv2d(x, offset, self.weight, self.stride, self.padding,
|
308 |
+
self.dilation, self.groups, self.deform_groups,
|
309 |
+
False, self.im2col_step)
|
310 |
+
if input_pad:
|
311 |
+
out = out[:, :, :out.size(2) - pad_h, :out.size(3) -
|
312 |
+
pad_w].contiguous()
|
313 |
+
return out
|
314 |
+
|
315 |
+
def __repr__(self):
|
316 |
+
s = self.__class__.__name__
|
317 |
+
s += f'(in_channels={self.in_channels},\n'
|
318 |
+
s += f'out_channels={self.out_channels},\n'
|
319 |
+
s += f'kernel_size={self.kernel_size},\n'
|
320 |
+
s += f'stride={self.stride},\n'
|
321 |
+
s += f'padding={self.padding},\n'
|
322 |
+
s += f'dilation={self.dilation},\n'
|
323 |
+
s += f'groups={self.groups},\n'
|
324 |
+
s += f'deform_groups={self.deform_groups},\n'
|
325 |
+
# bias is not supported in DeformConv2d.
|
326 |
+
s += 'bias=False)'
|
327 |
+
return s
|
328 |
+
|
329 |
+
|
330 |
+
@CONV_LAYERS.register_module('DCN')
|
331 |
+
class DeformConv2dPack(DeformConv2d):
|
332 |
+
"""A Deformable Conv Encapsulation that acts as normal Conv layers.
|
333 |
+
|
334 |
+
The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`.
|
335 |
+
The spatial arrangement is like:
|
336 |
+
|
337 |
+
.. code:: text
|
338 |
+
|
339 |
+
(x0, y0) (x1, y1) (x2, y2)
|
340 |
+
(x3, y3) (x4, y4) (x5, y5)
|
341 |
+
(x6, y6) (x7, y7) (x8, y8)
|
342 |
+
|
343 |
+
Args:
|
344 |
+
in_channels (int): Same as nn.Conv2d.
|
345 |
+
out_channels (int): Same as nn.Conv2d.
|
346 |
+
kernel_size (int or tuple[int]): Same as nn.Conv2d.
|
347 |
+
stride (int or tuple[int]): Same as nn.Conv2d.
|
348 |
+
padding (int or tuple[int]): Same as nn.Conv2d.
|
349 |
+
dilation (int or tuple[int]): Same as nn.Conv2d.
|
350 |
+
groups (int): Same as nn.Conv2d.
|
351 |
+
bias (bool or str): If specified as `auto`, it will be decided by the
|
352 |
+
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
|
353 |
+
False.
|
354 |
+
"""
|
355 |
+
|
356 |
+
_version = 2
|
357 |
+
|
358 |
+
def __init__(self, *args, **kwargs):
|
359 |
+
super(DeformConv2dPack, self).__init__(*args, **kwargs)
|
360 |
+
self.conv_offset = nn.Conv2d(
|
361 |
+
self.in_channels,
|
362 |
+
self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1],
|
363 |
+
kernel_size=self.kernel_size,
|
364 |
+
stride=_pair(self.stride),
|
365 |
+
padding=_pair(self.padding),
|
366 |
+
dilation=_pair(self.dilation),
|
367 |
+
bias=True)
|
368 |
+
self.init_offset()
|
369 |
+
|
370 |
+
def init_offset(self):
|
371 |
+
self.conv_offset.weight.data.zero_()
|
372 |
+
self.conv_offset.bias.data.zero_()
|
373 |
+
|
374 |
+
def forward(self, x):
|
375 |
+
offset = self.conv_offset(x)
|
376 |
+
return deform_conv2d(x, offset, self.weight, self.stride, self.padding,
|
377 |
+
self.dilation, self.groups, self.deform_groups,
|
378 |
+
False, self.im2col_step)
|
379 |
+
|
380 |
+
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
|
381 |
+
missing_keys, unexpected_keys, error_msgs):
|
382 |
+
version = local_metadata.get('version', None)
|
383 |
+
|
384 |
+
if version is None or version < 2:
|
385 |
+
# the key is different in early versions
|
386 |
+
# In version < 2, DeformConvPack loads previous benchmark models.
|
387 |
+
if (prefix + 'conv_offset.weight' not in state_dict
|
388 |
+
and prefix[:-1] + '_offset.weight' in state_dict):
|
389 |
+
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
|
390 |
+
prefix[:-1] + '_offset.weight')
|
391 |
+
if (prefix + 'conv_offset.bias' not in state_dict
|
392 |
+
and prefix[:-1] + '_offset.bias' in state_dict):
|
393 |
+
state_dict[prefix +
|
394 |
+
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
|
395 |
+
'_offset.bias')
|
396 |
+
|
397 |
+
if version is not None and version > 1:
|
398 |
+
print_log(
|
399 |
+
f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to '
|
400 |
+
'version 2.',
|
401 |
+
logger='root')
|
402 |
+
|
403 |
+
super()._load_from_state_dict(state_dict, prefix, local_metadata,
|
404 |
+
strict, missing_keys, unexpected_keys,
|
405 |
+
error_msgs)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deform_roi_pool.py
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from torch import nn
|
3 |
+
from torch.autograd import Function
|
4 |
+
from torch.autograd.function import once_differentiable
|
5 |
+
from torch.nn.modules.utils import _pair
|
6 |
+
|
7 |
+
from ..utils import ext_loader
|
8 |
+
|
9 |
+
ext_module = ext_loader.load_ext(
|
10 |
+
'_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward'])
|
11 |
+
|
12 |
+
|
13 |
+
class DeformRoIPoolFunction(Function):
|
14 |
+
|
15 |
+
@staticmethod
|
16 |
+
def symbolic(g, input, rois, offset, output_size, spatial_scale,
|
17 |
+
sampling_ratio, gamma):
|
18 |
+
return g.op(
|
19 |
+
'mmcv::MMCVDeformRoIPool',
|
20 |
+
input,
|
21 |
+
rois,
|
22 |
+
offset,
|
23 |
+
pooled_height_i=output_size[0],
|
24 |
+
pooled_width_i=output_size[1],
|
25 |
+
spatial_scale_f=spatial_scale,
|
26 |
+
sampling_ratio_f=sampling_ratio,
|
27 |
+
gamma_f=gamma)
|
28 |
+
|
29 |
+
@staticmethod
|
30 |
+
def forward(ctx,
|
31 |
+
input,
|
32 |
+
rois,
|
33 |
+
offset,
|
34 |
+
output_size,
|
35 |
+
spatial_scale=1.0,
|
36 |
+
sampling_ratio=0,
|
37 |
+
gamma=0.1):
|
38 |
+
if offset is None:
|
39 |
+
offset = input.new_zeros(0)
|
40 |
+
ctx.output_size = _pair(output_size)
|
41 |
+
ctx.spatial_scale = float(spatial_scale)
|
42 |
+
ctx.sampling_ratio = int(sampling_ratio)
|
43 |
+
ctx.gamma = float(gamma)
|
44 |
+
|
45 |
+
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
|
46 |
+
|
47 |
+
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
|
48 |
+
ctx.output_size[1])
|
49 |
+
output = input.new_zeros(output_shape)
|
50 |
+
|
51 |
+
ext_module.deform_roi_pool_forward(
|
52 |
+
input,
|
53 |
+
rois,
|
54 |
+
offset,
|
55 |
+
output,
|
56 |
+
pooled_height=ctx.output_size[0],
|
57 |
+
pooled_width=ctx.output_size[1],
|
58 |
+
spatial_scale=ctx.spatial_scale,
|
59 |
+
sampling_ratio=ctx.sampling_ratio,
|
60 |
+
gamma=ctx.gamma)
|
61 |
+
|
62 |
+
ctx.save_for_backward(input, rois, offset)
|
63 |
+
return output
|
64 |
+
|
65 |
+
@staticmethod
|
66 |
+
@once_differentiable
|
67 |
+
def backward(ctx, grad_output):
|
68 |
+
input, rois, offset = ctx.saved_tensors
|
69 |
+
grad_input = grad_output.new_zeros(input.shape)
|
70 |
+
grad_offset = grad_output.new_zeros(offset.shape)
|
71 |
+
|
72 |
+
ext_module.deform_roi_pool_backward(
|
73 |
+
grad_output,
|
74 |
+
input,
|
75 |
+
rois,
|
76 |
+
offset,
|
77 |
+
grad_input,
|
78 |
+
grad_offset,
|
79 |
+
pooled_height=ctx.output_size[0],
|
80 |
+
pooled_width=ctx.output_size[1],
|
81 |
+
spatial_scale=ctx.spatial_scale,
|
82 |
+
sampling_ratio=ctx.sampling_ratio,
|
83 |
+
gamma=ctx.gamma)
|
84 |
+
if grad_offset.numel() == 0:
|
85 |
+
grad_offset = None
|
86 |
+
return grad_input, None, grad_offset, None, None, None, None
|
87 |
+
|
88 |
+
|
89 |
+
deform_roi_pool = DeformRoIPoolFunction.apply
|
90 |
+
|
91 |
+
|
92 |
+
class DeformRoIPool(nn.Module):
|
93 |
+
|
94 |
+
def __init__(self,
|
95 |
+
output_size,
|
96 |
+
spatial_scale=1.0,
|
97 |
+
sampling_ratio=0,
|
98 |
+
gamma=0.1):
|
99 |
+
super(DeformRoIPool, self).__init__()
|
100 |
+
self.output_size = _pair(output_size)
|
101 |
+
self.spatial_scale = float(spatial_scale)
|
102 |
+
self.sampling_ratio = int(sampling_ratio)
|
103 |
+
self.gamma = float(gamma)
|
104 |
+
|
105 |
+
def forward(self, input, rois, offset=None):
|
106 |
+
return deform_roi_pool(input, rois, offset, self.output_size,
|
107 |
+
self.spatial_scale, self.sampling_ratio,
|
108 |
+
self.gamma)
|
109 |
+
|
110 |
+
|
111 |
+
class DeformRoIPoolPack(DeformRoIPool):
|
112 |
+
|
113 |
+
def __init__(self,
|
114 |
+
output_size,
|
115 |
+
output_channels,
|
116 |
+
deform_fc_channels=1024,
|
117 |
+
spatial_scale=1.0,
|
118 |
+
sampling_ratio=0,
|
119 |
+
gamma=0.1):
|
120 |
+
super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale,
|
121 |
+
sampling_ratio, gamma)
|
122 |
+
|
123 |
+
self.output_channels = output_channels
|
124 |
+
self.deform_fc_channels = deform_fc_channels
|
125 |
+
|
126 |
+
self.offset_fc = nn.Sequential(
|
127 |
+
nn.Linear(
|
128 |
+
self.output_size[0] * self.output_size[1] *
|
129 |
+
self.output_channels, self.deform_fc_channels),
|
130 |
+
nn.ReLU(inplace=True),
|
131 |
+
nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
|
132 |
+
nn.ReLU(inplace=True),
|
133 |
+
nn.Linear(self.deform_fc_channels,
|
134 |
+
self.output_size[0] * self.output_size[1] * 2))
|
135 |
+
self.offset_fc[-1].weight.data.zero_()
|
136 |
+
self.offset_fc[-1].bias.data.zero_()
|
137 |
+
|
138 |
+
def forward(self, input, rois):
|
139 |
+
assert input.size(1) == self.output_channels
|
140 |
+
x = deform_roi_pool(input, rois, None, self.output_size,
|
141 |
+
self.spatial_scale, self.sampling_ratio,
|
142 |
+
self.gamma)
|
143 |
+
rois_num = rois.size(0)
|
144 |
+
offset = self.offset_fc(x.view(rois_num, -1))
|
145 |
+
offset = offset.view(rois_num, 2, self.output_size[0],
|
146 |
+
self.output_size[1])
|
147 |
+
return deform_roi_pool(input, rois, offset, self.output_size,
|
148 |
+
self.spatial_scale, self.sampling_ratio,
|
149 |
+
self.gamma)
|
150 |
+
|
151 |
+
|
152 |
+
class ModulatedDeformRoIPoolPack(DeformRoIPool):
|
153 |
+
|
154 |
+
def __init__(self,
|
155 |
+
output_size,
|
156 |
+
output_channels,
|
157 |
+
deform_fc_channels=1024,
|
158 |
+
spatial_scale=1.0,
|
159 |
+
sampling_ratio=0,
|
160 |
+
gamma=0.1):
|
161 |
+
super(ModulatedDeformRoIPoolPack,
|
162 |
+
self).__init__(output_size, spatial_scale, sampling_ratio, gamma)
|
163 |
+
|
164 |
+
self.output_channels = output_channels
|
165 |
+
self.deform_fc_channels = deform_fc_channels
|
166 |
+
|
167 |
+
self.offset_fc = nn.Sequential(
|
168 |
+
nn.Linear(
|
169 |
+
self.output_size[0] * self.output_size[1] *
|
170 |
+
self.output_channels, self.deform_fc_channels),
|
171 |
+
nn.ReLU(inplace=True),
|
172 |
+
nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
|
173 |
+
nn.ReLU(inplace=True),
|
174 |
+
nn.Linear(self.deform_fc_channels,
|
175 |
+
self.output_size[0] * self.output_size[1] * 2))
|
176 |
+
self.offset_fc[-1].weight.data.zero_()
|
177 |
+
self.offset_fc[-1].bias.data.zero_()
|
178 |
+
|
179 |
+
self.mask_fc = nn.Sequential(
|
180 |
+
nn.Linear(
|
181 |
+
self.output_size[0] * self.output_size[1] *
|
182 |
+
self.output_channels, self.deform_fc_channels),
|
183 |
+
nn.ReLU(inplace=True),
|
184 |
+
nn.Linear(self.deform_fc_channels,
|
185 |
+
self.output_size[0] * self.output_size[1] * 1),
|
186 |
+
nn.Sigmoid())
|
187 |
+
self.mask_fc[2].weight.data.zero_()
|
188 |
+
self.mask_fc[2].bias.data.zero_()
|
189 |
+
|
190 |
+
def forward(self, input, rois):
|
191 |
+
assert input.size(1) == self.output_channels
|
192 |
+
x = deform_roi_pool(input, rois, None, self.output_size,
|
193 |
+
self.spatial_scale, self.sampling_ratio,
|
194 |
+
self.gamma)
|
195 |
+
rois_num = rois.size(0)
|
196 |
+
offset = self.offset_fc(x.view(rois_num, -1))
|
197 |
+
offset = offset.view(rois_num, 2, self.output_size[0],
|
198 |
+
self.output_size[1])
|
199 |
+
mask = self.mask_fc(x.view(rois_num, -1))
|
200 |
+
mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1])
|
201 |
+
d = deform_roi_pool(input, rois, offset, self.output_size,
|
202 |
+
self.spatial_scale, self.sampling_ratio,
|
203 |
+
self.gamma)
|
204 |
+
return d * mask
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/deprecated_wrappers.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
# This file is for backward compatibility.
|
3 |
+
# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks.
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d
|
7 |
+
|
8 |
+
|
9 |
+
class Conv2d_deprecated(Conv2d):
|
10 |
+
|
11 |
+
def __init__(self, *args, **kwargs):
|
12 |
+
super().__init__(*args, **kwargs)
|
13 |
+
warnings.warn(
|
14 |
+
'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in'
|
15 |
+
' the future. Please import them from "mmcv.cnn" instead')
|
16 |
+
|
17 |
+
|
18 |
+
class ConvTranspose2d_deprecated(ConvTranspose2d):
|
19 |
+
|
20 |
+
def __init__(self, *args, **kwargs):
|
21 |
+
super().__init__(*args, **kwargs)
|
22 |
+
warnings.warn(
|
23 |
+
'Importing ConvTranspose2d wrapper from "mmcv.ops" will be '
|
24 |
+
'deprecated in the future. Please import them from "mmcv.cnn" '
|
25 |
+
'instead')
|
26 |
+
|
27 |
+
|
28 |
+
class MaxPool2d_deprecated(MaxPool2d):
|
29 |
+
|
30 |
+
def __init__(self, *args, **kwargs):
|
31 |
+
super().__init__(*args, **kwargs)
|
32 |
+
warnings.warn(
|
33 |
+
'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in'
|
34 |
+
' the future. Please import them from "mmcv.cnn" instead')
|
35 |
+
|
36 |
+
|
37 |
+
class Linear_deprecated(Linear):
|
38 |
+
|
39 |
+
def __init__(self, *args, **kwargs):
|
40 |
+
super().__init__(*args, **kwargs)
|
41 |
+
warnings.warn(
|
42 |
+
'Importing Linear wrapper from "mmcv.ops" will be deprecated in'
|
43 |
+
' the future. Please import them from "mmcv.cnn" instead')
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/focal_loss.py
ADDED
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
from torch.autograd import Function
|
5 |
+
from torch.autograd.function import once_differentiable
|
6 |
+
|
7 |
+
from ..utils import ext_loader
|
8 |
+
|
9 |
+
ext_module = ext_loader.load_ext('_ext', [
|
10 |
+
'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward',
|
11 |
+
'softmax_focal_loss_forward', 'softmax_focal_loss_backward'
|
12 |
+
])
|
13 |
+
|
14 |
+
|
15 |
+
class SigmoidFocalLossFunction(Function):
|
16 |
+
|
17 |
+
@staticmethod
|
18 |
+
def symbolic(g, input, target, gamma, alpha, weight, reduction):
|
19 |
+
return g.op(
|
20 |
+
'mmcv::MMCVSigmoidFocalLoss',
|
21 |
+
input,
|
22 |
+
target,
|
23 |
+
gamma_f=gamma,
|
24 |
+
alpha_f=alpha,
|
25 |
+
weight_f=weight,
|
26 |
+
reduction_s=reduction)
|
27 |
+
|
28 |
+
@staticmethod
|
29 |
+
def forward(ctx,
|
30 |
+
input,
|
31 |
+
target,
|
32 |
+
gamma=2.0,
|
33 |
+
alpha=0.25,
|
34 |
+
weight=None,
|
35 |
+
reduction='mean'):
|
36 |
+
|
37 |
+
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
|
38 |
+
assert input.dim() == 2
|
39 |
+
assert target.dim() == 1
|
40 |
+
assert input.size(0) == target.size(0)
|
41 |
+
if weight is None:
|
42 |
+
weight = input.new_empty(0)
|
43 |
+
else:
|
44 |
+
assert weight.dim() == 1
|
45 |
+
assert input.size(1) == weight.size(0)
|
46 |
+
ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
|
47 |
+
assert reduction in ctx.reduction_dict.keys()
|
48 |
+
|
49 |
+
ctx.gamma = float(gamma)
|
50 |
+
ctx.alpha = float(alpha)
|
51 |
+
ctx.reduction = ctx.reduction_dict[reduction]
|
52 |
+
|
53 |
+
output = input.new_zeros(input.size())
|
54 |
+
|
55 |
+
ext_module.sigmoid_focal_loss_forward(
|
56 |
+
input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha)
|
57 |
+
if ctx.reduction == ctx.reduction_dict['mean']:
|
58 |
+
output = output.sum() / input.size(0)
|
59 |
+
elif ctx.reduction == ctx.reduction_dict['sum']:
|
60 |
+
output = output.sum()
|
61 |
+
ctx.save_for_backward(input, target, weight)
|
62 |
+
return output
|
63 |
+
|
64 |
+
@staticmethod
|
65 |
+
@once_differentiable
|
66 |
+
def backward(ctx, grad_output):
|
67 |
+
input, target, weight = ctx.saved_tensors
|
68 |
+
|
69 |
+
grad_input = input.new_zeros(input.size())
|
70 |
+
|
71 |
+
ext_module.sigmoid_focal_loss_backward(
|
72 |
+
input,
|
73 |
+
target,
|
74 |
+
weight,
|
75 |
+
grad_input,
|
76 |
+
gamma=ctx.gamma,
|
77 |
+
alpha=ctx.alpha)
|
78 |
+
|
79 |
+
grad_input *= grad_output
|
80 |
+
if ctx.reduction == ctx.reduction_dict['mean']:
|
81 |
+
grad_input /= input.size(0)
|
82 |
+
return grad_input, None, None, None, None, None
|
83 |
+
|
84 |
+
|
85 |
+
sigmoid_focal_loss = SigmoidFocalLossFunction.apply
|
86 |
+
|
87 |
+
|
88 |
+
class SigmoidFocalLoss(nn.Module):
|
89 |
+
|
90 |
+
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
|
91 |
+
super(SigmoidFocalLoss, self).__init__()
|
92 |
+
self.gamma = gamma
|
93 |
+
self.alpha = alpha
|
94 |
+
self.register_buffer('weight', weight)
|
95 |
+
self.reduction = reduction
|
96 |
+
|
97 |
+
def forward(self, input, target):
|
98 |
+
return sigmoid_focal_loss(input, target, self.gamma, self.alpha,
|
99 |
+
self.weight, self.reduction)
|
100 |
+
|
101 |
+
def __repr__(self):
|
102 |
+
s = self.__class__.__name__
|
103 |
+
s += f'(gamma={self.gamma}, '
|
104 |
+
s += f'alpha={self.alpha}, '
|
105 |
+
s += f'reduction={self.reduction})'
|
106 |
+
return s
|
107 |
+
|
108 |
+
|
109 |
+
class SoftmaxFocalLossFunction(Function):
|
110 |
+
|
111 |
+
@staticmethod
|
112 |
+
def symbolic(g, input, target, gamma, alpha, weight, reduction):
|
113 |
+
return g.op(
|
114 |
+
'mmcv::MMCVSoftmaxFocalLoss',
|
115 |
+
input,
|
116 |
+
target,
|
117 |
+
gamma_f=gamma,
|
118 |
+
alpha_f=alpha,
|
119 |
+
weight_f=weight,
|
120 |
+
reduction_s=reduction)
|
121 |
+
|
122 |
+
@staticmethod
|
123 |
+
def forward(ctx,
|
124 |
+
input,
|
125 |
+
target,
|
126 |
+
gamma=2.0,
|
127 |
+
alpha=0.25,
|
128 |
+
weight=None,
|
129 |
+
reduction='mean'):
|
130 |
+
|
131 |
+
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
|
132 |
+
assert input.dim() == 2
|
133 |
+
assert target.dim() == 1
|
134 |
+
assert input.size(0) == target.size(0)
|
135 |
+
if weight is None:
|
136 |
+
weight = input.new_empty(0)
|
137 |
+
else:
|
138 |
+
assert weight.dim() == 1
|
139 |
+
assert input.size(1) == weight.size(0)
|
140 |
+
ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
|
141 |
+
assert reduction in ctx.reduction_dict.keys()
|
142 |
+
|
143 |
+
ctx.gamma = float(gamma)
|
144 |
+
ctx.alpha = float(alpha)
|
145 |
+
ctx.reduction = ctx.reduction_dict[reduction]
|
146 |
+
|
147 |
+
channel_stats, _ = torch.max(input, dim=1)
|
148 |
+
input_softmax = input - channel_stats.unsqueeze(1).expand_as(input)
|
149 |
+
input_softmax.exp_()
|
150 |
+
|
151 |
+
channel_stats = input_softmax.sum(dim=1)
|
152 |
+
input_softmax /= channel_stats.unsqueeze(1).expand_as(input)
|
153 |
+
|
154 |
+
output = input.new_zeros(input.size(0))
|
155 |
+
ext_module.softmax_focal_loss_forward(
|
156 |
+
input_softmax,
|
157 |
+
target,
|
158 |
+
weight,
|
159 |
+
output,
|
160 |
+
gamma=ctx.gamma,
|
161 |
+
alpha=ctx.alpha)
|
162 |
+
|
163 |
+
if ctx.reduction == ctx.reduction_dict['mean']:
|
164 |
+
output = output.sum() / input.size(0)
|
165 |
+
elif ctx.reduction == ctx.reduction_dict['sum']:
|
166 |
+
output = output.sum()
|
167 |
+
ctx.save_for_backward(input_softmax, target, weight)
|
168 |
+
return output
|
169 |
+
|
170 |
+
@staticmethod
|
171 |
+
def backward(ctx, grad_output):
|
172 |
+
input_softmax, target, weight = ctx.saved_tensors
|
173 |
+
buff = input_softmax.new_zeros(input_softmax.size(0))
|
174 |
+
grad_input = input_softmax.new_zeros(input_softmax.size())
|
175 |
+
|
176 |
+
ext_module.softmax_focal_loss_backward(
|
177 |
+
input_softmax,
|
178 |
+
target,
|
179 |
+
weight,
|
180 |
+
buff,
|
181 |
+
grad_input,
|
182 |
+
gamma=ctx.gamma,
|
183 |
+
alpha=ctx.alpha)
|
184 |
+
|
185 |
+
grad_input *= grad_output
|
186 |
+
if ctx.reduction == ctx.reduction_dict['mean']:
|
187 |
+
grad_input /= input_softmax.size(0)
|
188 |
+
return grad_input, None, None, None, None, None
|
189 |
+
|
190 |
+
|
191 |
+
softmax_focal_loss = SoftmaxFocalLossFunction.apply
|
192 |
+
|
193 |
+
|
194 |
+
class SoftmaxFocalLoss(nn.Module):
|
195 |
+
|
196 |
+
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
|
197 |
+
super(SoftmaxFocalLoss, self).__init__()
|
198 |
+
self.gamma = gamma
|
199 |
+
self.alpha = alpha
|
200 |
+
self.register_buffer('weight', weight)
|
201 |
+
self.reduction = reduction
|
202 |
+
|
203 |
+
def forward(self, input, target):
|
204 |
+
return softmax_focal_loss(input, target, self.gamma, self.alpha,
|
205 |
+
self.weight, self.reduction)
|
206 |
+
|
207 |
+
def __repr__(self):
|
208 |
+
s = self.__class__.__name__
|
209 |
+
s += f'(gamma={self.gamma}, '
|
210 |
+
s += f'alpha={self.alpha}, '
|
211 |
+
s += f'reduction={self.reduction})'
|
212 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/furthest_point_sample.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch.autograd import Function
|
3 |
+
|
4 |
+
from ..utils import ext_loader
|
5 |
+
|
6 |
+
ext_module = ext_loader.load_ext('_ext', [
|
7 |
+
'furthest_point_sampling_forward',
|
8 |
+
'furthest_point_sampling_with_dist_forward'
|
9 |
+
])
|
10 |
+
|
11 |
+
|
12 |
+
class FurthestPointSampling(Function):
|
13 |
+
"""Uses iterative furthest point sampling to select a set of features whose
|
14 |
+
corresponding points have the furthest distance."""
|
15 |
+
|
16 |
+
@staticmethod
|
17 |
+
def forward(ctx, points_xyz: torch.Tensor,
|
18 |
+
num_points: int) -> torch.Tensor:
|
19 |
+
"""
|
20 |
+
Args:
|
21 |
+
points_xyz (Tensor): (B, N, 3) where N > num_points.
|
22 |
+
num_points (int): Number of points in the sampled set.
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
Tensor: (B, num_points) indices of the sampled points.
|
26 |
+
"""
|
27 |
+
assert points_xyz.is_contiguous()
|
28 |
+
|
29 |
+
B, N = points_xyz.size()[:2]
|
30 |
+
output = torch.cuda.IntTensor(B, num_points)
|
31 |
+
temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
|
32 |
+
|
33 |
+
ext_module.furthest_point_sampling_forward(
|
34 |
+
points_xyz,
|
35 |
+
temp,
|
36 |
+
output,
|
37 |
+
b=B,
|
38 |
+
n=N,
|
39 |
+
m=num_points,
|
40 |
+
)
|
41 |
+
if torch.__version__ != 'parrots':
|
42 |
+
ctx.mark_non_differentiable(output)
|
43 |
+
return output
|
44 |
+
|
45 |
+
@staticmethod
|
46 |
+
def backward(xyz, a=None):
|
47 |
+
return None, None
|
48 |
+
|
49 |
+
|
50 |
+
class FurthestPointSamplingWithDist(Function):
|
51 |
+
"""Uses iterative furthest point sampling to select a set of features whose
|
52 |
+
corresponding points have the furthest distance."""
|
53 |
+
|
54 |
+
@staticmethod
|
55 |
+
def forward(ctx, points_dist: torch.Tensor,
|
56 |
+
num_points: int) -> torch.Tensor:
|
57 |
+
"""
|
58 |
+
Args:
|
59 |
+
points_dist (Tensor): (B, N, N) Distance between each point pair.
|
60 |
+
num_points (int): Number of points in the sampled set.
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
Tensor: (B, num_points) indices of the sampled points.
|
64 |
+
"""
|
65 |
+
assert points_dist.is_contiguous()
|
66 |
+
|
67 |
+
B, N, _ = points_dist.size()
|
68 |
+
output = points_dist.new_zeros([B, num_points], dtype=torch.int32)
|
69 |
+
temp = points_dist.new_zeros([B, N]).fill_(1e10)
|
70 |
+
|
71 |
+
ext_module.furthest_point_sampling_with_dist_forward(
|
72 |
+
points_dist, temp, output, b=B, n=N, m=num_points)
|
73 |
+
if torch.__version__ != 'parrots':
|
74 |
+
ctx.mark_non_differentiable(output)
|
75 |
+
return output
|
76 |
+
|
77 |
+
@staticmethod
|
78 |
+
def backward(xyz, a=None):
|
79 |
+
return None, None
|
80 |
+
|
81 |
+
|
82 |
+
furthest_point_sample = FurthestPointSampling.apply
|
83 |
+
furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/fused_bias_leakyrelu.py
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501
|
2 |
+
|
3 |
+
# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
|
4 |
+
# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
|
5 |
+
# Augmentation (ADA)
|
6 |
+
# =======================================================================
|
7 |
+
|
8 |
+
# 1. Definitions
|
9 |
+
|
10 |
+
# "Licensor" means any person or entity that distributes its Work.
|
11 |
+
|
12 |
+
# "Software" means the original work of authorship made available under
|
13 |
+
# this License.
|
14 |
+
|
15 |
+
# "Work" means the Software and any additions to or derivative works of
|
16 |
+
# the Software that are made available under this License.
|
17 |
+
|
18 |
+
# The terms "reproduce," "reproduction," "derivative works," and
|
19 |
+
# "distribution" have the meaning as provided under U.S. copyright law;
|
20 |
+
# provided, however, that for the purposes of this License, derivative
|
21 |
+
# works shall not include works that remain separable from, or merely
|
22 |
+
# link (or bind by name) to the interfaces of, the Work.
|
23 |
+
|
24 |
+
# Works, including the Software, are "made available" under this License
|
25 |
+
# by including in or with the Work either (a) a copyright notice
|
26 |
+
# referencing the applicability of this License to the Work, or (b) a
|
27 |
+
# copy of this License.
|
28 |
+
|
29 |
+
# 2. License Grants
|
30 |
+
|
31 |
+
# 2.1 Copyright Grant. Subject to the terms and conditions of this
|
32 |
+
# License, each Licensor grants to you a perpetual, worldwide,
|
33 |
+
# non-exclusive, royalty-free, copyright license to reproduce,
|
34 |
+
# prepare derivative works of, publicly display, publicly perform,
|
35 |
+
# sublicense and distribute its Work and any resulting derivative
|
36 |
+
# works in any form.
|
37 |
+
|
38 |
+
# 3. Limitations
|
39 |
+
|
40 |
+
# 3.1 Redistribution. You may reproduce or distribute the Work only
|
41 |
+
# if (a) you do so under this License, (b) you include a complete
|
42 |
+
# copy of this License with your distribution, and (c) you retain
|
43 |
+
# without modification any copyright, patent, trademark, or
|
44 |
+
# attribution notices that are present in the Work.
|
45 |
+
|
46 |
+
# 3.2 Derivative Works. You may specify that additional or different
|
47 |
+
# terms apply to the use, reproduction, and distribution of your
|
48 |
+
# derivative works of the Work ("Your Terms") only if (a) Your Terms
|
49 |
+
# provide that the use limitation in Section 3.3 applies to your
|
50 |
+
# derivative works, and (b) you identify the specific derivative
|
51 |
+
# works that are subject to Your Terms. Notwithstanding Your Terms,
|
52 |
+
# this License (including the redistribution requirements in Section
|
53 |
+
# 3.1) will continue to apply to the Work itself.
|
54 |
+
|
55 |
+
# 3.3 Use Limitation. The Work and any derivative works thereof only
|
56 |
+
# may be used or intended for use non-commercially. Notwithstanding
|
57 |
+
# the foregoing, NVIDIA and its affiliates may use the Work and any
|
58 |
+
# derivative works commercially. As used herein, "non-commercially"
|
59 |
+
# means for research or evaluation purposes only.
|
60 |
+
|
61 |
+
# 3.4 Patent Claims. If you bring or threaten to bring a patent claim
|
62 |
+
# against any Licensor (including any claim, cross-claim or
|
63 |
+
# counterclaim in a lawsuit) to enforce any patents that you allege
|
64 |
+
# are infringed by any Work, then your rights under this License from
|
65 |
+
# such Licensor (including the grant in Section 2.1) will terminate
|
66 |
+
# immediately.
|
67 |
+
|
68 |
+
# 3.5 Trademarks. This License does not grant any rights to use any
|
69 |
+
# Licensor’s or its affiliates’ names, logos, or trademarks, except
|
70 |
+
# as necessary to reproduce the notices described in this License.
|
71 |
+
|
72 |
+
# 3.6 Termination. If you violate any term of this License, then your
|
73 |
+
# rights under this License (including the grant in Section 2.1) will
|
74 |
+
# terminate immediately.
|
75 |
+
|
76 |
+
# 4. Disclaimer of Warranty.
|
77 |
+
|
78 |
+
# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
79 |
+
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
|
80 |
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
|
81 |
+
# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
|
82 |
+
# THIS LICENSE.
|
83 |
+
|
84 |
+
# 5. Limitation of Liability.
|
85 |
+
|
86 |
+
# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
|
87 |
+
# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
|
88 |
+
# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
|
89 |
+
# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
|
90 |
+
# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
|
91 |
+
# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
|
92 |
+
# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
|
93 |
+
# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
|
94 |
+
# THE POSSIBILITY OF SUCH DAMAGES.
|
95 |
+
|
96 |
+
# =======================================================================
|
97 |
+
|
98 |
+
import torch
|
99 |
+
import torch.nn.functional as F
|
100 |
+
from torch import nn
|
101 |
+
from torch.autograd import Function
|
102 |
+
|
103 |
+
from ..utils import ext_loader
|
104 |
+
|
105 |
+
ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu'])
|
106 |
+
|
107 |
+
|
108 |
+
class FusedBiasLeakyReLUFunctionBackward(Function):
|
109 |
+
"""Calculate second order deviation.
|
110 |
+
|
111 |
+
This function is to compute the second order deviation for the fused leaky
|
112 |
+
relu operation.
|
113 |
+
"""
|
114 |
+
|
115 |
+
@staticmethod
|
116 |
+
def forward(ctx, grad_output, out, negative_slope, scale):
|
117 |
+
ctx.save_for_backward(out)
|
118 |
+
ctx.negative_slope = negative_slope
|
119 |
+
ctx.scale = scale
|
120 |
+
|
121 |
+
empty = grad_output.new_empty(0)
|
122 |
+
|
123 |
+
grad_input = ext_module.fused_bias_leakyrelu(
|
124 |
+
grad_output,
|
125 |
+
empty,
|
126 |
+
out,
|
127 |
+
act=3,
|
128 |
+
grad=1,
|
129 |
+
alpha=negative_slope,
|
130 |
+
scale=scale)
|
131 |
+
|
132 |
+
dim = [0]
|
133 |
+
|
134 |
+
if grad_input.ndim > 2:
|
135 |
+
dim += list(range(2, grad_input.ndim))
|
136 |
+
|
137 |
+
grad_bias = grad_input.sum(dim).detach()
|
138 |
+
|
139 |
+
return grad_input, grad_bias
|
140 |
+
|
141 |
+
@staticmethod
|
142 |
+
def backward(ctx, gradgrad_input, gradgrad_bias):
|
143 |
+
out, = ctx.saved_tensors
|
144 |
+
|
145 |
+
# The second order deviation, in fact, contains two parts, while the
|
146 |
+
# the first part is zero. Thus, we direct consider the second part
|
147 |
+
# which is similar with the first order deviation in implementation.
|
148 |
+
gradgrad_out = ext_module.fused_bias_leakyrelu(
|
149 |
+
gradgrad_input,
|
150 |
+
gradgrad_bias.to(out.dtype),
|
151 |
+
out,
|
152 |
+
act=3,
|
153 |
+
grad=1,
|
154 |
+
alpha=ctx.negative_slope,
|
155 |
+
scale=ctx.scale)
|
156 |
+
|
157 |
+
return gradgrad_out, None, None, None
|
158 |
+
|
159 |
+
|
160 |
+
class FusedBiasLeakyReLUFunction(Function):
|
161 |
+
|
162 |
+
@staticmethod
|
163 |
+
def forward(ctx, input, bias, negative_slope, scale):
|
164 |
+
empty = input.new_empty(0)
|
165 |
+
|
166 |
+
out = ext_module.fused_bias_leakyrelu(
|
167 |
+
input,
|
168 |
+
bias,
|
169 |
+
empty,
|
170 |
+
act=3,
|
171 |
+
grad=0,
|
172 |
+
alpha=negative_slope,
|
173 |
+
scale=scale)
|
174 |
+
ctx.save_for_backward(out)
|
175 |
+
ctx.negative_slope = negative_slope
|
176 |
+
ctx.scale = scale
|
177 |
+
|
178 |
+
return out
|
179 |
+
|
180 |
+
@staticmethod
|
181 |
+
def backward(ctx, grad_output):
|
182 |
+
out, = ctx.saved_tensors
|
183 |
+
|
184 |
+
grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply(
|
185 |
+
grad_output, out, ctx.negative_slope, ctx.scale)
|
186 |
+
|
187 |
+
return grad_input, grad_bias, None, None
|
188 |
+
|
189 |
+
|
190 |
+
class FusedBiasLeakyReLU(nn.Module):
|
191 |
+
"""Fused bias leaky ReLU.
|
192 |
+
|
193 |
+
This function is introduced in the StyleGAN2:
|
194 |
+
http://arxiv.org/abs/1912.04958
|
195 |
+
|
196 |
+
The bias term comes from the convolution operation. In addition, to keep
|
197 |
+
the variance of the feature map or gradients unchanged, they also adopt a
|
198 |
+
scale similarly with Kaiming initialization. However, since the
|
199 |
+
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
|
200 |
+
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
|
201 |
+
your own scale.
|
202 |
+
|
203 |
+
TODO: Implement the CPU version.
|
204 |
+
|
205 |
+
Args:
|
206 |
+
channel (int): The channel number of the feature map.
|
207 |
+
negative_slope (float, optional): Same as nn.LeakyRelu.
|
208 |
+
Defaults to 0.2.
|
209 |
+
scale (float, optional): A scalar to adjust the variance of the feature
|
210 |
+
map. Defaults to 2**0.5.
|
211 |
+
"""
|
212 |
+
|
213 |
+
def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5):
|
214 |
+
super(FusedBiasLeakyReLU, self).__init__()
|
215 |
+
|
216 |
+
self.bias = nn.Parameter(torch.zeros(num_channels))
|
217 |
+
self.negative_slope = negative_slope
|
218 |
+
self.scale = scale
|
219 |
+
|
220 |
+
def forward(self, input):
|
221 |
+
return fused_bias_leakyrelu(input, self.bias, self.negative_slope,
|
222 |
+
self.scale)
|
223 |
+
|
224 |
+
|
225 |
+
def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
|
226 |
+
"""Fused bias leaky ReLU function.
|
227 |
+
|
228 |
+
This function is introduced in the StyleGAN2:
|
229 |
+
http://arxiv.org/abs/1912.04958
|
230 |
+
|
231 |
+
The bias term comes from the convolution operation. In addition, to keep
|
232 |
+
the variance of the feature map or gradients unchanged, they also adopt a
|
233 |
+
scale similarly with Kaiming initialization. However, since the
|
234 |
+
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
|
235 |
+
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
|
236 |
+
your own scale.
|
237 |
+
|
238 |
+
Args:
|
239 |
+
input (torch.Tensor): Input feature map.
|
240 |
+
bias (nn.Parameter): The bias from convolution operation.
|
241 |
+
negative_slope (float, optional): Same as nn.LeakyRelu.
|
242 |
+
Defaults to 0.2.
|
243 |
+
scale (float, optional): A scalar to adjust the variance of the feature
|
244 |
+
map. Defaults to 2**0.5.
|
245 |
+
|
246 |
+
Returns:
|
247 |
+
torch.Tensor: Feature map after non-linear activation.
|
248 |
+
"""
|
249 |
+
|
250 |
+
if not input.is_cuda:
|
251 |
+
return bias_leakyrelu_ref(input, bias, negative_slope, scale)
|
252 |
+
|
253 |
+
return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype),
|
254 |
+
negative_slope, scale)
|
255 |
+
|
256 |
+
|
257 |
+
def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5):
|
258 |
+
|
259 |
+
if bias is not None:
|
260 |
+
assert bias.ndim == 1
|
261 |
+
assert bias.shape[0] == x.shape[1]
|
262 |
+
x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)])
|
263 |
+
|
264 |
+
x = F.leaky_relu(x, negative_slope)
|
265 |
+
if scale != 1:
|
266 |
+
x = x * scale
|
267 |
+
|
268 |
+
return x
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/gather_points.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch.autograd import Function
|
3 |
+
|
4 |
+
from ..utils import ext_loader
|
5 |
+
|
6 |
+
ext_module = ext_loader.load_ext(
|
7 |
+
'_ext', ['gather_points_forward', 'gather_points_backward'])
|
8 |
+
|
9 |
+
|
10 |
+
class GatherPoints(Function):
|
11 |
+
"""Gather points with given index."""
|
12 |
+
|
13 |
+
@staticmethod
|
14 |
+
def forward(ctx, features: torch.Tensor,
|
15 |
+
indices: torch.Tensor) -> torch.Tensor:
|
16 |
+
"""
|
17 |
+
Args:
|
18 |
+
features (Tensor): (B, C, N) features to gather.
|
19 |
+
indices (Tensor): (B, M) where M is the number of points.
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
Tensor: (B, C, M) where M is the number of points.
|
23 |
+
"""
|
24 |
+
assert features.is_contiguous()
|
25 |
+
assert indices.is_contiguous()
|
26 |
+
|
27 |
+
B, npoint = indices.size()
|
28 |
+
_, C, N = features.size()
|
29 |
+
output = torch.cuda.FloatTensor(B, C, npoint)
|
30 |
+
|
31 |
+
ext_module.gather_points_forward(
|
32 |
+
features, indices, output, b=B, c=C, n=N, npoints=npoint)
|
33 |
+
|
34 |
+
ctx.for_backwards = (indices, C, N)
|
35 |
+
if torch.__version__ != 'parrots':
|
36 |
+
ctx.mark_non_differentiable(indices)
|
37 |
+
return output
|
38 |
+
|
39 |
+
@staticmethod
|
40 |
+
def backward(ctx, grad_out):
|
41 |
+
idx, C, N = ctx.for_backwards
|
42 |
+
B, npoint = idx.size()
|
43 |
+
|
44 |
+
grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
|
45 |
+
grad_out_data = grad_out.data.contiguous()
|
46 |
+
ext_module.gather_points_backward(
|
47 |
+
grad_out_data,
|
48 |
+
idx,
|
49 |
+
grad_features.data,
|
50 |
+
b=B,
|
51 |
+
c=C,
|
52 |
+
n=N,
|
53 |
+
npoints=npoint)
|
54 |
+
return grad_features, None
|
55 |
+
|
56 |
+
|
57 |
+
gather_points = GatherPoints.apply
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/group_points.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from typing import Tuple
|
3 |
+
|
4 |
+
import torch
|
5 |
+
from torch import nn as nn
|
6 |
+
from torch.autograd import Function
|
7 |
+
|
8 |
+
from ..utils import ext_loader
|
9 |
+
from .ball_query import ball_query
|
10 |
+
from .knn import knn
|
11 |
+
|
12 |
+
ext_module = ext_loader.load_ext(
|
13 |
+
'_ext', ['group_points_forward', 'group_points_backward'])
|
14 |
+
|
15 |
+
|
16 |
+
class QueryAndGroup(nn.Module):
|
17 |
+
"""Groups points with a ball query of radius.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
max_radius (float): The maximum radius of the balls.
|
21 |
+
If None is given, we will use kNN sampling instead of ball query.
|
22 |
+
sample_num (int): Maximum number of features to gather in the ball.
|
23 |
+
min_radius (float, optional): The minimum radius of the balls.
|
24 |
+
Default: 0.
|
25 |
+
use_xyz (bool, optional): Whether to use xyz.
|
26 |
+
Default: True.
|
27 |
+
return_grouped_xyz (bool, optional): Whether to return grouped xyz.
|
28 |
+
Default: False.
|
29 |
+
normalize_xyz (bool, optional): Whether to normalize xyz.
|
30 |
+
Default: False.
|
31 |
+
uniform_sample (bool, optional): Whether to sample uniformly.
|
32 |
+
Default: False
|
33 |
+
return_unique_cnt (bool, optional): Whether to return the count of
|
34 |
+
unique samples. Default: False.
|
35 |
+
return_grouped_idx (bool, optional): Whether to return grouped idx.
|
36 |
+
Default: False.
|
37 |
+
"""
|
38 |
+
|
39 |
+
def __init__(self,
|
40 |
+
max_radius,
|
41 |
+
sample_num,
|
42 |
+
min_radius=0,
|
43 |
+
use_xyz=True,
|
44 |
+
return_grouped_xyz=False,
|
45 |
+
normalize_xyz=False,
|
46 |
+
uniform_sample=False,
|
47 |
+
return_unique_cnt=False,
|
48 |
+
return_grouped_idx=False):
|
49 |
+
super().__init__()
|
50 |
+
self.max_radius = max_radius
|
51 |
+
self.min_radius = min_radius
|
52 |
+
self.sample_num = sample_num
|
53 |
+
self.use_xyz = use_xyz
|
54 |
+
self.return_grouped_xyz = return_grouped_xyz
|
55 |
+
self.normalize_xyz = normalize_xyz
|
56 |
+
self.uniform_sample = uniform_sample
|
57 |
+
self.return_unique_cnt = return_unique_cnt
|
58 |
+
self.return_grouped_idx = return_grouped_idx
|
59 |
+
if self.return_unique_cnt:
|
60 |
+
assert self.uniform_sample, \
|
61 |
+
'uniform_sample should be True when ' \
|
62 |
+
'returning the count of unique samples'
|
63 |
+
if self.max_radius is None:
|
64 |
+
assert not self.normalize_xyz, \
|
65 |
+
'can not normalize grouped xyz when max_radius is None'
|
66 |
+
|
67 |
+
def forward(self, points_xyz, center_xyz, features=None):
|
68 |
+
"""
|
69 |
+
Args:
|
70 |
+
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
|
71 |
+
center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods.
|
72 |
+
features (Tensor): (B, C, N) Descriptors of the features.
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
Tensor: (B, 3 + C, npoint, sample_num) Grouped feature.
|
76 |
+
"""
|
77 |
+
# if self.max_radius is None, we will perform kNN instead of ball query
|
78 |
+
# idx is of shape [B, npoint, sample_num]
|
79 |
+
if self.max_radius is None:
|
80 |
+
idx = knn(self.sample_num, points_xyz, center_xyz, False)
|
81 |
+
idx = idx.transpose(1, 2).contiguous()
|
82 |
+
else:
|
83 |
+
idx = ball_query(self.min_radius, self.max_radius, self.sample_num,
|
84 |
+
points_xyz, center_xyz)
|
85 |
+
|
86 |
+
if self.uniform_sample:
|
87 |
+
unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))
|
88 |
+
for i_batch in range(idx.shape[0]):
|
89 |
+
for i_region in range(idx.shape[1]):
|
90 |
+
unique_ind = torch.unique(idx[i_batch, i_region, :])
|
91 |
+
num_unique = unique_ind.shape[0]
|
92 |
+
unique_cnt[i_batch, i_region] = num_unique
|
93 |
+
sample_ind = torch.randint(
|
94 |
+
0,
|
95 |
+
num_unique, (self.sample_num - num_unique, ),
|
96 |
+
dtype=torch.long)
|
97 |
+
all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))
|
98 |
+
idx[i_batch, i_region, :] = all_ind
|
99 |
+
|
100 |
+
xyz_trans = points_xyz.transpose(1, 2).contiguous()
|
101 |
+
# (B, 3, npoint, sample_num)
|
102 |
+
grouped_xyz = grouping_operation(xyz_trans, idx)
|
103 |
+
grouped_xyz_diff = grouped_xyz - \
|
104 |
+
center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets
|
105 |
+
if self.normalize_xyz:
|
106 |
+
grouped_xyz_diff /= self.max_radius
|
107 |
+
|
108 |
+
if features is not None:
|
109 |
+
grouped_features = grouping_operation(features, idx)
|
110 |
+
if self.use_xyz:
|
111 |
+
# (B, C + 3, npoint, sample_num)
|
112 |
+
new_features = torch.cat([grouped_xyz_diff, grouped_features],
|
113 |
+
dim=1)
|
114 |
+
else:
|
115 |
+
new_features = grouped_features
|
116 |
+
else:
|
117 |
+
assert (self.use_xyz
|
118 |
+
), 'Cannot have not features and not use xyz as a feature!'
|
119 |
+
new_features = grouped_xyz_diff
|
120 |
+
|
121 |
+
ret = [new_features]
|
122 |
+
if self.return_grouped_xyz:
|
123 |
+
ret.append(grouped_xyz)
|
124 |
+
if self.return_unique_cnt:
|
125 |
+
ret.append(unique_cnt)
|
126 |
+
if self.return_grouped_idx:
|
127 |
+
ret.append(idx)
|
128 |
+
if len(ret) == 1:
|
129 |
+
return ret[0]
|
130 |
+
else:
|
131 |
+
return tuple(ret)
|
132 |
+
|
133 |
+
|
134 |
+
class GroupAll(nn.Module):
|
135 |
+
"""Group xyz with feature.
|
136 |
+
|
137 |
+
Args:
|
138 |
+
use_xyz (bool): Whether to use xyz.
|
139 |
+
"""
|
140 |
+
|
141 |
+
def __init__(self, use_xyz: bool = True):
|
142 |
+
super().__init__()
|
143 |
+
self.use_xyz = use_xyz
|
144 |
+
|
145 |
+
def forward(self,
|
146 |
+
xyz: torch.Tensor,
|
147 |
+
new_xyz: torch.Tensor,
|
148 |
+
features: torch.Tensor = None):
|
149 |
+
"""
|
150 |
+
Args:
|
151 |
+
xyz (Tensor): (B, N, 3) xyz coordinates of the features.
|
152 |
+
new_xyz (Tensor): new xyz coordinates of the features.
|
153 |
+
features (Tensor): (B, C, N) features to group.
|
154 |
+
|
155 |
+
Returns:
|
156 |
+
Tensor: (B, C + 3, 1, N) Grouped feature.
|
157 |
+
"""
|
158 |
+
grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
|
159 |
+
if features is not None:
|
160 |
+
grouped_features = features.unsqueeze(2)
|
161 |
+
if self.use_xyz:
|
162 |
+
# (B, 3 + C, 1, N)
|
163 |
+
new_features = torch.cat([grouped_xyz, grouped_features],
|
164 |
+
dim=1)
|
165 |
+
else:
|
166 |
+
new_features = grouped_features
|
167 |
+
else:
|
168 |
+
new_features = grouped_xyz
|
169 |
+
|
170 |
+
return new_features
|
171 |
+
|
172 |
+
|
173 |
+
class GroupingOperation(Function):
|
174 |
+
"""Group feature with given index."""
|
175 |
+
|
176 |
+
@staticmethod
|
177 |
+
def forward(ctx, features: torch.Tensor,
|
178 |
+
indices: torch.Tensor) -> torch.Tensor:
|
179 |
+
"""
|
180 |
+
Args:
|
181 |
+
features (Tensor): (B, C, N) tensor of features to group.
|
182 |
+
indices (Tensor): (B, npoint, nsample) the indices of
|
183 |
+
features to group with.
|
184 |
+
|
185 |
+
Returns:
|
186 |
+
Tensor: (B, C, npoint, nsample) Grouped features.
|
187 |
+
"""
|
188 |
+
features = features.contiguous()
|
189 |
+
indices = indices.contiguous()
|
190 |
+
|
191 |
+
B, nfeatures, nsample = indices.size()
|
192 |
+
_, C, N = features.size()
|
193 |
+
output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
|
194 |
+
|
195 |
+
ext_module.group_points_forward(B, C, N, nfeatures, nsample, features,
|
196 |
+
indices, output)
|
197 |
+
|
198 |
+
ctx.for_backwards = (indices, N)
|
199 |
+
return output
|
200 |
+
|
201 |
+
@staticmethod
|
202 |
+
def backward(ctx,
|
203 |
+
grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
204 |
+
"""
|
205 |
+
Args:
|
206 |
+
grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients
|
207 |
+
of the output from forward.
|
208 |
+
|
209 |
+
Returns:
|
210 |
+
Tensor: (B, C, N) gradient of the features.
|
211 |
+
"""
|
212 |
+
idx, N = ctx.for_backwards
|
213 |
+
|
214 |
+
B, C, npoint, nsample = grad_out.size()
|
215 |
+
grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
|
216 |
+
|
217 |
+
grad_out_data = grad_out.data.contiguous()
|
218 |
+
ext_module.group_points_backward(B, C, N, npoint, nsample,
|
219 |
+
grad_out_data, idx,
|
220 |
+
grad_features.data)
|
221 |
+
return grad_features, None
|
222 |
+
|
223 |
+
|
224 |
+
grouping_operation = GroupingOperation.apply
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/info.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import glob
|
3 |
+
import os
|
4 |
+
|
5 |
+
import torch
|
6 |
+
|
7 |
+
if torch.__version__ == 'parrots':
|
8 |
+
import parrots
|
9 |
+
|
10 |
+
def get_compiler_version():
|
11 |
+
return 'GCC ' + parrots.version.compiler
|
12 |
+
|
13 |
+
def get_compiling_cuda_version():
|
14 |
+
return parrots.version.cuda
|
15 |
+
else:
|
16 |
+
from ..utils import ext_loader
|
17 |
+
ext_module = ext_loader.load_ext(
|
18 |
+
'_ext', ['get_compiler_version', 'get_compiling_cuda_version'])
|
19 |
+
|
20 |
+
def get_compiler_version():
|
21 |
+
return ext_module.get_compiler_version()
|
22 |
+
|
23 |
+
def get_compiling_cuda_version():
|
24 |
+
return ext_module.get_compiling_cuda_version()
|
25 |
+
|
26 |
+
|
27 |
+
def get_onnxruntime_op_path():
|
28 |
+
wildcard = os.path.join(
|
29 |
+
os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
|
30 |
+
'_ext_ort.*.so')
|
31 |
+
|
32 |
+
paths = glob.glob(wildcard)
|
33 |
+
if len(paths) > 0:
|
34 |
+
return paths[0]
|
35 |
+
else:
|
36 |
+
return ''
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/iou3d.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ..utils import ext_loader
|
5 |
+
|
6 |
+
ext_module = ext_loader.load_ext('_ext', [
|
7 |
+
'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward',
|
8 |
+
'iou3d_nms_normal_forward'
|
9 |
+
])
|
10 |
+
|
11 |
+
|
12 |
+
def boxes_iou_bev(boxes_a, boxes_b):
|
13 |
+
"""Calculate boxes IoU in the Bird's Eye View.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
boxes_a (torch.Tensor): Input boxes a with shape (M, 5).
|
17 |
+
boxes_b (torch.Tensor): Input boxes b with shape (N, 5).
|
18 |
+
|
19 |
+
Returns:
|
20 |
+
ans_iou (torch.Tensor): IoU result with shape (M, N).
|
21 |
+
"""
|
22 |
+
ans_iou = boxes_a.new_zeros(
|
23 |
+
torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
|
24 |
+
|
25 |
+
ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(),
|
26 |
+
boxes_b.contiguous(), ans_iou)
|
27 |
+
|
28 |
+
return ans_iou
|
29 |
+
|
30 |
+
|
31 |
+
def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None):
|
32 |
+
"""NMS function GPU implementation (for BEV boxes). The overlap of two
|
33 |
+
boxes for IoU calculation is defined as the exact overlapping area of the
|
34 |
+
two boxes. In this function, one can also set ``pre_max_size`` and
|
35 |
+
``post_max_size``.
|
36 |
+
|
37 |
+
Args:
|
38 |
+
boxes (torch.Tensor): Input boxes with the shape of [N, 5]
|
39 |
+
([x1, y1, x2, y2, ry]).
|
40 |
+
scores (torch.Tensor): Scores of boxes with the shape of [N].
|
41 |
+
thresh (float): Overlap threshold of NMS.
|
42 |
+
pre_max_size (int, optional): Max size of boxes before NMS.
|
43 |
+
Default: None.
|
44 |
+
post_max_size (int, optional): Max size of boxes after NMS.
|
45 |
+
Default: None.
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
torch.Tensor: Indexes after NMS.
|
49 |
+
"""
|
50 |
+
assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]'
|
51 |
+
order = scores.sort(0, descending=True)[1]
|
52 |
+
|
53 |
+
if pre_max_size is not None:
|
54 |
+
order = order[:pre_max_size]
|
55 |
+
boxes = boxes[order].contiguous()
|
56 |
+
|
57 |
+
keep = torch.zeros(boxes.size(0), dtype=torch.long)
|
58 |
+
num_out = ext_module.iou3d_nms_forward(boxes, keep, thresh)
|
59 |
+
keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
|
60 |
+
if post_max_size is not None:
|
61 |
+
keep = keep[:post_max_size]
|
62 |
+
return keep
|
63 |
+
|
64 |
+
|
65 |
+
def nms_normal_bev(boxes, scores, thresh):
|
66 |
+
"""Normal NMS function GPU implementation (for BEV boxes). The overlap of
|
67 |
+
two boxes for IoU calculation is defined as the exact overlapping area of
|
68 |
+
the two boxes WITH their yaw angle set to 0.
|
69 |
+
|
70 |
+
Args:
|
71 |
+
boxes (torch.Tensor): Input boxes with shape (N, 5).
|
72 |
+
scores (torch.Tensor): Scores of predicted boxes with shape (N).
|
73 |
+
thresh (float): Overlap threshold of NMS.
|
74 |
+
|
75 |
+
Returns:
|
76 |
+
torch.Tensor: Remaining indices with scores in descending order.
|
77 |
+
"""
|
78 |
+
assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]'
|
79 |
+
order = scores.sort(0, descending=True)[1]
|
80 |
+
|
81 |
+
boxes = boxes[order].contiguous()
|
82 |
+
|
83 |
+
keep = torch.zeros(boxes.size(0), dtype=torch.long)
|
84 |
+
num_out = ext_module.iou3d_nms_normal_forward(boxes, keep, thresh)
|
85 |
+
return order[keep[:num_out].cuda(boxes.device)].contiguous()
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/knn.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch.autograd import Function
|
3 |
+
|
4 |
+
from ..utils import ext_loader
|
5 |
+
|
6 |
+
ext_module = ext_loader.load_ext('_ext', ['knn_forward'])
|
7 |
+
|
8 |
+
|
9 |
+
class KNN(Function):
|
10 |
+
r"""KNN (CUDA) based on heap data structure.
|
11 |
+
Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
|
12 |
+
scene_seg/lib/pointops/src/knnquery_heap>`_.
|
13 |
+
|
14 |
+
Find k-nearest points.
|
15 |
+
"""
|
16 |
+
|
17 |
+
@staticmethod
|
18 |
+
def forward(ctx,
|
19 |
+
k: int,
|
20 |
+
xyz: torch.Tensor,
|
21 |
+
center_xyz: torch.Tensor = None,
|
22 |
+
transposed: bool = False) -> torch.Tensor:
|
23 |
+
"""
|
24 |
+
Args:
|
25 |
+
k (int): number of nearest neighbors.
|
26 |
+
xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N).
|
27 |
+
xyz coordinates of the features.
|
28 |
+
center_xyz (Tensor, optional): (B, npoint, 3) if transposed ==
|
29 |
+
False, else (B, 3, npoint). centers of the knn query.
|
30 |
+
Default: None.
|
31 |
+
transposed (bool, optional): whether the input tensors are
|
32 |
+
transposed. Should not explicitly use this keyword when
|
33 |
+
calling knn (=KNN.apply), just add the fourth param.
|
34 |
+
Default: False.
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
Tensor: (B, k, npoint) tensor with the indices of
|
38 |
+
the features that form k-nearest neighbours.
|
39 |
+
"""
|
40 |
+
assert (k > 0) & (k < 100), 'k should be in range(0, 100)'
|
41 |
+
|
42 |
+
if center_xyz is None:
|
43 |
+
center_xyz = xyz
|
44 |
+
|
45 |
+
if transposed:
|
46 |
+
xyz = xyz.transpose(2, 1).contiguous()
|
47 |
+
center_xyz = center_xyz.transpose(2, 1).contiguous()
|
48 |
+
|
49 |
+
assert xyz.is_contiguous() # [B, N, 3]
|
50 |
+
assert center_xyz.is_contiguous() # [B, npoint, 3]
|
51 |
+
|
52 |
+
center_xyz_device = center_xyz.get_device()
|
53 |
+
assert center_xyz_device == xyz.get_device(), \
|
54 |
+
'center_xyz and xyz should be put on the same device'
|
55 |
+
if torch.cuda.current_device() != center_xyz_device:
|
56 |
+
torch.cuda.set_device(center_xyz_device)
|
57 |
+
|
58 |
+
B, npoint, _ = center_xyz.shape
|
59 |
+
N = xyz.shape[1]
|
60 |
+
|
61 |
+
idx = center_xyz.new_zeros((B, npoint, k)).int()
|
62 |
+
dist2 = center_xyz.new_zeros((B, npoint, k)).float()
|
63 |
+
|
64 |
+
ext_module.knn_forward(
|
65 |
+
xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k)
|
66 |
+
# idx shape to [B, k, npoint]
|
67 |
+
idx = idx.transpose(2, 1).contiguous()
|
68 |
+
if torch.__version__ != 'parrots':
|
69 |
+
ctx.mark_non_differentiable(idx)
|
70 |
+
return idx
|
71 |
+
|
72 |
+
@staticmethod
|
73 |
+
def backward(ctx, a=None):
|
74 |
+
return None, None, None
|
75 |
+
|
76 |
+
|
77 |
+
knn = KNN.apply
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/masked_conv.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import math
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
from torch.autograd import Function
|
7 |
+
from torch.autograd.function import once_differentiable
|
8 |
+
from torch.nn.modules.utils import _pair
|
9 |
+
|
10 |
+
from ..utils import ext_loader
|
11 |
+
|
12 |
+
ext_module = ext_loader.load_ext(
|
13 |
+
'_ext', ['masked_im2col_forward', 'masked_col2im_forward'])
|
14 |
+
|
15 |
+
|
16 |
+
class MaskedConv2dFunction(Function):
|
17 |
+
|
18 |
+
@staticmethod
|
19 |
+
def symbolic(g, features, mask, weight, bias, padding, stride):
|
20 |
+
return g.op(
|
21 |
+
'mmcv::MMCVMaskedConv2d',
|
22 |
+
features,
|
23 |
+
mask,
|
24 |
+
weight,
|
25 |
+
bias,
|
26 |
+
padding_i=padding,
|
27 |
+
stride_i=stride)
|
28 |
+
|
29 |
+
@staticmethod
|
30 |
+
def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
|
31 |
+
assert mask.dim() == 3 and mask.size(0) == 1
|
32 |
+
assert features.dim() == 4 and features.size(0) == 1
|
33 |
+
assert features.size()[2:] == mask.size()[1:]
|
34 |
+
pad_h, pad_w = _pair(padding)
|
35 |
+
stride_h, stride_w = _pair(stride)
|
36 |
+
if stride_h != 1 or stride_w != 1:
|
37 |
+
raise ValueError(
|
38 |
+
'Stride could not only be 1 in masked_conv2d currently.')
|
39 |
+
out_channel, in_channel, kernel_h, kernel_w = weight.size()
|
40 |
+
|
41 |
+
batch_size = features.size(0)
|
42 |
+
out_h = int(
|
43 |
+
math.floor((features.size(2) + 2 * pad_h -
|
44 |
+
(kernel_h - 1) - 1) / stride_h + 1))
|
45 |
+
out_w = int(
|
46 |
+
math.floor((features.size(3) + 2 * pad_w -
|
47 |
+
(kernel_h - 1) - 1) / stride_w + 1))
|
48 |
+
mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False)
|
49 |
+
output = features.new_zeros(batch_size, out_channel, out_h, out_w)
|
50 |
+
if mask_inds.numel() > 0:
|
51 |
+
mask_h_idx = mask_inds[:, 0].contiguous()
|
52 |
+
mask_w_idx = mask_inds[:, 1].contiguous()
|
53 |
+
data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
|
54 |
+
mask_inds.size(0))
|
55 |
+
ext_module.masked_im2col_forward(
|
56 |
+
features,
|
57 |
+
mask_h_idx,
|
58 |
+
mask_w_idx,
|
59 |
+
data_col,
|
60 |
+
kernel_h=kernel_h,
|
61 |
+
kernel_w=kernel_w,
|
62 |
+
pad_h=pad_h,
|
63 |
+
pad_w=pad_w)
|
64 |
+
|
65 |
+
masked_output = torch.addmm(1, bias[:, None], 1,
|
66 |
+
weight.view(out_channel, -1), data_col)
|
67 |
+
ext_module.masked_col2im_forward(
|
68 |
+
masked_output,
|
69 |
+
mask_h_idx,
|
70 |
+
mask_w_idx,
|
71 |
+
output,
|
72 |
+
height=out_h,
|
73 |
+
width=out_w,
|
74 |
+
channels=out_channel)
|
75 |
+
return output
|
76 |
+
|
77 |
+
@staticmethod
|
78 |
+
@once_differentiable
|
79 |
+
def backward(ctx, grad_output):
|
80 |
+
return (None, ) * 5
|
81 |
+
|
82 |
+
|
83 |
+
masked_conv2d = MaskedConv2dFunction.apply
|
84 |
+
|
85 |
+
|
86 |
+
class MaskedConv2d(nn.Conv2d):
|
87 |
+
"""A MaskedConv2d which inherits the official Conv2d.
|
88 |
+
|
89 |
+
The masked forward doesn't implement the backward function and only
|
90 |
+
supports the stride parameter to be 1 currently.
|
91 |
+
"""
|
92 |
+
|
93 |
+
def __init__(self,
|
94 |
+
in_channels,
|
95 |
+
out_channels,
|
96 |
+
kernel_size,
|
97 |
+
stride=1,
|
98 |
+
padding=0,
|
99 |
+
dilation=1,
|
100 |
+
groups=1,
|
101 |
+
bias=True):
|
102 |
+
super(MaskedConv2d,
|
103 |
+
self).__init__(in_channels, out_channels, kernel_size, stride,
|
104 |
+
padding, dilation, groups, bias)
|
105 |
+
|
106 |
+
def forward(self, input, mask=None):
|
107 |
+
if mask is None: # fallback to the normal Conv2d
|
108 |
+
return super(MaskedConv2d, self).forward(input)
|
109 |
+
else:
|
110 |
+
return masked_conv2d(input, mask, self.weight, self.bias,
|
111 |
+
self.padding)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/merge_cells.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from abc import abstractmethod
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
import torch.nn.functional as F
|
7 |
+
|
8 |
+
from ..cnn import ConvModule
|
9 |
+
|
10 |
+
|
11 |
+
class BaseMergeCell(nn.Module):
|
12 |
+
"""The basic class for cells used in NAS-FPN and NAS-FCOS.
|
13 |
+
|
14 |
+
BaseMergeCell takes 2 inputs. After applying convolution
|
15 |
+
on them, they are resized to the target size. Then,
|
16 |
+
they go through binary_op, which depends on the type of cell.
|
17 |
+
If with_out_conv is True, the result of output will go through
|
18 |
+
another convolution layer.
|
19 |
+
|
20 |
+
Args:
|
21 |
+
in_channels (int): number of input channels in out_conv layer.
|
22 |
+
out_channels (int): number of output channels in out_conv layer.
|
23 |
+
with_out_conv (bool): Whether to use out_conv layer
|
24 |
+
out_conv_cfg (dict): Config dict for convolution layer, which should
|
25 |
+
contain "groups", "kernel_size", "padding", "bias" to build
|
26 |
+
out_conv layer.
|
27 |
+
out_norm_cfg (dict): Config dict for normalization layer in out_conv.
|
28 |
+
out_conv_order (tuple): The order of conv/norm/activation layers in
|
29 |
+
out_conv.
|
30 |
+
with_input1_conv (bool): Whether to use convolution on input1.
|
31 |
+
with_input2_conv (bool): Whether to use convolution on input2.
|
32 |
+
input_conv_cfg (dict): Config dict for building input1_conv layer and
|
33 |
+
input2_conv layer, which is expected to contain the type of
|
34 |
+
convolution.
|
35 |
+
Default: None, which means using conv2d.
|
36 |
+
input_norm_cfg (dict): Config dict for normalization layer in
|
37 |
+
input1_conv and input2_conv layer. Default: None.
|
38 |
+
upsample_mode (str): Interpolation method used to resize the output
|
39 |
+
of input1_conv and input2_conv to target size. Currently, we
|
40 |
+
support ['nearest', 'bilinear']. Default: 'nearest'.
|
41 |
+
"""
|
42 |
+
|
43 |
+
def __init__(self,
|
44 |
+
fused_channels=256,
|
45 |
+
out_channels=256,
|
46 |
+
with_out_conv=True,
|
47 |
+
out_conv_cfg=dict(
|
48 |
+
groups=1, kernel_size=3, padding=1, bias=True),
|
49 |
+
out_norm_cfg=None,
|
50 |
+
out_conv_order=('act', 'conv', 'norm'),
|
51 |
+
with_input1_conv=False,
|
52 |
+
with_input2_conv=False,
|
53 |
+
input_conv_cfg=None,
|
54 |
+
input_norm_cfg=None,
|
55 |
+
upsample_mode='nearest'):
|
56 |
+
super(BaseMergeCell, self).__init__()
|
57 |
+
assert upsample_mode in ['nearest', 'bilinear']
|
58 |
+
self.with_out_conv = with_out_conv
|
59 |
+
self.with_input1_conv = with_input1_conv
|
60 |
+
self.with_input2_conv = with_input2_conv
|
61 |
+
self.upsample_mode = upsample_mode
|
62 |
+
|
63 |
+
if self.with_out_conv:
|
64 |
+
self.out_conv = ConvModule(
|
65 |
+
fused_channels,
|
66 |
+
out_channels,
|
67 |
+
**out_conv_cfg,
|
68 |
+
norm_cfg=out_norm_cfg,
|
69 |
+
order=out_conv_order)
|
70 |
+
|
71 |
+
self.input1_conv = self._build_input_conv(
|
72 |
+
out_channels, input_conv_cfg,
|
73 |
+
input_norm_cfg) if with_input1_conv else nn.Sequential()
|
74 |
+
self.input2_conv = self._build_input_conv(
|
75 |
+
out_channels, input_conv_cfg,
|
76 |
+
input_norm_cfg) if with_input2_conv else nn.Sequential()
|
77 |
+
|
78 |
+
def _build_input_conv(self, channel, conv_cfg, norm_cfg):
|
79 |
+
return ConvModule(
|
80 |
+
channel,
|
81 |
+
channel,
|
82 |
+
3,
|
83 |
+
padding=1,
|
84 |
+
conv_cfg=conv_cfg,
|
85 |
+
norm_cfg=norm_cfg,
|
86 |
+
bias=True)
|
87 |
+
|
88 |
+
@abstractmethod
|
89 |
+
def _binary_op(self, x1, x2):
|
90 |
+
pass
|
91 |
+
|
92 |
+
def _resize(self, x, size):
|
93 |
+
if x.shape[-2:] == size:
|
94 |
+
return x
|
95 |
+
elif x.shape[-2:] < size:
|
96 |
+
return F.interpolate(x, size=size, mode=self.upsample_mode)
|
97 |
+
else:
|
98 |
+
assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
|
99 |
+
kernel_size = x.shape[-1] // size[-1]
|
100 |
+
x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
|
101 |
+
return x
|
102 |
+
|
103 |
+
def forward(self, x1, x2, out_size=None):
|
104 |
+
assert x1.shape[:2] == x2.shape[:2]
|
105 |
+
assert out_size is None or len(out_size) == 2
|
106 |
+
if out_size is None: # resize to larger one
|
107 |
+
out_size = max(x1.size()[2:], x2.size()[2:])
|
108 |
+
|
109 |
+
x1 = self.input1_conv(x1)
|
110 |
+
x2 = self.input2_conv(x2)
|
111 |
+
|
112 |
+
x1 = self._resize(x1, out_size)
|
113 |
+
x2 = self._resize(x2, out_size)
|
114 |
+
|
115 |
+
x = self._binary_op(x1, x2)
|
116 |
+
if self.with_out_conv:
|
117 |
+
x = self.out_conv(x)
|
118 |
+
return x
|
119 |
+
|
120 |
+
|
121 |
+
class SumCell(BaseMergeCell):
|
122 |
+
|
123 |
+
def __init__(self, in_channels, out_channels, **kwargs):
|
124 |
+
super(SumCell, self).__init__(in_channels, out_channels, **kwargs)
|
125 |
+
|
126 |
+
def _binary_op(self, x1, x2):
|
127 |
+
return x1 + x2
|
128 |
+
|
129 |
+
|
130 |
+
class ConcatCell(BaseMergeCell):
|
131 |
+
|
132 |
+
def __init__(self, in_channels, out_channels, **kwargs):
|
133 |
+
super(ConcatCell, self).__init__(in_channels * 2, out_channels,
|
134 |
+
**kwargs)
|
135 |
+
|
136 |
+
def _binary_op(self, x1, x2):
|
137 |
+
ret = torch.cat([x1, x2], dim=1)
|
138 |
+
return ret
|
139 |
+
|
140 |
+
|
141 |
+
class GlobalPoolingCell(BaseMergeCell):
|
142 |
+
|
143 |
+
def __init__(self, in_channels=None, out_channels=None, **kwargs):
|
144 |
+
super().__init__(in_channels, out_channels, **kwargs)
|
145 |
+
self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
|
146 |
+
|
147 |
+
def _binary_op(self, x1, x2):
|
148 |
+
x2_att = self.global_pool(x2).sigmoid()
|
149 |
+
return x2 + x2_att * x1
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/modulated_deform_conv.py
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import math
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
from torch.autograd import Function
|
7 |
+
from torch.autograd.function import once_differentiable
|
8 |
+
from torch.nn.modules.utils import _pair, _single
|
9 |
+
|
10 |
+
from annotator.mmpkg.mmcv.utils import deprecated_api_warning
|
11 |
+
from ..cnn import CONV_LAYERS
|
12 |
+
from ..utils import ext_loader, print_log
|
13 |
+
|
14 |
+
ext_module = ext_loader.load_ext(
|
15 |
+
'_ext',
|
16 |
+
['modulated_deform_conv_forward', 'modulated_deform_conv_backward'])
|
17 |
+
|
18 |
+
|
19 |
+
class ModulatedDeformConv2dFunction(Function):
|
20 |
+
|
21 |
+
@staticmethod
|
22 |
+
def symbolic(g, input, offset, mask, weight, bias, stride, padding,
|
23 |
+
dilation, groups, deform_groups):
|
24 |
+
input_tensors = [input, offset, mask, weight]
|
25 |
+
if bias is not None:
|
26 |
+
input_tensors.append(bias)
|
27 |
+
return g.op(
|
28 |
+
'mmcv::MMCVModulatedDeformConv2d',
|
29 |
+
*input_tensors,
|
30 |
+
stride_i=stride,
|
31 |
+
padding_i=padding,
|
32 |
+
dilation_i=dilation,
|
33 |
+
groups_i=groups,
|
34 |
+
deform_groups_i=deform_groups)
|
35 |
+
|
36 |
+
@staticmethod
|
37 |
+
def forward(ctx,
|
38 |
+
input,
|
39 |
+
offset,
|
40 |
+
mask,
|
41 |
+
weight,
|
42 |
+
bias=None,
|
43 |
+
stride=1,
|
44 |
+
padding=0,
|
45 |
+
dilation=1,
|
46 |
+
groups=1,
|
47 |
+
deform_groups=1):
|
48 |
+
if input is not None and input.dim() != 4:
|
49 |
+
raise ValueError(
|
50 |
+
f'Expected 4D tensor as input, got {input.dim()}D tensor \
|
51 |
+
instead.')
|
52 |
+
ctx.stride = _pair(stride)
|
53 |
+
ctx.padding = _pair(padding)
|
54 |
+
ctx.dilation = _pair(dilation)
|
55 |
+
ctx.groups = groups
|
56 |
+
ctx.deform_groups = deform_groups
|
57 |
+
ctx.with_bias = bias is not None
|
58 |
+
if not ctx.with_bias:
|
59 |
+
bias = input.new_empty(0) # fake tensor
|
60 |
+
# When pytorch version >= 1.6.0, amp is adopted for fp16 mode;
|
61 |
+
# amp won't cast the type of model (float32), but "offset" is cast
|
62 |
+
# to float16 by nn.Conv2d automatically, leading to the type
|
63 |
+
# mismatch with input (when it is float32) or weight.
|
64 |
+
# The flag for whether to use fp16 or amp is the type of "offset",
|
65 |
+
# we cast weight and input to temporarily support fp16 and amp
|
66 |
+
# whatever the pytorch version is.
|
67 |
+
input = input.type_as(offset)
|
68 |
+
weight = weight.type_as(input)
|
69 |
+
ctx.save_for_backward(input, offset, mask, weight, bias)
|
70 |
+
output = input.new_empty(
|
71 |
+
ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
|
72 |
+
ctx._bufs = [input.new_empty(0), input.new_empty(0)]
|
73 |
+
ext_module.modulated_deform_conv_forward(
|
74 |
+
input,
|
75 |
+
weight,
|
76 |
+
bias,
|
77 |
+
ctx._bufs[0],
|
78 |
+
offset,
|
79 |
+
mask,
|
80 |
+
output,
|
81 |
+
ctx._bufs[1],
|
82 |
+
kernel_h=weight.size(2),
|
83 |
+
kernel_w=weight.size(3),
|
84 |
+
stride_h=ctx.stride[0],
|
85 |
+
stride_w=ctx.stride[1],
|
86 |
+
pad_h=ctx.padding[0],
|
87 |
+
pad_w=ctx.padding[1],
|
88 |
+
dilation_h=ctx.dilation[0],
|
89 |
+
dilation_w=ctx.dilation[1],
|
90 |
+
group=ctx.groups,
|
91 |
+
deformable_group=ctx.deform_groups,
|
92 |
+
with_bias=ctx.with_bias)
|
93 |
+
return output
|
94 |
+
|
95 |
+
@staticmethod
|
96 |
+
@once_differentiable
|
97 |
+
def backward(ctx, grad_output):
|
98 |
+
input, offset, mask, weight, bias = ctx.saved_tensors
|
99 |
+
grad_input = torch.zeros_like(input)
|
100 |
+
grad_offset = torch.zeros_like(offset)
|
101 |
+
grad_mask = torch.zeros_like(mask)
|
102 |
+
grad_weight = torch.zeros_like(weight)
|
103 |
+
grad_bias = torch.zeros_like(bias)
|
104 |
+
grad_output = grad_output.contiguous()
|
105 |
+
ext_module.modulated_deform_conv_backward(
|
106 |
+
input,
|
107 |
+
weight,
|
108 |
+
bias,
|
109 |
+
ctx._bufs[0],
|
110 |
+
offset,
|
111 |
+
mask,
|
112 |
+
ctx._bufs[1],
|
113 |
+
grad_input,
|
114 |
+
grad_weight,
|
115 |
+
grad_bias,
|
116 |
+
grad_offset,
|
117 |
+
grad_mask,
|
118 |
+
grad_output,
|
119 |
+
kernel_h=weight.size(2),
|
120 |
+
kernel_w=weight.size(3),
|
121 |
+
stride_h=ctx.stride[0],
|
122 |
+
stride_w=ctx.stride[1],
|
123 |
+
pad_h=ctx.padding[0],
|
124 |
+
pad_w=ctx.padding[1],
|
125 |
+
dilation_h=ctx.dilation[0],
|
126 |
+
dilation_w=ctx.dilation[1],
|
127 |
+
group=ctx.groups,
|
128 |
+
deformable_group=ctx.deform_groups,
|
129 |
+
with_bias=ctx.with_bias)
|
130 |
+
if not ctx.with_bias:
|
131 |
+
grad_bias = None
|
132 |
+
|
133 |
+
return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
|
134 |
+
None, None, None, None, None)
|
135 |
+
|
136 |
+
@staticmethod
|
137 |
+
def _output_size(ctx, input, weight):
|
138 |
+
channels = weight.size(0)
|
139 |
+
output_size = (input.size(0), channels)
|
140 |
+
for d in range(input.dim() - 2):
|
141 |
+
in_size = input.size(d + 2)
|
142 |
+
pad = ctx.padding[d]
|
143 |
+
kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
|
144 |
+
stride_ = ctx.stride[d]
|
145 |
+
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
|
146 |
+
if not all(map(lambda s: s > 0, output_size)):
|
147 |
+
raise ValueError(
|
148 |
+
'convolution input is too small (output would be ' +
|
149 |
+
'x'.join(map(str, output_size)) + ')')
|
150 |
+
return output_size
|
151 |
+
|
152 |
+
|
153 |
+
modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply
|
154 |
+
|
155 |
+
|
156 |
+
class ModulatedDeformConv2d(nn.Module):
|
157 |
+
|
158 |
+
@deprecated_api_warning({'deformable_groups': 'deform_groups'},
|
159 |
+
cls_name='ModulatedDeformConv2d')
|
160 |
+
def __init__(self,
|
161 |
+
in_channels,
|
162 |
+
out_channels,
|
163 |
+
kernel_size,
|
164 |
+
stride=1,
|
165 |
+
padding=0,
|
166 |
+
dilation=1,
|
167 |
+
groups=1,
|
168 |
+
deform_groups=1,
|
169 |
+
bias=True):
|
170 |
+
super(ModulatedDeformConv2d, self).__init__()
|
171 |
+
self.in_channels = in_channels
|
172 |
+
self.out_channels = out_channels
|
173 |
+
self.kernel_size = _pair(kernel_size)
|
174 |
+
self.stride = _pair(stride)
|
175 |
+
self.padding = _pair(padding)
|
176 |
+
self.dilation = _pair(dilation)
|
177 |
+
self.groups = groups
|
178 |
+
self.deform_groups = deform_groups
|
179 |
+
# enable compatibility with nn.Conv2d
|
180 |
+
self.transposed = False
|
181 |
+
self.output_padding = _single(0)
|
182 |
+
|
183 |
+
self.weight = nn.Parameter(
|
184 |
+
torch.Tensor(out_channels, in_channels // groups,
|
185 |
+
*self.kernel_size))
|
186 |
+
if bias:
|
187 |
+
self.bias = nn.Parameter(torch.Tensor(out_channels))
|
188 |
+
else:
|
189 |
+
self.register_parameter('bias', None)
|
190 |
+
self.init_weights()
|
191 |
+
|
192 |
+
def init_weights(self):
|
193 |
+
n = self.in_channels
|
194 |
+
for k in self.kernel_size:
|
195 |
+
n *= k
|
196 |
+
stdv = 1. / math.sqrt(n)
|
197 |
+
self.weight.data.uniform_(-stdv, stdv)
|
198 |
+
if self.bias is not None:
|
199 |
+
self.bias.data.zero_()
|
200 |
+
|
201 |
+
def forward(self, x, offset, mask):
|
202 |
+
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
|
203 |
+
self.stride, self.padding,
|
204 |
+
self.dilation, self.groups,
|
205 |
+
self.deform_groups)
|
206 |
+
|
207 |
+
|
208 |
+
@CONV_LAYERS.register_module('DCNv2')
|
209 |
+
class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
|
210 |
+
"""A ModulatedDeformable Conv Encapsulation that acts as normal Conv
|
211 |
+
layers.
|
212 |
+
|
213 |
+
Args:
|
214 |
+
in_channels (int): Same as nn.Conv2d.
|
215 |
+
out_channels (int): Same as nn.Conv2d.
|
216 |
+
kernel_size (int or tuple[int]): Same as nn.Conv2d.
|
217 |
+
stride (int): Same as nn.Conv2d, while tuple is not supported.
|
218 |
+
padding (int): Same as nn.Conv2d, while tuple is not supported.
|
219 |
+
dilation (int): Same as nn.Conv2d, while tuple is not supported.
|
220 |
+
groups (int): Same as nn.Conv2d.
|
221 |
+
bias (bool or str): If specified as `auto`, it will be decided by the
|
222 |
+
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
|
223 |
+
False.
|
224 |
+
"""
|
225 |
+
|
226 |
+
_version = 2
|
227 |
+
|
228 |
+
def __init__(self, *args, **kwargs):
|
229 |
+
super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs)
|
230 |
+
self.conv_offset = nn.Conv2d(
|
231 |
+
self.in_channels,
|
232 |
+
self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
|
233 |
+
kernel_size=self.kernel_size,
|
234 |
+
stride=self.stride,
|
235 |
+
padding=self.padding,
|
236 |
+
dilation=self.dilation,
|
237 |
+
bias=True)
|
238 |
+
self.init_weights()
|
239 |
+
|
240 |
+
def init_weights(self):
|
241 |
+
super(ModulatedDeformConv2dPack, self).init_weights()
|
242 |
+
if hasattr(self, 'conv_offset'):
|
243 |
+
self.conv_offset.weight.data.zero_()
|
244 |
+
self.conv_offset.bias.data.zero_()
|
245 |
+
|
246 |
+
def forward(self, x):
|
247 |
+
out = self.conv_offset(x)
|
248 |
+
o1, o2, mask = torch.chunk(out, 3, dim=1)
|
249 |
+
offset = torch.cat((o1, o2), dim=1)
|
250 |
+
mask = torch.sigmoid(mask)
|
251 |
+
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
|
252 |
+
self.stride, self.padding,
|
253 |
+
self.dilation, self.groups,
|
254 |
+
self.deform_groups)
|
255 |
+
|
256 |
+
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
|
257 |
+
missing_keys, unexpected_keys, error_msgs):
|
258 |
+
version = local_metadata.get('version', None)
|
259 |
+
|
260 |
+
if version is None or version < 2:
|
261 |
+
# the key is different in early versions
|
262 |
+
# In version < 2, ModulatedDeformConvPack
|
263 |
+
# loads previous benchmark models.
|
264 |
+
if (prefix + 'conv_offset.weight' not in state_dict
|
265 |
+
and prefix[:-1] + '_offset.weight' in state_dict):
|
266 |
+
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
|
267 |
+
prefix[:-1] + '_offset.weight')
|
268 |
+
if (prefix + 'conv_offset.bias' not in state_dict
|
269 |
+
and prefix[:-1] + '_offset.bias' in state_dict):
|
270 |
+
state_dict[prefix +
|
271 |
+
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
|
272 |
+
'_offset.bias')
|
273 |
+
|
274 |
+
if version is not None and version > 1:
|
275 |
+
print_log(
|
276 |
+
f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to '
|
277 |
+
'version 2.',
|
278 |
+
logger='root')
|
279 |
+
|
280 |
+
super()._load_from_state_dict(state_dict, prefix, local_metadata,
|
281 |
+
strict, missing_keys, unexpected_keys,
|
282 |
+
error_msgs)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/multi_scale_deform_attn.py
ADDED
@@ -0,0 +1,358 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import math
|
3 |
+
import warnings
|
4 |
+
|
5 |
+
import torch
|
6 |
+
import torch.nn as nn
|
7 |
+
import torch.nn.functional as F
|
8 |
+
from torch.autograd.function import Function, once_differentiable
|
9 |
+
|
10 |
+
from annotator.mmpkg.mmcv import deprecated_api_warning
|
11 |
+
from annotator.mmpkg.mmcv.cnn import constant_init, xavier_init
|
12 |
+
from annotator.mmpkg.mmcv.cnn.bricks.registry import ATTENTION
|
13 |
+
from annotator.mmpkg.mmcv.runner import BaseModule
|
14 |
+
from ..utils import ext_loader
|
15 |
+
|
16 |
+
ext_module = ext_loader.load_ext(
|
17 |
+
'_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
|
18 |
+
|
19 |
+
|
20 |
+
class MultiScaleDeformableAttnFunction(Function):
|
21 |
+
|
22 |
+
@staticmethod
|
23 |
+
def forward(ctx, value, value_spatial_shapes, value_level_start_index,
|
24 |
+
sampling_locations, attention_weights, im2col_step):
|
25 |
+
"""GPU version of multi-scale deformable attention.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
value (Tensor): The value has shape
|
29 |
+
(bs, num_keys, mum_heads, embed_dims//num_heads)
|
30 |
+
value_spatial_shapes (Tensor): Spatial shape of
|
31 |
+
each feature map, has shape (num_levels, 2),
|
32 |
+
last dimension 2 represent (h, w)
|
33 |
+
sampling_locations (Tensor): The location of sampling points,
|
34 |
+
has shape
|
35 |
+
(bs ,num_queries, num_heads, num_levels, num_points, 2),
|
36 |
+
the last dimension 2 represent (x, y).
|
37 |
+
attention_weights (Tensor): The weight of sampling points used
|
38 |
+
when calculate the attention, has shape
|
39 |
+
(bs ,num_queries, num_heads, num_levels, num_points),
|
40 |
+
im2col_step (Tensor): The step used in image to column.
|
41 |
+
|
42 |
+
Returns:
|
43 |
+
Tensor: has shape (bs, num_queries, embed_dims)
|
44 |
+
"""
|
45 |
+
|
46 |
+
ctx.im2col_step = im2col_step
|
47 |
+
output = ext_module.ms_deform_attn_forward(
|
48 |
+
value,
|
49 |
+
value_spatial_shapes,
|
50 |
+
value_level_start_index,
|
51 |
+
sampling_locations,
|
52 |
+
attention_weights,
|
53 |
+
im2col_step=ctx.im2col_step)
|
54 |
+
ctx.save_for_backward(value, value_spatial_shapes,
|
55 |
+
value_level_start_index, sampling_locations,
|
56 |
+
attention_weights)
|
57 |
+
return output
|
58 |
+
|
59 |
+
@staticmethod
|
60 |
+
@once_differentiable
|
61 |
+
def backward(ctx, grad_output):
|
62 |
+
"""GPU version of backward function.
|
63 |
+
|
64 |
+
Args:
|
65 |
+
grad_output (Tensor): Gradient
|
66 |
+
of output tensor of forward.
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
Tuple[Tensor]: Gradient
|
70 |
+
of input tensors in forward.
|
71 |
+
"""
|
72 |
+
value, value_spatial_shapes, value_level_start_index,\
|
73 |
+
sampling_locations, attention_weights = ctx.saved_tensors
|
74 |
+
grad_value = torch.zeros_like(value)
|
75 |
+
grad_sampling_loc = torch.zeros_like(sampling_locations)
|
76 |
+
grad_attn_weight = torch.zeros_like(attention_weights)
|
77 |
+
|
78 |
+
ext_module.ms_deform_attn_backward(
|
79 |
+
value,
|
80 |
+
value_spatial_shapes,
|
81 |
+
value_level_start_index,
|
82 |
+
sampling_locations,
|
83 |
+
attention_weights,
|
84 |
+
grad_output.contiguous(),
|
85 |
+
grad_value,
|
86 |
+
grad_sampling_loc,
|
87 |
+
grad_attn_weight,
|
88 |
+
im2col_step=ctx.im2col_step)
|
89 |
+
|
90 |
+
return grad_value, None, None, \
|
91 |
+
grad_sampling_loc, grad_attn_weight, None
|
92 |
+
|
93 |
+
|
94 |
+
def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,
|
95 |
+
sampling_locations, attention_weights):
|
96 |
+
"""CPU version of multi-scale deformable attention.
|
97 |
+
|
98 |
+
Args:
|
99 |
+
value (Tensor): The value has shape
|
100 |
+
(bs, num_keys, mum_heads, embed_dims//num_heads)
|
101 |
+
value_spatial_shapes (Tensor): Spatial shape of
|
102 |
+
each feature map, has shape (num_levels, 2),
|
103 |
+
last dimension 2 represent (h, w)
|
104 |
+
sampling_locations (Tensor): The location of sampling points,
|
105 |
+
has shape
|
106 |
+
(bs ,num_queries, num_heads, num_levels, num_points, 2),
|
107 |
+
the last dimension 2 represent (x, y).
|
108 |
+
attention_weights (Tensor): The weight of sampling points used
|
109 |
+
when calculate the attention, has shape
|
110 |
+
(bs ,num_queries, num_heads, num_levels, num_points),
|
111 |
+
|
112 |
+
Returns:
|
113 |
+
Tensor: has shape (bs, num_queries, embed_dims)
|
114 |
+
"""
|
115 |
+
|
116 |
+
bs, _, num_heads, embed_dims = value.shape
|
117 |
+
_, num_queries, num_heads, num_levels, num_points, _ =\
|
118 |
+
sampling_locations.shape
|
119 |
+
value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes],
|
120 |
+
dim=1)
|
121 |
+
sampling_grids = 2 * sampling_locations - 1
|
122 |
+
sampling_value_list = []
|
123 |
+
for level, (H_, W_) in enumerate(value_spatial_shapes):
|
124 |
+
# bs, H_*W_, num_heads, embed_dims ->
|
125 |
+
# bs, H_*W_, num_heads*embed_dims ->
|
126 |
+
# bs, num_heads*embed_dims, H_*W_ ->
|
127 |
+
# bs*num_heads, embed_dims, H_, W_
|
128 |
+
value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(
|
129 |
+
bs * num_heads, embed_dims, H_, W_)
|
130 |
+
# bs, num_queries, num_heads, num_points, 2 ->
|
131 |
+
# bs, num_heads, num_queries, num_points, 2 ->
|
132 |
+
# bs*num_heads, num_queries, num_points, 2
|
133 |
+
sampling_grid_l_ = sampling_grids[:, :, :,
|
134 |
+
level].transpose(1, 2).flatten(0, 1)
|
135 |
+
# bs*num_heads, embed_dims, num_queries, num_points
|
136 |
+
sampling_value_l_ = F.grid_sample(
|
137 |
+
value_l_,
|
138 |
+
sampling_grid_l_,
|
139 |
+
mode='bilinear',
|
140 |
+
padding_mode='zeros',
|
141 |
+
align_corners=False)
|
142 |
+
sampling_value_list.append(sampling_value_l_)
|
143 |
+
# (bs, num_queries, num_heads, num_levels, num_points) ->
|
144 |
+
# (bs, num_heads, num_queries, num_levels, num_points) ->
|
145 |
+
# (bs, num_heads, 1, num_queries, num_levels*num_points)
|
146 |
+
attention_weights = attention_weights.transpose(1, 2).reshape(
|
147 |
+
bs * num_heads, 1, num_queries, num_levels * num_points)
|
148 |
+
output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) *
|
149 |
+
attention_weights).sum(-1).view(bs, num_heads * embed_dims,
|
150 |
+
num_queries)
|
151 |
+
return output.transpose(1, 2).contiguous()
|
152 |
+
|
153 |
+
|
154 |
+
@ATTENTION.register_module()
|
155 |
+
class MultiScaleDeformableAttention(BaseModule):
|
156 |
+
"""An attention module used in Deformable-Detr.
|
157 |
+
|
158 |
+
`Deformable DETR: Deformable Transformers for End-to-End Object Detection.
|
159 |
+
<https://arxiv.org/pdf/2010.04159.pdf>`_.
|
160 |
+
|
161 |
+
Args:
|
162 |
+
embed_dims (int): The embedding dimension of Attention.
|
163 |
+
Default: 256.
|
164 |
+
num_heads (int): Parallel attention heads. Default: 64.
|
165 |
+
num_levels (int): The number of feature map used in
|
166 |
+
Attention. Default: 4.
|
167 |
+
num_points (int): The number of sampling points for
|
168 |
+
each query in each head. Default: 4.
|
169 |
+
im2col_step (int): The step used in image_to_column.
|
170 |
+
Default: 64.
|
171 |
+
dropout (float): A Dropout layer on `inp_identity`.
|
172 |
+
Default: 0.1.
|
173 |
+
batch_first (bool): Key, Query and Value are shape of
|
174 |
+
(batch, n, embed_dim)
|
175 |
+
or (n, batch, embed_dim). Default to False.
|
176 |
+
norm_cfg (dict): Config dict for normalization layer.
|
177 |
+
Default: None.
|
178 |
+
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
|
179 |
+
Default: None.
|
180 |
+
"""
|
181 |
+
|
182 |
+
def __init__(self,
|
183 |
+
embed_dims=256,
|
184 |
+
num_heads=8,
|
185 |
+
num_levels=4,
|
186 |
+
num_points=4,
|
187 |
+
im2col_step=64,
|
188 |
+
dropout=0.1,
|
189 |
+
batch_first=False,
|
190 |
+
norm_cfg=None,
|
191 |
+
init_cfg=None):
|
192 |
+
super().__init__(init_cfg)
|
193 |
+
if embed_dims % num_heads != 0:
|
194 |
+
raise ValueError(f'embed_dims must be divisible by num_heads, '
|
195 |
+
f'but got {embed_dims} and {num_heads}')
|
196 |
+
dim_per_head = embed_dims // num_heads
|
197 |
+
self.norm_cfg = norm_cfg
|
198 |
+
self.dropout = nn.Dropout(dropout)
|
199 |
+
self.batch_first = batch_first
|
200 |
+
|
201 |
+
# you'd better set dim_per_head to a power of 2
|
202 |
+
# which is more efficient in the CUDA implementation
|
203 |
+
def _is_power_of_2(n):
|
204 |
+
if (not isinstance(n, int)) or (n < 0):
|
205 |
+
raise ValueError(
|
206 |
+
'invalid input for _is_power_of_2: {} (type: {})'.format(
|
207 |
+
n, type(n)))
|
208 |
+
return (n & (n - 1) == 0) and n != 0
|
209 |
+
|
210 |
+
if not _is_power_of_2(dim_per_head):
|
211 |
+
warnings.warn(
|
212 |
+
"You'd better set embed_dims in "
|
213 |
+
'MultiScaleDeformAttention to make '
|
214 |
+
'the dimension of each attention head a power of 2 '
|
215 |
+
'which is more efficient in our CUDA implementation.')
|
216 |
+
|
217 |
+
self.im2col_step = im2col_step
|
218 |
+
self.embed_dims = embed_dims
|
219 |
+
self.num_levels = num_levels
|
220 |
+
self.num_heads = num_heads
|
221 |
+
self.num_points = num_points
|
222 |
+
self.sampling_offsets = nn.Linear(
|
223 |
+
embed_dims, num_heads * num_levels * num_points * 2)
|
224 |
+
self.attention_weights = nn.Linear(embed_dims,
|
225 |
+
num_heads * num_levels * num_points)
|
226 |
+
self.value_proj = nn.Linear(embed_dims, embed_dims)
|
227 |
+
self.output_proj = nn.Linear(embed_dims, embed_dims)
|
228 |
+
self.init_weights()
|
229 |
+
|
230 |
+
def init_weights(self):
|
231 |
+
"""Default initialization for Parameters of Module."""
|
232 |
+
constant_init(self.sampling_offsets, 0.)
|
233 |
+
thetas = torch.arange(
|
234 |
+
self.num_heads,
|
235 |
+
dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
|
236 |
+
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
|
237 |
+
grid_init = (grid_init /
|
238 |
+
grid_init.abs().max(-1, keepdim=True)[0]).view(
|
239 |
+
self.num_heads, 1, 1,
|
240 |
+
2).repeat(1, self.num_levels, self.num_points, 1)
|
241 |
+
for i in range(self.num_points):
|
242 |
+
grid_init[:, :, i, :] *= i + 1
|
243 |
+
|
244 |
+
self.sampling_offsets.bias.data = grid_init.view(-1)
|
245 |
+
constant_init(self.attention_weights, val=0., bias=0.)
|
246 |
+
xavier_init(self.value_proj, distribution='uniform', bias=0.)
|
247 |
+
xavier_init(self.output_proj, distribution='uniform', bias=0.)
|
248 |
+
self._is_init = True
|
249 |
+
|
250 |
+
@deprecated_api_warning({'residual': 'identity'},
|
251 |
+
cls_name='MultiScaleDeformableAttention')
|
252 |
+
def forward(self,
|
253 |
+
query,
|
254 |
+
key=None,
|
255 |
+
value=None,
|
256 |
+
identity=None,
|
257 |
+
query_pos=None,
|
258 |
+
key_padding_mask=None,
|
259 |
+
reference_points=None,
|
260 |
+
spatial_shapes=None,
|
261 |
+
level_start_index=None,
|
262 |
+
**kwargs):
|
263 |
+
"""Forward Function of MultiScaleDeformAttention.
|
264 |
+
|
265 |
+
Args:
|
266 |
+
query (Tensor): Query of Transformer with shape
|
267 |
+
(num_query, bs, embed_dims).
|
268 |
+
key (Tensor): The key tensor with shape
|
269 |
+
`(num_key, bs, embed_dims)`.
|
270 |
+
value (Tensor): The value tensor with shape
|
271 |
+
`(num_key, bs, embed_dims)`.
|
272 |
+
identity (Tensor): The tensor used for addition, with the
|
273 |
+
same shape as `query`. Default None. If None,
|
274 |
+
`query` will be used.
|
275 |
+
query_pos (Tensor): The positional encoding for `query`.
|
276 |
+
Default: None.
|
277 |
+
key_pos (Tensor): The positional encoding for `key`. Default
|
278 |
+
None.
|
279 |
+
reference_points (Tensor): The normalized reference
|
280 |
+
points with shape (bs, num_query, num_levels, 2),
|
281 |
+
all elements is range in [0, 1], top-left (0,0),
|
282 |
+
bottom-right (1, 1), including padding area.
|
283 |
+
or (N, Length_{query}, num_levels, 4), add
|
284 |
+
additional two dimensions is (w, h) to
|
285 |
+
form reference boxes.
|
286 |
+
key_padding_mask (Tensor): ByteTensor for `query`, with
|
287 |
+
shape [bs, num_key].
|
288 |
+
spatial_shapes (Tensor): Spatial shape of features in
|
289 |
+
different levels. With shape (num_levels, 2),
|
290 |
+
last dimension represents (h, w).
|
291 |
+
level_start_index (Tensor): The start index of each level.
|
292 |
+
A tensor has shape ``(num_levels, )`` and can be represented
|
293 |
+
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
|
294 |
+
|
295 |
+
Returns:
|
296 |
+
Tensor: forwarded results with shape [num_query, bs, embed_dims].
|
297 |
+
"""
|
298 |
+
|
299 |
+
if value is None:
|
300 |
+
value = query
|
301 |
+
|
302 |
+
if identity is None:
|
303 |
+
identity = query
|
304 |
+
if query_pos is not None:
|
305 |
+
query = query + query_pos
|
306 |
+
if not self.batch_first:
|
307 |
+
# change to (bs, num_query ,embed_dims)
|
308 |
+
query = query.permute(1, 0, 2)
|
309 |
+
value = value.permute(1, 0, 2)
|
310 |
+
|
311 |
+
bs, num_query, _ = query.shape
|
312 |
+
bs, num_value, _ = value.shape
|
313 |
+
assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
|
314 |
+
|
315 |
+
value = self.value_proj(value)
|
316 |
+
if key_padding_mask is not None:
|
317 |
+
value = value.masked_fill(key_padding_mask[..., None], 0.0)
|
318 |
+
value = value.view(bs, num_value, self.num_heads, -1)
|
319 |
+
sampling_offsets = self.sampling_offsets(query).view(
|
320 |
+
bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)
|
321 |
+
attention_weights = self.attention_weights(query).view(
|
322 |
+
bs, num_query, self.num_heads, self.num_levels * self.num_points)
|
323 |
+
attention_weights = attention_weights.softmax(-1)
|
324 |
+
|
325 |
+
attention_weights = attention_weights.view(bs, num_query,
|
326 |
+
self.num_heads,
|
327 |
+
self.num_levels,
|
328 |
+
self.num_points)
|
329 |
+
if reference_points.shape[-1] == 2:
|
330 |
+
offset_normalizer = torch.stack(
|
331 |
+
[spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
|
332 |
+
sampling_locations = reference_points[:, :, None, :, None, :] \
|
333 |
+
+ sampling_offsets \
|
334 |
+
/ offset_normalizer[None, None, None, :, None, :]
|
335 |
+
elif reference_points.shape[-1] == 4:
|
336 |
+
sampling_locations = reference_points[:, :, None, :, None, :2] \
|
337 |
+
+ sampling_offsets / self.num_points \
|
338 |
+
* reference_points[:, :, None, :, None, 2:] \
|
339 |
+
* 0.5
|
340 |
+
else:
|
341 |
+
raise ValueError(
|
342 |
+
f'Last dim of reference_points must be'
|
343 |
+
f' 2 or 4, but get {reference_points.shape[-1]} instead.')
|
344 |
+
if torch.cuda.is_available() and value.is_cuda:
|
345 |
+
output = MultiScaleDeformableAttnFunction.apply(
|
346 |
+
value, spatial_shapes, level_start_index, sampling_locations,
|
347 |
+
attention_weights, self.im2col_step)
|
348 |
+
else:
|
349 |
+
output = multi_scale_deformable_attn_pytorch(
|
350 |
+
value, spatial_shapes, sampling_locations, attention_weights)
|
351 |
+
|
352 |
+
output = self.output_proj(output)
|
353 |
+
|
354 |
+
if not self.batch_first:
|
355 |
+
# (num_query, bs ,embed_dims)
|
356 |
+
output = output.permute(1, 0, 2)
|
357 |
+
|
358 |
+
return self.dropout(output) + identity
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/nms.py
ADDED
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
|
6 |
+
from annotator.mmpkg.mmcv.utils import deprecated_api_warning
|
7 |
+
from ..utils import ext_loader
|
8 |
+
|
9 |
+
ext_module = ext_loader.load_ext(
|
10 |
+
'_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated'])
|
11 |
+
|
12 |
+
|
13 |
+
# This function is modified from: https://github.com/pytorch/vision/
|
14 |
+
class NMSop(torch.autograd.Function):
|
15 |
+
|
16 |
+
@staticmethod
|
17 |
+
def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold,
|
18 |
+
max_num):
|
19 |
+
is_filtering_by_score = score_threshold > 0
|
20 |
+
if is_filtering_by_score:
|
21 |
+
valid_mask = scores > score_threshold
|
22 |
+
bboxes, scores = bboxes[valid_mask], scores[valid_mask]
|
23 |
+
valid_inds = torch.nonzero(
|
24 |
+
valid_mask, as_tuple=False).squeeze(dim=1)
|
25 |
+
|
26 |
+
inds = ext_module.nms(
|
27 |
+
bboxes, scores, iou_threshold=float(iou_threshold), offset=offset)
|
28 |
+
|
29 |
+
if max_num > 0:
|
30 |
+
inds = inds[:max_num]
|
31 |
+
if is_filtering_by_score:
|
32 |
+
inds = valid_inds[inds]
|
33 |
+
return inds
|
34 |
+
|
35 |
+
@staticmethod
|
36 |
+
def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold,
|
37 |
+
max_num):
|
38 |
+
from ..onnx import is_custom_op_loaded
|
39 |
+
has_custom_op = is_custom_op_loaded()
|
40 |
+
# TensorRT nms plugin is aligned with original nms in ONNXRuntime
|
41 |
+
is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT'
|
42 |
+
if has_custom_op and (not is_trt_backend):
|
43 |
+
return g.op(
|
44 |
+
'mmcv::NonMaxSuppression',
|
45 |
+
bboxes,
|
46 |
+
scores,
|
47 |
+
iou_threshold_f=float(iou_threshold),
|
48 |
+
offset_i=int(offset))
|
49 |
+
else:
|
50 |
+
from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
|
51 |
+
from ..onnx.onnx_utils.symbolic_helper import _size_helper
|
52 |
+
|
53 |
+
boxes = unsqueeze(g, bboxes, 0)
|
54 |
+
scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
|
55 |
+
|
56 |
+
if max_num > 0:
|
57 |
+
max_num = g.op(
|
58 |
+
'Constant',
|
59 |
+
value_t=torch.tensor(max_num, dtype=torch.long))
|
60 |
+
else:
|
61 |
+
dim = g.op('Constant', value_t=torch.tensor(0))
|
62 |
+
max_num = _size_helper(g, bboxes, dim)
|
63 |
+
max_output_per_class = max_num
|
64 |
+
iou_threshold = g.op(
|
65 |
+
'Constant',
|
66 |
+
value_t=torch.tensor([iou_threshold], dtype=torch.float))
|
67 |
+
score_threshold = g.op(
|
68 |
+
'Constant',
|
69 |
+
value_t=torch.tensor([score_threshold], dtype=torch.float))
|
70 |
+
nms_out = g.op('NonMaxSuppression', boxes, scores,
|
71 |
+
max_output_per_class, iou_threshold,
|
72 |
+
score_threshold)
|
73 |
+
return squeeze(
|
74 |
+
g,
|
75 |
+
select(
|
76 |
+
g, nms_out, 1,
|
77 |
+
g.op(
|
78 |
+
'Constant',
|
79 |
+
value_t=torch.tensor([2], dtype=torch.long))), 1)
|
80 |
+
|
81 |
+
|
82 |
+
class SoftNMSop(torch.autograd.Function):
|
83 |
+
|
84 |
+
@staticmethod
|
85 |
+
def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method,
|
86 |
+
offset):
|
87 |
+
dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
|
88 |
+
inds = ext_module.softnms(
|
89 |
+
boxes.cpu(),
|
90 |
+
scores.cpu(),
|
91 |
+
dets.cpu(),
|
92 |
+
iou_threshold=float(iou_threshold),
|
93 |
+
sigma=float(sigma),
|
94 |
+
min_score=float(min_score),
|
95 |
+
method=int(method),
|
96 |
+
offset=int(offset))
|
97 |
+
return dets, inds
|
98 |
+
|
99 |
+
@staticmethod
|
100 |
+
def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method,
|
101 |
+
offset):
|
102 |
+
from packaging import version
|
103 |
+
assert version.parse(torch.__version__) >= version.parse('1.7.0')
|
104 |
+
nms_out = g.op(
|
105 |
+
'mmcv::SoftNonMaxSuppression',
|
106 |
+
boxes,
|
107 |
+
scores,
|
108 |
+
iou_threshold_f=float(iou_threshold),
|
109 |
+
sigma_f=float(sigma),
|
110 |
+
min_score_f=float(min_score),
|
111 |
+
method_i=int(method),
|
112 |
+
offset_i=int(offset),
|
113 |
+
outputs=2)
|
114 |
+
return nms_out
|
115 |
+
|
116 |
+
|
117 |
+
@deprecated_api_warning({'iou_thr': 'iou_threshold'})
|
118 |
+
def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
|
119 |
+
"""Dispatch to either CPU or GPU NMS implementations.
|
120 |
+
|
121 |
+
The input can be either torch tensor or numpy array. GPU NMS will be used
|
122 |
+
if the input is gpu tensor, otherwise CPU NMS
|
123 |
+
will be used. The returned type will always be the same as inputs.
|
124 |
+
|
125 |
+
Arguments:
|
126 |
+
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
|
127 |
+
scores (torch.Tensor or np.ndarray): scores in shape (N, ).
|
128 |
+
iou_threshold (float): IoU threshold for NMS.
|
129 |
+
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
|
130 |
+
score_threshold (float): score threshold for NMS.
|
131 |
+
max_num (int): maximum number of boxes after NMS.
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
tuple: kept dets(boxes and scores) and indice, which is always the \
|
135 |
+
same data type as the input.
|
136 |
+
|
137 |
+
Example:
|
138 |
+
>>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
|
139 |
+
>>> [49.3, 32.9, 51.0, 35.3],
|
140 |
+
>>> [49.2, 31.8, 51.0, 35.4],
|
141 |
+
>>> [35.1, 11.5, 39.1, 15.7],
|
142 |
+
>>> [35.6, 11.8, 39.3, 14.2],
|
143 |
+
>>> [35.3, 11.5, 39.9, 14.5],
|
144 |
+
>>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32)
|
145 |
+
>>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\
|
146 |
+
dtype=np.float32)
|
147 |
+
>>> iou_threshold = 0.6
|
148 |
+
>>> dets, inds = nms(boxes, scores, iou_threshold)
|
149 |
+
>>> assert len(inds) == len(dets) == 3
|
150 |
+
"""
|
151 |
+
assert isinstance(boxes, (torch.Tensor, np.ndarray))
|
152 |
+
assert isinstance(scores, (torch.Tensor, np.ndarray))
|
153 |
+
is_numpy = False
|
154 |
+
if isinstance(boxes, np.ndarray):
|
155 |
+
is_numpy = True
|
156 |
+
boxes = torch.from_numpy(boxes)
|
157 |
+
if isinstance(scores, np.ndarray):
|
158 |
+
scores = torch.from_numpy(scores)
|
159 |
+
assert boxes.size(1) == 4
|
160 |
+
assert boxes.size(0) == scores.size(0)
|
161 |
+
assert offset in (0, 1)
|
162 |
+
|
163 |
+
if torch.__version__ == 'parrots':
|
164 |
+
indata_list = [boxes, scores]
|
165 |
+
indata_dict = {
|
166 |
+
'iou_threshold': float(iou_threshold),
|
167 |
+
'offset': int(offset)
|
168 |
+
}
|
169 |
+
inds = ext_module.nms(*indata_list, **indata_dict)
|
170 |
+
else:
|
171 |
+
inds = NMSop.apply(boxes, scores, iou_threshold, offset,
|
172 |
+
score_threshold, max_num)
|
173 |
+
dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
|
174 |
+
if is_numpy:
|
175 |
+
dets = dets.cpu().numpy()
|
176 |
+
inds = inds.cpu().numpy()
|
177 |
+
return dets, inds
|
178 |
+
|
179 |
+
|
180 |
+
@deprecated_api_warning({'iou_thr': 'iou_threshold'})
|
181 |
+
def soft_nms(boxes,
|
182 |
+
scores,
|
183 |
+
iou_threshold=0.3,
|
184 |
+
sigma=0.5,
|
185 |
+
min_score=1e-3,
|
186 |
+
method='linear',
|
187 |
+
offset=0):
|
188 |
+
"""Dispatch to only CPU Soft NMS implementations.
|
189 |
+
|
190 |
+
The input can be either a torch tensor or numpy array.
|
191 |
+
The returned type will always be the same as inputs.
|
192 |
+
|
193 |
+
Arguments:
|
194 |
+
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
|
195 |
+
scores (torch.Tensor or np.ndarray): scores in shape (N, ).
|
196 |
+
iou_threshold (float): IoU threshold for NMS.
|
197 |
+
sigma (float): hyperparameter for gaussian method
|
198 |
+
min_score (float): score filter threshold
|
199 |
+
method (str): either 'linear' or 'gaussian'
|
200 |
+
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
|
201 |
+
|
202 |
+
Returns:
|
203 |
+
tuple: kept dets(boxes and scores) and indice, which is always the \
|
204 |
+
same data type as the input.
|
205 |
+
|
206 |
+
Example:
|
207 |
+
>>> boxes = np.array([[4., 3., 5., 3.],
|
208 |
+
>>> [4., 3., 5., 4.],
|
209 |
+
>>> [3., 1., 3., 1.],
|
210 |
+
>>> [3., 1., 3., 1.],
|
211 |
+
>>> [3., 1., 3., 1.],
|
212 |
+
>>> [3., 1., 3., 1.]], dtype=np.float32)
|
213 |
+
>>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32)
|
214 |
+
>>> iou_threshold = 0.6
|
215 |
+
>>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5)
|
216 |
+
>>> assert len(inds) == len(dets) == 5
|
217 |
+
"""
|
218 |
+
|
219 |
+
assert isinstance(boxes, (torch.Tensor, np.ndarray))
|
220 |
+
assert isinstance(scores, (torch.Tensor, np.ndarray))
|
221 |
+
is_numpy = False
|
222 |
+
if isinstance(boxes, np.ndarray):
|
223 |
+
is_numpy = True
|
224 |
+
boxes = torch.from_numpy(boxes)
|
225 |
+
if isinstance(scores, np.ndarray):
|
226 |
+
scores = torch.from_numpy(scores)
|
227 |
+
assert boxes.size(1) == 4
|
228 |
+
assert boxes.size(0) == scores.size(0)
|
229 |
+
assert offset in (0, 1)
|
230 |
+
method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2}
|
231 |
+
assert method in method_dict.keys()
|
232 |
+
|
233 |
+
if torch.__version__ == 'parrots':
|
234 |
+
dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
|
235 |
+
indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()]
|
236 |
+
indata_dict = {
|
237 |
+
'iou_threshold': float(iou_threshold),
|
238 |
+
'sigma': float(sigma),
|
239 |
+
'min_score': min_score,
|
240 |
+
'method': method_dict[method],
|
241 |
+
'offset': int(offset)
|
242 |
+
}
|
243 |
+
inds = ext_module.softnms(*indata_list, **indata_dict)
|
244 |
+
else:
|
245 |
+
dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(),
|
246 |
+
float(iou_threshold), float(sigma),
|
247 |
+
float(min_score), method_dict[method],
|
248 |
+
int(offset))
|
249 |
+
|
250 |
+
dets = dets[:inds.size(0)]
|
251 |
+
|
252 |
+
if is_numpy:
|
253 |
+
dets = dets.cpu().numpy()
|
254 |
+
inds = inds.cpu().numpy()
|
255 |
+
return dets, inds
|
256 |
+
else:
|
257 |
+
return dets.to(device=boxes.device), inds.to(device=boxes.device)
|
258 |
+
|
259 |
+
|
260 |
+
def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
|
261 |
+
"""Performs non-maximum suppression in a batched fashion.
|
262 |
+
|
263 |
+
Modified from https://github.com/pytorch/vision/blob
|
264 |
+
/505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
|
265 |
+
In order to perform NMS independently per class, we add an offset to all
|
266 |
+
the boxes. The offset is dependent only on the class idx, and is large
|
267 |
+
enough so that boxes from different classes do not overlap.
|
268 |
+
|
269 |
+
Arguments:
|
270 |
+
boxes (torch.Tensor): boxes in shape (N, 4).
|
271 |
+
scores (torch.Tensor): scores in shape (N, ).
|
272 |
+
idxs (torch.Tensor): each index value correspond to a bbox cluster,
|
273 |
+
and NMS will not be applied between elements of different idxs,
|
274 |
+
shape (N, ).
|
275 |
+
nms_cfg (dict): specify nms type and other parameters like iou_thr.
|
276 |
+
Possible keys includes the following.
|
277 |
+
|
278 |
+
- iou_thr (float): IoU threshold used for NMS.
|
279 |
+
- split_thr (float): threshold number of boxes. In some cases the
|
280 |
+
number of boxes is large (e.g., 200k). To avoid OOM during
|
281 |
+
training, the users could set `split_thr` to a small value.
|
282 |
+
If the number of boxes is greater than the threshold, it will
|
283 |
+
perform NMS on each group of boxes separately and sequentially.
|
284 |
+
Defaults to 10000.
|
285 |
+
class_agnostic (bool): if true, nms is class agnostic,
|
286 |
+
i.e. IoU thresholding happens over all boxes,
|
287 |
+
regardless of the predicted class.
|
288 |
+
|
289 |
+
Returns:
|
290 |
+
tuple: kept dets and indice.
|
291 |
+
"""
|
292 |
+
nms_cfg_ = nms_cfg.copy()
|
293 |
+
class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
|
294 |
+
if class_agnostic:
|
295 |
+
boxes_for_nms = boxes
|
296 |
+
else:
|
297 |
+
max_coordinate = boxes.max()
|
298 |
+
offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
|
299 |
+
boxes_for_nms = boxes + offsets[:, None]
|
300 |
+
|
301 |
+
nms_type = nms_cfg_.pop('type', 'nms')
|
302 |
+
nms_op = eval(nms_type)
|
303 |
+
|
304 |
+
split_thr = nms_cfg_.pop('split_thr', 10000)
|
305 |
+
# Won't split to multiple nms nodes when exporting to onnx
|
306 |
+
if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
|
307 |
+
dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
|
308 |
+
boxes = boxes[keep]
|
309 |
+
# -1 indexing works abnormal in TensorRT
|
310 |
+
# This assumes `dets` has 5 dimensions where
|
311 |
+
# the last dimension is score.
|
312 |
+
# TODO: more elegant way to handle the dimension issue.
|
313 |
+
# Some type of nms would reweight the score, such as SoftNMS
|
314 |
+
scores = dets[:, 4]
|
315 |
+
else:
|
316 |
+
max_num = nms_cfg_.pop('max_num', -1)
|
317 |
+
total_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
|
318 |
+
# Some type of nms would reweight the score, such as SoftNMS
|
319 |
+
scores_after_nms = scores.new_zeros(scores.size())
|
320 |
+
for id in torch.unique(idxs):
|
321 |
+
mask = (idxs == id).nonzero(as_tuple=False).view(-1)
|
322 |
+
dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_)
|
323 |
+
total_mask[mask[keep]] = True
|
324 |
+
scores_after_nms[mask[keep]] = dets[:, -1]
|
325 |
+
keep = total_mask.nonzero(as_tuple=False).view(-1)
|
326 |
+
|
327 |
+
scores, inds = scores_after_nms[keep].sort(descending=True)
|
328 |
+
keep = keep[inds]
|
329 |
+
boxes = boxes[keep]
|
330 |
+
|
331 |
+
if max_num > 0:
|
332 |
+
keep = keep[:max_num]
|
333 |
+
boxes = boxes[:max_num]
|
334 |
+
scores = scores[:max_num]
|
335 |
+
|
336 |
+
return torch.cat([boxes, scores[:, None]], -1), keep
|
337 |
+
|
338 |
+
|
339 |
+
def nms_match(dets, iou_threshold):
|
340 |
+
"""Matched dets into different groups by NMS.
|
341 |
+
|
342 |
+
NMS match is Similar to NMS but when a bbox is suppressed, nms match will
|
343 |
+
record the indice of suppressed bbox and form a group with the indice of
|
344 |
+
kept bbox. In each group, indice is sorted as score order.
|
345 |
+
|
346 |
+
Arguments:
|
347 |
+
dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
|
348 |
+
iou_thr (float): IoU thresh for NMS.
|
349 |
+
|
350 |
+
Returns:
|
351 |
+
List[torch.Tensor | np.ndarray]: The outer list corresponds different
|
352 |
+
matched group, the inner Tensor corresponds the indices for a group
|
353 |
+
in score order.
|
354 |
+
"""
|
355 |
+
if dets.shape[0] == 0:
|
356 |
+
matched = []
|
357 |
+
else:
|
358 |
+
assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \
|
359 |
+
f'but get {dets.shape}'
|
360 |
+
if isinstance(dets, torch.Tensor):
|
361 |
+
dets_t = dets.detach().cpu()
|
362 |
+
else:
|
363 |
+
dets_t = torch.from_numpy(dets)
|
364 |
+
indata_list = [dets_t]
|
365 |
+
indata_dict = {'iou_threshold': float(iou_threshold)}
|
366 |
+
matched = ext_module.nms_match(*indata_list, **indata_dict)
|
367 |
+
if torch.__version__ == 'parrots':
|
368 |
+
matched = matched.tolist()
|
369 |
+
|
370 |
+
if isinstance(dets, torch.Tensor):
|
371 |
+
return [dets.new_tensor(m, dtype=torch.long) for m in matched]
|
372 |
+
else:
|
373 |
+
return [np.array(m, dtype=np.int) for m in matched]
|
374 |
+
|
375 |
+
|
376 |
+
def nms_rotated(dets, scores, iou_threshold, labels=None):
|
377 |
+
"""Performs non-maximum suppression (NMS) on the rotated boxes according to
|
378 |
+
their intersection-over-union (IoU).
|
379 |
+
|
380 |
+
Rotated NMS iteratively removes lower scoring rotated boxes which have an
|
381 |
+
IoU greater than iou_threshold with another (higher scoring) rotated box.
|
382 |
+
|
383 |
+
Args:
|
384 |
+
boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \
|
385 |
+
be in (x_ctr, y_ctr, width, height, angle_radian) format.
|
386 |
+
scores (Tensor): scores in shape (N, ).
|
387 |
+
iou_threshold (float): IoU thresh for NMS.
|
388 |
+
labels (Tensor): boxes' label in shape (N,).
|
389 |
+
|
390 |
+
Returns:
|
391 |
+
tuple: kept dets(boxes and scores) and indice, which is always the \
|
392 |
+
same data type as the input.
|
393 |
+
"""
|
394 |
+
if dets.shape[0] == 0:
|
395 |
+
return dets, None
|
396 |
+
multi_label = labels is not None
|
397 |
+
if multi_label:
|
398 |
+
dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1)
|
399 |
+
else:
|
400 |
+
dets_wl = dets
|
401 |
+
_, order = scores.sort(0, descending=True)
|
402 |
+
dets_sorted = dets_wl.index_select(0, order)
|
403 |
+
|
404 |
+
if torch.__version__ == 'parrots':
|
405 |
+
keep_inds = ext_module.nms_rotated(
|
406 |
+
dets_wl,
|
407 |
+
scores,
|
408 |
+
order,
|
409 |
+
dets_sorted,
|
410 |
+
iou_threshold=iou_threshold,
|
411 |
+
multi_label=multi_label)
|
412 |
+
else:
|
413 |
+
keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted,
|
414 |
+
iou_threshold, multi_label)
|
415 |
+
dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
|
416 |
+
dim=1)
|
417 |
+
return dets, keep_inds
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/pixel_group.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
|
5 |
+
from ..utils import ext_loader
|
6 |
+
|
7 |
+
ext_module = ext_loader.load_ext('_ext', ['pixel_group'])
|
8 |
+
|
9 |
+
|
10 |
+
def pixel_group(score, mask, embedding, kernel_label, kernel_contour,
|
11 |
+
kernel_region_num, distance_threshold):
|
12 |
+
"""Group pixels into text instances, which is widely used text detection
|
13 |
+
methods.
|
14 |
+
|
15 |
+
Arguments:
|
16 |
+
score (np.array or Tensor): The foreground score with size hxw.
|
17 |
+
mask (np.array or Tensor): The foreground mask with size hxw.
|
18 |
+
embedding (np.array or Tensor): The embedding with size hxwxc to
|
19 |
+
distinguish instances.
|
20 |
+
kernel_label (np.array or Tensor): The instance kernel index with
|
21 |
+
size hxw.
|
22 |
+
kernel_contour (np.array or Tensor): The kernel contour with size hxw.
|
23 |
+
kernel_region_num (int): The instance kernel region number.
|
24 |
+
distance_threshold (float): The embedding distance threshold between
|
25 |
+
kernel and pixel in one instance.
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
pixel_assignment (List[List[float]]): The instance coordinate list.
|
29 |
+
Each element consists of averaged confidence, pixel number, and
|
30 |
+
coordinates (x_i, y_i for all pixels) in order.
|
31 |
+
"""
|
32 |
+
assert isinstance(score, (torch.Tensor, np.ndarray))
|
33 |
+
assert isinstance(mask, (torch.Tensor, np.ndarray))
|
34 |
+
assert isinstance(embedding, (torch.Tensor, np.ndarray))
|
35 |
+
assert isinstance(kernel_label, (torch.Tensor, np.ndarray))
|
36 |
+
assert isinstance(kernel_contour, (torch.Tensor, np.ndarray))
|
37 |
+
assert isinstance(kernel_region_num, int)
|
38 |
+
assert isinstance(distance_threshold, float)
|
39 |
+
|
40 |
+
if isinstance(score, np.ndarray):
|
41 |
+
score = torch.from_numpy(score)
|
42 |
+
if isinstance(mask, np.ndarray):
|
43 |
+
mask = torch.from_numpy(mask)
|
44 |
+
if isinstance(embedding, np.ndarray):
|
45 |
+
embedding = torch.from_numpy(embedding)
|
46 |
+
if isinstance(kernel_label, np.ndarray):
|
47 |
+
kernel_label = torch.from_numpy(kernel_label)
|
48 |
+
if isinstance(kernel_contour, np.ndarray):
|
49 |
+
kernel_contour = torch.from_numpy(kernel_contour)
|
50 |
+
|
51 |
+
if torch.__version__ == 'parrots':
|
52 |
+
label = ext_module.pixel_group(
|
53 |
+
score,
|
54 |
+
mask,
|
55 |
+
embedding,
|
56 |
+
kernel_label,
|
57 |
+
kernel_contour,
|
58 |
+
kernel_region_num=kernel_region_num,
|
59 |
+
distance_threshold=distance_threshold)
|
60 |
+
label = label.tolist()
|
61 |
+
label = label[0]
|
62 |
+
list_index = kernel_region_num
|
63 |
+
pixel_assignment = []
|
64 |
+
for x in range(kernel_region_num):
|
65 |
+
pixel_assignment.append(
|
66 |
+
np.array(
|
67 |
+
label[list_index:list_index + int(label[x])],
|
68 |
+
dtype=np.float))
|
69 |
+
list_index = list_index + int(label[x])
|
70 |
+
else:
|
71 |
+
pixel_assignment = ext_module.pixel_group(score, mask, embedding,
|
72 |
+
kernel_label, kernel_contour,
|
73 |
+
kernel_region_num,
|
74 |
+
distance_threshold)
|
75 |
+
return pixel_assignment
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/point_sample.py
ADDED
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa
|
2 |
+
|
3 |
+
from os import path as osp
|
4 |
+
|
5 |
+
import torch
|
6 |
+
import torch.nn as nn
|
7 |
+
import torch.nn.functional as F
|
8 |
+
from torch.nn.modules.utils import _pair
|
9 |
+
from torch.onnx.operators import shape_as_tensor
|
10 |
+
|
11 |
+
|
12 |
+
def bilinear_grid_sample(im, grid, align_corners=False):
|
13 |
+
"""Given an input and a flow-field grid, computes the output using input
|
14 |
+
values and pixel locations from grid. Supported only bilinear interpolation
|
15 |
+
method to sample the input pixels.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
im (torch.Tensor): Input feature map, shape (N, C, H, W)
|
19 |
+
grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2)
|
20 |
+
align_corners {bool}: If set to True, the extrema (-1 and 1) are
|
21 |
+
considered as referring to the center points of the input’s
|
22 |
+
corner pixels. If set to False, they are instead considered as
|
23 |
+
referring to the corner points of the input’s corner pixels,
|
24 |
+
making the sampling more resolution agnostic.
|
25 |
+
Returns:
|
26 |
+
torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg)
|
27 |
+
"""
|
28 |
+
n, c, h, w = im.shape
|
29 |
+
gn, gh, gw, _ = grid.shape
|
30 |
+
assert n == gn
|
31 |
+
|
32 |
+
x = grid[:, :, :, 0]
|
33 |
+
y = grid[:, :, :, 1]
|
34 |
+
|
35 |
+
if align_corners:
|
36 |
+
x = ((x + 1) / 2) * (w - 1)
|
37 |
+
y = ((y + 1) / 2) * (h - 1)
|
38 |
+
else:
|
39 |
+
x = ((x + 1) * w - 1) / 2
|
40 |
+
y = ((y + 1) * h - 1) / 2
|
41 |
+
|
42 |
+
x = x.view(n, -1)
|
43 |
+
y = y.view(n, -1)
|
44 |
+
|
45 |
+
x0 = torch.floor(x).long()
|
46 |
+
y0 = torch.floor(y).long()
|
47 |
+
x1 = x0 + 1
|
48 |
+
y1 = y0 + 1
|
49 |
+
|
50 |
+
wa = ((x1 - x) * (y1 - y)).unsqueeze(1)
|
51 |
+
wb = ((x1 - x) * (y - y0)).unsqueeze(1)
|
52 |
+
wc = ((x - x0) * (y1 - y)).unsqueeze(1)
|
53 |
+
wd = ((x - x0) * (y - y0)).unsqueeze(1)
|
54 |
+
|
55 |
+
# Apply default for grid_sample function zero padding
|
56 |
+
im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0)
|
57 |
+
padded_h = h + 2
|
58 |
+
padded_w = w + 2
|
59 |
+
# save points positions after padding
|
60 |
+
x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1
|
61 |
+
|
62 |
+
# Clip coordinates to padded image size
|
63 |
+
x0 = torch.where(x0 < 0, torch.tensor(0), x0)
|
64 |
+
x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0)
|
65 |
+
x1 = torch.where(x1 < 0, torch.tensor(0), x1)
|
66 |
+
x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1)
|
67 |
+
y0 = torch.where(y0 < 0, torch.tensor(0), y0)
|
68 |
+
y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0)
|
69 |
+
y1 = torch.where(y1 < 0, torch.tensor(0), y1)
|
70 |
+
y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1)
|
71 |
+
|
72 |
+
im_padded = im_padded.view(n, c, -1)
|
73 |
+
|
74 |
+
x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1)
|
75 |
+
x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1)
|
76 |
+
x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1)
|
77 |
+
x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1)
|
78 |
+
|
79 |
+
Ia = torch.gather(im_padded, 2, x0_y0)
|
80 |
+
Ib = torch.gather(im_padded, 2, x0_y1)
|
81 |
+
Ic = torch.gather(im_padded, 2, x1_y0)
|
82 |
+
Id = torch.gather(im_padded, 2, x1_y1)
|
83 |
+
|
84 |
+
return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw)
|
85 |
+
|
86 |
+
|
87 |
+
def is_in_onnx_export_without_custom_ops():
|
88 |
+
from annotator.mmpkg.mmcv.ops import get_onnxruntime_op_path
|
89 |
+
ort_custom_op_path = get_onnxruntime_op_path()
|
90 |
+
return torch.onnx.is_in_onnx_export(
|
91 |
+
) and not osp.exists(ort_custom_op_path)
|
92 |
+
|
93 |
+
|
94 |
+
def normalize(grid):
|
95 |
+
"""Normalize input grid from [-1, 1] to [0, 1]
|
96 |
+
Args:
|
97 |
+
grid (Tensor): The grid to be normalize, range [-1, 1].
|
98 |
+
Returns:
|
99 |
+
Tensor: Normalized grid, range [0, 1].
|
100 |
+
"""
|
101 |
+
|
102 |
+
return (grid + 1.0) / 2.0
|
103 |
+
|
104 |
+
|
105 |
+
def denormalize(grid):
|
106 |
+
"""Denormalize input grid from range [0, 1] to [-1, 1]
|
107 |
+
Args:
|
108 |
+
grid (Tensor): The grid to be denormalize, range [0, 1].
|
109 |
+
Returns:
|
110 |
+
Tensor: Denormalized grid, range [-1, 1].
|
111 |
+
"""
|
112 |
+
|
113 |
+
return grid * 2.0 - 1.0
|
114 |
+
|
115 |
+
|
116 |
+
def generate_grid(num_grid, size, device):
|
117 |
+
"""Generate regular square grid of points in [0, 1] x [0, 1] coordinate
|
118 |
+
space.
|
119 |
+
|
120 |
+
Args:
|
121 |
+
num_grid (int): The number of grids to sample, one for each region.
|
122 |
+
size (tuple(int, int)): The side size of the regular grid.
|
123 |
+
device (torch.device): Desired device of returned tensor.
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
(torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that
|
127 |
+
contains coordinates for the regular grids.
|
128 |
+
"""
|
129 |
+
|
130 |
+
affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
|
131 |
+
grid = F.affine_grid(
|
132 |
+
affine_trans, torch.Size((1, 1, *size)), align_corners=False)
|
133 |
+
grid = normalize(grid)
|
134 |
+
return grid.view(1, -1, 2).expand(num_grid, -1, -1)
|
135 |
+
|
136 |
+
|
137 |
+
def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
|
138 |
+
"""Convert roi based relative point coordinates to image based absolute
|
139 |
+
point coordinates.
|
140 |
+
|
141 |
+
Args:
|
142 |
+
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
|
143 |
+
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
|
144 |
+
RoI, location, range (0, 1), shape (N, P, 2)
|
145 |
+
Returns:
|
146 |
+
Tensor: Image based absolute point coordinates, shape (N, P, 2)
|
147 |
+
"""
|
148 |
+
|
149 |
+
with torch.no_grad():
|
150 |
+
assert rel_roi_points.size(0) == rois.size(0)
|
151 |
+
assert rois.dim() == 2
|
152 |
+
assert rel_roi_points.dim() == 3
|
153 |
+
assert rel_roi_points.size(2) == 2
|
154 |
+
# remove batch idx
|
155 |
+
if rois.size(1) == 5:
|
156 |
+
rois = rois[:, 1:]
|
157 |
+
abs_img_points = rel_roi_points.clone()
|
158 |
+
# To avoid an error during exporting to onnx use independent
|
159 |
+
# variables instead inplace computation
|
160 |
+
xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0])
|
161 |
+
ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1])
|
162 |
+
xs += rois[:, None, 0]
|
163 |
+
ys += rois[:, None, 1]
|
164 |
+
abs_img_points = torch.stack([xs, ys], dim=2)
|
165 |
+
return abs_img_points
|
166 |
+
|
167 |
+
|
168 |
+
def get_shape_from_feature_map(x):
|
169 |
+
"""Get spatial resolution of input feature map considering exporting to
|
170 |
+
onnx mode.
|
171 |
+
|
172 |
+
Args:
|
173 |
+
x (torch.Tensor): Input tensor, shape (N, C, H, W)
|
174 |
+
Returns:
|
175 |
+
torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2)
|
176 |
+
"""
|
177 |
+
if torch.onnx.is_in_onnx_export():
|
178 |
+
img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to(
|
179 |
+
x.device).float()
|
180 |
+
else:
|
181 |
+
img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to(
|
182 |
+
x.device).float()
|
183 |
+
return img_shape
|
184 |
+
|
185 |
+
|
186 |
+
def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
|
187 |
+
"""Convert image based absolute point coordinates to image based relative
|
188 |
+
coordinates for sampling.
|
189 |
+
|
190 |
+
Args:
|
191 |
+
abs_img_points (Tensor): Image based absolute point coordinates,
|
192 |
+
shape (N, P, 2)
|
193 |
+
img (tuple/Tensor): (height, width) of image or feature map.
|
194 |
+
spatial_scale (float): Scale points by this factor. Default: 1.
|
195 |
+
|
196 |
+
Returns:
|
197 |
+
Tensor: Image based relative point coordinates for sampling,
|
198 |
+
shape (N, P, 2)
|
199 |
+
"""
|
200 |
+
|
201 |
+
assert (isinstance(img, tuple) and len(img) == 2) or \
|
202 |
+
(isinstance(img, torch.Tensor) and len(img.shape) == 4)
|
203 |
+
|
204 |
+
if isinstance(img, tuple):
|
205 |
+
h, w = img
|
206 |
+
scale = torch.tensor([w, h],
|
207 |
+
dtype=torch.float,
|
208 |
+
device=abs_img_points.device)
|
209 |
+
scale = scale.view(1, 1, 2)
|
210 |
+
else:
|
211 |
+
scale = get_shape_from_feature_map(img)
|
212 |
+
|
213 |
+
return abs_img_points / scale * spatial_scale
|
214 |
+
|
215 |
+
|
216 |
+
def rel_roi_point_to_rel_img_point(rois,
|
217 |
+
rel_roi_points,
|
218 |
+
img,
|
219 |
+
spatial_scale=1.):
|
220 |
+
"""Convert roi based relative point coordinates to image based absolute
|
221 |
+
point coordinates.
|
222 |
+
|
223 |
+
Args:
|
224 |
+
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
|
225 |
+
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
|
226 |
+
RoI, location, range (0, 1), shape (N, P, 2)
|
227 |
+
img (tuple/Tensor): (height, width) of image or feature map.
|
228 |
+
spatial_scale (float): Scale points by this factor. Default: 1.
|
229 |
+
|
230 |
+
Returns:
|
231 |
+
Tensor: Image based relative point coordinates for sampling,
|
232 |
+
shape (N, P, 2)
|
233 |
+
"""
|
234 |
+
|
235 |
+
abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
|
236 |
+
rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img,
|
237 |
+
spatial_scale)
|
238 |
+
|
239 |
+
return rel_img_point
|
240 |
+
|
241 |
+
|
242 |
+
def point_sample(input, points, align_corners=False, **kwargs):
|
243 |
+
"""A wrapper around :func:`grid_sample` to support 3D point_coords tensors
|
244 |
+
Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to
|
245 |
+
lie inside ``[0, 1] x [0, 1]`` square.
|
246 |
+
|
247 |
+
Args:
|
248 |
+
input (Tensor): Feature map, shape (N, C, H, W).
|
249 |
+
points (Tensor): Image based absolute point coordinates (normalized),
|
250 |
+
range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2).
|
251 |
+
align_corners (bool): Whether align_corners. Default: False
|
252 |
+
|
253 |
+
Returns:
|
254 |
+
Tensor: Features of `point` on `input`, shape (N, C, P) or
|
255 |
+
(N, C, Hgrid, Wgrid).
|
256 |
+
"""
|
257 |
+
|
258 |
+
add_dim = False
|
259 |
+
if points.dim() == 3:
|
260 |
+
add_dim = True
|
261 |
+
points = points.unsqueeze(2)
|
262 |
+
if is_in_onnx_export_without_custom_ops():
|
263 |
+
# If custom ops for onnx runtime not compiled use python
|
264 |
+
# implementation of grid_sample function to make onnx graph
|
265 |
+
# with supported nodes
|
266 |
+
output = bilinear_grid_sample(
|
267 |
+
input, denormalize(points), align_corners=align_corners)
|
268 |
+
else:
|
269 |
+
output = F.grid_sample(
|
270 |
+
input, denormalize(points), align_corners=align_corners, **kwargs)
|
271 |
+
if add_dim:
|
272 |
+
output = output.squeeze(3)
|
273 |
+
return output
|
274 |
+
|
275 |
+
|
276 |
+
class SimpleRoIAlign(nn.Module):
|
277 |
+
|
278 |
+
def __init__(self, output_size, spatial_scale, aligned=True):
|
279 |
+
"""Simple RoI align in PointRend, faster than standard RoIAlign.
|
280 |
+
|
281 |
+
Args:
|
282 |
+
output_size (tuple[int]): h, w
|
283 |
+
spatial_scale (float): scale the input boxes by this number
|
284 |
+
aligned (bool): if False, use the legacy implementation in
|
285 |
+
MMDetection, align_corners=True will be used in F.grid_sample.
|
286 |
+
If True, align the results more perfectly.
|
287 |
+
"""
|
288 |
+
|
289 |
+
super(SimpleRoIAlign, self).__init__()
|
290 |
+
self.output_size = _pair(output_size)
|
291 |
+
self.spatial_scale = float(spatial_scale)
|
292 |
+
# to be consistent with other RoI ops
|
293 |
+
self.use_torchvision = False
|
294 |
+
self.aligned = aligned
|
295 |
+
|
296 |
+
def forward(self, features, rois):
|
297 |
+
num_imgs = features.size(0)
|
298 |
+
num_rois = rois.size(0)
|
299 |
+
rel_roi_points = generate_grid(
|
300 |
+
num_rois, self.output_size, device=rois.device)
|
301 |
+
|
302 |
+
if torch.onnx.is_in_onnx_export():
|
303 |
+
rel_img_points = rel_roi_point_to_rel_img_point(
|
304 |
+
rois, rel_roi_points, features, self.spatial_scale)
|
305 |
+
rel_img_points = rel_img_points.reshape(num_imgs, -1,
|
306 |
+
*rel_img_points.shape[1:])
|
307 |
+
point_feats = point_sample(
|
308 |
+
features, rel_img_points, align_corners=not self.aligned)
|
309 |
+
point_feats = point_feats.transpose(1, 2)
|
310 |
+
else:
|
311 |
+
point_feats = []
|
312 |
+
for batch_ind in range(num_imgs):
|
313 |
+
# unravel batch dim
|
314 |
+
feat = features[batch_ind].unsqueeze(0)
|
315 |
+
inds = (rois[:, 0].long() == batch_ind)
|
316 |
+
if inds.any():
|
317 |
+
rel_img_points = rel_roi_point_to_rel_img_point(
|
318 |
+
rois[inds], rel_roi_points[inds], feat,
|
319 |
+
self.spatial_scale).unsqueeze(0)
|
320 |
+
point_feat = point_sample(
|
321 |
+
feat, rel_img_points, align_corners=not self.aligned)
|
322 |
+
point_feat = point_feat.squeeze(0).transpose(0, 1)
|
323 |
+
point_feats.append(point_feat)
|
324 |
+
|
325 |
+
point_feats = torch.cat(point_feats, dim=0)
|
326 |
+
|
327 |
+
channels = features.size(1)
|
328 |
+
roi_feats = point_feats.reshape(num_rois, channels, *self.output_size)
|
329 |
+
|
330 |
+
return roi_feats
|
331 |
+
|
332 |
+
def __repr__(self):
|
333 |
+
format_str = self.__class__.__name__
|
334 |
+
format_str += '(output_size={}, spatial_scale={}'.format(
|
335 |
+
self.output_size, self.spatial_scale)
|
336 |
+
return format_str
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/points_in_boxes.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ..utils import ext_loader
|
4 |
+
|
5 |
+
ext_module = ext_loader.load_ext('_ext', [
|
6 |
+
'points_in_boxes_part_forward', 'points_in_boxes_cpu_forward',
|
7 |
+
'points_in_boxes_all_forward'
|
8 |
+
])
|
9 |
+
|
10 |
+
|
11 |
+
def points_in_boxes_part(points, boxes):
|
12 |
+
"""Find the box in which each point is (CUDA).
|
13 |
+
|
14 |
+
Args:
|
15 |
+
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate
|
16 |
+
boxes (torch.Tensor): [B, T, 7],
|
17 |
+
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in
|
18 |
+
LiDAR/DEPTH coordinate, (x, y, z) is the bottom center
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
box_idxs_of_pts (torch.Tensor): (B, M), default background = -1
|
22 |
+
"""
|
23 |
+
assert points.shape[0] == boxes.shape[0], \
|
24 |
+
'Points and boxes should have the same batch size, ' \
|
25 |
+
f'but got {points.shape[0]} and {boxes.shape[0]}'
|
26 |
+
assert boxes.shape[2] == 7, \
|
27 |
+
'boxes dimension should be 7, ' \
|
28 |
+
f'but got unexpected shape {boxes.shape[2]}'
|
29 |
+
assert points.shape[2] == 3, \
|
30 |
+
'points dimension should be 3, ' \
|
31 |
+
f'but got unexpected shape {points.shape[2]}'
|
32 |
+
batch_size, num_points, _ = points.shape
|
33 |
+
|
34 |
+
box_idxs_of_pts = points.new_zeros((batch_size, num_points),
|
35 |
+
dtype=torch.int).fill_(-1)
|
36 |
+
|
37 |
+
# If manually put the tensor 'points' or 'boxes' on a device
|
38 |
+
# which is not the current device, some temporary variables
|
39 |
+
# will be created on the current device in the cuda op,
|
40 |
+
# and the output will be incorrect.
|
41 |
+
# Therefore, we force the current device to be the same
|
42 |
+
# as the device of the tensors if it was not.
|
43 |
+
# Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305
|
44 |
+
# for the incorrect output before the fix.
|
45 |
+
points_device = points.get_device()
|
46 |
+
assert points_device == boxes.get_device(), \
|
47 |
+
'Points and boxes should be put on the same device'
|
48 |
+
if torch.cuda.current_device() != points_device:
|
49 |
+
torch.cuda.set_device(points_device)
|
50 |
+
|
51 |
+
ext_module.points_in_boxes_part_forward(boxes.contiguous(),
|
52 |
+
points.contiguous(),
|
53 |
+
box_idxs_of_pts)
|
54 |
+
|
55 |
+
return box_idxs_of_pts
|
56 |
+
|
57 |
+
|
58 |
+
def points_in_boxes_cpu(points, boxes):
|
59 |
+
"""Find all boxes in which each point is (CPU). The CPU version of
|
60 |
+
:meth:`points_in_boxes_all`.
|
61 |
+
|
62 |
+
Args:
|
63 |
+
points (torch.Tensor): [B, M, 3], [x, y, z] in
|
64 |
+
LiDAR/DEPTH coordinate
|
65 |
+
boxes (torch.Tensor): [B, T, 7],
|
66 |
+
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
|
67 |
+
(x, y, z) is the bottom center.
|
68 |
+
|
69 |
+
Returns:
|
70 |
+
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
|
71 |
+
"""
|
72 |
+
assert points.shape[0] == boxes.shape[0], \
|
73 |
+
'Points and boxes should have the same batch size, ' \
|
74 |
+
f'but got {points.shape[0]} and {boxes.shape[0]}'
|
75 |
+
assert boxes.shape[2] == 7, \
|
76 |
+
'boxes dimension should be 7, ' \
|
77 |
+
f'but got unexpected shape {boxes.shape[2]}'
|
78 |
+
assert points.shape[2] == 3, \
|
79 |
+
'points dimension should be 3, ' \
|
80 |
+
f'but got unexpected shape {points.shape[2]}'
|
81 |
+
batch_size, num_points, _ = points.shape
|
82 |
+
num_boxes = boxes.shape[1]
|
83 |
+
|
84 |
+
point_indices = points.new_zeros((batch_size, num_boxes, num_points),
|
85 |
+
dtype=torch.int)
|
86 |
+
for b in range(batch_size):
|
87 |
+
ext_module.points_in_boxes_cpu_forward(boxes[b].float().contiguous(),
|
88 |
+
points[b].float().contiguous(),
|
89 |
+
point_indices[b])
|
90 |
+
point_indices = point_indices.transpose(1, 2)
|
91 |
+
|
92 |
+
return point_indices
|
93 |
+
|
94 |
+
|
95 |
+
def points_in_boxes_all(points, boxes):
|
96 |
+
"""Find all boxes in which each point is (CUDA).
|
97 |
+
|
98 |
+
Args:
|
99 |
+
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate
|
100 |
+
boxes (torch.Tensor): [B, T, 7],
|
101 |
+
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
|
102 |
+
(x, y, z) is the bottom center.
|
103 |
+
|
104 |
+
Returns:
|
105 |
+
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
|
106 |
+
"""
|
107 |
+
assert boxes.shape[0] == points.shape[0], \
|
108 |
+
'Points and boxes should have the same batch size, ' \
|
109 |
+
f'but got {boxes.shape[0]} and {boxes.shape[0]}'
|
110 |
+
assert boxes.shape[2] == 7, \
|
111 |
+
'boxes dimension should be 7, ' \
|
112 |
+
f'but got unexpected shape {boxes.shape[2]}'
|
113 |
+
assert points.shape[2] == 3, \
|
114 |
+
'points dimension should be 3, ' \
|
115 |
+
f'but got unexpected shape {points.shape[2]}'
|
116 |
+
batch_size, num_points, _ = points.shape
|
117 |
+
num_boxes = boxes.shape[1]
|
118 |
+
|
119 |
+
box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes),
|
120 |
+
dtype=torch.int).fill_(0)
|
121 |
+
|
122 |
+
# Same reason as line 25-32
|
123 |
+
points_device = points.get_device()
|
124 |
+
assert points_device == boxes.get_device(), \
|
125 |
+
'Points and boxes should be put on the same device'
|
126 |
+
if torch.cuda.current_device() != points_device:
|
127 |
+
torch.cuda.set_device(points_device)
|
128 |
+
|
129 |
+
ext_module.points_in_boxes_all_forward(boxes.contiguous(),
|
130 |
+
points.contiguous(),
|
131 |
+
box_idxs_of_pts)
|
132 |
+
|
133 |
+
return box_idxs_of_pts
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/points_sampler.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from torch import nn as nn
|
5 |
+
|
6 |
+
from annotator.mmpkg.mmcv.runner import force_fp32
|
7 |
+
from .furthest_point_sample import (furthest_point_sample,
|
8 |
+
furthest_point_sample_with_dist)
|
9 |
+
|
10 |
+
|
11 |
+
def calc_square_dist(point_feat_a, point_feat_b, norm=True):
|
12 |
+
"""Calculating square distance between a and b.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
point_feat_a (Tensor): (B, N, C) Feature vector of each point.
|
16 |
+
point_feat_b (Tensor): (B, M, C) Feature vector of each point.
|
17 |
+
norm (Bool, optional): Whether to normalize the distance.
|
18 |
+
Default: True.
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
Tensor: (B, N, M) Distance between each pair points.
|
22 |
+
"""
|
23 |
+
num_channel = point_feat_a.shape[-1]
|
24 |
+
# [bs, n, 1]
|
25 |
+
a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1)
|
26 |
+
# [bs, 1, m]
|
27 |
+
b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1)
|
28 |
+
|
29 |
+
corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2))
|
30 |
+
|
31 |
+
dist = a_square + b_square - 2 * corr_matrix
|
32 |
+
if norm:
|
33 |
+
dist = torch.sqrt(dist) / num_channel
|
34 |
+
return dist
|
35 |
+
|
36 |
+
|
37 |
+
def get_sampler_cls(sampler_type):
|
38 |
+
"""Get the type and mode of points sampler.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
sampler_type (str): The type of points sampler.
|
42 |
+
The valid value are "D-FPS", "F-FPS", or "FS".
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
class: Points sampler type.
|
46 |
+
"""
|
47 |
+
sampler_mappings = {
|
48 |
+
'D-FPS': DFPSSampler,
|
49 |
+
'F-FPS': FFPSSampler,
|
50 |
+
'FS': FSSampler,
|
51 |
+
}
|
52 |
+
try:
|
53 |
+
return sampler_mappings[sampler_type]
|
54 |
+
except KeyError:
|
55 |
+
raise KeyError(
|
56 |
+
f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \
|
57 |
+
{sampler_type}')
|
58 |
+
|
59 |
+
|
60 |
+
class PointsSampler(nn.Module):
|
61 |
+
"""Points sampling.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
num_point (list[int]): Number of sample points.
|
65 |
+
fps_mod_list (list[str], optional): Type of FPS method, valid mod
|
66 |
+
['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
|
67 |
+
F-FPS: using feature distances for FPS.
|
68 |
+
D-FPS: using Euclidean distances of points for FPS.
|
69 |
+
FS: using F-FPS and D-FPS simultaneously.
|
70 |
+
fps_sample_range_list (list[int], optional):
|
71 |
+
Range of points to apply FPS. Default: [-1].
|
72 |
+
"""
|
73 |
+
|
74 |
+
def __init__(self,
|
75 |
+
num_point: List[int],
|
76 |
+
fps_mod_list: List[str] = ['D-FPS'],
|
77 |
+
fps_sample_range_list: List[int] = [-1]):
|
78 |
+
super().__init__()
|
79 |
+
# FPS would be applied to different fps_mod in the list,
|
80 |
+
# so the length of the num_point should be equal to
|
81 |
+
# fps_mod_list and fps_sample_range_list.
|
82 |
+
assert len(num_point) == len(fps_mod_list) == len(
|
83 |
+
fps_sample_range_list)
|
84 |
+
self.num_point = num_point
|
85 |
+
self.fps_sample_range_list = fps_sample_range_list
|
86 |
+
self.samplers = nn.ModuleList()
|
87 |
+
for fps_mod in fps_mod_list:
|
88 |
+
self.samplers.append(get_sampler_cls(fps_mod)())
|
89 |
+
self.fp16_enabled = False
|
90 |
+
|
91 |
+
@force_fp32()
|
92 |
+
def forward(self, points_xyz, features):
|
93 |
+
"""
|
94 |
+
Args:
|
95 |
+
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
|
96 |
+
features (Tensor): (B, C, N) Descriptors of the features.
|
97 |
+
|
98 |
+
Returns:
|
99 |
+
Tensor: (B, npoint, sample_num) Indices of sampled points.
|
100 |
+
"""
|
101 |
+
indices = []
|
102 |
+
last_fps_end_index = 0
|
103 |
+
|
104 |
+
for fps_sample_range, sampler, npoint in zip(
|
105 |
+
self.fps_sample_range_list, self.samplers, self.num_point):
|
106 |
+
assert fps_sample_range < points_xyz.shape[1]
|
107 |
+
|
108 |
+
if fps_sample_range == -1:
|
109 |
+
sample_points_xyz = points_xyz[:, last_fps_end_index:]
|
110 |
+
if features is not None:
|
111 |
+
sample_features = features[:, :, last_fps_end_index:]
|
112 |
+
else:
|
113 |
+
sample_features = None
|
114 |
+
else:
|
115 |
+
sample_points_xyz = \
|
116 |
+
points_xyz[:, last_fps_end_index:fps_sample_range]
|
117 |
+
if features is not None:
|
118 |
+
sample_features = features[:, :, last_fps_end_index:
|
119 |
+
fps_sample_range]
|
120 |
+
else:
|
121 |
+
sample_features = None
|
122 |
+
|
123 |
+
fps_idx = sampler(sample_points_xyz.contiguous(), sample_features,
|
124 |
+
npoint)
|
125 |
+
|
126 |
+
indices.append(fps_idx + last_fps_end_index)
|
127 |
+
last_fps_end_index += fps_sample_range
|
128 |
+
indices = torch.cat(indices, dim=1)
|
129 |
+
|
130 |
+
return indices
|
131 |
+
|
132 |
+
|
133 |
+
class DFPSSampler(nn.Module):
|
134 |
+
"""Using Euclidean distances of points for FPS."""
|
135 |
+
|
136 |
+
def __init__(self):
|
137 |
+
super().__init__()
|
138 |
+
|
139 |
+
def forward(self, points, features, npoint):
|
140 |
+
"""Sampling points with D-FPS."""
|
141 |
+
fps_idx = furthest_point_sample(points.contiguous(), npoint)
|
142 |
+
return fps_idx
|
143 |
+
|
144 |
+
|
145 |
+
class FFPSSampler(nn.Module):
|
146 |
+
"""Using feature distances for FPS."""
|
147 |
+
|
148 |
+
def __init__(self):
|
149 |
+
super().__init__()
|
150 |
+
|
151 |
+
def forward(self, points, features, npoint):
|
152 |
+
"""Sampling points with F-FPS."""
|
153 |
+
assert features is not None, \
|
154 |
+
'feature input to FFPS_Sampler should not be None'
|
155 |
+
features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2)
|
156 |
+
features_dist = calc_square_dist(
|
157 |
+
features_for_fps, features_for_fps, norm=False)
|
158 |
+
fps_idx = furthest_point_sample_with_dist(features_dist, npoint)
|
159 |
+
return fps_idx
|
160 |
+
|
161 |
+
|
162 |
+
class FSSampler(nn.Module):
|
163 |
+
"""Using F-FPS and D-FPS simultaneously."""
|
164 |
+
|
165 |
+
def __init__(self):
|
166 |
+
super().__init__()
|
167 |
+
|
168 |
+
def forward(self, points, features, npoint):
|
169 |
+
"""Sampling points with FS_Sampling."""
|
170 |
+
assert features is not None, \
|
171 |
+
'feature input to FS_Sampler should not be None'
|
172 |
+
ffps_sampler = FFPSSampler()
|
173 |
+
dfps_sampler = DFPSSampler()
|
174 |
+
fps_idx_ffps = ffps_sampler(points, features, npoint)
|
175 |
+
fps_idx_dfps = dfps_sampler(points, features, npoint)
|
176 |
+
fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1)
|
177 |
+
return fps_idx
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/psa_mask.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa
|
2 |
+
from torch import nn
|
3 |
+
from torch.autograd import Function
|
4 |
+
from torch.nn.modules.utils import _pair
|
5 |
+
|
6 |
+
from ..utils import ext_loader
|
7 |
+
|
8 |
+
ext_module = ext_loader.load_ext('_ext',
|
9 |
+
['psamask_forward', 'psamask_backward'])
|
10 |
+
|
11 |
+
|
12 |
+
class PSAMaskFunction(Function):
|
13 |
+
|
14 |
+
@staticmethod
|
15 |
+
def symbolic(g, input, psa_type, mask_size):
|
16 |
+
return g.op(
|
17 |
+
'mmcv::MMCVPSAMask',
|
18 |
+
input,
|
19 |
+
psa_type_i=psa_type,
|
20 |
+
mask_size_i=mask_size)
|
21 |
+
|
22 |
+
@staticmethod
|
23 |
+
def forward(ctx, input, psa_type, mask_size):
|
24 |
+
ctx.psa_type = psa_type
|
25 |
+
ctx.mask_size = _pair(mask_size)
|
26 |
+
ctx.save_for_backward(input)
|
27 |
+
|
28 |
+
h_mask, w_mask = ctx.mask_size
|
29 |
+
batch_size, channels, h_feature, w_feature = input.size()
|
30 |
+
assert channels == h_mask * w_mask
|
31 |
+
output = input.new_zeros(
|
32 |
+
(batch_size, h_feature * w_feature, h_feature, w_feature))
|
33 |
+
|
34 |
+
ext_module.psamask_forward(
|
35 |
+
input,
|
36 |
+
output,
|
37 |
+
psa_type=psa_type,
|
38 |
+
num_=batch_size,
|
39 |
+
h_feature=h_feature,
|
40 |
+
w_feature=w_feature,
|
41 |
+
h_mask=h_mask,
|
42 |
+
w_mask=w_mask,
|
43 |
+
half_h_mask=(h_mask - 1) // 2,
|
44 |
+
half_w_mask=(w_mask - 1) // 2)
|
45 |
+
return output
|
46 |
+
|
47 |
+
@staticmethod
|
48 |
+
def backward(ctx, grad_output):
|
49 |
+
input = ctx.saved_tensors[0]
|
50 |
+
psa_type = ctx.psa_type
|
51 |
+
h_mask, w_mask = ctx.mask_size
|
52 |
+
batch_size, channels, h_feature, w_feature = input.size()
|
53 |
+
grad_input = grad_output.new_zeros(
|
54 |
+
(batch_size, channels, h_feature, w_feature))
|
55 |
+
ext_module.psamask_backward(
|
56 |
+
grad_output,
|
57 |
+
grad_input,
|
58 |
+
psa_type=psa_type,
|
59 |
+
num_=batch_size,
|
60 |
+
h_feature=h_feature,
|
61 |
+
w_feature=w_feature,
|
62 |
+
h_mask=h_mask,
|
63 |
+
w_mask=w_mask,
|
64 |
+
half_h_mask=(h_mask - 1) // 2,
|
65 |
+
half_w_mask=(w_mask - 1) // 2)
|
66 |
+
return grad_input, None, None, None
|
67 |
+
|
68 |
+
|
69 |
+
psa_mask = PSAMaskFunction.apply
|
70 |
+
|
71 |
+
|
72 |
+
class PSAMask(nn.Module):
|
73 |
+
|
74 |
+
def __init__(self, psa_type, mask_size=None):
|
75 |
+
super(PSAMask, self).__init__()
|
76 |
+
assert psa_type in ['collect', 'distribute']
|
77 |
+
if psa_type == 'collect':
|
78 |
+
psa_type_enum = 0
|
79 |
+
else:
|
80 |
+
psa_type_enum = 1
|
81 |
+
self.psa_type_enum = psa_type_enum
|
82 |
+
self.mask_size = mask_size
|
83 |
+
self.psa_type = psa_type
|
84 |
+
|
85 |
+
def forward(self, input):
|
86 |
+
return psa_mask(input, self.psa_type_enum, self.mask_size)
|
87 |
+
|
88 |
+
def __repr__(self):
|
89 |
+
s = self.__class__.__name__
|
90 |
+
s += f'(psa_type={self.psa_type}, '
|
91 |
+
s += f'mask_size={self.mask_size})'
|
92 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_align.py
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
from torch.autograd import Function
|
5 |
+
from torch.autograd.function import once_differentiable
|
6 |
+
from torch.nn.modules.utils import _pair
|
7 |
+
|
8 |
+
from ..utils import deprecated_api_warning, ext_loader
|
9 |
+
|
10 |
+
ext_module = ext_loader.load_ext('_ext',
|
11 |
+
['roi_align_forward', 'roi_align_backward'])
|
12 |
+
|
13 |
+
|
14 |
+
class RoIAlignFunction(Function):
|
15 |
+
|
16 |
+
@staticmethod
|
17 |
+
def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio,
|
18 |
+
pool_mode, aligned):
|
19 |
+
from ..onnx import is_custom_op_loaded
|
20 |
+
has_custom_op = is_custom_op_loaded()
|
21 |
+
if has_custom_op:
|
22 |
+
return g.op(
|
23 |
+
'mmcv::MMCVRoiAlign',
|
24 |
+
input,
|
25 |
+
rois,
|
26 |
+
output_height_i=output_size[0],
|
27 |
+
output_width_i=output_size[1],
|
28 |
+
spatial_scale_f=spatial_scale,
|
29 |
+
sampling_ratio_i=sampling_ratio,
|
30 |
+
mode_s=pool_mode,
|
31 |
+
aligned_i=aligned)
|
32 |
+
else:
|
33 |
+
from torch.onnx.symbolic_opset9 import sub, squeeze
|
34 |
+
from torch.onnx.symbolic_helper import _slice_helper
|
35 |
+
from torch.onnx import TensorProtoDataType
|
36 |
+
# batch_indices = rois[:, 0].long()
|
37 |
+
batch_indices = _slice_helper(
|
38 |
+
g, rois, axes=[1], starts=[0], ends=[1])
|
39 |
+
batch_indices = squeeze(g, batch_indices, 1)
|
40 |
+
batch_indices = g.op(
|
41 |
+
'Cast', batch_indices, to_i=TensorProtoDataType.INT64)
|
42 |
+
# rois = rois[:, 1:]
|
43 |
+
rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5])
|
44 |
+
if aligned:
|
45 |
+
# rois -= 0.5/spatial_scale
|
46 |
+
aligned_offset = g.op(
|
47 |
+
'Constant',
|
48 |
+
value_t=torch.tensor([0.5 / spatial_scale],
|
49 |
+
dtype=torch.float32))
|
50 |
+
rois = sub(g, rois, aligned_offset)
|
51 |
+
# roi align
|
52 |
+
return g.op(
|
53 |
+
'RoiAlign',
|
54 |
+
input,
|
55 |
+
rois,
|
56 |
+
batch_indices,
|
57 |
+
output_height_i=output_size[0],
|
58 |
+
output_width_i=output_size[1],
|
59 |
+
spatial_scale_f=spatial_scale,
|
60 |
+
sampling_ratio_i=max(0, sampling_ratio),
|
61 |
+
mode_s=pool_mode)
|
62 |
+
|
63 |
+
@staticmethod
|
64 |
+
def forward(ctx,
|
65 |
+
input,
|
66 |
+
rois,
|
67 |
+
output_size,
|
68 |
+
spatial_scale=1.0,
|
69 |
+
sampling_ratio=0,
|
70 |
+
pool_mode='avg',
|
71 |
+
aligned=True):
|
72 |
+
ctx.output_size = _pair(output_size)
|
73 |
+
ctx.spatial_scale = spatial_scale
|
74 |
+
ctx.sampling_ratio = sampling_ratio
|
75 |
+
assert pool_mode in ('max', 'avg')
|
76 |
+
ctx.pool_mode = 0 if pool_mode == 'max' else 1
|
77 |
+
ctx.aligned = aligned
|
78 |
+
ctx.input_shape = input.size()
|
79 |
+
|
80 |
+
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
|
81 |
+
|
82 |
+
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
|
83 |
+
ctx.output_size[1])
|
84 |
+
output = input.new_zeros(output_shape)
|
85 |
+
if ctx.pool_mode == 0:
|
86 |
+
argmax_y = input.new_zeros(output_shape)
|
87 |
+
argmax_x = input.new_zeros(output_shape)
|
88 |
+
else:
|
89 |
+
argmax_y = input.new_zeros(0)
|
90 |
+
argmax_x = input.new_zeros(0)
|
91 |
+
|
92 |
+
ext_module.roi_align_forward(
|
93 |
+
input,
|
94 |
+
rois,
|
95 |
+
output,
|
96 |
+
argmax_y,
|
97 |
+
argmax_x,
|
98 |
+
aligned_height=ctx.output_size[0],
|
99 |
+
aligned_width=ctx.output_size[1],
|
100 |
+
spatial_scale=ctx.spatial_scale,
|
101 |
+
sampling_ratio=ctx.sampling_ratio,
|
102 |
+
pool_mode=ctx.pool_mode,
|
103 |
+
aligned=ctx.aligned)
|
104 |
+
|
105 |
+
ctx.save_for_backward(rois, argmax_y, argmax_x)
|
106 |
+
return output
|
107 |
+
|
108 |
+
@staticmethod
|
109 |
+
@once_differentiable
|
110 |
+
def backward(ctx, grad_output):
|
111 |
+
rois, argmax_y, argmax_x = ctx.saved_tensors
|
112 |
+
grad_input = grad_output.new_zeros(ctx.input_shape)
|
113 |
+
# complex head architecture may cause grad_output uncontiguous.
|
114 |
+
grad_output = grad_output.contiguous()
|
115 |
+
ext_module.roi_align_backward(
|
116 |
+
grad_output,
|
117 |
+
rois,
|
118 |
+
argmax_y,
|
119 |
+
argmax_x,
|
120 |
+
grad_input,
|
121 |
+
aligned_height=ctx.output_size[0],
|
122 |
+
aligned_width=ctx.output_size[1],
|
123 |
+
spatial_scale=ctx.spatial_scale,
|
124 |
+
sampling_ratio=ctx.sampling_ratio,
|
125 |
+
pool_mode=ctx.pool_mode,
|
126 |
+
aligned=ctx.aligned)
|
127 |
+
return grad_input, None, None, None, None, None, None
|
128 |
+
|
129 |
+
|
130 |
+
roi_align = RoIAlignFunction.apply
|
131 |
+
|
132 |
+
|
133 |
+
class RoIAlign(nn.Module):
|
134 |
+
"""RoI align pooling layer.
|
135 |
+
|
136 |
+
Args:
|
137 |
+
output_size (tuple): h, w
|
138 |
+
spatial_scale (float): scale the input boxes by this number
|
139 |
+
sampling_ratio (int): number of inputs samples to take for each
|
140 |
+
output sample. 0 to take samples densely for current models.
|
141 |
+
pool_mode (str, 'avg' or 'max'): pooling mode in each bin.
|
142 |
+
aligned (bool): if False, use the legacy implementation in
|
143 |
+
MMDetection. If True, align the results more perfectly.
|
144 |
+
use_torchvision (bool): whether to use roi_align from torchvision.
|
145 |
+
|
146 |
+
Note:
|
147 |
+
The implementation of RoIAlign when aligned=True is modified from
|
148 |
+
https://github.com/facebookresearch/detectron2/
|
149 |
+
|
150 |
+
The meaning of aligned=True:
|
151 |
+
|
152 |
+
Given a continuous coordinate c, its two neighboring pixel
|
153 |
+
indices (in our pixel model) are computed by floor(c - 0.5) and
|
154 |
+
ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
|
155 |
+
indices [0] and [1] (which are sampled from the underlying signal
|
156 |
+
at continuous coordinates 0.5 and 1.5). But the original roi_align
|
157 |
+
(aligned=False) does not subtract the 0.5 when computing
|
158 |
+
neighboring pixel indices and therefore it uses pixels with a
|
159 |
+
slightly incorrect alignment (relative to our pixel model) when
|
160 |
+
performing bilinear interpolation.
|
161 |
+
|
162 |
+
With `aligned=True`,
|
163 |
+
we first appropriately scale the ROI and then shift it by -0.5
|
164 |
+
prior to calling roi_align. This produces the correct neighbors;
|
165 |
+
|
166 |
+
The difference does not make a difference to the model's
|
167 |
+
performance if ROIAlign is used together with conv layers.
|
168 |
+
"""
|
169 |
+
|
170 |
+
@deprecated_api_warning(
|
171 |
+
{
|
172 |
+
'out_size': 'output_size',
|
173 |
+
'sample_num': 'sampling_ratio'
|
174 |
+
},
|
175 |
+
cls_name='RoIAlign')
|
176 |
+
def __init__(self,
|
177 |
+
output_size,
|
178 |
+
spatial_scale=1.0,
|
179 |
+
sampling_ratio=0,
|
180 |
+
pool_mode='avg',
|
181 |
+
aligned=True,
|
182 |
+
use_torchvision=False):
|
183 |
+
super(RoIAlign, self).__init__()
|
184 |
+
|
185 |
+
self.output_size = _pair(output_size)
|
186 |
+
self.spatial_scale = float(spatial_scale)
|
187 |
+
self.sampling_ratio = int(sampling_ratio)
|
188 |
+
self.pool_mode = pool_mode
|
189 |
+
self.aligned = aligned
|
190 |
+
self.use_torchvision = use_torchvision
|
191 |
+
|
192 |
+
def forward(self, input, rois):
|
193 |
+
"""
|
194 |
+
Args:
|
195 |
+
input: NCHW images
|
196 |
+
rois: Bx5 boxes. First column is the index into N.\
|
197 |
+
The other 4 columns are xyxy.
|
198 |
+
"""
|
199 |
+
if self.use_torchvision:
|
200 |
+
from torchvision.ops import roi_align as tv_roi_align
|
201 |
+
if 'aligned' in tv_roi_align.__code__.co_varnames:
|
202 |
+
return tv_roi_align(input, rois, self.output_size,
|
203 |
+
self.spatial_scale, self.sampling_ratio,
|
204 |
+
self.aligned)
|
205 |
+
else:
|
206 |
+
if self.aligned:
|
207 |
+
rois -= rois.new_tensor([0.] +
|
208 |
+
[0.5 / self.spatial_scale] * 4)
|
209 |
+
return tv_roi_align(input, rois, self.output_size,
|
210 |
+
self.spatial_scale, self.sampling_ratio)
|
211 |
+
else:
|
212 |
+
return roi_align(input, rois, self.output_size, self.spatial_scale,
|
213 |
+
self.sampling_ratio, self.pool_mode, self.aligned)
|
214 |
+
|
215 |
+
def __repr__(self):
|
216 |
+
s = self.__class__.__name__
|
217 |
+
s += f'(output_size={self.output_size}, '
|
218 |
+
s += f'spatial_scale={self.spatial_scale}, '
|
219 |
+
s += f'sampling_ratio={self.sampling_ratio}, '
|
220 |
+
s += f'pool_mode={self.pool_mode}, '
|
221 |
+
s += f'aligned={self.aligned}, '
|
222 |
+
s += f'use_torchvision={self.use_torchvision})'
|
223 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_align_rotated.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch.nn as nn
|
3 |
+
from torch.autograd import Function
|
4 |
+
|
5 |
+
from ..utils import ext_loader
|
6 |
+
|
7 |
+
ext_module = ext_loader.load_ext(
|
8 |
+
'_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward'])
|
9 |
+
|
10 |
+
|
11 |
+
class RoIAlignRotatedFunction(Function):
|
12 |
+
|
13 |
+
@staticmethod
|
14 |
+
def symbolic(g, features, rois, out_size, spatial_scale, sample_num,
|
15 |
+
aligned, clockwise):
|
16 |
+
if isinstance(out_size, int):
|
17 |
+
out_h = out_size
|
18 |
+
out_w = out_size
|
19 |
+
elif isinstance(out_size, tuple):
|
20 |
+
assert len(out_size) == 2
|
21 |
+
assert isinstance(out_size[0], int)
|
22 |
+
assert isinstance(out_size[1], int)
|
23 |
+
out_h, out_w = out_size
|
24 |
+
else:
|
25 |
+
raise TypeError(
|
26 |
+
'"out_size" must be an integer or tuple of integers')
|
27 |
+
return g.op(
|
28 |
+
'mmcv::MMCVRoIAlignRotated',
|
29 |
+
features,
|
30 |
+
rois,
|
31 |
+
output_height_i=out_h,
|
32 |
+
output_width_i=out_h,
|
33 |
+
spatial_scale_f=spatial_scale,
|
34 |
+
sampling_ratio_i=sample_num,
|
35 |
+
aligned_i=aligned,
|
36 |
+
clockwise_i=clockwise)
|
37 |
+
|
38 |
+
@staticmethod
|
39 |
+
def forward(ctx,
|
40 |
+
features,
|
41 |
+
rois,
|
42 |
+
out_size,
|
43 |
+
spatial_scale,
|
44 |
+
sample_num=0,
|
45 |
+
aligned=True,
|
46 |
+
clockwise=False):
|
47 |
+
if isinstance(out_size, int):
|
48 |
+
out_h = out_size
|
49 |
+
out_w = out_size
|
50 |
+
elif isinstance(out_size, tuple):
|
51 |
+
assert len(out_size) == 2
|
52 |
+
assert isinstance(out_size[0], int)
|
53 |
+
assert isinstance(out_size[1], int)
|
54 |
+
out_h, out_w = out_size
|
55 |
+
else:
|
56 |
+
raise TypeError(
|
57 |
+
'"out_size" must be an integer or tuple of integers')
|
58 |
+
ctx.spatial_scale = spatial_scale
|
59 |
+
ctx.sample_num = sample_num
|
60 |
+
ctx.aligned = aligned
|
61 |
+
ctx.clockwise = clockwise
|
62 |
+
ctx.save_for_backward(rois)
|
63 |
+
ctx.feature_size = features.size()
|
64 |
+
|
65 |
+
batch_size, num_channels, data_height, data_width = features.size()
|
66 |
+
num_rois = rois.size(0)
|
67 |
+
|
68 |
+
output = features.new_zeros(num_rois, num_channels, out_h, out_w)
|
69 |
+
ext_module.roi_align_rotated_forward(
|
70 |
+
features,
|
71 |
+
rois,
|
72 |
+
output,
|
73 |
+
pooled_height=out_h,
|
74 |
+
pooled_width=out_w,
|
75 |
+
spatial_scale=spatial_scale,
|
76 |
+
sample_num=sample_num,
|
77 |
+
aligned=aligned,
|
78 |
+
clockwise=clockwise)
|
79 |
+
return output
|
80 |
+
|
81 |
+
@staticmethod
|
82 |
+
def backward(ctx, grad_output):
|
83 |
+
feature_size = ctx.feature_size
|
84 |
+
spatial_scale = ctx.spatial_scale
|
85 |
+
aligned = ctx.aligned
|
86 |
+
clockwise = ctx.clockwise
|
87 |
+
sample_num = ctx.sample_num
|
88 |
+
rois = ctx.saved_tensors[0]
|
89 |
+
assert feature_size is not None
|
90 |
+
batch_size, num_channels, data_height, data_width = feature_size
|
91 |
+
|
92 |
+
out_w = grad_output.size(3)
|
93 |
+
out_h = grad_output.size(2)
|
94 |
+
|
95 |
+
grad_input = grad_rois = None
|
96 |
+
|
97 |
+
if ctx.needs_input_grad[0]:
|
98 |
+
grad_input = rois.new_zeros(batch_size, num_channels, data_height,
|
99 |
+
data_width)
|
100 |
+
ext_module.roi_align_rotated_backward(
|
101 |
+
grad_output.contiguous(),
|
102 |
+
rois,
|
103 |
+
grad_input,
|
104 |
+
pooled_height=out_h,
|
105 |
+
pooled_width=out_w,
|
106 |
+
spatial_scale=spatial_scale,
|
107 |
+
sample_num=sample_num,
|
108 |
+
aligned=aligned,
|
109 |
+
clockwise=clockwise)
|
110 |
+
return grad_input, grad_rois, None, None, None, None, None
|
111 |
+
|
112 |
+
|
113 |
+
roi_align_rotated = RoIAlignRotatedFunction.apply
|
114 |
+
|
115 |
+
|
116 |
+
class RoIAlignRotated(nn.Module):
|
117 |
+
"""RoI align pooling layer for rotated proposals.
|
118 |
+
|
119 |
+
It accepts a feature map of shape (N, C, H, W) and rois with shape
|
120 |
+
(n, 6) with each roi decoded as (batch_index, center_x, center_y,
|
121 |
+
w, h, angle). The angle is in radian.
|
122 |
+
|
123 |
+
Args:
|
124 |
+
out_size (tuple): h, w
|
125 |
+
spatial_scale (float): scale the input boxes by this number
|
126 |
+
sample_num (int): number of inputs samples to take for each
|
127 |
+
output sample. 0 to take samples densely for current models.
|
128 |
+
aligned (bool): if False, use the legacy implementation in
|
129 |
+
MMDetection. If True, align the results more perfectly.
|
130 |
+
Default: True.
|
131 |
+
clockwise (bool): If True, the angle in each proposal follows a
|
132 |
+
clockwise fashion in image space, otherwise, the angle is
|
133 |
+
counterclockwise. Default: False.
|
134 |
+
|
135 |
+
Note:
|
136 |
+
The implementation of RoIAlign when aligned=True is modified from
|
137 |
+
https://github.com/facebookresearch/detectron2/
|
138 |
+
|
139 |
+
The meaning of aligned=True:
|
140 |
+
|
141 |
+
Given a continuous coordinate c, its two neighboring pixel
|
142 |
+
indices (in our pixel model) are computed by floor(c - 0.5) and
|
143 |
+
ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
|
144 |
+
indices [0] and [1] (which are sampled from the underlying signal
|
145 |
+
at continuous coordinates 0.5 and 1.5). But the original roi_align
|
146 |
+
(aligned=False) does not subtract the 0.5 when computing
|
147 |
+
neighboring pixel indices and therefore it uses pixels with a
|
148 |
+
slightly incorrect alignment (relative to our pixel model) when
|
149 |
+
performing bilinear interpolation.
|
150 |
+
|
151 |
+
With `aligned=True`,
|
152 |
+
we first appropriately scale the ROI and then shift it by -0.5
|
153 |
+
prior to calling roi_align. This produces the correct neighbors;
|
154 |
+
|
155 |
+
The difference does not make a difference to the model's
|
156 |
+
performance if ROIAlign is used together with conv layers.
|
157 |
+
"""
|
158 |
+
|
159 |
+
def __init__(self,
|
160 |
+
out_size,
|
161 |
+
spatial_scale,
|
162 |
+
sample_num=0,
|
163 |
+
aligned=True,
|
164 |
+
clockwise=False):
|
165 |
+
super(RoIAlignRotated, self).__init__()
|
166 |
+
|
167 |
+
self.out_size = out_size
|
168 |
+
self.spatial_scale = float(spatial_scale)
|
169 |
+
self.sample_num = int(sample_num)
|
170 |
+
self.aligned = aligned
|
171 |
+
self.clockwise = clockwise
|
172 |
+
|
173 |
+
def forward(self, features, rois):
|
174 |
+
return RoIAlignRotatedFunction.apply(features, rois, self.out_size,
|
175 |
+
self.spatial_scale,
|
176 |
+
self.sample_num, self.aligned,
|
177 |
+
self.clockwise)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roi_pool.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
from torch.autograd import Function
|
5 |
+
from torch.autograd.function import once_differentiable
|
6 |
+
from torch.nn.modules.utils import _pair
|
7 |
+
|
8 |
+
from ..utils import ext_loader
|
9 |
+
|
10 |
+
ext_module = ext_loader.load_ext('_ext',
|
11 |
+
['roi_pool_forward', 'roi_pool_backward'])
|
12 |
+
|
13 |
+
|
14 |
+
class RoIPoolFunction(Function):
|
15 |
+
|
16 |
+
@staticmethod
|
17 |
+
def symbolic(g, input, rois, output_size, spatial_scale):
|
18 |
+
return g.op(
|
19 |
+
'MaxRoiPool',
|
20 |
+
input,
|
21 |
+
rois,
|
22 |
+
pooled_shape_i=output_size,
|
23 |
+
spatial_scale_f=spatial_scale)
|
24 |
+
|
25 |
+
@staticmethod
|
26 |
+
def forward(ctx, input, rois, output_size, spatial_scale=1.0):
|
27 |
+
ctx.output_size = _pair(output_size)
|
28 |
+
ctx.spatial_scale = spatial_scale
|
29 |
+
ctx.input_shape = input.size()
|
30 |
+
|
31 |
+
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
|
32 |
+
|
33 |
+
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
|
34 |
+
ctx.output_size[1])
|
35 |
+
output = input.new_zeros(output_shape)
|
36 |
+
argmax = input.new_zeros(output_shape, dtype=torch.int)
|
37 |
+
|
38 |
+
ext_module.roi_pool_forward(
|
39 |
+
input,
|
40 |
+
rois,
|
41 |
+
output,
|
42 |
+
argmax,
|
43 |
+
pooled_height=ctx.output_size[0],
|
44 |
+
pooled_width=ctx.output_size[1],
|
45 |
+
spatial_scale=ctx.spatial_scale)
|
46 |
+
|
47 |
+
ctx.save_for_backward(rois, argmax)
|
48 |
+
return output
|
49 |
+
|
50 |
+
@staticmethod
|
51 |
+
@once_differentiable
|
52 |
+
def backward(ctx, grad_output):
|
53 |
+
rois, argmax = ctx.saved_tensors
|
54 |
+
grad_input = grad_output.new_zeros(ctx.input_shape)
|
55 |
+
|
56 |
+
ext_module.roi_pool_backward(
|
57 |
+
grad_output,
|
58 |
+
rois,
|
59 |
+
argmax,
|
60 |
+
grad_input,
|
61 |
+
pooled_height=ctx.output_size[0],
|
62 |
+
pooled_width=ctx.output_size[1],
|
63 |
+
spatial_scale=ctx.spatial_scale)
|
64 |
+
|
65 |
+
return grad_input, None, None, None
|
66 |
+
|
67 |
+
|
68 |
+
roi_pool = RoIPoolFunction.apply
|
69 |
+
|
70 |
+
|
71 |
+
class RoIPool(nn.Module):
|
72 |
+
|
73 |
+
def __init__(self, output_size, spatial_scale=1.0):
|
74 |
+
super(RoIPool, self).__init__()
|
75 |
+
|
76 |
+
self.output_size = _pair(output_size)
|
77 |
+
self.spatial_scale = float(spatial_scale)
|
78 |
+
|
79 |
+
def forward(self, input, rois):
|
80 |
+
return roi_pool(input, rois, self.output_size, self.spatial_scale)
|
81 |
+
|
82 |
+
def __repr__(self):
|
83 |
+
s = self.__class__.__name__
|
84 |
+
s += f'(output_size={self.output_size}, '
|
85 |
+
s += f'spatial_scale={self.spatial_scale})'
|
86 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roiaware_pool3d.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
from torch import nn as nn
|
4 |
+
from torch.autograd import Function
|
5 |
+
|
6 |
+
import annotator.mmpkg.mmcv as mmcv
|
7 |
+
from ..utils import ext_loader
|
8 |
+
|
9 |
+
ext_module = ext_loader.load_ext(
|
10 |
+
'_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward'])
|
11 |
+
|
12 |
+
|
13 |
+
class RoIAwarePool3d(nn.Module):
|
14 |
+
"""Encode the geometry-specific features of each 3D proposal.
|
15 |
+
|
16 |
+
Please refer to `PartA2 <https://arxiv.org/pdf/1907.03670.pdf>`_ for more
|
17 |
+
details.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
out_size (int or tuple): The size of output features. n or
|
21 |
+
[n1, n2, n3].
|
22 |
+
max_pts_per_voxel (int, optional): The maximum number of points per
|
23 |
+
voxel. Default: 128.
|
24 |
+
mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'.
|
25 |
+
Default: 'max'.
|
26 |
+
"""
|
27 |
+
|
28 |
+
def __init__(self, out_size, max_pts_per_voxel=128, mode='max'):
|
29 |
+
super().__init__()
|
30 |
+
|
31 |
+
self.out_size = out_size
|
32 |
+
self.max_pts_per_voxel = max_pts_per_voxel
|
33 |
+
assert mode in ['max', 'avg']
|
34 |
+
pool_mapping = {'max': 0, 'avg': 1}
|
35 |
+
self.mode = pool_mapping[mode]
|
36 |
+
|
37 |
+
def forward(self, rois, pts, pts_feature):
|
38 |
+
"""
|
39 |
+
Args:
|
40 |
+
rois (torch.Tensor): [N, 7], in LiDAR coordinate,
|
41 |
+
(x, y, z) is the bottom center of rois.
|
42 |
+
pts (torch.Tensor): [npoints, 3], coordinates of input points.
|
43 |
+
pts_feature (torch.Tensor): [npoints, C], features of input points.
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
|
47 |
+
"""
|
48 |
+
|
49 |
+
return RoIAwarePool3dFunction.apply(rois, pts, pts_feature,
|
50 |
+
self.out_size,
|
51 |
+
self.max_pts_per_voxel, self.mode)
|
52 |
+
|
53 |
+
|
54 |
+
class RoIAwarePool3dFunction(Function):
|
55 |
+
|
56 |
+
@staticmethod
|
57 |
+
def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel,
|
58 |
+
mode):
|
59 |
+
"""
|
60 |
+
Args:
|
61 |
+
rois (torch.Tensor): [N, 7], in LiDAR coordinate,
|
62 |
+
(x, y, z) is the bottom center of rois.
|
63 |
+
pts (torch.Tensor): [npoints, 3], coordinates of input points.
|
64 |
+
pts_feature (torch.Tensor): [npoints, C], features of input points.
|
65 |
+
out_size (int or tuple): The size of output features. n or
|
66 |
+
[n1, n2, n3].
|
67 |
+
max_pts_per_voxel (int): The maximum number of points per voxel.
|
68 |
+
Default: 128.
|
69 |
+
mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average
|
70 |
+
pool).
|
71 |
+
|
72 |
+
Returns:
|
73 |
+
pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C], output
|
74 |
+
pooled features.
|
75 |
+
"""
|
76 |
+
|
77 |
+
if isinstance(out_size, int):
|
78 |
+
out_x = out_y = out_z = out_size
|
79 |
+
else:
|
80 |
+
assert len(out_size) == 3
|
81 |
+
assert mmcv.is_tuple_of(out_size, int)
|
82 |
+
out_x, out_y, out_z = out_size
|
83 |
+
|
84 |
+
num_rois = rois.shape[0]
|
85 |
+
num_channels = pts_feature.shape[-1]
|
86 |
+
num_pts = pts.shape[0]
|
87 |
+
|
88 |
+
pooled_features = pts_feature.new_zeros(
|
89 |
+
(num_rois, out_x, out_y, out_z, num_channels))
|
90 |
+
argmax = pts_feature.new_zeros(
|
91 |
+
(num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int)
|
92 |
+
pts_idx_of_voxels = pts_feature.new_zeros(
|
93 |
+
(num_rois, out_x, out_y, out_z, max_pts_per_voxel),
|
94 |
+
dtype=torch.int)
|
95 |
+
|
96 |
+
ext_module.roiaware_pool3d_forward(rois, pts, pts_feature, argmax,
|
97 |
+
pts_idx_of_voxels, pooled_features,
|
98 |
+
mode)
|
99 |
+
|
100 |
+
ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode,
|
101 |
+
num_pts, num_channels)
|
102 |
+
return pooled_features
|
103 |
+
|
104 |
+
@staticmethod
|
105 |
+
def backward(ctx, grad_out):
|
106 |
+
ret = ctx.roiaware_pool3d_for_backward
|
107 |
+
pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret
|
108 |
+
|
109 |
+
grad_in = grad_out.new_zeros((num_pts, num_channels))
|
110 |
+
ext_module.roiaware_pool3d_backward(pts_idx_of_voxels, argmax,
|
111 |
+
grad_out.contiguous(), grad_in,
|
112 |
+
mode)
|
113 |
+
|
114 |
+
return None, None, grad_in, None, None, None
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/roipoint_pool3d.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch import nn as nn
|
2 |
+
from torch.autograd import Function
|
3 |
+
|
4 |
+
from ..utils import ext_loader
|
5 |
+
|
6 |
+
ext_module = ext_loader.load_ext('_ext', ['roipoint_pool3d_forward'])
|
7 |
+
|
8 |
+
|
9 |
+
class RoIPointPool3d(nn.Module):
|
10 |
+
"""Encode the geometry-specific features of each 3D proposal.
|
11 |
+
|
12 |
+
Please refer to `Paper of PartA2 <https://arxiv.org/pdf/1907.03670.pdf>`_
|
13 |
+
for more details.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
num_sampled_points (int, optional): Number of samples in each roi.
|
17 |
+
Default: 512.
|
18 |
+
"""
|
19 |
+
|
20 |
+
def __init__(self, num_sampled_points=512):
|
21 |
+
super().__init__()
|
22 |
+
self.num_sampled_points = num_sampled_points
|
23 |
+
|
24 |
+
def forward(self, points, point_features, boxes3d):
|
25 |
+
"""
|
26 |
+
Args:
|
27 |
+
points (torch.Tensor): Input points whose shape is (B, N, C).
|
28 |
+
point_features (torch.Tensor): Features of input points whose shape
|
29 |
+
is (B, N, C).
|
30 |
+
boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7).
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
pooled_features (torch.Tensor): The output pooled features whose
|
34 |
+
shape is (B, M, 512, 3 + C).
|
35 |
+
pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M).
|
36 |
+
"""
|
37 |
+
return RoIPointPool3dFunction.apply(points, point_features, boxes3d,
|
38 |
+
self.num_sampled_points)
|
39 |
+
|
40 |
+
|
41 |
+
class RoIPointPool3dFunction(Function):
|
42 |
+
|
43 |
+
@staticmethod
|
44 |
+
def forward(ctx, points, point_features, boxes3d, num_sampled_points=512):
|
45 |
+
"""
|
46 |
+
Args:
|
47 |
+
points (torch.Tensor): Input points whose shape is (B, N, C).
|
48 |
+
point_features (torch.Tensor): Features of input points whose shape
|
49 |
+
is (B, N, C).
|
50 |
+
boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7).
|
51 |
+
num_sampled_points (int, optional): The num of sampled points.
|
52 |
+
Default: 512.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
pooled_features (torch.Tensor): The output pooled features whose
|
56 |
+
shape is (B, M, 512, 3 + C).
|
57 |
+
pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M).
|
58 |
+
"""
|
59 |
+
assert len(points.shape) == 3 and points.shape[2] == 3
|
60 |
+
batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[
|
61 |
+
1], point_features.shape[2]
|
62 |
+
pooled_boxes3d = boxes3d.view(batch_size, -1, 7)
|
63 |
+
pooled_features = point_features.new_zeros(
|
64 |
+
(batch_size, boxes_num, num_sampled_points, 3 + feature_len))
|
65 |
+
pooled_empty_flag = point_features.new_zeros(
|
66 |
+
(batch_size, boxes_num)).int()
|
67 |
+
|
68 |
+
ext_module.roipoint_pool3d_forward(points.contiguous(),
|
69 |
+
pooled_boxes3d.contiguous(),
|
70 |
+
point_features.contiguous(),
|
71 |
+
pooled_features, pooled_empty_flag)
|
72 |
+
|
73 |
+
return pooled_features, pooled_empty_flag
|
74 |
+
|
75 |
+
@staticmethod
|
76 |
+
def backward(ctx, grad_out):
|
77 |
+
raise NotImplementedError
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/saconv.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
from annotator.mmpkg.mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init
|
7 |
+
from annotator.mmpkg.mmcv.ops.deform_conv import deform_conv2d
|
8 |
+
from annotator.mmpkg.mmcv.utils import TORCH_VERSION, digit_version
|
9 |
+
|
10 |
+
|
11 |
+
@CONV_LAYERS.register_module(name='SAC')
|
12 |
+
class SAConv2d(ConvAWS2d):
|
13 |
+
"""SAC (Switchable Atrous Convolution)
|
14 |
+
|
15 |
+
This is an implementation of SAC in DetectoRS
|
16 |
+
(https://arxiv.org/pdf/2006.02334.pdf).
|
17 |
+
|
18 |
+
Args:
|
19 |
+
in_channels (int): Number of channels in the input image
|
20 |
+
out_channels (int): Number of channels produced by the convolution
|
21 |
+
kernel_size (int or tuple): Size of the convolving kernel
|
22 |
+
stride (int or tuple, optional): Stride of the convolution. Default: 1
|
23 |
+
padding (int or tuple, optional): Zero-padding added to both sides of
|
24 |
+
the input. Default: 0
|
25 |
+
padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
|
26 |
+
``'replicate'`` or ``'circular'``. Default: ``'zeros'``
|
27 |
+
dilation (int or tuple, optional): Spacing between kernel elements.
|
28 |
+
Default: 1
|
29 |
+
groups (int, optional): Number of blocked connections from input
|
30 |
+
channels to output channels. Default: 1
|
31 |
+
bias (bool, optional): If ``True``, adds a learnable bias to the
|
32 |
+
output. Default: ``True``
|
33 |
+
use_deform: If ``True``, replace convolution with deformable
|
34 |
+
convolution. Default: ``False``.
|
35 |
+
"""
|
36 |
+
|
37 |
+
def __init__(self,
|
38 |
+
in_channels,
|
39 |
+
out_channels,
|
40 |
+
kernel_size,
|
41 |
+
stride=1,
|
42 |
+
padding=0,
|
43 |
+
dilation=1,
|
44 |
+
groups=1,
|
45 |
+
bias=True,
|
46 |
+
use_deform=False):
|
47 |
+
super().__init__(
|
48 |
+
in_channels,
|
49 |
+
out_channels,
|
50 |
+
kernel_size,
|
51 |
+
stride=stride,
|
52 |
+
padding=padding,
|
53 |
+
dilation=dilation,
|
54 |
+
groups=groups,
|
55 |
+
bias=bias)
|
56 |
+
self.use_deform = use_deform
|
57 |
+
self.switch = nn.Conv2d(
|
58 |
+
self.in_channels, 1, kernel_size=1, stride=stride, bias=True)
|
59 |
+
self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size()))
|
60 |
+
self.pre_context = nn.Conv2d(
|
61 |
+
self.in_channels, self.in_channels, kernel_size=1, bias=True)
|
62 |
+
self.post_context = nn.Conv2d(
|
63 |
+
self.out_channels, self.out_channels, kernel_size=1, bias=True)
|
64 |
+
if self.use_deform:
|
65 |
+
self.offset_s = nn.Conv2d(
|
66 |
+
self.in_channels,
|
67 |
+
18,
|
68 |
+
kernel_size=3,
|
69 |
+
padding=1,
|
70 |
+
stride=stride,
|
71 |
+
bias=True)
|
72 |
+
self.offset_l = nn.Conv2d(
|
73 |
+
self.in_channels,
|
74 |
+
18,
|
75 |
+
kernel_size=3,
|
76 |
+
padding=1,
|
77 |
+
stride=stride,
|
78 |
+
bias=True)
|
79 |
+
self.init_weights()
|
80 |
+
|
81 |
+
def init_weights(self):
|
82 |
+
constant_init(self.switch, 0, bias=1)
|
83 |
+
self.weight_diff.data.zero_()
|
84 |
+
constant_init(self.pre_context, 0)
|
85 |
+
constant_init(self.post_context, 0)
|
86 |
+
if self.use_deform:
|
87 |
+
constant_init(self.offset_s, 0)
|
88 |
+
constant_init(self.offset_l, 0)
|
89 |
+
|
90 |
+
def forward(self, x):
|
91 |
+
# pre-context
|
92 |
+
avg_x = F.adaptive_avg_pool2d(x, output_size=1)
|
93 |
+
avg_x = self.pre_context(avg_x)
|
94 |
+
avg_x = avg_x.expand_as(x)
|
95 |
+
x = x + avg_x
|
96 |
+
# switch
|
97 |
+
avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect')
|
98 |
+
avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0)
|
99 |
+
switch = self.switch(avg_x)
|
100 |
+
# sac
|
101 |
+
weight = self._get_weight(self.weight)
|
102 |
+
zero_bias = torch.zeros(
|
103 |
+
self.out_channels, device=weight.device, dtype=weight.dtype)
|
104 |
+
|
105 |
+
if self.use_deform:
|
106 |
+
offset = self.offset_s(avg_x)
|
107 |
+
out_s = deform_conv2d(x, offset, weight, self.stride, self.padding,
|
108 |
+
self.dilation, self.groups, 1)
|
109 |
+
else:
|
110 |
+
if (TORCH_VERSION == 'parrots'
|
111 |
+
or digit_version(TORCH_VERSION) < digit_version('1.5.0')):
|
112 |
+
out_s = super().conv2d_forward(x, weight)
|
113 |
+
elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'):
|
114 |
+
# bias is a required argument of _conv_forward in torch 1.8.0
|
115 |
+
out_s = super()._conv_forward(x, weight, zero_bias)
|
116 |
+
else:
|
117 |
+
out_s = super()._conv_forward(x, weight)
|
118 |
+
ori_p = self.padding
|
119 |
+
ori_d = self.dilation
|
120 |
+
self.padding = tuple(3 * p for p in self.padding)
|
121 |
+
self.dilation = tuple(3 * d for d in self.dilation)
|
122 |
+
weight = weight + self.weight_diff
|
123 |
+
if self.use_deform:
|
124 |
+
offset = self.offset_l(avg_x)
|
125 |
+
out_l = deform_conv2d(x, offset, weight, self.stride, self.padding,
|
126 |
+
self.dilation, self.groups, 1)
|
127 |
+
else:
|
128 |
+
if (TORCH_VERSION == 'parrots'
|
129 |
+
or digit_version(TORCH_VERSION) < digit_version('1.5.0')):
|
130 |
+
out_l = super().conv2d_forward(x, weight)
|
131 |
+
elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'):
|
132 |
+
# bias is a required argument of _conv_forward in torch 1.8.0
|
133 |
+
out_l = super()._conv_forward(x, weight, zero_bias)
|
134 |
+
else:
|
135 |
+
out_l = super()._conv_forward(x, weight)
|
136 |
+
|
137 |
+
out = switch * out_s + (1 - switch) * out_l
|
138 |
+
self.padding = ori_p
|
139 |
+
self.dilation = ori_d
|
140 |
+
# post-context
|
141 |
+
avg_x = F.adaptive_avg_pool2d(out, output_size=1)
|
142 |
+
avg_x = self.post_context(avg_x)
|
143 |
+
avg_x = avg_x.expand_as(out)
|
144 |
+
out = out + avg_x
|
145 |
+
return out
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/scatter_points.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
from torch import nn
|
4 |
+
from torch.autograd import Function
|
5 |
+
|
6 |
+
from ..utils import ext_loader
|
7 |
+
|
8 |
+
ext_module = ext_loader.load_ext(
|
9 |
+
'_ext',
|
10 |
+
['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward'])
|
11 |
+
|
12 |
+
|
13 |
+
class _DynamicScatter(Function):
|
14 |
+
|
15 |
+
@staticmethod
|
16 |
+
def forward(ctx, feats, coors, reduce_type='max'):
|
17 |
+
"""convert kitti points(N, >=3) to voxels.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
feats (torch.Tensor): [N, C]. Points features to be reduced
|
21 |
+
into voxels.
|
22 |
+
coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates
|
23 |
+
(specifically multi-dim voxel index) of each points.
|
24 |
+
reduce_type (str, optional): Reduce op. support 'max', 'sum' and
|
25 |
+
'mean'. Default: 'max'.
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
voxel_feats (torch.Tensor): [M, C]. Reduced features, input
|
29 |
+
features that shares the same voxel coordinates are reduced to
|
30 |
+
one row.
|
31 |
+
voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates.
|
32 |
+
"""
|
33 |
+
results = ext_module.dynamic_point_to_voxel_forward(
|
34 |
+
feats, coors, reduce_type)
|
35 |
+
(voxel_feats, voxel_coors, point2voxel_map,
|
36 |
+
voxel_points_count) = results
|
37 |
+
ctx.reduce_type = reduce_type
|
38 |
+
ctx.save_for_backward(feats, voxel_feats, point2voxel_map,
|
39 |
+
voxel_points_count)
|
40 |
+
ctx.mark_non_differentiable(voxel_coors)
|
41 |
+
return voxel_feats, voxel_coors
|
42 |
+
|
43 |
+
@staticmethod
|
44 |
+
def backward(ctx, grad_voxel_feats, grad_voxel_coors=None):
|
45 |
+
(feats, voxel_feats, point2voxel_map,
|
46 |
+
voxel_points_count) = ctx.saved_tensors
|
47 |
+
grad_feats = torch.zeros_like(feats)
|
48 |
+
# TODO: whether to use index put or use cuda_backward
|
49 |
+
# To use index put, need point to voxel index
|
50 |
+
ext_module.dynamic_point_to_voxel_backward(
|
51 |
+
grad_feats, grad_voxel_feats.contiguous(), feats, voxel_feats,
|
52 |
+
point2voxel_map, voxel_points_count, ctx.reduce_type)
|
53 |
+
return grad_feats, None, None
|
54 |
+
|
55 |
+
|
56 |
+
dynamic_scatter = _DynamicScatter.apply
|
57 |
+
|
58 |
+
|
59 |
+
class DynamicScatter(nn.Module):
|
60 |
+
"""Scatters points into voxels, used in the voxel encoder with dynamic
|
61 |
+
voxelization.
|
62 |
+
|
63 |
+
Note:
|
64 |
+
The CPU and GPU implementation get the same output, but have numerical
|
65 |
+
difference after summation and division (e.g., 5e-7).
|
66 |
+
|
67 |
+
Args:
|
68 |
+
voxel_size (list): list [x, y, z] size of three dimension.
|
69 |
+
point_cloud_range (list): The coordinate range of points, [x_min,
|
70 |
+
y_min, z_min, x_max, y_max, z_max].
|
71 |
+
average_points (bool): whether to use avg pooling to scatter points
|
72 |
+
into voxel.
|
73 |
+
"""
|
74 |
+
|
75 |
+
def __init__(self, voxel_size, point_cloud_range, average_points: bool):
|
76 |
+
super().__init__()
|
77 |
+
|
78 |
+
self.voxel_size = voxel_size
|
79 |
+
self.point_cloud_range = point_cloud_range
|
80 |
+
self.average_points = average_points
|
81 |
+
|
82 |
+
def forward_single(self, points, coors):
|
83 |
+
"""Scatters points into voxels.
|
84 |
+
|
85 |
+
Args:
|
86 |
+
points (torch.Tensor): Points to be reduced into voxels.
|
87 |
+
coors (torch.Tensor): Corresponding voxel coordinates (specifically
|
88 |
+
multi-dim voxel index) of each points.
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
voxel_feats (torch.Tensor): Reduced features, input features that
|
92 |
+
shares the same voxel coordinates are reduced to one row.
|
93 |
+
voxel_coors (torch.Tensor): Voxel coordinates.
|
94 |
+
"""
|
95 |
+
reduce = 'mean' if self.average_points else 'max'
|
96 |
+
return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce)
|
97 |
+
|
98 |
+
def forward(self, points, coors):
|
99 |
+
"""Scatters points/features into voxels.
|
100 |
+
|
101 |
+
Args:
|
102 |
+
points (torch.Tensor): Points to be reduced into voxels.
|
103 |
+
coors (torch.Tensor): Corresponding voxel coordinates (specifically
|
104 |
+
multi-dim voxel index) of each points.
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
voxel_feats (torch.Tensor): Reduced features, input features that
|
108 |
+
shares the same voxel coordinates are reduced to one row.
|
109 |
+
voxel_coors (torch.Tensor): Voxel coordinates.
|
110 |
+
"""
|
111 |
+
if coors.size(-1) == 3:
|
112 |
+
return self.forward_single(points, coors)
|
113 |
+
else:
|
114 |
+
batch_size = coors[-1, 0] + 1
|
115 |
+
voxels, voxel_coors = [], []
|
116 |
+
for i in range(batch_size):
|
117 |
+
inds = torch.where(coors[:, 0] == i)
|
118 |
+
voxel, voxel_coor = self.forward_single(
|
119 |
+
points[inds], coors[inds][:, 1:])
|
120 |
+
coor_pad = nn.functional.pad(
|
121 |
+
voxel_coor, (1, 0), mode='constant', value=i)
|
122 |
+
voxel_coors.append(coor_pad)
|
123 |
+
voxels.append(voxel)
|
124 |
+
features = torch.cat(voxels, dim=0)
|
125 |
+
feature_coors = torch.cat(voxel_coors, dim=0)
|
126 |
+
|
127 |
+
return features, feature_coors
|
128 |
+
|
129 |
+
def __repr__(self):
|
130 |
+
s = self.__class__.__name__ + '('
|
131 |
+
s += 'voxel_size=' + str(self.voxel_size)
|
132 |
+
s += ', point_cloud_range=' + str(self.point_cloud_range)
|
133 |
+
s += ', average_points=' + str(self.average_points)
|
134 |
+
s += ')'
|
135 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/sync_bn.py
ADDED
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
import torch.distributed as dist
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from torch.autograd import Function
|
6 |
+
from torch.autograd.function import once_differentiable
|
7 |
+
from torch.nn.modules.module import Module
|
8 |
+
from torch.nn.parameter import Parameter
|
9 |
+
|
10 |
+
from annotator.mmpkg.mmcv.cnn import NORM_LAYERS
|
11 |
+
from ..utils import ext_loader
|
12 |
+
|
13 |
+
ext_module = ext_loader.load_ext('_ext', [
|
14 |
+
'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output',
|
15 |
+
'sync_bn_backward_param', 'sync_bn_backward_data'
|
16 |
+
])
|
17 |
+
|
18 |
+
|
19 |
+
class SyncBatchNormFunction(Function):
|
20 |
+
|
21 |
+
@staticmethod
|
22 |
+
def symbolic(g, input, running_mean, running_var, weight, bias, momentum,
|
23 |
+
eps, group, group_size, stats_mode):
|
24 |
+
return g.op(
|
25 |
+
'mmcv::MMCVSyncBatchNorm',
|
26 |
+
input,
|
27 |
+
running_mean,
|
28 |
+
running_var,
|
29 |
+
weight,
|
30 |
+
bias,
|
31 |
+
momentum_f=momentum,
|
32 |
+
eps_f=eps,
|
33 |
+
group_i=group,
|
34 |
+
group_size_i=group_size,
|
35 |
+
stats_mode=stats_mode)
|
36 |
+
|
37 |
+
@staticmethod
|
38 |
+
def forward(self, input, running_mean, running_var, weight, bias, momentum,
|
39 |
+
eps, group, group_size, stats_mode):
|
40 |
+
self.momentum = momentum
|
41 |
+
self.eps = eps
|
42 |
+
self.group = group
|
43 |
+
self.group_size = group_size
|
44 |
+
self.stats_mode = stats_mode
|
45 |
+
|
46 |
+
assert isinstance(
|
47 |
+
input, (torch.HalfTensor, torch.FloatTensor,
|
48 |
+
torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \
|
49 |
+
f'only support Half or Float Tensor, but {input.type()}'
|
50 |
+
output = torch.zeros_like(input)
|
51 |
+
input3d = input.flatten(start_dim=2)
|
52 |
+
output3d = output.view_as(input3d)
|
53 |
+
num_channels = input3d.size(1)
|
54 |
+
|
55 |
+
# ensure mean/var/norm/std are initialized as zeros
|
56 |
+
# ``torch.empty()`` does not guarantee that
|
57 |
+
mean = torch.zeros(
|
58 |
+
num_channels, dtype=torch.float, device=input3d.device)
|
59 |
+
var = torch.zeros(
|
60 |
+
num_channels, dtype=torch.float, device=input3d.device)
|
61 |
+
norm = torch.zeros_like(
|
62 |
+
input3d, dtype=torch.float, device=input3d.device)
|
63 |
+
std = torch.zeros(
|
64 |
+
num_channels, dtype=torch.float, device=input3d.device)
|
65 |
+
|
66 |
+
batch_size = input3d.size(0)
|
67 |
+
if batch_size > 0:
|
68 |
+
ext_module.sync_bn_forward_mean(input3d, mean)
|
69 |
+
batch_flag = torch.ones([1], device=mean.device, dtype=mean.dtype)
|
70 |
+
else:
|
71 |
+
# skip updating mean and leave it as zeros when the input is empty
|
72 |
+
batch_flag = torch.zeros([1], device=mean.device, dtype=mean.dtype)
|
73 |
+
|
74 |
+
# synchronize mean and the batch flag
|
75 |
+
vec = torch.cat([mean, batch_flag])
|
76 |
+
if self.stats_mode == 'N':
|
77 |
+
vec *= batch_size
|
78 |
+
if self.group_size > 1:
|
79 |
+
dist.all_reduce(vec, group=self.group)
|
80 |
+
total_batch = vec[-1].detach()
|
81 |
+
mean = vec[:num_channels]
|
82 |
+
|
83 |
+
if self.stats_mode == 'default':
|
84 |
+
mean = mean / self.group_size
|
85 |
+
elif self.stats_mode == 'N':
|
86 |
+
mean = mean / total_batch.clamp(min=1)
|
87 |
+
else:
|
88 |
+
raise NotImplementedError
|
89 |
+
|
90 |
+
# leave var as zeros when the input is empty
|
91 |
+
if batch_size > 0:
|
92 |
+
ext_module.sync_bn_forward_var(input3d, mean, var)
|
93 |
+
|
94 |
+
if self.stats_mode == 'N':
|
95 |
+
var *= batch_size
|
96 |
+
if self.group_size > 1:
|
97 |
+
dist.all_reduce(var, group=self.group)
|
98 |
+
|
99 |
+
if self.stats_mode == 'default':
|
100 |
+
var /= self.group_size
|
101 |
+
elif self.stats_mode == 'N':
|
102 |
+
var /= total_batch.clamp(min=1)
|
103 |
+
else:
|
104 |
+
raise NotImplementedError
|
105 |
+
|
106 |
+
# if the total batch size over all the ranks is zero,
|
107 |
+
# we should not update the statistics in the current batch
|
108 |
+
update_flag = total_batch.clamp(max=1)
|
109 |
+
momentum = update_flag * self.momentum
|
110 |
+
ext_module.sync_bn_forward_output(
|
111 |
+
input3d,
|
112 |
+
mean,
|
113 |
+
var,
|
114 |
+
weight,
|
115 |
+
bias,
|
116 |
+
running_mean,
|
117 |
+
running_var,
|
118 |
+
norm,
|
119 |
+
std,
|
120 |
+
output3d,
|
121 |
+
eps=self.eps,
|
122 |
+
momentum=momentum,
|
123 |
+
group_size=self.group_size)
|
124 |
+
self.save_for_backward(norm, std, weight)
|
125 |
+
return output
|
126 |
+
|
127 |
+
@staticmethod
|
128 |
+
@once_differentiable
|
129 |
+
def backward(self, grad_output):
|
130 |
+
norm, std, weight = self.saved_tensors
|
131 |
+
grad_weight = torch.zeros_like(weight)
|
132 |
+
grad_bias = torch.zeros_like(weight)
|
133 |
+
grad_input = torch.zeros_like(grad_output)
|
134 |
+
grad_output3d = grad_output.flatten(start_dim=2)
|
135 |
+
grad_input3d = grad_input.view_as(grad_output3d)
|
136 |
+
|
137 |
+
batch_size = grad_input3d.size(0)
|
138 |
+
if batch_size > 0:
|
139 |
+
ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight,
|
140 |
+
grad_bias)
|
141 |
+
|
142 |
+
# all reduce
|
143 |
+
if self.group_size > 1:
|
144 |
+
dist.all_reduce(grad_weight, group=self.group)
|
145 |
+
dist.all_reduce(grad_bias, group=self.group)
|
146 |
+
grad_weight /= self.group_size
|
147 |
+
grad_bias /= self.group_size
|
148 |
+
|
149 |
+
if batch_size > 0:
|
150 |
+
ext_module.sync_bn_backward_data(grad_output3d, weight,
|
151 |
+
grad_weight, grad_bias, norm, std,
|
152 |
+
grad_input3d)
|
153 |
+
|
154 |
+
return grad_input, None, None, grad_weight, grad_bias, \
|
155 |
+
None, None, None, None, None
|
156 |
+
|
157 |
+
|
158 |
+
@NORM_LAYERS.register_module(name='MMSyncBN')
|
159 |
+
class SyncBatchNorm(Module):
|
160 |
+
"""Synchronized Batch Normalization.
|
161 |
+
|
162 |
+
Args:
|
163 |
+
num_features (int): number of features/chennels in input tensor
|
164 |
+
eps (float, optional): a value added to the denominator for numerical
|
165 |
+
stability. Defaults to 1e-5.
|
166 |
+
momentum (float, optional): the value used for the running_mean and
|
167 |
+
running_var computation. Defaults to 0.1.
|
168 |
+
affine (bool, optional): whether to use learnable affine parameters.
|
169 |
+
Defaults to True.
|
170 |
+
track_running_stats (bool, optional): whether to track the running
|
171 |
+
mean and variance during training. When set to False, this
|
172 |
+
module does not track such statistics, and initializes statistics
|
173 |
+
buffers ``running_mean`` and ``running_var`` as ``None``. When
|
174 |
+
these buffers are ``None``, this module always uses batch
|
175 |
+
statistics in both training and eval modes. Defaults to True.
|
176 |
+
group (int, optional): synchronization of stats happen within
|
177 |
+
each process group individually. By default it is synchronization
|
178 |
+
across the whole world. Defaults to None.
|
179 |
+
stats_mode (str, optional): The statistical mode. Available options
|
180 |
+
includes ``'default'`` and ``'N'``. Defaults to 'default'.
|
181 |
+
When ``stats_mode=='default'``, it computes the overall statistics
|
182 |
+
using those from each worker with equal weight, i.e., the
|
183 |
+
statistics are synchronized and simply divied by ``group``. This
|
184 |
+
mode will produce inaccurate statistics when empty tensors occur.
|
185 |
+
When ``stats_mode=='N'``, it compute the overall statistics using
|
186 |
+
the total number of batches in each worker ignoring the number of
|
187 |
+
group, i.e., the statistics are synchronized and then divied by
|
188 |
+
the total batch ``N``. This mode is beneficial when empty tensors
|
189 |
+
occur during training, as it average the total mean by the real
|
190 |
+
number of batch.
|
191 |
+
"""
|
192 |
+
|
193 |
+
def __init__(self,
|
194 |
+
num_features,
|
195 |
+
eps=1e-5,
|
196 |
+
momentum=0.1,
|
197 |
+
affine=True,
|
198 |
+
track_running_stats=True,
|
199 |
+
group=None,
|
200 |
+
stats_mode='default'):
|
201 |
+
super(SyncBatchNorm, self).__init__()
|
202 |
+
self.num_features = num_features
|
203 |
+
self.eps = eps
|
204 |
+
self.momentum = momentum
|
205 |
+
self.affine = affine
|
206 |
+
self.track_running_stats = track_running_stats
|
207 |
+
group = dist.group.WORLD if group is None else group
|
208 |
+
self.group = group
|
209 |
+
self.group_size = dist.get_world_size(group)
|
210 |
+
assert stats_mode in ['default', 'N'], \
|
211 |
+
f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"'
|
212 |
+
self.stats_mode = stats_mode
|
213 |
+
if self.affine:
|
214 |
+
self.weight = Parameter(torch.Tensor(num_features))
|
215 |
+
self.bias = Parameter(torch.Tensor(num_features))
|
216 |
+
else:
|
217 |
+
self.register_parameter('weight', None)
|
218 |
+
self.register_parameter('bias', None)
|
219 |
+
if self.track_running_stats:
|
220 |
+
self.register_buffer('running_mean', torch.zeros(num_features))
|
221 |
+
self.register_buffer('running_var', torch.ones(num_features))
|
222 |
+
self.register_buffer('num_batches_tracked',
|
223 |
+
torch.tensor(0, dtype=torch.long))
|
224 |
+
else:
|
225 |
+
self.register_buffer('running_mean', None)
|
226 |
+
self.register_buffer('running_var', None)
|
227 |
+
self.register_buffer('num_batches_tracked', None)
|
228 |
+
self.reset_parameters()
|
229 |
+
|
230 |
+
def reset_running_stats(self):
|
231 |
+
if self.track_running_stats:
|
232 |
+
self.running_mean.zero_()
|
233 |
+
self.running_var.fill_(1)
|
234 |
+
self.num_batches_tracked.zero_()
|
235 |
+
|
236 |
+
def reset_parameters(self):
|
237 |
+
self.reset_running_stats()
|
238 |
+
if self.affine:
|
239 |
+
self.weight.data.uniform_() # pytorch use ones_()
|
240 |
+
self.bias.data.zero_()
|
241 |
+
|
242 |
+
def forward(self, input):
|
243 |
+
if input.dim() < 2:
|
244 |
+
raise ValueError(
|
245 |
+
f'expected at least 2D input, got {input.dim()}D input')
|
246 |
+
if self.momentum is None:
|
247 |
+
exponential_average_factor = 0.0
|
248 |
+
else:
|
249 |
+
exponential_average_factor = self.momentum
|
250 |
+
|
251 |
+
if self.training and self.track_running_stats:
|
252 |
+
if self.num_batches_tracked is not None:
|
253 |
+
self.num_batches_tracked += 1
|
254 |
+
if self.momentum is None: # use cumulative moving average
|
255 |
+
exponential_average_factor = 1.0 / float(
|
256 |
+
self.num_batches_tracked)
|
257 |
+
else: # use exponential moving average
|
258 |
+
exponential_average_factor = self.momentum
|
259 |
+
|
260 |
+
if self.training or not self.track_running_stats:
|
261 |
+
return SyncBatchNormFunction.apply(
|
262 |
+
input, self.running_mean, self.running_var, self.weight,
|
263 |
+
self.bias, exponential_average_factor, self.eps, self.group,
|
264 |
+
self.group_size, self.stats_mode)
|
265 |
+
else:
|
266 |
+
return F.batch_norm(input, self.running_mean, self.running_var,
|
267 |
+
self.weight, self.bias, False,
|
268 |
+
exponential_average_factor, self.eps)
|
269 |
+
|
270 |
+
def __repr__(self):
|
271 |
+
s = self.__class__.__name__
|
272 |
+
s += f'({self.num_features}, '
|
273 |
+
s += f'eps={self.eps}, '
|
274 |
+
s += f'momentum={self.momentum}, '
|
275 |
+
s += f'affine={self.affine}, '
|
276 |
+
s += f'track_running_stats={self.track_running_stats}, '
|
277 |
+
s += f'group_size={self.group_size},'
|
278 |
+
s += f'stats_mode={self.stats_mode})'
|
279 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/three_interpolate.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Tuple
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from torch.autograd import Function
|
5 |
+
|
6 |
+
from ..utils import ext_loader
|
7 |
+
|
8 |
+
ext_module = ext_loader.load_ext(
|
9 |
+
'_ext', ['three_interpolate_forward', 'three_interpolate_backward'])
|
10 |
+
|
11 |
+
|
12 |
+
class ThreeInterpolate(Function):
|
13 |
+
"""Performs weighted linear interpolation on 3 features.
|
14 |
+
|
15 |
+
Please refer to `Paper of PointNet++ <https://arxiv.org/abs/1706.02413>`_
|
16 |
+
for more details.
|
17 |
+
"""
|
18 |
+
|
19 |
+
@staticmethod
|
20 |
+
def forward(ctx, features: torch.Tensor, indices: torch.Tensor,
|
21 |
+
weight: torch.Tensor) -> torch.Tensor:
|
22 |
+
"""
|
23 |
+
Args:
|
24 |
+
features (Tensor): (B, C, M) Features descriptors to be
|
25 |
+
interpolated
|
26 |
+
indices (Tensor): (B, n, 3) index three nearest neighbors
|
27 |
+
of the target features in features
|
28 |
+
weight (Tensor): (B, n, 3) weights of interpolation
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
Tensor: (B, C, N) tensor of the interpolated features
|
32 |
+
"""
|
33 |
+
assert features.is_contiguous()
|
34 |
+
assert indices.is_contiguous()
|
35 |
+
assert weight.is_contiguous()
|
36 |
+
|
37 |
+
B, c, m = features.size()
|
38 |
+
n = indices.size(1)
|
39 |
+
ctx.three_interpolate_for_backward = (indices, weight, m)
|
40 |
+
output = torch.cuda.FloatTensor(B, c, n)
|
41 |
+
|
42 |
+
ext_module.three_interpolate_forward(
|
43 |
+
features, indices, weight, output, b=B, c=c, m=m, n=n)
|
44 |
+
return output
|
45 |
+
|
46 |
+
@staticmethod
|
47 |
+
def backward(
|
48 |
+
ctx, grad_out: torch.Tensor
|
49 |
+
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
50 |
+
"""
|
51 |
+
Args:
|
52 |
+
grad_out (Tensor): (B, C, N) tensor with gradients of outputs
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
Tensor: (B, C, M) tensor with gradients of features
|
56 |
+
"""
|
57 |
+
idx, weight, m = ctx.three_interpolate_for_backward
|
58 |
+
B, c, n = grad_out.size()
|
59 |
+
|
60 |
+
grad_features = torch.cuda.FloatTensor(B, c, m).zero_()
|
61 |
+
grad_out_data = grad_out.data.contiguous()
|
62 |
+
|
63 |
+
ext_module.three_interpolate_backward(
|
64 |
+
grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m)
|
65 |
+
return grad_features, None, None
|
66 |
+
|
67 |
+
|
68 |
+
three_interpolate = ThreeInterpolate.apply
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/three_nn.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Tuple
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from torch.autograd import Function
|
5 |
+
|
6 |
+
from ..utils import ext_loader
|
7 |
+
|
8 |
+
ext_module = ext_loader.load_ext('_ext', ['three_nn_forward'])
|
9 |
+
|
10 |
+
|
11 |
+
class ThreeNN(Function):
|
12 |
+
"""Find the top-3 nearest neighbors of the target set from the source set.
|
13 |
+
|
14 |
+
Please refer to `Paper of PointNet++ <https://arxiv.org/abs/1706.02413>`_
|
15 |
+
for more details.
|
16 |
+
"""
|
17 |
+
|
18 |
+
@staticmethod
|
19 |
+
def forward(ctx, target: torch.Tensor,
|
20 |
+
source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
21 |
+
"""
|
22 |
+
Args:
|
23 |
+
target (Tensor): shape (B, N, 3), points set that needs to
|
24 |
+
find the nearest neighbors.
|
25 |
+
source (Tensor): shape (B, M, 3), points set that is used
|
26 |
+
to find the nearest neighbors of points in target set.
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
Tensor: shape (B, N, 3), L2 distance of each point in target
|
30 |
+
set to their corresponding nearest neighbors.
|
31 |
+
"""
|
32 |
+
target = target.contiguous()
|
33 |
+
source = source.contiguous()
|
34 |
+
|
35 |
+
B, N, _ = target.size()
|
36 |
+
m = source.size(1)
|
37 |
+
dist2 = torch.cuda.FloatTensor(B, N, 3)
|
38 |
+
idx = torch.cuda.IntTensor(B, N, 3)
|
39 |
+
|
40 |
+
ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m)
|
41 |
+
if torch.__version__ != 'parrots':
|
42 |
+
ctx.mark_non_differentiable(idx)
|
43 |
+
|
44 |
+
return torch.sqrt(dist2), idx
|
45 |
+
|
46 |
+
@staticmethod
|
47 |
+
def backward(ctx, a=None, b=None):
|
48 |
+
return None, None
|
49 |
+
|
50 |
+
|
51 |
+
three_nn = ThreeNN.apply
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/tin_shift.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
# Code reference from "Temporal Interlacing Network"
|
3 |
+
# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py
|
4 |
+
# Hao Shao, Shengju Qian, Yu Liu
|
5 |
+
# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk
|
6 |
+
|
7 |
+
import torch
|
8 |
+
import torch.nn as nn
|
9 |
+
from torch.autograd import Function
|
10 |
+
|
11 |
+
from ..utils import ext_loader
|
12 |
+
|
13 |
+
ext_module = ext_loader.load_ext('_ext',
|
14 |
+
['tin_shift_forward', 'tin_shift_backward'])
|
15 |
+
|
16 |
+
|
17 |
+
class TINShiftFunction(Function):
|
18 |
+
|
19 |
+
@staticmethod
|
20 |
+
def forward(ctx, input, shift):
|
21 |
+
C = input.size(2)
|
22 |
+
num_segments = shift.size(1)
|
23 |
+
if C // num_segments <= 0 or C % num_segments != 0:
|
24 |
+
raise ValueError('C should be a multiple of num_segments, '
|
25 |
+
f'but got C={C} and num_segments={num_segments}.')
|
26 |
+
|
27 |
+
ctx.save_for_backward(shift)
|
28 |
+
|
29 |
+
out = torch.zeros_like(input)
|
30 |
+
ext_module.tin_shift_forward(input, shift, out)
|
31 |
+
|
32 |
+
return out
|
33 |
+
|
34 |
+
@staticmethod
|
35 |
+
def backward(ctx, grad_output):
|
36 |
+
|
37 |
+
shift = ctx.saved_tensors[0]
|
38 |
+
data_grad_input = grad_output.new(*grad_output.size()).zero_()
|
39 |
+
shift_grad_input = shift.new(*shift.size()).zero_()
|
40 |
+
ext_module.tin_shift_backward(grad_output, shift, data_grad_input)
|
41 |
+
|
42 |
+
return data_grad_input, shift_grad_input
|
43 |
+
|
44 |
+
|
45 |
+
tin_shift = TINShiftFunction.apply
|
46 |
+
|
47 |
+
|
48 |
+
class TINShift(nn.Module):
|
49 |
+
"""Temporal Interlace Shift.
|
50 |
+
|
51 |
+
Temporal Interlace shift is a differentiable temporal-wise frame shifting
|
52 |
+
which is proposed in "Temporal Interlacing Network"
|
53 |
+
|
54 |
+
Please refer to https://arxiv.org/abs/2001.06499 for more details.
|
55 |
+
Code is modified from https://github.com/mit-han-lab/temporal-shift-module
|
56 |
+
"""
|
57 |
+
|
58 |
+
def forward(self, input, shift):
|
59 |
+
"""Perform temporal interlace shift.
|
60 |
+
|
61 |
+
Args:
|
62 |
+
input (Tensor): Feature map with shape [N, num_segments, C, H * W].
|
63 |
+
shift (Tensor): Shift tensor with shape [N, num_segments].
|
64 |
+
|
65 |
+
Returns:
|
66 |
+
Feature map after temporal interlace shift.
|
67 |
+
"""
|
68 |
+
return tin_shift(input, shift)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/upfirdn2d.py
ADDED
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501
|
2 |
+
|
3 |
+
# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
|
4 |
+
# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
|
5 |
+
# Augmentation (ADA)
|
6 |
+
# =======================================================================
|
7 |
+
|
8 |
+
# 1. Definitions
|
9 |
+
|
10 |
+
# "Licensor" means any person or entity that distributes its Work.
|
11 |
+
|
12 |
+
# "Software" means the original work of authorship made available under
|
13 |
+
# this License.
|
14 |
+
|
15 |
+
# "Work" means the Software and any additions to or derivative works of
|
16 |
+
# the Software that are made available under this License.
|
17 |
+
|
18 |
+
# The terms "reproduce," "reproduction," "derivative works," and
|
19 |
+
# "distribution" have the meaning as provided under U.S. copyright law;
|
20 |
+
# provided, however, that for the purposes of this License, derivative
|
21 |
+
# works shall not include works that remain separable from, or merely
|
22 |
+
# link (or bind by name) to the interfaces of, the Work.
|
23 |
+
|
24 |
+
# Works, including the Software, are "made available" under this License
|
25 |
+
# by including in or with the Work either (a) a copyright notice
|
26 |
+
# referencing the applicability of this License to the Work, or (b) a
|
27 |
+
# copy of this License.
|
28 |
+
|
29 |
+
# 2. License Grants
|
30 |
+
|
31 |
+
# 2.1 Copyright Grant. Subject to the terms and conditions of this
|
32 |
+
# License, each Licensor grants to you a perpetual, worldwide,
|
33 |
+
# non-exclusive, royalty-free, copyright license to reproduce,
|
34 |
+
# prepare derivative works of, publicly display, publicly perform,
|
35 |
+
# sublicense and distribute its Work and any resulting derivative
|
36 |
+
# works in any form.
|
37 |
+
|
38 |
+
# 3. Limitations
|
39 |
+
|
40 |
+
# 3.1 Redistribution. You may reproduce or distribute the Work only
|
41 |
+
# if (a) you do so under this License, (b) you include a complete
|
42 |
+
# copy of this License with your distribution, and (c) you retain
|
43 |
+
# without modification any copyright, patent, trademark, or
|
44 |
+
# attribution notices that are present in the Work.
|
45 |
+
|
46 |
+
# 3.2 Derivative Works. You may specify that additional or different
|
47 |
+
# terms apply to the use, reproduction, and distribution of your
|
48 |
+
# derivative works of the Work ("Your Terms") only if (a) Your Terms
|
49 |
+
# provide that the use limitation in Section 3.3 applies to your
|
50 |
+
# derivative works, and (b) you identify the specific derivative
|
51 |
+
# works that are subject to Your Terms. Notwithstanding Your Terms,
|
52 |
+
# this License (including the redistribution requirements in Section
|
53 |
+
# 3.1) will continue to apply to the Work itself.
|
54 |
+
|
55 |
+
# 3.3 Use Limitation. The Work and any derivative works thereof only
|
56 |
+
# may be used or intended for use non-commercially. Notwithstanding
|
57 |
+
# the foregoing, NVIDIA and its affiliates may use the Work and any
|
58 |
+
# derivative works commercially. As used herein, "non-commercially"
|
59 |
+
# means for research or evaluation purposes only.
|
60 |
+
|
61 |
+
# 3.4 Patent Claims. If you bring or threaten to bring a patent claim
|
62 |
+
# against any Licensor (including any claim, cross-claim or
|
63 |
+
# counterclaim in a lawsuit) to enforce any patents that you allege
|
64 |
+
# are infringed by any Work, then your rights under this License from
|
65 |
+
# such Licensor (including the grant in Section 2.1) will terminate
|
66 |
+
# immediately.
|
67 |
+
|
68 |
+
# 3.5 Trademarks. This License does not grant any rights to use any
|
69 |
+
# Licensor’s or its affiliates’ names, logos, or trademarks, except
|
70 |
+
# as necessary to reproduce the notices described in this License.
|
71 |
+
|
72 |
+
# 3.6 Termination. If you violate any term of this License, then your
|
73 |
+
# rights under this License (including the grant in Section 2.1) will
|
74 |
+
# terminate immediately.
|
75 |
+
|
76 |
+
# 4. Disclaimer of Warranty.
|
77 |
+
|
78 |
+
# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
79 |
+
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
|
80 |
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
|
81 |
+
# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
|
82 |
+
# THIS LICENSE.
|
83 |
+
|
84 |
+
# 5. Limitation of Liability.
|
85 |
+
|
86 |
+
# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
|
87 |
+
# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
|
88 |
+
# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
|
89 |
+
# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
|
90 |
+
# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
|
91 |
+
# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
|
92 |
+
# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
|
93 |
+
# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
|
94 |
+
# THE POSSIBILITY OF SUCH DAMAGES.
|
95 |
+
|
96 |
+
# =======================================================================
|
97 |
+
|
98 |
+
import torch
|
99 |
+
from torch.autograd import Function
|
100 |
+
from torch.nn import functional as F
|
101 |
+
|
102 |
+
from annotator.mmpkg.mmcv.utils import to_2tuple
|
103 |
+
from ..utils import ext_loader
|
104 |
+
|
105 |
+
upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d'])
|
106 |
+
|
107 |
+
|
108 |
+
class UpFirDn2dBackward(Function):
|
109 |
+
|
110 |
+
@staticmethod
|
111 |
+
def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad,
|
112 |
+
in_size, out_size):
|
113 |
+
|
114 |
+
up_x, up_y = up
|
115 |
+
down_x, down_y = down
|
116 |
+
g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad
|
117 |
+
|
118 |
+
grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)
|
119 |
+
|
120 |
+
grad_input = upfirdn2d_ext.upfirdn2d(
|
121 |
+
grad_output,
|
122 |
+
grad_kernel,
|
123 |
+
up_x=down_x,
|
124 |
+
up_y=down_y,
|
125 |
+
down_x=up_x,
|
126 |
+
down_y=up_y,
|
127 |
+
pad_x0=g_pad_x0,
|
128 |
+
pad_x1=g_pad_x1,
|
129 |
+
pad_y0=g_pad_y0,
|
130 |
+
pad_y1=g_pad_y1)
|
131 |
+
grad_input = grad_input.view(in_size[0], in_size[1], in_size[2],
|
132 |
+
in_size[3])
|
133 |
+
|
134 |
+
ctx.save_for_backward(kernel)
|
135 |
+
|
136 |
+
pad_x0, pad_x1, pad_y0, pad_y1 = pad
|
137 |
+
|
138 |
+
ctx.up_x = up_x
|
139 |
+
ctx.up_y = up_y
|
140 |
+
ctx.down_x = down_x
|
141 |
+
ctx.down_y = down_y
|
142 |
+
ctx.pad_x0 = pad_x0
|
143 |
+
ctx.pad_x1 = pad_x1
|
144 |
+
ctx.pad_y0 = pad_y0
|
145 |
+
ctx.pad_y1 = pad_y1
|
146 |
+
ctx.in_size = in_size
|
147 |
+
ctx.out_size = out_size
|
148 |
+
|
149 |
+
return grad_input
|
150 |
+
|
151 |
+
@staticmethod
|
152 |
+
def backward(ctx, gradgrad_input):
|
153 |
+
kernel, = ctx.saved_tensors
|
154 |
+
|
155 |
+
gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2],
|
156 |
+
ctx.in_size[3], 1)
|
157 |
+
|
158 |
+
gradgrad_out = upfirdn2d_ext.upfirdn2d(
|
159 |
+
gradgrad_input,
|
160 |
+
kernel,
|
161 |
+
up_x=ctx.up_x,
|
162 |
+
up_y=ctx.up_y,
|
163 |
+
down_x=ctx.down_x,
|
164 |
+
down_y=ctx.down_y,
|
165 |
+
pad_x0=ctx.pad_x0,
|
166 |
+
pad_x1=ctx.pad_x1,
|
167 |
+
pad_y0=ctx.pad_y0,
|
168 |
+
pad_y1=ctx.pad_y1)
|
169 |
+
# gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0],
|
170 |
+
# ctx.out_size[1], ctx.in_size[3])
|
171 |
+
gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1],
|
172 |
+
ctx.out_size[0], ctx.out_size[1])
|
173 |
+
|
174 |
+
return gradgrad_out, None, None, None, None, None, None, None, None
|
175 |
+
|
176 |
+
|
177 |
+
class UpFirDn2d(Function):
|
178 |
+
|
179 |
+
@staticmethod
|
180 |
+
def forward(ctx, input, kernel, up, down, pad):
|
181 |
+
up_x, up_y = up
|
182 |
+
down_x, down_y = down
|
183 |
+
pad_x0, pad_x1, pad_y0, pad_y1 = pad
|
184 |
+
|
185 |
+
kernel_h, kernel_w = kernel.shape
|
186 |
+
batch, channel, in_h, in_w = input.shape
|
187 |
+
ctx.in_size = input.shape
|
188 |
+
|
189 |
+
input = input.reshape(-1, in_h, in_w, 1)
|
190 |
+
|
191 |
+
ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1]))
|
192 |
+
|
193 |
+
out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
|
194 |
+
out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
|
195 |
+
ctx.out_size = (out_h, out_w)
|
196 |
+
|
197 |
+
ctx.up = (up_x, up_y)
|
198 |
+
ctx.down = (down_x, down_y)
|
199 |
+
ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1)
|
200 |
+
|
201 |
+
g_pad_x0 = kernel_w - pad_x0 - 1
|
202 |
+
g_pad_y0 = kernel_h - pad_y0 - 1
|
203 |
+
g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1
|
204 |
+
g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1
|
205 |
+
|
206 |
+
ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1)
|
207 |
+
|
208 |
+
out = upfirdn2d_ext.upfirdn2d(
|
209 |
+
input,
|
210 |
+
kernel,
|
211 |
+
up_x=up_x,
|
212 |
+
up_y=up_y,
|
213 |
+
down_x=down_x,
|
214 |
+
down_y=down_y,
|
215 |
+
pad_x0=pad_x0,
|
216 |
+
pad_x1=pad_x1,
|
217 |
+
pad_y0=pad_y0,
|
218 |
+
pad_y1=pad_y1)
|
219 |
+
# out = out.view(major, out_h, out_w, minor)
|
220 |
+
out = out.view(-1, channel, out_h, out_w)
|
221 |
+
|
222 |
+
return out
|
223 |
+
|
224 |
+
@staticmethod
|
225 |
+
def backward(ctx, grad_output):
|
226 |
+
kernel, grad_kernel = ctx.saved_tensors
|
227 |
+
|
228 |
+
grad_input = UpFirDn2dBackward.apply(
|
229 |
+
grad_output,
|
230 |
+
kernel,
|
231 |
+
grad_kernel,
|
232 |
+
ctx.up,
|
233 |
+
ctx.down,
|
234 |
+
ctx.pad,
|
235 |
+
ctx.g_pad,
|
236 |
+
ctx.in_size,
|
237 |
+
ctx.out_size,
|
238 |
+
)
|
239 |
+
|
240 |
+
return grad_input, None, None, None, None
|
241 |
+
|
242 |
+
|
243 |
+
def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
|
244 |
+
"""UpFRIDn for 2d features.
|
245 |
+
|
246 |
+
UpFIRDn is short for upsample, apply FIR filter and downsample. More
|
247 |
+
details can be found in:
|
248 |
+
https://www.mathworks.com/help/signal/ref/upfirdn.html
|
249 |
+
|
250 |
+
Args:
|
251 |
+
input (Tensor): Tensor with shape of (n, c, h, w).
|
252 |
+
kernel (Tensor): Filter kernel.
|
253 |
+
up (int | tuple[int], optional): Upsampling factor. If given a number,
|
254 |
+
we will use this factor for the both height and width side.
|
255 |
+
Defaults to 1.
|
256 |
+
down (int | tuple[int], optional): Downsampling factor. If given a
|
257 |
+
number, we will use this factor for the both height and width side.
|
258 |
+
Defaults to 1.
|
259 |
+
pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or
|
260 |
+
(x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0).
|
261 |
+
|
262 |
+
Returns:
|
263 |
+
Tensor: Tensor after UpFIRDn.
|
264 |
+
"""
|
265 |
+
if input.device.type == 'cpu':
|
266 |
+
if len(pad) == 2:
|
267 |
+
pad = (pad[0], pad[1], pad[0], pad[1])
|
268 |
+
|
269 |
+
up = to_2tuple(up)
|
270 |
+
|
271 |
+
down = to_2tuple(down)
|
272 |
+
|
273 |
+
out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1],
|
274 |
+
pad[0], pad[1], pad[2], pad[3])
|
275 |
+
else:
|
276 |
+
_up = to_2tuple(up)
|
277 |
+
|
278 |
+
_down = to_2tuple(down)
|
279 |
+
|
280 |
+
if len(pad) == 4:
|
281 |
+
_pad = pad
|
282 |
+
elif len(pad) == 2:
|
283 |
+
_pad = (pad[0], pad[1], pad[0], pad[1])
|
284 |
+
|
285 |
+
out = UpFirDn2d.apply(input, kernel, _up, _down, _pad)
|
286 |
+
|
287 |
+
return out
|
288 |
+
|
289 |
+
|
290 |
+
def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1,
|
291 |
+
pad_y0, pad_y1):
|
292 |
+
_, channel, in_h, in_w = input.shape
|
293 |
+
input = input.reshape(-1, in_h, in_w, 1)
|
294 |
+
|
295 |
+
_, in_h, in_w, minor = input.shape
|
296 |
+
kernel_h, kernel_w = kernel.shape
|
297 |
+
|
298 |
+
out = input.view(-1, in_h, 1, in_w, 1, minor)
|
299 |
+
out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
|
300 |
+
out = out.view(-1, in_h * up_y, in_w * up_x, minor)
|
301 |
+
|
302 |
+
out = F.pad(
|
303 |
+
out,
|
304 |
+
[0, 0,
|
305 |
+
max(pad_x0, 0),
|
306 |
+
max(pad_x1, 0),
|
307 |
+
max(pad_y0, 0),
|
308 |
+
max(pad_y1, 0)])
|
309 |
+
out = out[:,
|
310 |
+
max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0),
|
311 |
+
max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ]
|
312 |
+
|
313 |
+
out = out.permute(0, 3, 1, 2)
|
314 |
+
out = out.reshape(
|
315 |
+
[-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1])
|
316 |
+
w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
|
317 |
+
out = F.conv2d(out, w)
|
318 |
+
out = out.reshape(
|
319 |
+
-1,
|
320 |
+
minor,
|
321 |
+
in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1,
|
322 |
+
in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
|
323 |
+
)
|
324 |
+
out = out.permute(0, 2, 3, 1)
|
325 |
+
out = out[:, ::down_y, ::down_x, :]
|
326 |
+
|
327 |
+
out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
|
328 |
+
out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
|
329 |
+
|
330 |
+
return out.view(-1, channel, out_h, out_w)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/ops/voxelize.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
from torch import nn
|
4 |
+
from torch.autograd import Function
|
5 |
+
from torch.nn.modules.utils import _pair
|
6 |
+
|
7 |
+
from ..utils import ext_loader
|
8 |
+
|
9 |
+
ext_module = ext_loader.load_ext(
|
10 |
+
'_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward'])
|
11 |
+
|
12 |
+
|
13 |
+
class _Voxelization(Function):
|
14 |
+
|
15 |
+
@staticmethod
|
16 |
+
def forward(ctx,
|
17 |
+
points,
|
18 |
+
voxel_size,
|
19 |
+
coors_range,
|
20 |
+
max_points=35,
|
21 |
+
max_voxels=20000):
|
22 |
+
"""Convert kitti points(N, >=3) to voxels.
|
23 |
+
|
24 |
+
Args:
|
25 |
+
points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points
|
26 |
+
and points[:, 3:] contain other information like reflectivity.
|
27 |
+
voxel_size (tuple or float): The size of voxel with the shape of
|
28 |
+
[3].
|
29 |
+
coors_range (tuple or float): The coordinate range of voxel with
|
30 |
+
the shape of [6].
|
31 |
+
max_points (int, optional): maximum points contained in a voxel. if
|
32 |
+
max_points=-1, it means using dynamic_voxelize. Default: 35.
|
33 |
+
max_voxels (int, optional): maximum voxels this function create.
|
34 |
+
for second, 20000 is a good choice. Users should shuffle points
|
35 |
+
before call this function because max_voxels may drop points.
|
36 |
+
Default: 20000.
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
voxels_out (torch.Tensor): Output voxels with the shape of [M,
|
40 |
+
max_points, ndim]. Only contain points and returned when
|
41 |
+
max_points != -1.
|
42 |
+
coors_out (torch.Tensor): Output coordinates with the shape of
|
43 |
+
[M, 3].
|
44 |
+
num_points_per_voxel_out (torch.Tensor): Num points per voxel with
|
45 |
+
the shape of [M]. Only returned when max_points != -1.
|
46 |
+
"""
|
47 |
+
if max_points == -1 or max_voxels == -1:
|
48 |
+
coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int)
|
49 |
+
ext_module.dynamic_voxelize_forward(points, coors, voxel_size,
|
50 |
+
coors_range, 3)
|
51 |
+
return coors
|
52 |
+
else:
|
53 |
+
voxels = points.new_zeros(
|
54 |
+
size=(max_voxels, max_points, points.size(1)))
|
55 |
+
coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int)
|
56 |
+
num_points_per_voxel = points.new_zeros(
|
57 |
+
size=(max_voxels, ), dtype=torch.int)
|
58 |
+
voxel_num = ext_module.hard_voxelize_forward(
|
59 |
+
points, voxels, coors, num_points_per_voxel, voxel_size,
|
60 |
+
coors_range, max_points, max_voxels, 3)
|
61 |
+
# select the valid voxels
|
62 |
+
voxels_out = voxels[:voxel_num]
|
63 |
+
coors_out = coors[:voxel_num]
|
64 |
+
num_points_per_voxel_out = num_points_per_voxel[:voxel_num]
|
65 |
+
return voxels_out, coors_out, num_points_per_voxel_out
|
66 |
+
|
67 |
+
|
68 |
+
voxelization = _Voxelization.apply
|
69 |
+
|
70 |
+
|
71 |
+
class Voxelization(nn.Module):
|
72 |
+
"""Convert kitti points(N, >=3) to voxels.
|
73 |
+
|
74 |
+
Please refer to `PVCNN <https://arxiv.org/abs/1907.03739>`_ for more
|
75 |
+
details.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
voxel_size (tuple or float): The size of voxel with the shape of [3].
|
79 |
+
point_cloud_range (tuple or float): The coordinate range of voxel with
|
80 |
+
the shape of [6].
|
81 |
+
max_num_points (int): maximum points contained in a voxel. if
|
82 |
+
max_points=-1, it means using dynamic_voxelize.
|
83 |
+
max_voxels (int, optional): maximum voxels this function create.
|
84 |
+
for second, 20000 is a good choice. Users should shuffle points
|
85 |
+
before call this function because max_voxels may drop points.
|
86 |
+
Default: 20000.
|
87 |
+
"""
|
88 |
+
|
89 |
+
def __init__(self,
|
90 |
+
voxel_size,
|
91 |
+
point_cloud_range,
|
92 |
+
max_num_points,
|
93 |
+
max_voxels=20000):
|
94 |
+
super().__init__()
|
95 |
+
|
96 |
+
self.voxel_size = voxel_size
|
97 |
+
self.point_cloud_range = point_cloud_range
|
98 |
+
self.max_num_points = max_num_points
|
99 |
+
if isinstance(max_voxels, tuple):
|
100 |
+
self.max_voxels = max_voxels
|
101 |
+
else:
|
102 |
+
self.max_voxels = _pair(max_voxels)
|
103 |
+
|
104 |
+
point_cloud_range = torch.tensor(
|
105 |
+
point_cloud_range, dtype=torch.float32)
|
106 |
+
voxel_size = torch.tensor(voxel_size, dtype=torch.float32)
|
107 |
+
grid_size = (point_cloud_range[3:] -
|
108 |
+
point_cloud_range[:3]) / voxel_size
|
109 |
+
grid_size = torch.round(grid_size).long()
|
110 |
+
input_feat_shape = grid_size[:2]
|
111 |
+
self.grid_size = grid_size
|
112 |
+
# the origin shape is as [x-len, y-len, z-len]
|
113 |
+
# [w, h, d] -> [d, h, w]
|
114 |
+
self.pcd_shape = [*input_feat_shape, 1][::-1]
|
115 |
+
|
116 |
+
def forward(self, input):
|
117 |
+
if self.training:
|
118 |
+
max_voxels = self.max_voxels[0]
|
119 |
+
else:
|
120 |
+
max_voxels = self.max_voxels[1]
|
121 |
+
|
122 |
+
return voxelization(input, self.voxel_size, self.point_cloud_range,
|
123 |
+
self.max_num_points, max_voxels)
|
124 |
+
|
125 |
+
def __repr__(self):
|
126 |
+
s = self.__class__.__name__ + '('
|
127 |
+
s += 'voxel_size=' + str(self.voxel_size)
|
128 |
+
s += ', point_cloud_range=' + str(self.point_cloud_range)
|
129 |
+
s += ', max_num_points=' + str(self.max_num_points)
|
130 |
+
s += ', max_voxels=' + str(self.max_voxels)
|
131 |
+
s += ')'
|
132 |
+
return s
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
from .collate import collate
|
3 |
+
from .data_container import DataContainer
|
4 |
+
from .data_parallel import MMDataParallel
|
5 |
+
from .distributed import MMDistributedDataParallel
|
6 |
+
from .registry import MODULE_WRAPPERS
|
7 |
+
from .scatter_gather import scatter, scatter_kwargs
|
8 |
+
from .utils import is_module_wrapper
|
9 |
+
|
10 |
+
__all__ = [
|
11 |
+
'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel',
|
12 |
+
'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS'
|
13 |
+
]
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/parallel/_functions.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) OpenMMLab. All rights reserved.
|
2 |
+
import torch
|
3 |
+
from torch.nn.parallel._functions import _get_stream
|
4 |
+
|
5 |
+
|
6 |
+
def scatter(input, devices, streams=None):
|
7 |
+
"""Scatters tensor across multiple GPUs."""
|
8 |
+
if streams is None:
|
9 |
+
streams = [None] * len(devices)
|
10 |
+
|
11 |
+
if isinstance(input, list):
|
12 |
+
chunk_size = (len(input) - 1) // len(devices) + 1
|
13 |
+
outputs = [
|
14 |
+
scatter(input[i], [devices[i // chunk_size]],
|
15 |
+
[streams[i // chunk_size]]) for i in range(len(input))
|
16 |
+
]
|
17 |
+
return outputs
|
18 |
+
elif isinstance(input, torch.Tensor):
|
19 |
+
output = input.contiguous()
|
20 |
+
# TODO: copy to a pinned buffer first (if copying from CPU)
|
21 |
+
stream = streams[0] if output.numel() > 0 else None
|
22 |
+
if devices != [-1]:
|
23 |
+
with torch.cuda.device(devices[0]), torch.cuda.stream(stream):
|
24 |
+
output = output.cuda(devices[0], non_blocking=True)
|
25 |
+
else:
|
26 |
+
# unsqueeze the first dimension thus the tensor's shape is the
|
27 |
+
# same as those scattered with GPU.
|
28 |
+
output = output.unsqueeze(0)
|
29 |
+
return output
|
30 |
+
else:
|
31 |
+
raise Exception(f'Unknown type {type(input)}.')
|
32 |
+
|
33 |
+
|
34 |
+
def synchronize_stream(output, devices, streams):
|
35 |
+
if isinstance(output, list):
|
36 |
+
chunk_size = len(output) // len(devices)
|
37 |
+
for i in range(len(devices)):
|
38 |
+
for j in range(chunk_size):
|
39 |
+
synchronize_stream(output[i * chunk_size + j], [devices[i]],
|
40 |
+
[streams[i]])
|
41 |
+
elif isinstance(output, torch.Tensor):
|
42 |
+
if output.numel() != 0:
|
43 |
+
with torch.cuda.device(devices[0]):
|
44 |
+
main_stream = torch.cuda.current_stream()
|
45 |
+
main_stream.wait_stream(streams[0])
|
46 |
+
output.record_stream(main_stream)
|
47 |
+
else:
|
48 |
+
raise Exception(f'Unknown type {type(output)}.')
|
49 |
+
|
50 |
+
|
51 |
+
def get_input_device(input):
|
52 |
+
if isinstance(input, list):
|
53 |
+
for item in input:
|
54 |
+
input_device = get_input_device(item)
|
55 |
+
if input_device != -1:
|
56 |
+
return input_device
|
57 |
+
return -1
|
58 |
+
elif isinstance(input, torch.Tensor):
|
59 |
+
return input.get_device() if input.is_cuda else -1
|
60 |
+
else:
|
61 |
+
raise Exception(f'Unknown type {type(input)}.')
|
62 |
+
|
63 |
+
|
64 |
+
class Scatter:
|
65 |
+
|
66 |
+
@staticmethod
|
67 |
+
def forward(target_gpus, input):
|
68 |
+
input_device = get_input_device(input)
|
69 |
+
streams = None
|
70 |
+
if input_device == -1 and target_gpus != [-1]:
|
71 |
+
# Perform CPU to GPU copies in a background stream
|
72 |
+
streams = [_get_stream(device) for device in target_gpus]
|
73 |
+
|
74 |
+
outputs = scatter(input, target_gpus, streams)
|
75 |
+
# Synchronize with the copy stream
|
76 |
+
if streams is not None:
|
77 |
+
synchronize_stream(outputs, target_gpus, streams)
|
78 |
+
|
79 |
+
return tuple(outputs)
|