07eae2c8a060ad18170f8523a98c20ce4f75351729e2e5d459e54c2763a900fe
Browse files- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/necks/multilevel_neck.py +70 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/segmentors/__init__.py +5 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/segmentors/base.py +273 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/segmentors/cascade_encoder_decoder.py +98 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/segmentors/encoder_decoder.py +298 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/__init__.py +13 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/drop.py +31 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/inverted_residual.py +208 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/make_divisible.py +27 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/res_layer.py +94 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/se_layer.py +57 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/self_attention_block.py +159 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/up_conv_block.py +101 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/weight_init.py +62 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/ops/__init__.py +4 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/ops/encoding.py +74 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/ops/wrappers.py +50 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/utils/__init__.py +4 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/utils/collect_env.py +17 -0
- extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/utils/logger.py +27 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/LICENSE +21 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/__init__.py +81 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/NNET.py +22 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/baseline.py +85 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/decoder.py +202 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/.gitignore +109 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/BENCHMARK.md +555 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/LICENSE +201 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/README.md +323 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_benchmark.py +65 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_validate.py +138 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/__init__.py +5 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/__init__.py +137 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations.py +102 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_jit.py +79 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_me.py +174 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/config.py +123 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/conv2d_layers.py +304 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/efficientnet_builder.py +683 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/gen_efficientnet.py +1450 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/helpers.py +71 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/mobilenetv3.py +364 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/model_factory.py +27 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/version.py +1 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/hubconf.py +84 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/onnx_export.py +120 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/onnx_optimize.py +84 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/onnx_to_caffe.py +27 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/onnx_validate.py +112 -0
- extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/requirements.txt +2 -0
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/necks/multilevel_neck.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
import torch.nn.functional as F
|
3 |
+
from annotator.mmpkg.mmcv.cnn import ConvModule
|
4 |
+
|
5 |
+
from ..builder import NECKS
|
6 |
+
|
7 |
+
|
8 |
+
@NECKS.register_module()
|
9 |
+
class MultiLevelNeck(nn.Module):
|
10 |
+
"""MultiLevelNeck.
|
11 |
+
|
12 |
+
A neck structure connect vit backbone and decoder_heads.
|
13 |
+
Args:
|
14 |
+
in_channels (List[int]): Number of input channels per scale.
|
15 |
+
out_channels (int): Number of output channels (used at each scale).
|
16 |
+
scales (List[int]): Scale factors for each input feature map.
|
17 |
+
norm_cfg (dict): Config dict for normalization layer. Default: None.
|
18 |
+
act_cfg (dict): Config dict for activation layer in ConvModule.
|
19 |
+
Default: None.
|
20 |
+
"""
|
21 |
+
|
22 |
+
def __init__(self,
|
23 |
+
in_channels,
|
24 |
+
out_channels,
|
25 |
+
scales=[0.5, 1, 2, 4],
|
26 |
+
norm_cfg=None,
|
27 |
+
act_cfg=None):
|
28 |
+
super(MultiLevelNeck, self).__init__()
|
29 |
+
assert isinstance(in_channels, list)
|
30 |
+
self.in_channels = in_channels
|
31 |
+
self.out_channels = out_channels
|
32 |
+
self.scales = scales
|
33 |
+
self.num_outs = len(scales)
|
34 |
+
self.lateral_convs = nn.ModuleList()
|
35 |
+
self.convs = nn.ModuleList()
|
36 |
+
for in_channel in in_channels:
|
37 |
+
self.lateral_convs.append(
|
38 |
+
ConvModule(
|
39 |
+
in_channel,
|
40 |
+
out_channels,
|
41 |
+
kernel_size=1,
|
42 |
+
norm_cfg=norm_cfg,
|
43 |
+
act_cfg=act_cfg))
|
44 |
+
for _ in range(self.num_outs):
|
45 |
+
self.convs.append(
|
46 |
+
ConvModule(
|
47 |
+
out_channels,
|
48 |
+
out_channels,
|
49 |
+
kernel_size=3,
|
50 |
+
padding=1,
|
51 |
+
stride=1,
|
52 |
+
norm_cfg=norm_cfg,
|
53 |
+
act_cfg=act_cfg))
|
54 |
+
|
55 |
+
def forward(self, inputs):
|
56 |
+
assert len(inputs) == len(self.in_channels)
|
57 |
+
print(inputs[0].shape)
|
58 |
+
inputs = [
|
59 |
+
lateral_conv(inputs[i])
|
60 |
+
for i, lateral_conv in enumerate(self.lateral_convs)
|
61 |
+
]
|
62 |
+
# for len(inputs) not equal to self.num_outs
|
63 |
+
if len(inputs) == 1:
|
64 |
+
inputs = [inputs[0] for _ in range(self.num_outs)]
|
65 |
+
outs = []
|
66 |
+
for i in range(self.num_outs):
|
67 |
+
x_resize = F.interpolate(
|
68 |
+
inputs[i], scale_factor=self.scales[i], mode='bilinear')
|
69 |
+
outs.append(self.convs[i](x_resize))
|
70 |
+
return tuple(outs)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/segmentors/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .base import BaseSegmentor
|
2 |
+
from .cascade_encoder_decoder import CascadeEncoderDecoder
|
3 |
+
from .encoder_decoder import EncoderDecoder
|
4 |
+
|
5 |
+
__all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder']
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/segmentors/base.py
ADDED
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import warnings
|
3 |
+
from abc import ABCMeta, abstractmethod
|
4 |
+
from collections import OrderedDict
|
5 |
+
|
6 |
+
import annotator.mmpkg.mmcv as mmcv
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
import torch.distributed as dist
|
10 |
+
import torch.nn as nn
|
11 |
+
from annotator.mmpkg.mmcv.runner import auto_fp16
|
12 |
+
|
13 |
+
|
14 |
+
class BaseSegmentor(nn.Module):
|
15 |
+
"""Base class for segmentors."""
|
16 |
+
|
17 |
+
__metaclass__ = ABCMeta
|
18 |
+
|
19 |
+
def __init__(self):
|
20 |
+
super(BaseSegmentor, self).__init__()
|
21 |
+
self.fp16_enabled = False
|
22 |
+
|
23 |
+
@property
|
24 |
+
def with_neck(self):
|
25 |
+
"""bool: whether the segmentor has neck"""
|
26 |
+
return hasattr(self, 'neck') and self.neck is not None
|
27 |
+
|
28 |
+
@property
|
29 |
+
def with_auxiliary_head(self):
|
30 |
+
"""bool: whether the segmentor has auxiliary head"""
|
31 |
+
return hasattr(self,
|
32 |
+
'auxiliary_head') and self.auxiliary_head is not None
|
33 |
+
|
34 |
+
@property
|
35 |
+
def with_decode_head(self):
|
36 |
+
"""bool: whether the segmentor has decode head"""
|
37 |
+
return hasattr(self, 'decode_head') and self.decode_head is not None
|
38 |
+
|
39 |
+
@abstractmethod
|
40 |
+
def extract_feat(self, imgs):
|
41 |
+
"""Placeholder for extract features from images."""
|
42 |
+
pass
|
43 |
+
|
44 |
+
@abstractmethod
|
45 |
+
def encode_decode(self, img, img_metas):
|
46 |
+
"""Placeholder for encode images with backbone and decode into a
|
47 |
+
semantic segmentation map of the same size as input."""
|
48 |
+
pass
|
49 |
+
|
50 |
+
@abstractmethod
|
51 |
+
def forward_train(self, imgs, img_metas, **kwargs):
|
52 |
+
"""Placeholder for Forward function for training."""
|
53 |
+
pass
|
54 |
+
|
55 |
+
@abstractmethod
|
56 |
+
def simple_test(self, img, img_meta, **kwargs):
|
57 |
+
"""Placeholder for single image test."""
|
58 |
+
pass
|
59 |
+
|
60 |
+
@abstractmethod
|
61 |
+
def aug_test(self, imgs, img_metas, **kwargs):
|
62 |
+
"""Placeholder for augmentation test."""
|
63 |
+
pass
|
64 |
+
|
65 |
+
def init_weights(self, pretrained=None):
|
66 |
+
"""Initialize the weights in segmentor.
|
67 |
+
|
68 |
+
Args:
|
69 |
+
pretrained (str, optional): Path to pre-trained weights.
|
70 |
+
Defaults to None.
|
71 |
+
"""
|
72 |
+
if pretrained is not None:
|
73 |
+
logger = logging.getLogger()
|
74 |
+
logger.info(f'load model from: {pretrained}')
|
75 |
+
|
76 |
+
def forward_test(self, imgs, img_metas, **kwargs):
|
77 |
+
"""
|
78 |
+
Args:
|
79 |
+
imgs (List[Tensor]): the outer list indicates test-time
|
80 |
+
augmentations and inner Tensor should have a shape NxCxHxW,
|
81 |
+
which contains all images in the batch.
|
82 |
+
img_metas (List[List[dict]]): the outer list indicates test-time
|
83 |
+
augs (multiscale, flip, etc.) and the inner list indicates
|
84 |
+
images in a batch.
|
85 |
+
"""
|
86 |
+
for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
|
87 |
+
if not isinstance(var, list):
|
88 |
+
raise TypeError(f'{name} must be a list, but got '
|
89 |
+
f'{type(var)}')
|
90 |
+
|
91 |
+
num_augs = len(imgs)
|
92 |
+
if num_augs != len(img_metas):
|
93 |
+
raise ValueError(f'num of augmentations ({len(imgs)}) != '
|
94 |
+
f'num of image meta ({len(img_metas)})')
|
95 |
+
# all images in the same aug batch all of the same ori_shape and pad
|
96 |
+
# shape
|
97 |
+
for img_meta in img_metas:
|
98 |
+
ori_shapes = [_['ori_shape'] for _ in img_meta]
|
99 |
+
assert all(shape == ori_shapes[0] for shape in ori_shapes)
|
100 |
+
img_shapes = [_['img_shape'] for _ in img_meta]
|
101 |
+
assert all(shape == img_shapes[0] for shape in img_shapes)
|
102 |
+
pad_shapes = [_['pad_shape'] for _ in img_meta]
|
103 |
+
assert all(shape == pad_shapes[0] for shape in pad_shapes)
|
104 |
+
|
105 |
+
if num_augs == 1:
|
106 |
+
return self.simple_test(imgs[0], img_metas[0], **kwargs)
|
107 |
+
else:
|
108 |
+
return self.aug_test(imgs, img_metas, **kwargs)
|
109 |
+
|
110 |
+
@auto_fp16(apply_to=('img', ))
|
111 |
+
def forward(self, img, img_metas, return_loss=True, **kwargs):
|
112 |
+
"""Calls either :func:`forward_train` or :func:`forward_test` depending
|
113 |
+
on whether ``return_loss`` is ``True``.
|
114 |
+
|
115 |
+
Note this setting will change the expected inputs. When
|
116 |
+
``return_loss=True``, img and img_meta are single-nested (i.e. Tensor
|
117 |
+
and List[dict]), and when ``resturn_loss=False``, img and img_meta
|
118 |
+
should be double nested (i.e. List[Tensor], List[List[dict]]), with
|
119 |
+
the outer list indicating test time augmentations.
|
120 |
+
"""
|
121 |
+
if return_loss:
|
122 |
+
return self.forward_train(img, img_metas, **kwargs)
|
123 |
+
else:
|
124 |
+
return self.forward_test(img, img_metas, **kwargs)
|
125 |
+
|
126 |
+
def train_step(self, data_batch, optimizer, **kwargs):
|
127 |
+
"""The iteration step during training.
|
128 |
+
|
129 |
+
This method defines an iteration step during training, except for the
|
130 |
+
back propagation and optimizer updating, which are done in an optimizer
|
131 |
+
hook. Note that in some complicated cases or models, the whole process
|
132 |
+
including back propagation and optimizer updating is also defined in
|
133 |
+
this method, such as GAN.
|
134 |
+
|
135 |
+
Args:
|
136 |
+
data (dict): The output of dataloader.
|
137 |
+
optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of
|
138 |
+
runner is passed to ``train_step()``. This argument is unused
|
139 |
+
and reserved.
|
140 |
+
|
141 |
+
Returns:
|
142 |
+
dict: It should contain at least 3 keys: ``loss``, ``log_vars``,
|
143 |
+
``num_samples``.
|
144 |
+
``loss`` is a tensor for back propagation, which can be a
|
145 |
+
weighted sum of multiple losses.
|
146 |
+
``log_vars`` contains all the variables to be sent to the
|
147 |
+
logger.
|
148 |
+
``num_samples`` indicates the batch size (when the model is
|
149 |
+
DDP, it means the batch size on each GPU), which is used for
|
150 |
+
averaging the logs.
|
151 |
+
"""
|
152 |
+
losses = self(**data_batch)
|
153 |
+
loss, log_vars = self._parse_losses(losses)
|
154 |
+
|
155 |
+
outputs = dict(
|
156 |
+
loss=loss,
|
157 |
+
log_vars=log_vars,
|
158 |
+
num_samples=len(data_batch['img_metas']))
|
159 |
+
|
160 |
+
return outputs
|
161 |
+
|
162 |
+
def val_step(self, data_batch, **kwargs):
|
163 |
+
"""The iteration step during validation.
|
164 |
+
|
165 |
+
This method shares the same signature as :func:`train_step`, but used
|
166 |
+
during val epochs. Note that the evaluation after training epochs is
|
167 |
+
not implemented with this method, but an evaluation hook.
|
168 |
+
"""
|
169 |
+
output = self(**data_batch, **kwargs)
|
170 |
+
return output
|
171 |
+
|
172 |
+
@staticmethod
|
173 |
+
def _parse_losses(losses):
|
174 |
+
"""Parse the raw outputs (losses) of the network.
|
175 |
+
|
176 |
+
Args:
|
177 |
+
losses (dict): Raw output of the network, which usually contain
|
178 |
+
losses and other necessary information.
|
179 |
+
|
180 |
+
Returns:
|
181 |
+
tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor
|
182 |
+
which may be a weighted sum of all losses, log_vars contains
|
183 |
+
all the variables to be sent to the logger.
|
184 |
+
"""
|
185 |
+
log_vars = OrderedDict()
|
186 |
+
for loss_name, loss_value in losses.items():
|
187 |
+
if isinstance(loss_value, torch.Tensor):
|
188 |
+
log_vars[loss_name] = loss_value.mean()
|
189 |
+
elif isinstance(loss_value, list):
|
190 |
+
log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)
|
191 |
+
else:
|
192 |
+
raise TypeError(
|
193 |
+
f'{loss_name} is not a tensor or list of tensors')
|
194 |
+
|
195 |
+
loss = sum(_value for _key, _value in log_vars.items()
|
196 |
+
if 'loss' in _key)
|
197 |
+
|
198 |
+
log_vars['loss'] = loss
|
199 |
+
for loss_name, loss_value in log_vars.items():
|
200 |
+
# reduce loss when distributed training
|
201 |
+
if dist.is_available() and dist.is_initialized():
|
202 |
+
loss_value = loss_value.data.clone()
|
203 |
+
dist.all_reduce(loss_value.div_(dist.get_world_size()))
|
204 |
+
log_vars[loss_name] = loss_value.item()
|
205 |
+
|
206 |
+
return loss, log_vars
|
207 |
+
|
208 |
+
def show_result(self,
|
209 |
+
img,
|
210 |
+
result,
|
211 |
+
palette=None,
|
212 |
+
win_name='',
|
213 |
+
show=False,
|
214 |
+
wait_time=0,
|
215 |
+
out_file=None,
|
216 |
+
opacity=0.5):
|
217 |
+
"""Draw `result` over `img`.
|
218 |
+
|
219 |
+
Args:
|
220 |
+
img (str or Tensor): The image to be displayed.
|
221 |
+
result (Tensor): The semantic segmentation results to draw over
|
222 |
+
`img`.
|
223 |
+
palette (list[list[int]]] | np.ndarray | None): The palette of
|
224 |
+
segmentation map. If None is given, random palette will be
|
225 |
+
generated. Default: None
|
226 |
+
win_name (str): The window name.
|
227 |
+
wait_time (int): Value of waitKey param.
|
228 |
+
Default: 0.
|
229 |
+
show (bool): Whether to show the image.
|
230 |
+
Default: False.
|
231 |
+
out_file (str or None): The filename to write the image.
|
232 |
+
Default: None.
|
233 |
+
opacity(float): Opacity of painted segmentation map.
|
234 |
+
Default 0.5.
|
235 |
+
Must be in (0, 1] range.
|
236 |
+
Returns:
|
237 |
+
img (Tensor): Only if not `show` or `out_file`
|
238 |
+
"""
|
239 |
+
img = mmcv.imread(img)
|
240 |
+
img = img.copy()
|
241 |
+
seg = result[0]
|
242 |
+
if palette is None:
|
243 |
+
if self.PALETTE is None:
|
244 |
+
palette = np.random.randint(
|
245 |
+
0, 255, size=(len(self.CLASSES), 3))
|
246 |
+
else:
|
247 |
+
palette = self.PALETTE
|
248 |
+
palette = np.array(palette)
|
249 |
+
assert palette.shape[0] == len(self.CLASSES)
|
250 |
+
assert palette.shape[1] == 3
|
251 |
+
assert len(palette.shape) == 2
|
252 |
+
assert 0 < opacity <= 1.0
|
253 |
+
color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
|
254 |
+
for label, color in enumerate(palette):
|
255 |
+
color_seg[seg == label, :] = color
|
256 |
+
# convert to BGR
|
257 |
+
color_seg = color_seg[..., ::-1]
|
258 |
+
|
259 |
+
img = img * (1 - opacity) + color_seg * opacity
|
260 |
+
img = img.astype(np.uint8)
|
261 |
+
# if out_file specified, do not show image in window
|
262 |
+
if out_file is not None:
|
263 |
+
show = False
|
264 |
+
|
265 |
+
if show:
|
266 |
+
mmcv.imshow(img, win_name, wait_time)
|
267 |
+
if out_file is not None:
|
268 |
+
mmcv.imwrite(img, out_file)
|
269 |
+
|
270 |
+
if not (show or out_file):
|
271 |
+
warnings.warn('show==False and out_file is not specified, only '
|
272 |
+
'result image will be returned')
|
273 |
+
return img
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/segmentors/cascade_encoder_decoder.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch import nn
|
2 |
+
|
3 |
+
from annotator.mmpkg.mmseg.core import add_prefix
|
4 |
+
from annotator.mmpkg.mmseg.ops import resize
|
5 |
+
from .. import builder
|
6 |
+
from ..builder import SEGMENTORS
|
7 |
+
from .encoder_decoder import EncoderDecoder
|
8 |
+
|
9 |
+
|
10 |
+
@SEGMENTORS.register_module()
|
11 |
+
class CascadeEncoderDecoder(EncoderDecoder):
|
12 |
+
"""Cascade Encoder Decoder segmentors.
|
13 |
+
|
14 |
+
CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of
|
15 |
+
CascadeEncoderDecoder are cascaded. The output of previous decoder_head
|
16 |
+
will be the input of next decoder_head.
|
17 |
+
"""
|
18 |
+
|
19 |
+
def __init__(self,
|
20 |
+
num_stages,
|
21 |
+
backbone,
|
22 |
+
decode_head,
|
23 |
+
neck=None,
|
24 |
+
auxiliary_head=None,
|
25 |
+
train_cfg=None,
|
26 |
+
test_cfg=None,
|
27 |
+
pretrained=None):
|
28 |
+
self.num_stages = num_stages
|
29 |
+
super(CascadeEncoderDecoder, self).__init__(
|
30 |
+
backbone=backbone,
|
31 |
+
decode_head=decode_head,
|
32 |
+
neck=neck,
|
33 |
+
auxiliary_head=auxiliary_head,
|
34 |
+
train_cfg=train_cfg,
|
35 |
+
test_cfg=test_cfg,
|
36 |
+
pretrained=pretrained)
|
37 |
+
|
38 |
+
def _init_decode_head(self, decode_head):
|
39 |
+
"""Initialize ``decode_head``"""
|
40 |
+
assert isinstance(decode_head, list)
|
41 |
+
assert len(decode_head) == self.num_stages
|
42 |
+
self.decode_head = nn.ModuleList()
|
43 |
+
for i in range(self.num_stages):
|
44 |
+
self.decode_head.append(builder.build_head(decode_head[i]))
|
45 |
+
self.align_corners = self.decode_head[-1].align_corners
|
46 |
+
self.num_classes = self.decode_head[-1].num_classes
|
47 |
+
|
48 |
+
def init_weights(self, pretrained=None):
|
49 |
+
"""Initialize the weights in backbone and heads.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
pretrained (str, optional): Path to pre-trained weights.
|
53 |
+
Defaults to None.
|
54 |
+
"""
|
55 |
+
self.backbone.init_weights(pretrained=pretrained)
|
56 |
+
for i in range(self.num_stages):
|
57 |
+
self.decode_head[i].init_weights()
|
58 |
+
if self.with_auxiliary_head:
|
59 |
+
if isinstance(self.auxiliary_head, nn.ModuleList):
|
60 |
+
for aux_head in self.auxiliary_head:
|
61 |
+
aux_head.init_weights()
|
62 |
+
else:
|
63 |
+
self.auxiliary_head.init_weights()
|
64 |
+
|
65 |
+
def encode_decode(self, img, img_metas):
|
66 |
+
"""Encode images with backbone and decode into a semantic segmentation
|
67 |
+
map of the same size as input."""
|
68 |
+
x = self.extract_feat(img)
|
69 |
+
out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg)
|
70 |
+
for i in range(1, self.num_stages):
|
71 |
+
out = self.decode_head[i].forward_test(x, out, img_metas,
|
72 |
+
self.test_cfg)
|
73 |
+
out = resize(
|
74 |
+
input=out,
|
75 |
+
size=img.shape[2:],
|
76 |
+
mode='bilinear',
|
77 |
+
align_corners=self.align_corners)
|
78 |
+
return out
|
79 |
+
|
80 |
+
def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg):
|
81 |
+
"""Run forward function and calculate loss for decode head in
|
82 |
+
training."""
|
83 |
+
losses = dict()
|
84 |
+
|
85 |
+
loss_decode = self.decode_head[0].forward_train(
|
86 |
+
x, img_metas, gt_semantic_seg, self.train_cfg)
|
87 |
+
|
88 |
+
losses.update(add_prefix(loss_decode, 'decode_0'))
|
89 |
+
|
90 |
+
for i in range(1, self.num_stages):
|
91 |
+
# forward test again, maybe unnecessary for most methods.
|
92 |
+
prev_outputs = self.decode_head[i - 1].forward_test(
|
93 |
+
x, img_metas, self.test_cfg)
|
94 |
+
loss_decode = self.decode_head[i].forward_train(
|
95 |
+
x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg)
|
96 |
+
losses.update(add_prefix(loss_decode, f'decode_{i}'))
|
97 |
+
|
98 |
+
return losses
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/segmentors/encoder_decoder.py
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from annotator.mmpkg.mmseg.core import add_prefix
|
6 |
+
from annotator.mmpkg.mmseg.ops import resize
|
7 |
+
from .. import builder
|
8 |
+
from ..builder import SEGMENTORS
|
9 |
+
from .base import BaseSegmentor
|
10 |
+
|
11 |
+
|
12 |
+
@SEGMENTORS.register_module()
|
13 |
+
class EncoderDecoder(BaseSegmentor):
|
14 |
+
"""Encoder Decoder segmentors.
|
15 |
+
|
16 |
+
EncoderDecoder typically consists of backbone, decode_head, auxiliary_head.
|
17 |
+
Note that auxiliary_head is only used for deep supervision during training,
|
18 |
+
which could be dumped during inference.
|
19 |
+
"""
|
20 |
+
|
21 |
+
def __init__(self,
|
22 |
+
backbone,
|
23 |
+
decode_head,
|
24 |
+
neck=None,
|
25 |
+
auxiliary_head=None,
|
26 |
+
train_cfg=None,
|
27 |
+
test_cfg=None,
|
28 |
+
pretrained=None):
|
29 |
+
super(EncoderDecoder, self).__init__()
|
30 |
+
self.backbone = builder.build_backbone(backbone)
|
31 |
+
if neck is not None:
|
32 |
+
self.neck = builder.build_neck(neck)
|
33 |
+
self._init_decode_head(decode_head)
|
34 |
+
self._init_auxiliary_head(auxiliary_head)
|
35 |
+
|
36 |
+
self.train_cfg = train_cfg
|
37 |
+
self.test_cfg = test_cfg
|
38 |
+
|
39 |
+
self.init_weights(pretrained=pretrained)
|
40 |
+
|
41 |
+
assert self.with_decode_head
|
42 |
+
|
43 |
+
def _init_decode_head(self, decode_head):
|
44 |
+
"""Initialize ``decode_head``"""
|
45 |
+
self.decode_head = builder.build_head(decode_head)
|
46 |
+
self.align_corners = self.decode_head.align_corners
|
47 |
+
self.num_classes = self.decode_head.num_classes
|
48 |
+
|
49 |
+
def _init_auxiliary_head(self, auxiliary_head):
|
50 |
+
"""Initialize ``auxiliary_head``"""
|
51 |
+
if auxiliary_head is not None:
|
52 |
+
if isinstance(auxiliary_head, list):
|
53 |
+
self.auxiliary_head = nn.ModuleList()
|
54 |
+
for head_cfg in auxiliary_head:
|
55 |
+
self.auxiliary_head.append(builder.build_head(head_cfg))
|
56 |
+
else:
|
57 |
+
self.auxiliary_head = builder.build_head(auxiliary_head)
|
58 |
+
|
59 |
+
def init_weights(self, pretrained=None):
|
60 |
+
"""Initialize the weights in backbone and heads.
|
61 |
+
|
62 |
+
Args:
|
63 |
+
pretrained (str, optional): Path to pre-trained weights.
|
64 |
+
Defaults to None.
|
65 |
+
"""
|
66 |
+
|
67 |
+
super(EncoderDecoder, self).init_weights(pretrained)
|
68 |
+
self.backbone.init_weights(pretrained=pretrained)
|
69 |
+
self.decode_head.init_weights()
|
70 |
+
if self.with_auxiliary_head:
|
71 |
+
if isinstance(self.auxiliary_head, nn.ModuleList):
|
72 |
+
for aux_head in self.auxiliary_head:
|
73 |
+
aux_head.init_weights()
|
74 |
+
else:
|
75 |
+
self.auxiliary_head.init_weights()
|
76 |
+
|
77 |
+
def extract_feat(self, img):
|
78 |
+
"""Extract features from images."""
|
79 |
+
x = self.backbone(img)
|
80 |
+
if self.with_neck:
|
81 |
+
x = self.neck(x)
|
82 |
+
return x
|
83 |
+
|
84 |
+
def encode_decode(self, img, img_metas):
|
85 |
+
"""Encode images with backbone and decode into a semantic segmentation
|
86 |
+
map of the same size as input."""
|
87 |
+
x = self.extract_feat(img)
|
88 |
+
out = self._decode_head_forward_test(x, img_metas)
|
89 |
+
out = resize(
|
90 |
+
input=out,
|
91 |
+
size=img.shape[2:],
|
92 |
+
mode='bilinear',
|
93 |
+
align_corners=self.align_corners)
|
94 |
+
return out
|
95 |
+
|
96 |
+
def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg):
|
97 |
+
"""Run forward function and calculate loss for decode head in
|
98 |
+
training."""
|
99 |
+
losses = dict()
|
100 |
+
loss_decode = self.decode_head.forward_train(x, img_metas,
|
101 |
+
gt_semantic_seg,
|
102 |
+
self.train_cfg)
|
103 |
+
|
104 |
+
losses.update(add_prefix(loss_decode, 'decode'))
|
105 |
+
return losses
|
106 |
+
|
107 |
+
def _decode_head_forward_test(self, x, img_metas):
|
108 |
+
"""Run forward function and calculate loss for decode head in
|
109 |
+
inference."""
|
110 |
+
seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg)
|
111 |
+
return seg_logits
|
112 |
+
|
113 |
+
def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg):
|
114 |
+
"""Run forward function and calculate loss for auxiliary head in
|
115 |
+
training."""
|
116 |
+
losses = dict()
|
117 |
+
if isinstance(self.auxiliary_head, nn.ModuleList):
|
118 |
+
for idx, aux_head in enumerate(self.auxiliary_head):
|
119 |
+
loss_aux = aux_head.forward_train(x, img_metas,
|
120 |
+
gt_semantic_seg,
|
121 |
+
self.train_cfg)
|
122 |
+
losses.update(add_prefix(loss_aux, f'aux_{idx}'))
|
123 |
+
else:
|
124 |
+
loss_aux = self.auxiliary_head.forward_train(
|
125 |
+
x, img_metas, gt_semantic_seg, self.train_cfg)
|
126 |
+
losses.update(add_prefix(loss_aux, 'aux'))
|
127 |
+
|
128 |
+
return losses
|
129 |
+
|
130 |
+
def forward_dummy(self, img):
|
131 |
+
"""Dummy forward function."""
|
132 |
+
seg_logit = self.encode_decode(img, None)
|
133 |
+
|
134 |
+
return seg_logit
|
135 |
+
|
136 |
+
def forward_train(self, img, img_metas, gt_semantic_seg):
|
137 |
+
"""Forward function for training.
|
138 |
+
|
139 |
+
Args:
|
140 |
+
img (Tensor): Input images.
|
141 |
+
img_metas (list[dict]): List of image info dict where each dict
|
142 |
+
has: 'img_shape', 'scale_factor', 'flip', and may also contain
|
143 |
+
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
|
144 |
+
For details on the values of these keys see
|
145 |
+
`mmseg/datasets/pipelines/formatting.py:Collect`.
|
146 |
+
gt_semantic_seg (Tensor): Semantic segmentation masks
|
147 |
+
used if the architecture supports semantic segmentation task.
|
148 |
+
|
149 |
+
Returns:
|
150 |
+
dict[str, Tensor]: a dictionary of loss components
|
151 |
+
"""
|
152 |
+
|
153 |
+
x = self.extract_feat(img)
|
154 |
+
|
155 |
+
losses = dict()
|
156 |
+
|
157 |
+
loss_decode = self._decode_head_forward_train(x, img_metas,
|
158 |
+
gt_semantic_seg)
|
159 |
+
losses.update(loss_decode)
|
160 |
+
|
161 |
+
if self.with_auxiliary_head:
|
162 |
+
loss_aux = self._auxiliary_head_forward_train(
|
163 |
+
x, img_metas, gt_semantic_seg)
|
164 |
+
losses.update(loss_aux)
|
165 |
+
|
166 |
+
return losses
|
167 |
+
|
168 |
+
# TODO refactor
|
169 |
+
def slide_inference(self, img, img_meta, rescale):
|
170 |
+
"""Inference by sliding-window with overlap.
|
171 |
+
|
172 |
+
If h_crop > h_img or w_crop > w_img, the small patch will be used to
|
173 |
+
decode without padding.
|
174 |
+
"""
|
175 |
+
|
176 |
+
h_stride, w_stride = self.test_cfg.stride
|
177 |
+
h_crop, w_crop = self.test_cfg.crop_size
|
178 |
+
batch_size, _, h_img, w_img = img.size()
|
179 |
+
num_classes = self.num_classes
|
180 |
+
h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1
|
181 |
+
w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1
|
182 |
+
preds = img.new_zeros((batch_size, num_classes, h_img, w_img))
|
183 |
+
count_mat = img.new_zeros((batch_size, 1, h_img, w_img))
|
184 |
+
for h_idx in range(h_grids):
|
185 |
+
for w_idx in range(w_grids):
|
186 |
+
y1 = h_idx * h_stride
|
187 |
+
x1 = w_idx * w_stride
|
188 |
+
y2 = min(y1 + h_crop, h_img)
|
189 |
+
x2 = min(x1 + w_crop, w_img)
|
190 |
+
y1 = max(y2 - h_crop, 0)
|
191 |
+
x1 = max(x2 - w_crop, 0)
|
192 |
+
crop_img = img[:, :, y1:y2, x1:x2]
|
193 |
+
crop_seg_logit = self.encode_decode(crop_img, img_meta)
|
194 |
+
preds += F.pad(crop_seg_logit,
|
195 |
+
(int(x1), int(preds.shape[3] - x2), int(y1),
|
196 |
+
int(preds.shape[2] - y2)))
|
197 |
+
|
198 |
+
count_mat[:, :, y1:y2, x1:x2] += 1
|
199 |
+
assert (count_mat == 0).sum() == 0
|
200 |
+
if torch.onnx.is_in_onnx_export():
|
201 |
+
# cast count_mat to constant while exporting to ONNX
|
202 |
+
count_mat = torch.from_numpy(
|
203 |
+
count_mat.cpu().detach().numpy()).to(device=img.device)
|
204 |
+
preds = preds / count_mat
|
205 |
+
if rescale:
|
206 |
+
preds = resize(
|
207 |
+
preds,
|
208 |
+
size=img_meta[0]['ori_shape'][:2],
|
209 |
+
mode='bilinear',
|
210 |
+
align_corners=self.align_corners,
|
211 |
+
warning=False)
|
212 |
+
return preds
|
213 |
+
|
214 |
+
def whole_inference(self, img, img_meta, rescale):
|
215 |
+
"""Inference with full image."""
|
216 |
+
|
217 |
+
seg_logit = self.encode_decode(img, img_meta)
|
218 |
+
if rescale:
|
219 |
+
# support dynamic shape for onnx
|
220 |
+
if torch.onnx.is_in_onnx_export():
|
221 |
+
size = img.shape[2:]
|
222 |
+
else:
|
223 |
+
size = img_meta[0]['ori_shape'][:2]
|
224 |
+
seg_logit = resize(
|
225 |
+
seg_logit,
|
226 |
+
size=size,
|
227 |
+
mode='bilinear',
|
228 |
+
align_corners=self.align_corners,
|
229 |
+
warning=False)
|
230 |
+
|
231 |
+
return seg_logit
|
232 |
+
|
233 |
+
def inference(self, img, img_meta, rescale):
|
234 |
+
"""Inference with slide/whole style.
|
235 |
+
|
236 |
+
Args:
|
237 |
+
img (Tensor): The input image of shape (N, 3, H, W).
|
238 |
+
img_meta (dict): Image info dict where each dict has: 'img_shape',
|
239 |
+
'scale_factor', 'flip', and may also contain
|
240 |
+
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
|
241 |
+
For details on the values of these keys see
|
242 |
+
`mmseg/datasets/pipelines/formatting.py:Collect`.
|
243 |
+
rescale (bool): Whether rescale back to original shape.
|
244 |
+
|
245 |
+
Returns:
|
246 |
+
Tensor: The output segmentation map.
|
247 |
+
"""
|
248 |
+
|
249 |
+
assert self.test_cfg.mode in ['slide', 'whole']
|
250 |
+
ori_shape = img_meta[0]['ori_shape']
|
251 |
+
assert all(_['ori_shape'] == ori_shape for _ in img_meta)
|
252 |
+
if self.test_cfg.mode == 'slide':
|
253 |
+
seg_logit = self.slide_inference(img, img_meta, rescale)
|
254 |
+
else:
|
255 |
+
seg_logit = self.whole_inference(img, img_meta, rescale)
|
256 |
+
output = F.softmax(seg_logit, dim=1)
|
257 |
+
flip = img_meta[0]['flip']
|
258 |
+
if flip:
|
259 |
+
flip_direction = img_meta[0]['flip_direction']
|
260 |
+
assert flip_direction in ['horizontal', 'vertical']
|
261 |
+
if flip_direction == 'horizontal':
|
262 |
+
output = output.flip(dims=(3, ))
|
263 |
+
elif flip_direction == 'vertical':
|
264 |
+
output = output.flip(dims=(2, ))
|
265 |
+
|
266 |
+
return output
|
267 |
+
|
268 |
+
def simple_test(self, img, img_meta, rescale=True):
|
269 |
+
"""Simple test with single image."""
|
270 |
+
seg_logit = self.inference(img, img_meta, rescale)
|
271 |
+
seg_pred = seg_logit.argmax(dim=1)
|
272 |
+
if torch.onnx.is_in_onnx_export():
|
273 |
+
# our inference backend only support 4D output
|
274 |
+
seg_pred = seg_pred.unsqueeze(0)
|
275 |
+
return seg_pred
|
276 |
+
seg_pred = seg_pred.cpu().numpy()
|
277 |
+
# unravel batch dim
|
278 |
+
seg_pred = list(seg_pred)
|
279 |
+
return seg_pred
|
280 |
+
|
281 |
+
def aug_test(self, imgs, img_metas, rescale=True):
|
282 |
+
"""Test with augmentations.
|
283 |
+
|
284 |
+
Only rescale=True is supported.
|
285 |
+
"""
|
286 |
+
# aug_test rescale all imgs back to ori_shape for now
|
287 |
+
assert rescale
|
288 |
+
# to save memory, we get augmented seg logit inplace
|
289 |
+
seg_logit = self.inference(imgs[0], img_metas[0], rescale)
|
290 |
+
for i in range(1, len(imgs)):
|
291 |
+
cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale)
|
292 |
+
seg_logit += cur_seg_logit
|
293 |
+
seg_logit /= len(imgs)
|
294 |
+
seg_pred = seg_logit.argmax(dim=1)
|
295 |
+
seg_pred = seg_pred.cpu().numpy()
|
296 |
+
# unravel batch dim
|
297 |
+
seg_pred = list(seg_pred)
|
298 |
+
return seg_pred
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .drop import DropPath
|
2 |
+
from .inverted_residual import InvertedResidual, InvertedResidualV3
|
3 |
+
from .make_divisible import make_divisible
|
4 |
+
from .res_layer import ResLayer
|
5 |
+
from .se_layer import SELayer
|
6 |
+
from .self_attention_block import SelfAttentionBlock
|
7 |
+
from .up_conv_block import UpConvBlock
|
8 |
+
from .weight_init import trunc_normal_
|
9 |
+
|
10 |
+
__all__ = [
|
11 |
+
'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual',
|
12 |
+
'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'DropPath', 'trunc_normal_'
|
13 |
+
]
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/drop.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Modified from https://github.com/rwightman/pytorch-image-
|
2 |
+
models/blob/master/timm/models/layers/drop.py."""
|
3 |
+
|
4 |
+
import torch
|
5 |
+
from torch import nn
|
6 |
+
|
7 |
+
|
8 |
+
class DropPath(nn.Module):
|
9 |
+
"""Drop paths (Stochastic Depth) per sample (when applied in main path of
|
10 |
+
residual blocks).
|
11 |
+
|
12 |
+
Args:
|
13 |
+
drop_prob (float): Drop rate for paths of model. Dropout rate has
|
14 |
+
to be between 0 and 1. Default: 0.
|
15 |
+
"""
|
16 |
+
|
17 |
+
def __init__(self, drop_prob=0.):
|
18 |
+
super(DropPath, self).__init__()
|
19 |
+
self.drop_prob = drop_prob
|
20 |
+
self.keep_prob = 1 - drop_prob
|
21 |
+
|
22 |
+
def forward(self, x):
|
23 |
+
if self.drop_prob == 0. or not self.training:
|
24 |
+
return x
|
25 |
+
shape = (x.shape[0], ) + (1, ) * (
|
26 |
+
x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
|
27 |
+
random_tensor = self.keep_prob + torch.rand(
|
28 |
+
shape, dtype=x.dtype, device=x.device)
|
29 |
+
random_tensor.floor_() # binarize
|
30 |
+
output = x.div(self.keep_prob) * random_tensor
|
31 |
+
return output
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/inverted_residual.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from annotator.mmpkg.mmcv.cnn import ConvModule
|
2 |
+
from torch import nn
|
3 |
+
from torch.utils import checkpoint as cp
|
4 |
+
|
5 |
+
from .se_layer import SELayer
|
6 |
+
|
7 |
+
|
8 |
+
class InvertedResidual(nn.Module):
|
9 |
+
"""InvertedResidual block for MobileNetV2.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
in_channels (int): The input channels of the InvertedResidual block.
|
13 |
+
out_channels (int): The output channels of the InvertedResidual block.
|
14 |
+
stride (int): Stride of the middle (first) 3x3 convolution.
|
15 |
+
expand_ratio (int): Adjusts number of channels of the hidden layer
|
16 |
+
in InvertedResidual by this amount.
|
17 |
+
dilation (int): Dilation rate of depthwise conv. Default: 1
|
18 |
+
conv_cfg (dict): Config dict for convolution layer.
|
19 |
+
Default: None, which means using conv2d.
|
20 |
+
norm_cfg (dict): Config dict for normalization layer.
|
21 |
+
Default: dict(type='BN').
|
22 |
+
act_cfg (dict): Config dict for activation layer.
|
23 |
+
Default: dict(type='ReLU6').
|
24 |
+
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
|
25 |
+
memory while slowing down the training speed. Default: False.
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
Tensor: The output tensor.
|
29 |
+
"""
|
30 |
+
|
31 |
+
def __init__(self,
|
32 |
+
in_channels,
|
33 |
+
out_channels,
|
34 |
+
stride,
|
35 |
+
expand_ratio,
|
36 |
+
dilation=1,
|
37 |
+
conv_cfg=None,
|
38 |
+
norm_cfg=dict(type='BN'),
|
39 |
+
act_cfg=dict(type='ReLU6'),
|
40 |
+
with_cp=False):
|
41 |
+
super(InvertedResidual, self).__init__()
|
42 |
+
self.stride = stride
|
43 |
+
assert stride in [1, 2], f'stride must in [1, 2]. ' \
|
44 |
+
f'But received {stride}.'
|
45 |
+
self.with_cp = with_cp
|
46 |
+
self.use_res_connect = self.stride == 1 and in_channels == out_channels
|
47 |
+
hidden_dim = int(round(in_channels * expand_ratio))
|
48 |
+
|
49 |
+
layers = []
|
50 |
+
if expand_ratio != 1:
|
51 |
+
layers.append(
|
52 |
+
ConvModule(
|
53 |
+
in_channels=in_channels,
|
54 |
+
out_channels=hidden_dim,
|
55 |
+
kernel_size=1,
|
56 |
+
conv_cfg=conv_cfg,
|
57 |
+
norm_cfg=norm_cfg,
|
58 |
+
act_cfg=act_cfg))
|
59 |
+
layers.extend([
|
60 |
+
ConvModule(
|
61 |
+
in_channels=hidden_dim,
|
62 |
+
out_channels=hidden_dim,
|
63 |
+
kernel_size=3,
|
64 |
+
stride=stride,
|
65 |
+
padding=dilation,
|
66 |
+
dilation=dilation,
|
67 |
+
groups=hidden_dim,
|
68 |
+
conv_cfg=conv_cfg,
|
69 |
+
norm_cfg=norm_cfg,
|
70 |
+
act_cfg=act_cfg),
|
71 |
+
ConvModule(
|
72 |
+
in_channels=hidden_dim,
|
73 |
+
out_channels=out_channels,
|
74 |
+
kernel_size=1,
|
75 |
+
conv_cfg=conv_cfg,
|
76 |
+
norm_cfg=norm_cfg,
|
77 |
+
act_cfg=None)
|
78 |
+
])
|
79 |
+
self.conv = nn.Sequential(*layers)
|
80 |
+
|
81 |
+
def forward(self, x):
|
82 |
+
|
83 |
+
def _inner_forward(x):
|
84 |
+
if self.use_res_connect:
|
85 |
+
return x + self.conv(x)
|
86 |
+
else:
|
87 |
+
return self.conv(x)
|
88 |
+
|
89 |
+
if self.with_cp and x.requires_grad:
|
90 |
+
out = cp.checkpoint(_inner_forward, x)
|
91 |
+
else:
|
92 |
+
out = _inner_forward(x)
|
93 |
+
|
94 |
+
return out
|
95 |
+
|
96 |
+
|
97 |
+
class InvertedResidualV3(nn.Module):
|
98 |
+
"""Inverted Residual Block for MobileNetV3.
|
99 |
+
|
100 |
+
Args:
|
101 |
+
in_channels (int): The input channels of this Module.
|
102 |
+
out_channels (int): The output channels of this Module.
|
103 |
+
mid_channels (int): The input channels of the depthwise convolution.
|
104 |
+
kernel_size (int): The kernel size of the depthwise convolution.
|
105 |
+
Default: 3.
|
106 |
+
stride (int): The stride of the depthwise convolution. Default: 1.
|
107 |
+
se_cfg (dict): Config dict for se layer. Default: None, which means no
|
108 |
+
se layer.
|
109 |
+
with_expand_conv (bool): Use expand conv or not. If set False,
|
110 |
+
mid_channels must be the same with in_channels. Default: True.
|
111 |
+
conv_cfg (dict): Config dict for convolution layer. Default: None,
|
112 |
+
which means using conv2d.
|
113 |
+
norm_cfg (dict): Config dict for normalization layer.
|
114 |
+
Default: dict(type='BN').
|
115 |
+
act_cfg (dict): Config dict for activation layer.
|
116 |
+
Default: dict(type='ReLU').
|
117 |
+
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
|
118 |
+
memory while slowing down the training speed. Default: False.
|
119 |
+
|
120 |
+
Returns:
|
121 |
+
Tensor: The output tensor.
|
122 |
+
"""
|
123 |
+
|
124 |
+
def __init__(self,
|
125 |
+
in_channels,
|
126 |
+
out_channels,
|
127 |
+
mid_channels,
|
128 |
+
kernel_size=3,
|
129 |
+
stride=1,
|
130 |
+
se_cfg=None,
|
131 |
+
with_expand_conv=True,
|
132 |
+
conv_cfg=None,
|
133 |
+
norm_cfg=dict(type='BN'),
|
134 |
+
act_cfg=dict(type='ReLU'),
|
135 |
+
with_cp=False):
|
136 |
+
super(InvertedResidualV3, self).__init__()
|
137 |
+
self.with_res_shortcut = (stride == 1 and in_channels == out_channels)
|
138 |
+
assert stride in [1, 2]
|
139 |
+
self.with_cp = with_cp
|
140 |
+
self.with_se = se_cfg is not None
|
141 |
+
self.with_expand_conv = with_expand_conv
|
142 |
+
|
143 |
+
if self.with_se:
|
144 |
+
assert isinstance(se_cfg, dict)
|
145 |
+
if not self.with_expand_conv:
|
146 |
+
assert mid_channels == in_channels
|
147 |
+
|
148 |
+
if self.with_expand_conv:
|
149 |
+
self.expand_conv = ConvModule(
|
150 |
+
in_channels=in_channels,
|
151 |
+
out_channels=mid_channels,
|
152 |
+
kernel_size=1,
|
153 |
+
stride=1,
|
154 |
+
padding=0,
|
155 |
+
conv_cfg=conv_cfg,
|
156 |
+
norm_cfg=norm_cfg,
|
157 |
+
act_cfg=act_cfg)
|
158 |
+
self.depthwise_conv = ConvModule(
|
159 |
+
in_channels=mid_channels,
|
160 |
+
out_channels=mid_channels,
|
161 |
+
kernel_size=kernel_size,
|
162 |
+
stride=stride,
|
163 |
+
padding=kernel_size // 2,
|
164 |
+
groups=mid_channels,
|
165 |
+
conv_cfg=dict(
|
166 |
+
type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg,
|
167 |
+
norm_cfg=norm_cfg,
|
168 |
+
act_cfg=act_cfg)
|
169 |
+
|
170 |
+
if self.with_se:
|
171 |
+
self.se = SELayer(**se_cfg)
|
172 |
+
|
173 |
+
self.linear_conv = ConvModule(
|
174 |
+
in_channels=mid_channels,
|
175 |
+
out_channels=out_channels,
|
176 |
+
kernel_size=1,
|
177 |
+
stride=1,
|
178 |
+
padding=0,
|
179 |
+
conv_cfg=conv_cfg,
|
180 |
+
norm_cfg=norm_cfg,
|
181 |
+
act_cfg=None)
|
182 |
+
|
183 |
+
def forward(self, x):
|
184 |
+
|
185 |
+
def _inner_forward(x):
|
186 |
+
out = x
|
187 |
+
|
188 |
+
if self.with_expand_conv:
|
189 |
+
out = self.expand_conv(out)
|
190 |
+
|
191 |
+
out = self.depthwise_conv(out)
|
192 |
+
|
193 |
+
if self.with_se:
|
194 |
+
out = self.se(out)
|
195 |
+
|
196 |
+
out = self.linear_conv(out)
|
197 |
+
|
198 |
+
if self.with_res_shortcut:
|
199 |
+
return x + out
|
200 |
+
else:
|
201 |
+
return out
|
202 |
+
|
203 |
+
if self.with_cp and x.requires_grad:
|
204 |
+
out = cp.checkpoint(_inner_forward, x)
|
205 |
+
else:
|
206 |
+
out = _inner_forward(x)
|
207 |
+
|
208 |
+
return out
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/make_divisible.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def make_divisible(value, divisor, min_value=None, min_ratio=0.9):
|
2 |
+
"""Make divisible function.
|
3 |
+
|
4 |
+
This function rounds the channel number to the nearest value that can be
|
5 |
+
divisible by the divisor. It is taken from the original tf repo. It ensures
|
6 |
+
that all layers have a channel number that is divisible by divisor. It can
|
7 |
+
be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa
|
8 |
+
|
9 |
+
Args:
|
10 |
+
value (int): The original channel number.
|
11 |
+
divisor (int): The divisor to fully divide the channel number.
|
12 |
+
min_value (int): The minimum value of the output channel.
|
13 |
+
Default: None, means that the minimum value equal to the divisor.
|
14 |
+
min_ratio (float): The minimum ratio of the rounded channel number to
|
15 |
+
the original channel number. Default: 0.9.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
int: The modified output channel number.
|
19 |
+
"""
|
20 |
+
|
21 |
+
if min_value is None:
|
22 |
+
min_value = divisor
|
23 |
+
new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
|
24 |
+
# Make sure that round down does not go down by more than (1-min_ratio).
|
25 |
+
if new_value < min_ratio * value:
|
26 |
+
new_value += divisor
|
27 |
+
return new_value
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/res_layer.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from annotator.mmpkg.mmcv.cnn import build_conv_layer, build_norm_layer
|
2 |
+
from torch import nn as nn
|
3 |
+
|
4 |
+
|
5 |
+
class ResLayer(nn.Sequential):
|
6 |
+
"""ResLayer to build ResNet style backbone.
|
7 |
+
|
8 |
+
Args:
|
9 |
+
block (nn.Module): block used to build ResLayer.
|
10 |
+
inplanes (int): inplanes of block.
|
11 |
+
planes (int): planes of block.
|
12 |
+
num_blocks (int): number of blocks.
|
13 |
+
stride (int): stride of the first block. Default: 1
|
14 |
+
avg_down (bool): Use AvgPool instead of stride conv when
|
15 |
+
downsampling in the bottleneck. Default: False
|
16 |
+
conv_cfg (dict): dictionary to construct and config conv layer.
|
17 |
+
Default: None
|
18 |
+
norm_cfg (dict): dictionary to construct and config norm layer.
|
19 |
+
Default: dict(type='BN')
|
20 |
+
multi_grid (int | None): Multi grid dilation rates of last
|
21 |
+
stage. Default: None
|
22 |
+
contract_dilation (bool): Whether contract first dilation of each layer
|
23 |
+
Default: False
|
24 |
+
"""
|
25 |
+
|
26 |
+
def __init__(self,
|
27 |
+
block,
|
28 |
+
inplanes,
|
29 |
+
planes,
|
30 |
+
num_blocks,
|
31 |
+
stride=1,
|
32 |
+
dilation=1,
|
33 |
+
avg_down=False,
|
34 |
+
conv_cfg=None,
|
35 |
+
norm_cfg=dict(type='BN'),
|
36 |
+
multi_grid=None,
|
37 |
+
contract_dilation=False,
|
38 |
+
**kwargs):
|
39 |
+
self.block = block
|
40 |
+
|
41 |
+
downsample = None
|
42 |
+
if stride != 1 or inplanes != planes * block.expansion:
|
43 |
+
downsample = []
|
44 |
+
conv_stride = stride
|
45 |
+
if avg_down:
|
46 |
+
conv_stride = 1
|
47 |
+
downsample.append(
|
48 |
+
nn.AvgPool2d(
|
49 |
+
kernel_size=stride,
|
50 |
+
stride=stride,
|
51 |
+
ceil_mode=True,
|
52 |
+
count_include_pad=False))
|
53 |
+
downsample.extend([
|
54 |
+
build_conv_layer(
|
55 |
+
conv_cfg,
|
56 |
+
inplanes,
|
57 |
+
planes * block.expansion,
|
58 |
+
kernel_size=1,
|
59 |
+
stride=conv_stride,
|
60 |
+
bias=False),
|
61 |
+
build_norm_layer(norm_cfg, planes * block.expansion)[1]
|
62 |
+
])
|
63 |
+
downsample = nn.Sequential(*downsample)
|
64 |
+
|
65 |
+
layers = []
|
66 |
+
if multi_grid is None:
|
67 |
+
if dilation > 1 and contract_dilation:
|
68 |
+
first_dilation = dilation // 2
|
69 |
+
else:
|
70 |
+
first_dilation = dilation
|
71 |
+
else:
|
72 |
+
first_dilation = multi_grid[0]
|
73 |
+
layers.append(
|
74 |
+
block(
|
75 |
+
inplanes=inplanes,
|
76 |
+
planes=planes,
|
77 |
+
stride=stride,
|
78 |
+
dilation=first_dilation,
|
79 |
+
downsample=downsample,
|
80 |
+
conv_cfg=conv_cfg,
|
81 |
+
norm_cfg=norm_cfg,
|
82 |
+
**kwargs))
|
83 |
+
inplanes = planes * block.expansion
|
84 |
+
for i in range(1, num_blocks):
|
85 |
+
layers.append(
|
86 |
+
block(
|
87 |
+
inplanes=inplanes,
|
88 |
+
planes=planes,
|
89 |
+
stride=1,
|
90 |
+
dilation=dilation if multi_grid is None else multi_grid[i],
|
91 |
+
conv_cfg=conv_cfg,
|
92 |
+
norm_cfg=norm_cfg,
|
93 |
+
**kwargs))
|
94 |
+
super(ResLayer, self).__init__(*layers)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/se_layer.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import annotator.mmpkg.mmcv as mmcv
|
2 |
+
import torch.nn as nn
|
3 |
+
from annotator.mmpkg.mmcv.cnn import ConvModule
|
4 |
+
|
5 |
+
from .make_divisible import make_divisible
|
6 |
+
|
7 |
+
|
8 |
+
class SELayer(nn.Module):
|
9 |
+
"""Squeeze-and-Excitation Module.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
channels (int): The input (and output) channels of the SE layer.
|
13 |
+
ratio (int): Squeeze ratio in SELayer, the intermediate channel will be
|
14 |
+
``int(channels/ratio)``. Default: 16.
|
15 |
+
conv_cfg (None or dict): Config dict for convolution layer.
|
16 |
+
Default: None, which means using conv2d.
|
17 |
+
act_cfg (dict or Sequence[dict]): Config dict for activation layer.
|
18 |
+
If act_cfg is a dict, two activation layers will be configured
|
19 |
+
by this dict. If act_cfg is a sequence of dicts, the first
|
20 |
+
activation layer will be configured by the first dict and the
|
21 |
+
second activation layer will be configured by the second dict.
|
22 |
+
Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,
|
23 |
+
divisor=6.0)).
|
24 |
+
"""
|
25 |
+
|
26 |
+
def __init__(self,
|
27 |
+
channels,
|
28 |
+
ratio=16,
|
29 |
+
conv_cfg=None,
|
30 |
+
act_cfg=(dict(type='ReLU'),
|
31 |
+
dict(type='HSigmoid', bias=3.0, divisor=6.0))):
|
32 |
+
super(SELayer, self).__init__()
|
33 |
+
if isinstance(act_cfg, dict):
|
34 |
+
act_cfg = (act_cfg, act_cfg)
|
35 |
+
assert len(act_cfg) == 2
|
36 |
+
assert mmcv.is_tuple_of(act_cfg, dict)
|
37 |
+
self.global_avgpool = nn.AdaptiveAvgPool2d(1)
|
38 |
+
self.conv1 = ConvModule(
|
39 |
+
in_channels=channels,
|
40 |
+
out_channels=make_divisible(channels // ratio, 8),
|
41 |
+
kernel_size=1,
|
42 |
+
stride=1,
|
43 |
+
conv_cfg=conv_cfg,
|
44 |
+
act_cfg=act_cfg[0])
|
45 |
+
self.conv2 = ConvModule(
|
46 |
+
in_channels=make_divisible(channels // ratio, 8),
|
47 |
+
out_channels=channels,
|
48 |
+
kernel_size=1,
|
49 |
+
stride=1,
|
50 |
+
conv_cfg=conv_cfg,
|
51 |
+
act_cfg=act_cfg[1])
|
52 |
+
|
53 |
+
def forward(self, x):
|
54 |
+
out = self.global_avgpool(x)
|
55 |
+
out = self.conv1(out)
|
56 |
+
out = self.conv2(out)
|
57 |
+
return x * out
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/self_attention_block.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from annotator.mmpkg.mmcv.cnn import ConvModule, constant_init
|
3 |
+
from torch import nn as nn
|
4 |
+
from torch.nn import functional as F
|
5 |
+
|
6 |
+
|
7 |
+
class SelfAttentionBlock(nn.Module):
|
8 |
+
"""General self-attention block/non-local block.
|
9 |
+
|
10 |
+
Please refer to https://arxiv.org/abs/1706.03762 for details about key,
|
11 |
+
query and value.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
key_in_channels (int): Input channels of key feature.
|
15 |
+
query_in_channels (int): Input channels of query feature.
|
16 |
+
channels (int): Output channels of key/query transform.
|
17 |
+
out_channels (int): Output channels.
|
18 |
+
share_key_query (bool): Whether share projection weight between key
|
19 |
+
and query projection.
|
20 |
+
query_downsample (nn.Module): Query downsample module.
|
21 |
+
key_downsample (nn.Module): Key downsample module.
|
22 |
+
key_query_num_convs (int): Number of convs for key/query projection.
|
23 |
+
value_num_convs (int): Number of convs for value projection.
|
24 |
+
matmul_norm (bool): Whether normalize attention map with sqrt of
|
25 |
+
channels
|
26 |
+
with_out (bool): Whether use out projection.
|
27 |
+
conv_cfg (dict|None): Config of conv layers.
|
28 |
+
norm_cfg (dict|None): Config of norm layers.
|
29 |
+
act_cfg (dict|None): Config of activation layers.
|
30 |
+
"""
|
31 |
+
|
32 |
+
def __init__(self, key_in_channels, query_in_channels, channels,
|
33 |
+
out_channels, share_key_query, query_downsample,
|
34 |
+
key_downsample, key_query_num_convs, value_out_num_convs,
|
35 |
+
key_query_norm, value_out_norm, matmul_norm, with_out,
|
36 |
+
conv_cfg, norm_cfg, act_cfg):
|
37 |
+
super(SelfAttentionBlock, self).__init__()
|
38 |
+
if share_key_query:
|
39 |
+
assert key_in_channels == query_in_channels
|
40 |
+
self.key_in_channels = key_in_channels
|
41 |
+
self.query_in_channels = query_in_channels
|
42 |
+
self.out_channels = out_channels
|
43 |
+
self.channels = channels
|
44 |
+
self.share_key_query = share_key_query
|
45 |
+
self.conv_cfg = conv_cfg
|
46 |
+
self.norm_cfg = norm_cfg
|
47 |
+
self.act_cfg = act_cfg
|
48 |
+
self.key_project = self.build_project(
|
49 |
+
key_in_channels,
|
50 |
+
channels,
|
51 |
+
num_convs=key_query_num_convs,
|
52 |
+
use_conv_module=key_query_norm,
|
53 |
+
conv_cfg=conv_cfg,
|
54 |
+
norm_cfg=norm_cfg,
|
55 |
+
act_cfg=act_cfg)
|
56 |
+
if share_key_query:
|
57 |
+
self.query_project = self.key_project
|
58 |
+
else:
|
59 |
+
self.query_project = self.build_project(
|
60 |
+
query_in_channels,
|
61 |
+
channels,
|
62 |
+
num_convs=key_query_num_convs,
|
63 |
+
use_conv_module=key_query_norm,
|
64 |
+
conv_cfg=conv_cfg,
|
65 |
+
norm_cfg=norm_cfg,
|
66 |
+
act_cfg=act_cfg)
|
67 |
+
self.value_project = self.build_project(
|
68 |
+
key_in_channels,
|
69 |
+
channels if with_out else out_channels,
|
70 |
+
num_convs=value_out_num_convs,
|
71 |
+
use_conv_module=value_out_norm,
|
72 |
+
conv_cfg=conv_cfg,
|
73 |
+
norm_cfg=norm_cfg,
|
74 |
+
act_cfg=act_cfg)
|
75 |
+
if with_out:
|
76 |
+
self.out_project = self.build_project(
|
77 |
+
channels,
|
78 |
+
out_channels,
|
79 |
+
num_convs=value_out_num_convs,
|
80 |
+
use_conv_module=value_out_norm,
|
81 |
+
conv_cfg=conv_cfg,
|
82 |
+
norm_cfg=norm_cfg,
|
83 |
+
act_cfg=act_cfg)
|
84 |
+
else:
|
85 |
+
self.out_project = None
|
86 |
+
|
87 |
+
self.query_downsample = query_downsample
|
88 |
+
self.key_downsample = key_downsample
|
89 |
+
self.matmul_norm = matmul_norm
|
90 |
+
|
91 |
+
self.init_weights()
|
92 |
+
|
93 |
+
def init_weights(self):
|
94 |
+
"""Initialize weight of later layer."""
|
95 |
+
if self.out_project is not None:
|
96 |
+
if not isinstance(self.out_project, ConvModule):
|
97 |
+
constant_init(self.out_project, 0)
|
98 |
+
|
99 |
+
def build_project(self, in_channels, channels, num_convs, use_conv_module,
|
100 |
+
conv_cfg, norm_cfg, act_cfg):
|
101 |
+
"""Build projection layer for key/query/value/out."""
|
102 |
+
if use_conv_module:
|
103 |
+
convs = [
|
104 |
+
ConvModule(
|
105 |
+
in_channels,
|
106 |
+
channels,
|
107 |
+
1,
|
108 |
+
conv_cfg=conv_cfg,
|
109 |
+
norm_cfg=norm_cfg,
|
110 |
+
act_cfg=act_cfg)
|
111 |
+
]
|
112 |
+
for _ in range(num_convs - 1):
|
113 |
+
convs.append(
|
114 |
+
ConvModule(
|
115 |
+
channels,
|
116 |
+
channels,
|
117 |
+
1,
|
118 |
+
conv_cfg=conv_cfg,
|
119 |
+
norm_cfg=norm_cfg,
|
120 |
+
act_cfg=act_cfg))
|
121 |
+
else:
|
122 |
+
convs = [nn.Conv2d(in_channels, channels, 1)]
|
123 |
+
for _ in range(num_convs - 1):
|
124 |
+
convs.append(nn.Conv2d(channels, channels, 1))
|
125 |
+
if len(convs) > 1:
|
126 |
+
convs = nn.Sequential(*convs)
|
127 |
+
else:
|
128 |
+
convs = convs[0]
|
129 |
+
return convs
|
130 |
+
|
131 |
+
def forward(self, query_feats, key_feats):
|
132 |
+
"""Forward function."""
|
133 |
+
batch_size = query_feats.size(0)
|
134 |
+
query = self.query_project(query_feats)
|
135 |
+
if self.query_downsample is not None:
|
136 |
+
query = self.query_downsample(query)
|
137 |
+
query = query.reshape(*query.shape[:2], -1)
|
138 |
+
query = query.permute(0, 2, 1).contiguous()
|
139 |
+
|
140 |
+
key = self.key_project(key_feats)
|
141 |
+
value = self.value_project(key_feats)
|
142 |
+
if self.key_downsample is not None:
|
143 |
+
key = self.key_downsample(key)
|
144 |
+
value = self.key_downsample(value)
|
145 |
+
key = key.reshape(*key.shape[:2], -1)
|
146 |
+
value = value.reshape(*value.shape[:2], -1)
|
147 |
+
value = value.permute(0, 2, 1).contiguous()
|
148 |
+
|
149 |
+
sim_map = torch.matmul(query, key)
|
150 |
+
if self.matmul_norm:
|
151 |
+
sim_map = (self.channels**-.5) * sim_map
|
152 |
+
sim_map = F.softmax(sim_map, dim=-1)
|
153 |
+
|
154 |
+
context = torch.matmul(sim_map, value)
|
155 |
+
context = context.permute(0, 2, 1).contiguous()
|
156 |
+
context = context.reshape(batch_size, -1, *query_feats.shape[2:])
|
157 |
+
if self.out_project is not None:
|
158 |
+
context = self.out_project(context)
|
159 |
+
return context
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/up_conv_block.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from annotator.mmpkg.mmcv.cnn import ConvModule, build_upsample_layer
|
4 |
+
|
5 |
+
|
6 |
+
class UpConvBlock(nn.Module):
|
7 |
+
"""Upsample convolution block in decoder for UNet.
|
8 |
+
|
9 |
+
This upsample convolution block consists of one upsample module
|
10 |
+
followed by one convolution block. The upsample module expands the
|
11 |
+
high-level low-resolution feature map and the convolution block fuses
|
12 |
+
the upsampled high-level low-resolution feature map and the low-level
|
13 |
+
high-resolution feature map from encoder.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
conv_block (nn.Sequential): Sequential of convolutional layers.
|
17 |
+
in_channels (int): Number of input channels of the high-level
|
18 |
+
skip_channels (int): Number of input channels of the low-level
|
19 |
+
high-resolution feature map from encoder.
|
20 |
+
out_channels (int): Number of output channels.
|
21 |
+
num_convs (int): Number of convolutional layers in the conv_block.
|
22 |
+
Default: 2.
|
23 |
+
stride (int): Stride of convolutional layer in conv_block. Default: 1.
|
24 |
+
dilation (int): Dilation rate of convolutional layer in conv_block.
|
25 |
+
Default: 1.
|
26 |
+
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
|
27 |
+
memory while slowing down the training speed. Default: False.
|
28 |
+
conv_cfg (dict | None): Config dict for convolution layer.
|
29 |
+
Default: None.
|
30 |
+
norm_cfg (dict | None): Config dict for normalization layer.
|
31 |
+
Default: dict(type='BN').
|
32 |
+
act_cfg (dict | None): Config dict for activation layer in ConvModule.
|
33 |
+
Default: dict(type='ReLU').
|
34 |
+
upsample_cfg (dict): The upsample config of the upsample module in
|
35 |
+
decoder. Default: dict(type='InterpConv'). If the size of
|
36 |
+
high-level feature map is the same as that of skip feature map
|
37 |
+
(low-level feature map from encoder), it does not need upsample the
|
38 |
+
high-level feature map and the upsample_cfg is None.
|
39 |
+
dcn (bool): Use deformable convolution in convolutional layer or not.
|
40 |
+
Default: None.
|
41 |
+
plugins (dict): plugins for convolutional layers. Default: None.
|
42 |
+
"""
|
43 |
+
|
44 |
+
def __init__(self,
|
45 |
+
conv_block,
|
46 |
+
in_channels,
|
47 |
+
skip_channels,
|
48 |
+
out_channels,
|
49 |
+
num_convs=2,
|
50 |
+
stride=1,
|
51 |
+
dilation=1,
|
52 |
+
with_cp=False,
|
53 |
+
conv_cfg=None,
|
54 |
+
norm_cfg=dict(type='BN'),
|
55 |
+
act_cfg=dict(type='ReLU'),
|
56 |
+
upsample_cfg=dict(type='InterpConv'),
|
57 |
+
dcn=None,
|
58 |
+
plugins=None):
|
59 |
+
super(UpConvBlock, self).__init__()
|
60 |
+
assert dcn is None, 'Not implemented yet.'
|
61 |
+
assert plugins is None, 'Not implemented yet.'
|
62 |
+
|
63 |
+
self.conv_block = conv_block(
|
64 |
+
in_channels=2 * skip_channels,
|
65 |
+
out_channels=out_channels,
|
66 |
+
num_convs=num_convs,
|
67 |
+
stride=stride,
|
68 |
+
dilation=dilation,
|
69 |
+
with_cp=with_cp,
|
70 |
+
conv_cfg=conv_cfg,
|
71 |
+
norm_cfg=norm_cfg,
|
72 |
+
act_cfg=act_cfg,
|
73 |
+
dcn=None,
|
74 |
+
plugins=None)
|
75 |
+
if upsample_cfg is not None:
|
76 |
+
self.upsample = build_upsample_layer(
|
77 |
+
cfg=upsample_cfg,
|
78 |
+
in_channels=in_channels,
|
79 |
+
out_channels=skip_channels,
|
80 |
+
with_cp=with_cp,
|
81 |
+
norm_cfg=norm_cfg,
|
82 |
+
act_cfg=act_cfg)
|
83 |
+
else:
|
84 |
+
self.upsample = ConvModule(
|
85 |
+
in_channels,
|
86 |
+
skip_channels,
|
87 |
+
kernel_size=1,
|
88 |
+
stride=1,
|
89 |
+
padding=0,
|
90 |
+
conv_cfg=conv_cfg,
|
91 |
+
norm_cfg=norm_cfg,
|
92 |
+
act_cfg=act_cfg)
|
93 |
+
|
94 |
+
def forward(self, skip, x):
|
95 |
+
"""Forward function."""
|
96 |
+
|
97 |
+
x = self.upsample(x)
|
98 |
+
out = torch.cat([skip, x], dim=1)
|
99 |
+
out = self.conv_block(out)
|
100 |
+
|
101 |
+
return out
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/models/utils/weight_init.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Modified from https://github.com/rwightman/pytorch-image-
|
2 |
+
models/blob/master/timm/models/layers/drop.py."""
|
3 |
+
|
4 |
+
import math
|
5 |
+
import warnings
|
6 |
+
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
def _no_grad_trunc_normal_(tensor, mean, std, a, b):
|
11 |
+
"""Reference: https://people.sc.fsu.edu/~jburkardt/presentations
|
12 |
+
/truncated_normal.pdf"""
|
13 |
+
|
14 |
+
def norm_cdf(x):
|
15 |
+
# Computes standard normal cumulative distribution function
|
16 |
+
return (1. + math.erf(x / math.sqrt(2.))) / 2.
|
17 |
+
|
18 |
+
if (mean < a - 2 * std) or (mean > b + 2 * std):
|
19 |
+
warnings.warn(
|
20 |
+
'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. '
|
21 |
+
'The distribution of values may be incorrect.',
|
22 |
+
stacklevel=2)
|
23 |
+
|
24 |
+
with torch.no_grad():
|
25 |
+
# Values are generated by using a truncated uniform distribution and
|
26 |
+
# then using the inverse CDF for the normal distribution.
|
27 |
+
# Get upper and lower cdf values
|
28 |
+
lower_bound = norm_cdf((a - mean) / std)
|
29 |
+
upper_bound = norm_cdf((b - mean) / std)
|
30 |
+
|
31 |
+
# Uniformly fill tensor with values from [l, u], then translate to
|
32 |
+
# [2l-1, 2u-1].
|
33 |
+
tensor.uniform_(2 * lower_bound - 1, 2 * upper_bound - 1)
|
34 |
+
|
35 |
+
# Use inverse cdf transform for normal distribution to get truncated
|
36 |
+
# standard normal
|
37 |
+
tensor.erfinv_()
|
38 |
+
|
39 |
+
# Transform to proper mean, std
|
40 |
+
tensor.mul_(std * math.sqrt(2.))
|
41 |
+
tensor.add_(mean)
|
42 |
+
|
43 |
+
# Clamp to ensure it's in the proper range
|
44 |
+
tensor.clamp_(min=a, max=b)
|
45 |
+
return tensor
|
46 |
+
|
47 |
+
|
48 |
+
def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
|
49 |
+
r"""Fills the input Tensor with values drawn from a truncated
|
50 |
+
normal distribution. The values are effectively drawn from the
|
51 |
+
normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
|
52 |
+
with values outside :math:`[a, b]` redrawn until they are within
|
53 |
+
the bounds. The method used for generating the random values works
|
54 |
+
best when :math:`a \leq \text{mean} \leq b`.
|
55 |
+
Args:
|
56 |
+
tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`
|
57 |
+
mean (float): the mean of the normal distribution
|
58 |
+
std (float): the standard deviation of the normal distribution
|
59 |
+
a (float): the minimum cutoff value
|
60 |
+
b (float): the maximum cutoff value
|
61 |
+
"""
|
62 |
+
return _no_grad_trunc_normal_(tensor, mean, std, a, b)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/ops/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .encoding import Encoding
|
2 |
+
from .wrappers import Upsample, resize
|
3 |
+
|
4 |
+
__all__ = ['Upsample', 'resize', 'Encoding']
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/ops/encoding.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from torch.nn import functional as F
|
4 |
+
|
5 |
+
|
6 |
+
class Encoding(nn.Module):
|
7 |
+
"""Encoding Layer: a learnable residual encoder.
|
8 |
+
|
9 |
+
Input is of shape (batch_size, channels, height, width).
|
10 |
+
Output is of shape (batch_size, num_codes, channels).
|
11 |
+
|
12 |
+
Args:
|
13 |
+
channels: dimension of the features or feature channels
|
14 |
+
num_codes: number of code words
|
15 |
+
"""
|
16 |
+
|
17 |
+
def __init__(self, channels, num_codes):
|
18 |
+
super(Encoding, self).__init__()
|
19 |
+
# init codewords and smoothing factor
|
20 |
+
self.channels, self.num_codes = channels, num_codes
|
21 |
+
std = 1. / ((num_codes * channels)**0.5)
|
22 |
+
# [num_codes, channels]
|
23 |
+
self.codewords = nn.Parameter(
|
24 |
+
torch.empty(num_codes, channels,
|
25 |
+
dtype=torch.float).uniform_(-std, std),
|
26 |
+
requires_grad=True)
|
27 |
+
# [num_codes]
|
28 |
+
self.scale = nn.Parameter(
|
29 |
+
torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0),
|
30 |
+
requires_grad=True)
|
31 |
+
|
32 |
+
@staticmethod
|
33 |
+
def scaled_l2(x, codewords, scale):
|
34 |
+
num_codes, channels = codewords.size()
|
35 |
+
batch_size = x.size(0)
|
36 |
+
reshaped_scale = scale.view((1, 1, num_codes))
|
37 |
+
expanded_x = x.unsqueeze(2).expand(
|
38 |
+
(batch_size, x.size(1), num_codes, channels))
|
39 |
+
reshaped_codewords = codewords.view((1, 1, num_codes, channels))
|
40 |
+
|
41 |
+
scaled_l2_norm = reshaped_scale * (
|
42 |
+
expanded_x - reshaped_codewords).pow(2).sum(dim=3)
|
43 |
+
return scaled_l2_norm
|
44 |
+
|
45 |
+
@staticmethod
|
46 |
+
def aggregate(assignment_weights, x, codewords):
|
47 |
+
num_codes, channels = codewords.size()
|
48 |
+
reshaped_codewords = codewords.view((1, 1, num_codes, channels))
|
49 |
+
batch_size = x.size(0)
|
50 |
+
|
51 |
+
expanded_x = x.unsqueeze(2).expand(
|
52 |
+
(batch_size, x.size(1), num_codes, channels))
|
53 |
+
encoded_feat = (assignment_weights.unsqueeze(3) *
|
54 |
+
(expanded_x - reshaped_codewords)).sum(dim=1)
|
55 |
+
return encoded_feat
|
56 |
+
|
57 |
+
def forward(self, x):
|
58 |
+
assert x.dim() == 4 and x.size(1) == self.channels
|
59 |
+
# [batch_size, channels, height, width]
|
60 |
+
batch_size = x.size(0)
|
61 |
+
# [batch_size, height x width, channels]
|
62 |
+
x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous()
|
63 |
+
# assignment_weights: [batch_size, channels, num_codes]
|
64 |
+
assignment_weights = F.softmax(
|
65 |
+
self.scaled_l2(x, self.codewords, self.scale), dim=2)
|
66 |
+
# aggregate
|
67 |
+
encoded_feat = self.aggregate(assignment_weights, x, self.codewords)
|
68 |
+
return encoded_feat
|
69 |
+
|
70 |
+
def __repr__(self):
|
71 |
+
repr_str = self.__class__.__name__
|
72 |
+
repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \
|
73 |
+
f'x{self.channels})'
|
74 |
+
return repr_str
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/ops/wrappers.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
|
7 |
+
def resize(input,
|
8 |
+
size=None,
|
9 |
+
scale_factor=None,
|
10 |
+
mode='nearest',
|
11 |
+
align_corners=None,
|
12 |
+
warning=True):
|
13 |
+
if warning:
|
14 |
+
if size is not None and align_corners:
|
15 |
+
input_h, input_w = tuple(int(x) for x in input.shape[2:])
|
16 |
+
output_h, output_w = tuple(int(x) for x in size)
|
17 |
+
if output_h > input_h or output_w > output_h:
|
18 |
+
if ((output_h > 1 and output_w > 1 and input_h > 1
|
19 |
+
and input_w > 1) and (output_h - 1) % (input_h - 1)
|
20 |
+
and (output_w - 1) % (input_w - 1)):
|
21 |
+
warnings.warn(
|
22 |
+
f'When align_corners={align_corners}, '
|
23 |
+
'the output would more aligned if '
|
24 |
+
f'input size {(input_h, input_w)} is `x+1` and '
|
25 |
+
f'out size {(output_h, output_w)} is `nx+1`')
|
26 |
+
return F.interpolate(input, size, scale_factor, mode, align_corners)
|
27 |
+
|
28 |
+
|
29 |
+
class Upsample(nn.Module):
|
30 |
+
|
31 |
+
def __init__(self,
|
32 |
+
size=None,
|
33 |
+
scale_factor=None,
|
34 |
+
mode='nearest',
|
35 |
+
align_corners=None):
|
36 |
+
super(Upsample, self).__init__()
|
37 |
+
self.size = size
|
38 |
+
if isinstance(scale_factor, tuple):
|
39 |
+
self.scale_factor = tuple(float(factor) for factor in scale_factor)
|
40 |
+
else:
|
41 |
+
self.scale_factor = float(scale_factor) if scale_factor else None
|
42 |
+
self.mode = mode
|
43 |
+
self.align_corners = align_corners
|
44 |
+
|
45 |
+
def forward(self, x):
|
46 |
+
if not self.size:
|
47 |
+
size = [int(t * self.scale_factor) for t in x.shape[-2:]]
|
48 |
+
else:
|
49 |
+
size = self.size
|
50 |
+
return resize(x, size, None, self.mode, self.align_corners)
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/utils/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .collect_env import collect_env
|
2 |
+
from .logger import get_root_logger
|
3 |
+
|
4 |
+
__all__ = ['get_root_logger', 'collect_env']
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/utils/collect_env.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from annotator.mmpkg.mmcv.utils import collect_env as collect_base_env
|
2 |
+
from annotator.mmpkg.mmcv.utils import get_git_hash
|
3 |
+
|
4 |
+
import annotator.mmpkg.mmseg as mmseg
|
5 |
+
|
6 |
+
|
7 |
+
def collect_env():
|
8 |
+
"""Collect the information of the running environments."""
|
9 |
+
env_info = collect_base_env()
|
10 |
+
env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}'
|
11 |
+
|
12 |
+
return env_info
|
13 |
+
|
14 |
+
|
15 |
+
if __name__ == '__main__':
|
16 |
+
for name, val in collect_env().items():
|
17 |
+
print('{}: {}'.format(name, val))
|
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmseg/utils/logger.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
from annotator.mmpkg.mmcv.utils import get_logger
|
4 |
+
|
5 |
+
|
6 |
+
def get_root_logger(log_file=None, log_level=logging.INFO):
|
7 |
+
"""Get the root logger.
|
8 |
+
|
9 |
+
The logger will be initialized if it has not been initialized. By default a
|
10 |
+
StreamHandler will be added. If `log_file` is specified, a FileHandler will
|
11 |
+
also be added. The name of the root logger is the top-level package name,
|
12 |
+
e.g., "mmseg".
|
13 |
+
|
14 |
+
Args:
|
15 |
+
log_file (str | None): The log filename. If specified, a FileHandler
|
16 |
+
will be added to the root logger.
|
17 |
+
log_level (int): The root logger level. Note that only the process of
|
18 |
+
rank 0 is affected, while other processes will set the level to
|
19 |
+
"Error" and be silent most of the time.
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
logging.Logger: The root logger.
|
23 |
+
"""
|
24 |
+
|
25 |
+
logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level)
|
26 |
+
|
27 |
+
return logger
|
extensions/microsoftexcel-controlnet/annotator/normalbae/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2022 Caroline Chan
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
extensions/microsoftexcel-controlnet/annotator/normalbae/__init__.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import types
|
3 |
+
import torch
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
from einops import rearrange
|
7 |
+
from .models.NNET import NNET
|
8 |
+
from modules import devices
|
9 |
+
from annotator.annotator_path import models_path
|
10 |
+
import torchvision.transforms as transforms
|
11 |
+
|
12 |
+
|
13 |
+
# load model
|
14 |
+
def load_checkpoint(fpath, model):
|
15 |
+
ckpt = torch.load(fpath, map_location='cpu')['model']
|
16 |
+
|
17 |
+
load_dict = {}
|
18 |
+
for k, v in ckpt.items():
|
19 |
+
if k.startswith('module.'):
|
20 |
+
k_ = k.replace('module.', '')
|
21 |
+
load_dict[k_] = v
|
22 |
+
else:
|
23 |
+
load_dict[k] = v
|
24 |
+
|
25 |
+
model.load_state_dict(load_dict)
|
26 |
+
return model
|
27 |
+
|
28 |
+
|
29 |
+
class NormalBaeDetector:
|
30 |
+
model_dir = os.path.join(models_path, "normal_bae")
|
31 |
+
|
32 |
+
def __init__(self):
|
33 |
+
self.model = None
|
34 |
+
self.device = devices.get_device_for("controlnet")
|
35 |
+
|
36 |
+
def load_model(self):
|
37 |
+
remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/scannet.pt"
|
38 |
+
modelpath = os.path.join(self.model_dir, "scannet.pt")
|
39 |
+
if not os.path.exists(modelpath):
|
40 |
+
from basicsr.utils.download_util import load_file_from_url
|
41 |
+
load_file_from_url(remote_model_path, model_dir=self.model_dir)
|
42 |
+
args = types.SimpleNamespace()
|
43 |
+
args.mode = 'client'
|
44 |
+
args.architecture = 'BN'
|
45 |
+
args.pretrained = 'scannet'
|
46 |
+
args.sampling_ratio = 0.4
|
47 |
+
args.importance_ratio = 0.7
|
48 |
+
model = NNET(args)
|
49 |
+
model = load_checkpoint(modelpath, model)
|
50 |
+
model.eval()
|
51 |
+
self.model = model.to(self.device)
|
52 |
+
self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
53 |
+
|
54 |
+
def unload_model(self):
|
55 |
+
if self.model is not None:
|
56 |
+
self.model.cpu()
|
57 |
+
|
58 |
+
def __call__(self, input_image):
|
59 |
+
if self.model is None:
|
60 |
+
self.load_model()
|
61 |
+
|
62 |
+
self.model.to(self.device)
|
63 |
+
assert input_image.ndim == 3
|
64 |
+
image_normal = input_image
|
65 |
+
with torch.no_grad():
|
66 |
+
image_normal = torch.from_numpy(image_normal).float().to(self.device)
|
67 |
+
image_normal = image_normal / 255.0
|
68 |
+
image_normal = rearrange(image_normal, 'h w c -> 1 c h w')
|
69 |
+
image_normal = self.norm(image_normal)
|
70 |
+
|
71 |
+
normal = self.model(image_normal)
|
72 |
+
normal = normal[0][-1][:, :3]
|
73 |
+
# d = torch.sum(normal ** 2.0, dim=1, keepdim=True) ** 0.5
|
74 |
+
# d = torch.maximum(d, torch.ones_like(d) * 1e-5)
|
75 |
+
# normal /= d
|
76 |
+
normal = ((normal + 1) * 0.5).clip(0, 1)
|
77 |
+
|
78 |
+
normal = rearrange(normal[0], 'c h w -> h w c').cpu().numpy()
|
79 |
+
normal_image = (normal * 255.0).clip(0, 255).astype(np.uint8)
|
80 |
+
|
81 |
+
return normal_image
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/NNET.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from .submodules.encoder import Encoder
|
6 |
+
from .submodules.decoder import Decoder
|
7 |
+
|
8 |
+
|
9 |
+
class NNET(nn.Module):
|
10 |
+
def __init__(self, args):
|
11 |
+
super(NNET, self).__init__()
|
12 |
+
self.encoder = Encoder()
|
13 |
+
self.decoder = Decoder(args)
|
14 |
+
|
15 |
+
def get_1x_lr_params(self): # lr/10 learning rate
|
16 |
+
return self.encoder.parameters()
|
17 |
+
|
18 |
+
def get_10x_lr_params(self): # lr learning rate
|
19 |
+
return self.decoder.parameters()
|
20 |
+
|
21 |
+
def forward(self, img, **kwargs):
|
22 |
+
return self.decoder(self.encoder(img), **kwargs)
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/baseline.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from .submodules.submodules import UpSampleBN, norm_normalize
|
6 |
+
|
7 |
+
|
8 |
+
# This is the baseline encoder-decoder we used in the ablation study
|
9 |
+
class NNET(nn.Module):
|
10 |
+
def __init__(self, args=None):
|
11 |
+
super(NNET, self).__init__()
|
12 |
+
self.encoder = Encoder()
|
13 |
+
self.decoder = Decoder(num_classes=4)
|
14 |
+
|
15 |
+
def forward(self, x, **kwargs):
|
16 |
+
out = self.decoder(self.encoder(x), **kwargs)
|
17 |
+
|
18 |
+
# Bilinearly upsample the output to match the input resolution
|
19 |
+
up_out = F.interpolate(out, size=[x.size(2), x.size(3)], mode='bilinear', align_corners=False)
|
20 |
+
|
21 |
+
# L2-normalize the first three channels / ensure positive value for concentration parameters (kappa)
|
22 |
+
up_out = norm_normalize(up_out)
|
23 |
+
return up_out
|
24 |
+
|
25 |
+
def get_1x_lr_params(self): # lr/10 learning rate
|
26 |
+
return self.encoder.parameters()
|
27 |
+
|
28 |
+
def get_10x_lr_params(self): # lr learning rate
|
29 |
+
modules = [self.decoder]
|
30 |
+
for m in modules:
|
31 |
+
yield from m.parameters()
|
32 |
+
|
33 |
+
|
34 |
+
# Encoder
|
35 |
+
class Encoder(nn.Module):
|
36 |
+
def __init__(self):
|
37 |
+
super(Encoder, self).__init__()
|
38 |
+
|
39 |
+
basemodel_name = 'tf_efficientnet_b5_ap'
|
40 |
+
basemodel = torch.hub.load('rwightman/gen-efficientnet-pytorch', basemodel_name, pretrained=True)
|
41 |
+
|
42 |
+
# Remove last layer
|
43 |
+
basemodel.global_pool = nn.Identity()
|
44 |
+
basemodel.classifier = nn.Identity()
|
45 |
+
|
46 |
+
self.original_model = basemodel
|
47 |
+
|
48 |
+
def forward(self, x):
|
49 |
+
features = [x]
|
50 |
+
for k, v in self.original_model._modules.items():
|
51 |
+
if (k == 'blocks'):
|
52 |
+
for ki, vi in v._modules.items():
|
53 |
+
features.append(vi(features[-1]))
|
54 |
+
else:
|
55 |
+
features.append(v(features[-1]))
|
56 |
+
return features
|
57 |
+
|
58 |
+
|
59 |
+
# Decoder (no pixel-wise MLP, no uncertainty-guided sampling)
|
60 |
+
class Decoder(nn.Module):
|
61 |
+
def __init__(self, num_classes=4):
|
62 |
+
super(Decoder, self).__init__()
|
63 |
+
self.conv2 = nn.Conv2d(2048, 2048, kernel_size=1, stride=1, padding=0)
|
64 |
+
self.up1 = UpSampleBN(skip_input=2048 + 176, output_features=1024)
|
65 |
+
self.up2 = UpSampleBN(skip_input=1024 + 64, output_features=512)
|
66 |
+
self.up3 = UpSampleBN(skip_input=512 + 40, output_features=256)
|
67 |
+
self.up4 = UpSampleBN(skip_input=256 + 24, output_features=128)
|
68 |
+
self.conv3 = nn.Conv2d(128, num_classes, kernel_size=3, stride=1, padding=1)
|
69 |
+
|
70 |
+
def forward(self, features):
|
71 |
+
x_block0, x_block1, x_block2, x_block3, x_block4 = features[4], features[5], features[6], features[8], features[11]
|
72 |
+
x_d0 = self.conv2(x_block4)
|
73 |
+
x_d1 = self.up1(x_d0, x_block3)
|
74 |
+
x_d2 = self.up2(x_d1, x_block2)
|
75 |
+
x_d3 = self.up3(x_d2, x_block1)
|
76 |
+
x_d4 = self.up4(x_d3, x_block0)
|
77 |
+
out = self.conv3(x_d4)
|
78 |
+
return out
|
79 |
+
|
80 |
+
|
81 |
+
if __name__ == '__main__':
|
82 |
+
model = Baseline()
|
83 |
+
x = torch.rand(2, 3, 480, 640)
|
84 |
+
out = model(x)
|
85 |
+
print(out.shape)
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/decoder.py
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
from .submodules import UpSampleBN, UpSampleGN, norm_normalize, sample_points
|
5 |
+
|
6 |
+
|
7 |
+
class Decoder(nn.Module):
|
8 |
+
def __init__(self, args):
|
9 |
+
super(Decoder, self).__init__()
|
10 |
+
|
11 |
+
# hyper-parameter for sampling
|
12 |
+
self.sampling_ratio = args.sampling_ratio
|
13 |
+
self.importance_ratio = args.importance_ratio
|
14 |
+
|
15 |
+
# feature-map
|
16 |
+
self.conv2 = nn.Conv2d(2048, 2048, kernel_size=1, stride=1, padding=0)
|
17 |
+
if args.architecture == 'BN':
|
18 |
+
self.up1 = UpSampleBN(skip_input=2048 + 176, output_features=1024)
|
19 |
+
self.up2 = UpSampleBN(skip_input=1024 + 64, output_features=512)
|
20 |
+
self.up3 = UpSampleBN(skip_input=512 + 40, output_features=256)
|
21 |
+
self.up4 = UpSampleBN(skip_input=256 + 24, output_features=128)
|
22 |
+
|
23 |
+
elif args.architecture == 'GN':
|
24 |
+
self.up1 = UpSampleGN(skip_input=2048 + 176, output_features=1024)
|
25 |
+
self.up2 = UpSampleGN(skip_input=1024 + 64, output_features=512)
|
26 |
+
self.up3 = UpSampleGN(skip_input=512 + 40, output_features=256)
|
27 |
+
self.up4 = UpSampleGN(skip_input=256 + 24, output_features=128)
|
28 |
+
|
29 |
+
else:
|
30 |
+
raise Exception('invalid architecture')
|
31 |
+
|
32 |
+
# produces 1/8 res output
|
33 |
+
self.out_conv_res8 = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
|
34 |
+
|
35 |
+
# produces 1/4 res output
|
36 |
+
self.out_conv_res4 = nn.Sequential(
|
37 |
+
nn.Conv1d(512 + 4, 128, kernel_size=1), nn.ReLU(),
|
38 |
+
nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(),
|
39 |
+
nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(),
|
40 |
+
nn.Conv1d(128, 4, kernel_size=1),
|
41 |
+
)
|
42 |
+
|
43 |
+
# produces 1/2 res output
|
44 |
+
self.out_conv_res2 = nn.Sequential(
|
45 |
+
nn.Conv1d(256 + 4, 128, kernel_size=1), nn.ReLU(),
|
46 |
+
nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(),
|
47 |
+
nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(),
|
48 |
+
nn.Conv1d(128, 4, kernel_size=1),
|
49 |
+
)
|
50 |
+
|
51 |
+
# produces 1/1 res output
|
52 |
+
self.out_conv_res1 = nn.Sequential(
|
53 |
+
nn.Conv1d(128 + 4, 128, kernel_size=1), nn.ReLU(),
|
54 |
+
nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(),
|
55 |
+
nn.Conv1d(128, 128, kernel_size=1), nn.ReLU(),
|
56 |
+
nn.Conv1d(128, 4, kernel_size=1),
|
57 |
+
)
|
58 |
+
|
59 |
+
def forward(self, features, gt_norm_mask=None, mode='test'):
|
60 |
+
x_block0, x_block1, x_block2, x_block3, x_block4 = features[4], features[5], features[6], features[8], features[11]
|
61 |
+
|
62 |
+
# generate feature-map
|
63 |
+
|
64 |
+
x_d0 = self.conv2(x_block4) # x_d0 : [2, 2048, 15, 20] 1/32 res
|
65 |
+
x_d1 = self.up1(x_d0, x_block3) # x_d1 : [2, 1024, 30, 40] 1/16 res
|
66 |
+
x_d2 = self.up2(x_d1, x_block2) # x_d2 : [2, 512, 60, 80] 1/8 res
|
67 |
+
x_d3 = self.up3(x_d2, x_block1) # x_d3: [2, 256, 120, 160] 1/4 res
|
68 |
+
x_d4 = self.up4(x_d3, x_block0) # x_d4: [2, 128, 240, 320] 1/2 res
|
69 |
+
|
70 |
+
# 1/8 res output
|
71 |
+
out_res8 = self.out_conv_res8(x_d2) # out_res8: [2, 4, 60, 80] 1/8 res output
|
72 |
+
out_res8 = norm_normalize(out_res8) # out_res8: [2, 4, 60, 80] 1/8 res output
|
73 |
+
|
74 |
+
################################################################################################################
|
75 |
+
# out_res4
|
76 |
+
################################################################################################################
|
77 |
+
|
78 |
+
if mode == 'train':
|
79 |
+
# upsampling ... out_res8: [2, 4, 60, 80] -> out_res8_res4: [2, 4, 120, 160]
|
80 |
+
out_res8_res4 = F.interpolate(out_res8, scale_factor=2, mode='bilinear', align_corners=True)
|
81 |
+
B, _, H, W = out_res8_res4.shape
|
82 |
+
|
83 |
+
# samples: [B, 1, N, 2]
|
84 |
+
point_coords_res4, rows_int, cols_int = sample_points(out_res8_res4.detach(), gt_norm_mask,
|
85 |
+
sampling_ratio=self.sampling_ratio,
|
86 |
+
beta=self.importance_ratio)
|
87 |
+
|
88 |
+
# output (needed for evaluation / visualization)
|
89 |
+
out_res4 = out_res8_res4
|
90 |
+
|
91 |
+
# grid_sample feature-map
|
92 |
+
feat_res4 = F.grid_sample(x_d2, point_coords_res4, mode='bilinear', align_corners=True) # (B, 512, 1, N)
|
93 |
+
init_pred = F.grid_sample(out_res8, point_coords_res4, mode='bilinear', align_corners=True) # (B, 4, 1, N)
|
94 |
+
feat_res4 = torch.cat([feat_res4, init_pred], dim=1) # (B, 512+4, 1, N)
|
95 |
+
|
96 |
+
# prediction (needed to compute loss)
|
97 |
+
samples_pred_res4 = self.out_conv_res4(feat_res4[:, :, 0, :]) # (B, 4, N)
|
98 |
+
samples_pred_res4 = norm_normalize(samples_pred_res4) # (B, 4, N) - normalized
|
99 |
+
|
100 |
+
for i in range(B):
|
101 |
+
out_res4[i, :, rows_int[i, :], cols_int[i, :]] = samples_pred_res4[i, :, :]
|
102 |
+
|
103 |
+
else:
|
104 |
+
# grid_sample feature-map
|
105 |
+
feat_map = F.interpolate(x_d2, scale_factor=2, mode='bilinear', align_corners=True)
|
106 |
+
init_pred = F.interpolate(out_res8, scale_factor=2, mode='bilinear', align_corners=True)
|
107 |
+
feat_map = torch.cat([feat_map, init_pred], dim=1) # (B, 512+4, H, W)
|
108 |
+
B, _, H, W = feat_map.shape
|
109 |
+
|
110 |
+
# try all pixels
|
111 |
+
out_res4 = self.out_conv_res4(feat_map.view(B, 512 + 4, -1)) # (B, 4, N)
|
112 |
+
out_res4 = norm_normalize(out_res4) # (B, 4, N) - normalized
|
113 |
+
out_res4 = out_res4.view(B, 4, H, W)
|
114 |
+
samples_pred_res4 = point_coords_res4 = None
|
115 |
+
|
116 |
+
################################################################################################################
|
117 |
+
# out_res2
|
118 |
+
################################################################################################################
|
119 |
+
|
120 |
+
if mode == 'train':
|
121 |
+
|
122 |
+
# upsampling ... out_res4: [2, 4, 120, 160] -> out_res4_res2: [2, 4, 240, 320]
|
123 |
+
out_res4_res2 = F.interpolate(out_res4, scale_factor=2, mode='bilinear', align_corners=True)
|
124 |
+
B, _, H, W = out_res4_res2.shape
|
125 |
+
|
126 |
+
# samples: [B, 1, N, 2]
|
127 |
+
point_coords_res2, rows_int, cols_int = sample_points(out_res4_res2.detach(), gt_norm_mask,
|
128 |
+
sampling_ratio=self.sampling_ratio,
|
129 |
+
beta=self.importance_ratio)
|
130 |
+
|
131 |
+
# output (needed for evaluation / visualization)
|
132 |
+
out_res2 = out_res4_res2
|
133 |
+
|
134 |
+
# grid_sample feature-map
|
135 |
+
feat_res2 = F.grid_sample(x_d3, point_coords_res2, mode='bilinear', align_corners=True) # (B, 256, 1, N)
|
136 |
+
init_pred = F.grid_sample(out_res4, point_coords_res2, mode='bilinear', align_corners=True) # (B, 4, 1, N)
|
137 |
+
feat_res2 = torch.cat([feat_res2, init_pred], dim=1) # (B, 256+4, 1, N)
|
138 |
+
|
139 |
+
# prediction (needed to compute loss)
|
140 |
+
samples_pred_res2 = self.out_conv_res2(feat_res2[:, :, 0, :]) # (B, 4, N)
|
141 |
+
samples_pred_res2 = norm_normalize(samples_pred_res2) # (B, 4, N) - normalized
|
142 |
+
|
143 |
+
for i in range(B):
|
144 |
+
out_res2[i, :, rows_int[i, :], cols_int[i, :]] = samples_pred_res2[i, :, :]
|
145 |
+
|
146 |
+
else:
|
147 |
+
# grid_sample feature-map
|
148 |
+
feat_map = F.interpolate(x_d3, scale_factor=2, mode='bilinear', align_corners=True)
|
149 |
+
init_pred = F.interpolate(out_res4, scale_factor=2, mode='bilinear', align_corners=True)
|
150 |
+
feat_map = torch.cat([feat_map, init_pred], dim=1) # (B, 512+4, H, W)
|
151 |
+
B, _, H, W = feat_map.shape
|
152 |
+
|
153 |
+
out_res2 = self.out_conv_res2(feat_map.view(B, 256 + 4, -1)) # (B, 4, N)
|
154 |
+
out_res2 = norm_normalize(out_res2) # (B, 4, N) - normalized
|
155 |
+
out_res2 = out_res2.view(B, 4, H, W)
|
156 |
+
samples_pred_res2 = point_coords_res2 = None
|
157 |
+
|
158 |
+
################################################################################################################
|
159 |
+
# out_res1
|
160 |
+
################################################################################################################
|
161 |
+
|
162 |
+
if mode == 'train':
|
163 |
+
# upsampling ... out_res4: [2, 4, 120, 160] -> out_res4_res2: [2, 4, 240, 320]
|
164 |
+
out_res2_res1 = F.interpolate(out_res2, scale_factor=2, mode='bilinear', align_corners=True)
|
165 |
+
B, _, H, W = out_res2_res1.shape
|
166 |
+
|
167 |
+
# samples: [B, 1, N, 2]
|
168 |
+
point_coords_res1, rows_int, cols_int = sample_points(out_res2_res1.detach(), gt_norm_mask,
|
169 |
+
sampling_ratio=self.sampling_ratio,
|
170 |
+
beta=self.importance_ratio)
|
171 |
+
|
172 |
+
# output (needed for evaluation / visualization)
|
173 |
+
out_res1 = out_res2_res1
|
174 |
+
|
175 |
+
# grid_sample feature-map
|
176 |
+
feat_res1 = F.grid_sample(x_d4, point_coords_res1, mode='bilinear', align_corners=True) # (B, 128, 1, N)
|
177 |
+
init_pred = F.grid_sample(out_res2, point_coords_res1, mode='bilinear', align_corners=True) # (B, 4, 1, N)
|
178 |
+
feat_res1 = torch.cat([feat_res1, init_pred], dim=1) # (B, 128+4, 1, N)
|
179 |
+
|
180 |
+
# prediction (needed to compute loss)
|
181 |
+
samples_pred_res1 = self.out_conv_res1(feat_res1[:, :, 0, :]) # (B, 4, N)
|
182 |
+
samples_pred_res1 = norm_normalize(samples_pred_res1) # (B, 4, N) - normalized
|
183 |
+
|
184 |
+
for i in range(B):
|
185 |
+
out_res1[i, :, rows_int[i, :], cols_int[i, :]] = samples_pred_res1[i, :, :]
|
186 |
+
|
187 |
+
else:
|
188 |
+
# grid_sample feature-map
|
189 |
+
feat_map = F.interpolate(x_d4, scale_factor=2, mode='bilinear', align_corners=True)
|
190 |
+
init_pred = F.interpolate(out_res2, scale_factor=2, mode='bilinear', align_corners=True)
|
191 |
+
feat_map = torch.cat([feat_map, init_pred], dim=1) # (B, 512+4, H, W)
|
192 |
+
B, _, H, W = feat_map.shape
|
193 |
+
|
194 |
+
out_res1 = self.out_conv_res1(feat_map.view(B, 128 + 4, -1)) # (B, 4, N)
|
195 |
+
out_res1 = norm_normalize(out_res1) # (B, 4, N) - normalized
|
196 |
+
out_res1 = out_res1.view(B, 4, H, W)
|
197 |
+
samples_pred_res1 = point_coords_res1 = None
|
198 |
+
|
199 |
+
return [out_res8, out_res4, out_res2, out_res1], \
|
200 |
+
[out_res8, samples_pred_res4, samples_pred_res2, samples_pred_res1], \
|
201 |
+
[None, point_coords_res4, point_coords_res2, point_coords_res1]
|
202 |
+
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/.gitignore
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
*.egg-info/
|
24 |
+
.installed.cfg
|
25 |
+
*.egg
|
26 |
+
MANIFEST
|
27 |
+
|
28 |
+
# PyInstaller
|
29 |
+
# Usually these files are written by a python script from a template
|
30 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
31 |
+
*.manifest
|
32 |
+
*.spec
|
33 |
+
|
34 |
+
# Installer logs
|
35 |
+
pip-log.txt
|
36 |
+
pip-delete-this-directory.txt
|
37 |
+
|
38 |
+
# Unit test / coverage reports
|
39 |
+
htmlcov/
|
40 |
+
.tox/
|
41 |
+
.coverage
|
42 |
+
.coverage.*
|
43 |
+
.cache
|
44 |
+
nosetests.xml
|
45 |
+
coverage.xml
|
46 |
+
*.cover
|
47 |
+
.hypothesis/
|
48 |
+
.pytest_cache/
|
49 |
+
|
50 |
+
# Translations
|
51 |
+
*.mo
|
52 |
+
*.pot
|
53 |
+
|
54 |
+
# Django stuff:
|
55 |
+
*.log
|
56 |
+
local_settings.py
|
57 |
+
db.sqlite3
|
58 |
+
|
59 |
+
# Flask stuff:
|
60 |
+
instance/
|
61 |
+
.webassets-cache
|
62 |
+
|
63 |
+
# Scrapy stuff:
|
64 |
+
.scrapy
|
65 |
+
|
66 |
+
# Sphinx documentation
|
67 |
+
docs/_build/
|
68 |
+
|
69 |
+
# PyBuilder
|
70 |
+
target/
|
71 |
+
|
72 |
+
# Jupyter Notebook
|
73 |
+
.ipynb_checkpoints
|
74 |
+
|
75 |
+
# pyenv
|
76 |
+
.python-version
|
77 |
+
|
78 |
+
# celery beat schedule file
|
79 |
+
celerybeat-schedule
|
80 |
+
|
81 |
+
# SageMath parsed files
|
82 |
+
*.sage.py
|
83 |
+
|
84 |
+
# Environments
|
85 |
+
.env
|
86 |
+
.venv
|
87 |
+
env/
|
88 |
+
venv/
|
89 |
+
ENV/
|
90 |
+
env.bak/
|
91 |
+
venv.bak/
|
92 |
+
|
93 |
+
# Spyder project settings
|
94 |
+
.spyderproject
|
95 |
+
.spyproject
|
96 |
+
|
97 |
+
# Rope project settings
|
98 |
+
.ropeproject
|
99 |
+
|
100 |
+
# mkdocs documentation
|
101 |
+
/site
|
102 |
+
|
103 |
+
# pytorch stuff
|
104 |
+
*.pth
|
105 |
+
*.onnx
|
106 |
+
*.pb
|
107 |
+
|
108 |
+
trained_models/
|
109 |
+
.fuse_hidden*
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/BENCHMARK.md
ADDED
@@ -0,0 +1,555 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Model Performance Benchmarks
|
2 |
+
|
3 |
+
All benchmarks run as per:
|
4 |
+
|
5 |
+
```
|
6 |
+
python onnx_export.py --model mobilenetv3_100 ./mobilenetv3_100.onnx
|
7 |
+
python onnx_optimize.py ./mobilenetv3_100.onnx --output mobilenetv3_100-opt.onnx
|
8 |
+
python onnx_to_caffe.py ./mobilenetv3_100.onnx --c2-prefix mobilenetv3
|
9 |
+
python onnx_to_caffe.py ./mobilenetv3_100-opt.onnx --c2-prefix mobilenetv3-opt
|
10 |
+
python caffe2_benchmark.py --c2-init ./mobilenetv3.init.pb --c2-predict ./mobilenetv3.predict.pb
|
11 |
+
python caffe2_benchmark.py --c2-init ./mobilenetv3-opt.init.pb --c2-predict ./mobilenetv3-opt.predict.pb
|
12 |
+
```
|
13 |
+
|
14 |
+
## EfficientNet-B0
|
15 |
+
|
16 |
+
### Unoptimized
|
17 |
+
```
|
18 |
+
Main run finished. Milliseconds per iter: 49.2862. Iters per second: 20.2897
|
19 |
+
Time per operator type:
|
20 |
+
29.7378 ms. 60.5145%. Conv
|
21 |
+
12.1785 ms. 24.7824%. Sigmoid
|
22 |
+
3.62811 ms. 7.38297%. SpatialBN
|
23 |
+
2.98444 ms. 6.07314%. Mul
|
24 |
+
0.326902 ms. 0.665225%. AveragePool
|
25 |
+
0.197317 ms. 0.401528%. FC
|
26 |
+
0.0852877 ms. 0.173555%. Add
|
27 |
+
0.0032607 ms. 0.00663532%. Squeeze
|
28 |
+
49.1416 ms in Total
|
29 |
+
FLOP per operator type:
|
30 |
+
0.76907 GFLOP. 95.2696%. Conv
|
31 |
+
0.0269508 GFLOP. 3.33857%. SpatialBN
|
32 |
+
0.00846444 GFLOP. 1.04855%. Mul
|
33 |
+
0.002561 GFLOP. 0.317248%. FC
|
34 |
+
0.000210112 GFLOP. 0.0260279%. Add
|
35 |
+
0.807256 GFLOP in Total
|
36 |
+
Feature Memory Read per operator type:
|
37 |
+
58.5253 MB. 43.0891%. Mul
|
38 |
+
43.2015 MB. 31.807%. Conv
|
39 |
+
27.2869 MB. 20.0899%. SpatialBN
|
40 |
+
5.12912 MB. 3.77631%. FC
|
41 |
+
1.6809 MB. 1.23756%. Add
|
42 |
+
135.824 MB in Total
|
43 |
+
Feature Memory Written per operator type:
|
44 |
+
33.8578 MB. 38.1965%. Mul
|
45 |
+
26.9881 MB. 30.4465%. Conv
|
46 |
+
26.9508 MB. 30.4044%. SpatialBN
|
47 |
+
0.840448 MB. 0.948147%. Add
|
48 |
+
0.004 MB. 0.00451258%. FC
|
49 |
+
88.6412 MB in Total
|
50 |
+
Parameter Memory per operator type:
|
51 |
+
15.8248 MB. 74.9391%. Conv
|
52 |
+
5.124 MB. 24.265%. FC
|
53 |
+
0.168064 MB. 0.795877%. SpatialBN
|
54 |
+
0 MB. 0%. Add
|
55 |
+
0 MB. 0%. Mul
|
56 |
+
21.1168 MB in Total
|
57 |
+
```
|
58 |
+
### Optimized
|
59 |
+
```
|
60 |
+
Main run finished. Milliseconds per iter: 46.0838. Iters per second: 21.6996
|
61 |
+
Time per operator type:
|
62 |
+
29.776 ms. 65.002%. Conv
|
63 |
+
12.2803 ms. 26.8084%. Sigmoid
|
64 |
+
3.15073 ms. 6.87815%. Mul
|
65 |
+
0.328651 ms. 0.717456%. AveragePool
|
66 |
+
0.186237 ms. 0.406563%. FC
|
67 |
+
0.0832429 ms. 0.181722%. Add
|
68 |
+
0.0026184 ms. 0.00571606%. Squeeze
|
69 |
+
45.8078 ms in Total
|
70 |
+
FLOP per operator type:
|
71 |
+
0.76907 GFLOP. 98.5601%. Conv
|
72 |
+
0.00846444 GFLOP. 1.08476%. Mul
|
73 |
+
0.002561 GFLOP. 0.328205%. FC
|
74 |
+
0.000210112 GFLOP. 0.0269269%. Add
|
75 |
+
0.780305 GFLOP in Total
|
76 |
+
Feature Memory Read per operator type:
|
77 |
+
58.5253 MB. 53.8803%. Mul
|
78 |
+
43.2855 MB. 39.8501%. Conv
|
79 |
+
5.12912 MB. 4.72204%. FC
|
80 |
+
1.6809 MB. 1.54749%. Add
|
81 |
+
108.621 MB in Total
|
82 |
+
Feature Memory Written per operator type:
|
83 |
+
33.8578 MB. 54.8834%. Mul
|
84 |
+
26.9881 MB. 43.7477%. Conv
|
85 |
+
0.840448 MB. 1.36237%. Add
|
86 |
+
0.004 MB. 0.00648399%. FC
|
87 |
+
61.6904 MB in Total
|
88 |
+
Parameter Memory per operator type:
|
89 |
+
15.8248 MB. 75.5403%. Conv
|
90 |
+
5.124 MB. 24.4597%. FC
|
91 |
+
0 MB. 0%. Add
|
92 |
+
0 MB. 0%. Mul
|
93 |
+
20.9488 MB in Total
|
94 |
+
```
|
95 |
+
|
96 |
+
## EfficientNet-B1
|
97 |
+
### Optimized
|
98 |
+
```
|
99 |
+
Main run finished. Milliseconds per iter: 71.8102. Iters per second: 13.9256
|
100 |
+
Time per operator type:
|
101 |
+
45.7915 ms. 66.3206%. Conv
|
102 |
+
17.8718 ms. 25.8841%. Sigmoid
|
103 |
+
4.44132 ms. 6.43244%. Mul
|
104 |
+
0.51001 ms. 0.738658%. AveragePool
|
105 |
+
0.233283 ms. 0.337868%. Add
|
106 |
+
0.194986 ms. 0.282402%. FC
|
107 |
+
0.00268255 ms. 0.00388519%. Squeeze
|
108 |
+
69.0456 ms in Total
|
109 |
+
FLOP per operator type:
|
110 |
+
1.37105 GFLOP. 98.7673%. Conv
|
111 |
+
0.0138759 GFLOP. 0.99959%. Mul
|
112 |
+
0.002561 GFLOP. 0.184489%. FC
|
113 |
+
0.000674432 GFLOP. 0.0485847%. Add
|
114 |
+
1.38816 GFLOP in Total
|
115 |
+
Feature Memory Read per operator type:
|
116 |
+
94.624 MB. 54.0789%. Mul
|
117 |
+
69.8255 MB. 39.9062%. Conv
|
118 |
+
5.39546 MB. 3.08357%. Add
|
119 |
+
5.12912 MB. 2.93136%. FC
|
120 |
+
174.974 MB in Total
|
121 |
+
Feature Memory Written per operator type:
|
122 |
+
55.5035 MB. 54.555%. Mul
|
123 |
+
43.5333 MB. 42.7894%. Conv
|
124 |
+
2.69773 MB. 2.65163%. Add
|
125 |
+
0.004 MB. 0.00393165%. FC
|
126 |
+
101.739 MB in Total
|
127 |
+
Parameter Memory per operator type:
|
128 |
+
25.7479 MB. 83.4024%. Conv
|
129 |
+
5.124 MB. 16.5976%. FC
|
130 |
+
0 MB. 0%. Add
|
131 |
+
0 MB. 0%. Mul
|
132 |
+
30.8719 MB in Total
|
133 |
+
```
|
134 |
+
|
135 |
+
## EfficientNet-B2
|
136 |
+
### Optimized
|
137 |
+
```
|
138 |
+
Main run finished. Milliseconds per iter: 92.28. Iters per second: 10.8366
|
139 |
+
Time per operator type:
|
140 |
+
61.4627 ms. 67.5845%. Conv
|
141 |
+
22.7458 ms. 25.0113%. Sigmoid
|
142 |
+
5.59931 ms. 6.15701%. Mul
|
143 |
+
0.642567 ms. 0.706568%. AveragePool
|
144 |
+
0.272795 ms. 0.299965%. Add
|
145 |
+
0.216178 ms. 0.237709%. FC
|
146 |
+
0.00268895 ms. 0.00295677%. Squeeze
|
147 |
+
90.942 ms in Total
|
148 |
+
FLOP per operator type:
|
149 |
+
1.98431 GFLOP. 98.9343%. Conv
|
150 |
+
0.0177039 GFLOP. 0.882686%. Mul
|
151 |
+
0.002817 GFLOP. 0.140451%. FC
|
152 |
+
0.000853984 GFLOP. 0.0425782%. Add
|
153 |
+
2.00568 GFLOP in Total
|
154 |
+
Feature Memory Read per operator type:
|
155 |
+
120.609 MB. 54.9637%. Mul
|
156 |
+
86.3512 MB. 39.3519%. Conv
|
157 |
+
6.83187 MB. 3.11341%. Add
|
158 |
+
5.64163 MB. 2.571%. FC
|
159 |
+
219.433 MB in Total
|
160 |
+
Feature Memory Written per operator type:
|
161 |
+
70.8155 MB. 54.6573%. Mul
|
162 |
+
55.3273 MB. 42.7031%. Conv
|
163 |
+
3.41594 MB. 2.63651%. Add
|
164 |
+
0.004 MB. 0.00308731%. FC
|
165 |
+
129.563 MB in Total
|
166 |
+
Parameter Memory per operator type:
|
167 |
+
30.4721 MB. 84.3913%. Conv
|
168 |
+
5.636 MB. 15.6087%. FC
|
169 |
+
0 MB. 0%. Add
|
170 |
+
0 MB. 0%. Mul
|
171 |
+
36.1081 MB in Total
|
172 |
+
```
|
173 |
+
|
174 |
+
## MixNet-M
|
175 |
+
### Optimized
|
176 |
+
```
|
177 |
+
Main run finished. Milliseconds per iter: 63.1122. Iters per second: 15.8448
|
178 |
+
Time per operator type:
|
179 |
+
48.1139 ms. 75.2052%. Conv
|
180 |
+
7.1341 ms. 11.1511%. Sigmoid
|
181 |
+
2.63706 ms. 4.12189%. SpatialBN
|
182 |
+
1.73186 ms. 2.70701%. Mul
|
183 |
+
1.38707 ms. 2.16809%. Split
|
184 |
+
1.29322 ms. 2.02139%. Concat
|
185 |
+
1.00093 ms. 1.56452%. Relu
|
186 |
+
0.235309 ms. 0.367803%. Add
|
187 |
+
0.221579 ms. 0.346343%. FC
|
188 |
+
0.219315 ms. 0.342803%. AveragePool
|
189 |
+
0.00250145 ms. 0.00390993%. Squeeze
|
190 |
+
63.9768 ms in Total
|
191 |
+
FLOP per operator type:
|
192 |
+
0.675273 GFLOP. 95.5827%. Conv
|
193 |
+
0.0221072 GFLOP. 3.12921%. SpatialBN
|
194 |
+
0.00538445 GFLOP. 0.762152%. Mul
|
195 |
+
0.003073 GFLOP. 0.434973%. FC
|
196 |
+
0.000642488 GFLOP. 0.0909421%. Add
|
197 |
+
0 GFLOP. 0%. Concat
|
198 |
+
0 GFLOP. 0%. Relu
|
199 |
+
0.70648 GFLOP in Total
|
200 |
+
Feature Memory Read per operator type:
|
201 |
+
46.8424 MB. 30.502%. Conv
|
202 |
+
36.8626 MB. 24.0036%. Mul
|
203 |
+
22.3152 MB. 14.5309%. SpatialBN
|
204 |
+
22.1074 MB. 14.3955%. Concat
|
205 |
+
14.1496 MB. 9.21372%. Relu
|
206 |
+
6.15414 MB. 4.00735%. FC
|
207 |
+
5.1399 MB. 3.34692%. Add
|
208 |
+
153.571 MB in Total
|
209 |
+
Feature Memory Written per operator type:
|
210 |
+
32.7672 MB. 28.4331%. Conv
|
211 |
+
22.1072 MB. 19.1831%. Concat
|
212 |
+
22.1072 MB. 19.1831%. SpatialBN
|
213 |
+
21.5378 MB. 18.689%. Mul
|
214 |
+
14.1496 MB. 12.2781%. Relu
|
215 |
+
2.56995 MB. 2.23003%. Add
|
216 |
+
0.004 MB. 0.00347092%. FC
|
217 |
+
115.243 MB in Total
|
218 |
+
Parameter Memory per operator type:
|
219 |
+
13.7059 MB. 68.674%. Conv
|
220 |
+
6.148 MB. 30.8049%. FC
|
221 |
+
0.104 MB. 0.521097%. SpatialBN
|
222 |
+
0 MB. 0%. Add
|
223 |
+
0 MB. 0%. Concat
|
224 |
+
0 MB. 0%. Mul
|
225 |
+
0 MB. 0%. Relu
|
226 |
+
19.9579 MB in Total
|
227 |
+
```
|
228 |
+
|
229 |
+
## TF MobileNet-V3 Large 1.0
|
230 |
+
|
231 |
+
### Optimized
|
232 |
+
```
|
233 |
+
Main run finished. Milliseconds per iter: 22.0495. Iters per second: 45.3525
|
234 |
+
Time per operator type:
|
235 |
+
17.437 ms. 80.0087%. Conv
|
236 |
+
1.27662 ms. 5.8577%. Add
|
237 |
+
1.12759 ms. 5.17387%. Div
|
238 |
+
0.701155 ms. 3.21721%. Mul
|
239 |
+
0.562654 ms. 2.58171%. Relu
|
240 |
+
0.431144 ms. 1.97828%. Clip
|
241 |
+
0.156902 ms. 0.719936%. FC
|
242 |
+
0.0996858 ms. 0.457402%. AveragePool
|
243 |
+
0.00112455 ms. 0.00515993%. Flatten
|
244 |
+
21.7939 ms in Total
|
245 |
+
FLOP per operator type:
|
246 |
+
0.43062 GFLOP. 98.1484%. Conv
|
247 |
+
0.002561 GFLOP. 0.583713%. FC
|
248 |
+
0.00210867 GFLOP. 0.480616%. Mul
|
249 |
+
0.00193868 GFLOP. 0.441871%. Add
|
250 |
+
0.00151532 GFLOP. 0.345377%. Div
|
251 |
+
0 GFLOP. 0%. Relu
|
252 |
+
0.438743 GFLOP in Total
|
253 |
+
Feature Memory Read per operator type:
|
254 |
+
34.7967 MB. 43.9391%. Conv
|
255 |
+
14.496 MB. 18.3046%. Mul
|
256 |
+
9.44828 MB. 11.9307%. Add
|
257 |
+
9.26157 MB. 11.6949%. Relu
|
258 |
+
6.0614 MB. 7.65395%. Div
|
259 |
+
5.12912 MB. 6.47673%. FC
|
260 |
+
79.193 MB in Total
|
261 |
+
Feature Memory Written per operator type:
|
262 |
+
17.6247 MB. 35.8656%. Conv
|
263 |
+
9.26157 MB. 18.847%. Relu
|
264 |
+
8.43469 MB. 17.1643%. Mul
|
265 |
+
7.75472 MB. 15.7806%. Add
|
266 |
+
6.06128 MB. 12.3345%. Div
|
267 |
+
0.004 MB. 0.00813985%. FC
|
268 |
+
49.1409 MB in Total
|
269 |
+
Parameter Memory per operator type:
|
270 |
+
16.6851 MB. 76.5052%. Conv
|
271 |
+
5.124 MB. 23.4948%. FC
|
272 |
+
0 MB. 0%. Add
|
273 |
+
0 MB. 0%. Div
|
274 |
+
0 MB. 0%. Mul
|
275 |
+
0 MB. 0%. Relu
|
276 |
+
21.8091 MB in Total
|
277 |
+
```
|
278 |
+
|
279 |
+
## MobileNet-V3 (RW)
|
280 |
+
|
281 |
+
### Unoptimized
|
282 |
+
```
|
283 |
+
Main run finished. Milliseconds per iter: 24.8316. Iters per second: 40.2712
|
284 |
+
Time per operator type:
|
285 |
+
15.9266 ms. 69.2624%. Conv
|
286 |
+
2.36551 ms. 10.2873%. SpatialBN
|
287 |
+
1.39102 ms. 6.04936%. Add
|
288 |
+
1.30327 ms. 5.66773%. Div
|
289 |
+
0.737014 ms. 3.20517%. Mul
|
290 |
+
0.639697 ms. 2.78195%. Relu
|
291 |
+
0.375681 ms. 1.63378%. Clip
|
292 |
+
0.153126 ms. 0.665921%. FC
|
293 |
+
0.0993787 ms. 0.432184%. AveragePool
|
294 |
+
0.0032632 ms. 0.0141912%. Squeeze
|
295 |
+
22.9946 ms in Total
|
296 |
+
FLOP per operator type:
|
297 |
+
0.430616 GFLOP. 94.4041%. Conv
|
298 |
+
0.0175992 GFLOP. 3.85829%. SpatialBN
|
299 |
+
0.002561 GFLOP. 0.561449%. FC
|
300 |
+
0.00210961 GFLOP. 0.46249%. Mul
|
301 |
+
0.00173891 GFLOP. 0.381223%. Add
|
302 |
+
0.00151626 GFLOP. 0.33241%. Div
|
303 |
+
0 GFLOP. 0%. Relu
|
304 |
+
0.456141 GFLOP in Total
|
305 |
+
Feature Memory Read per operator type:
|
306 |
+
34.7354 MB. 36.4363%. Conv
|
307 |
+
17.7944 MB. 18.6658%. SpatialBN
|
308 |
+
14.5035 MB. 15.2137%. Mul
|
309 |
+
9.25778 MB. 9.71113%. Relu
|
310 |
+
7.84641 MB. 8.23064%. Add
|
311 |
+
6.06516 MB. 6.36216%. Div
|
312 |
+
5.12912 MB. 5.38029%. FC
|
313 |
+
95.3317 MB in Total
|
314 |
+
Feature Memory Written per operator type:
|
315 |
+
17.6246 MB. 26.7264%. Conv
|
316 |
+
17.5992 MB. 26.6878%. SpatialBN
|
317 |
+
9.25778 MB. 14.0387%. Relu
|
318 |
+
8.43843 MB. 12.7962%. Mul
|
319 |
+
6.95565 MB. 10.5477%. Add
|
320 |
+
6.06502 MB. 9.19713%. Div
|
321 |
+
0.004 MB. 0.00606568%. FC
|
322 |
+
65.9447 MB in Total
|
323 |
+
Parameter Memory per operator type:
|
324 |
+
16.6778 MB. 76.1564%. Conv
|
325 |
+
5.124 MB. 23.3979%. FC
|
326 |
+
0.0976 MB. 0.445674%. SpatialBN
|
327 |
+
0 MB. 0%. Add
|
328 |
+
0 MB. 0%. Div
|
329 |
+
0 MB. 0%. Mul
|
330 |
+
0 MB. 0%. Relu
|
331 |
+
21.8994 MB in Total
|
332 |
+
|
333 |
+
```
|
334 |
+
### Optimized
|
335 |
+
|
336 |
+
```
|
337 |
+
Main run finished. Milliseconds per iter: 22.0981. Iters per second: 45.2527
|
338 |
+
Time per operator type:
|
339 |
+
17.146 ms. 78.8965%. Conv
|
340 |
+
1.38453 ms. 6.37084%. Add
|
341 |
+
1.30991 ms. 6.02749%. Div
|
342 |
+
0.685417 ms. 3.15391%. Mul
|
343 |
+
0.532589 ms. 2.45068%. Relu
|
344 |
+
0.418263 ms. 1.92461%. Clip
|
345 |
+
0.15128 ms. 0.696106%. FC
|
346 |
+
0.102065 ms. 0.469648%. AveragePool
|
347 |
+
0.0022143 ms. 0.010189%. Squeeze
|
348 |
+
21.7323 ms in Total
|
349 |
+
FLOP per operator type:
|
350 |
+
0.430616 GFLOP. 98.1927%. Conv
|
351 |
+
0.002561 GFLOP. 0.583981%. FC
|
352 |
+
0.00210961 GFLOP. 0.481051%. Mul
|
353 |
+
0.00173891 GFLOP. 0.396522%. Add
|
354 |
+
0.00151626 GFLOP. 0.34575%. Div
|
355 |
+
0 GFLOP. 0%. Relu
|
356 |
+
0.438542 GFLOP in Total
|
357 |
+
Feature Memory Read per operator type:
|
358 |
+
34.7842 MB. 44.833%. Conv
|
359 |
+
14.5035 MB. 18.6934%. Mul
|
360 |
+
9.25778 MB. 11.9323%. Relu
|
361 |
+
7.84641 MB. 10.1132%. Add
|
362 |
+
6.06516 MB. 7.81733%. Div
|
363 |
+
5.12912 MB. 6.61087%. FC
|
364 |
+
77.5861 MB in Total
|
365 |
+
Feature Memory Written per operator type:
|
366 |
+
17.6246 MB. 36.4556%. Conv
|
367 |
+
9.25778 MB. 19.1492%. Relu
|
368 |
+
8.43843 MB. 17.4544%. Mul
|
369 |
+
6.95565 MB. 14.3874%. Add
|
370 |
+
6.06502 MB. 12.5452%. Div
|
371 |
+
0.004 MB. 0.00827378%. FC
|
372 |
+
48.3455 MB in Total
|
373 |
+
Parameter Memory per operator type:
|
374 |
+
16.6778 MB. 76.4973%. Conv
|
375 |
+
5.124 MB. 23.5027%. FC
|
376 |
+
0 MB. 0%. Add
|
377 |
+
0 MB. 0%. Div
|
378 |
+
0 MB. 0%. Mul
|
379 |
+
0 MB. 0%. Relu
|
380 |
+
21.8018 MB in Total
|
381 |
+
|
382 |
+
```
|
383 |
+
|
384 |
+
## MnasNet-A1
|
385 |
+
|
386 |
+
### Unoptimized
|
387 |
+
```
|
388 |
+
Main run finished. Milliseconds per iter: 30.0892. Iters per second: 33.2345
|
389 |
+
Time per operator type:
|
390 |
+
24.4656 ms. 79.0905%. Conv
|
391 |
+
4.14958 ms. 13.4144%. SpatialBN
|
392 |
+
1.60598 ms. 5.19169%. Relu
|
393 |
+
0.295219 ms. 0.95436%. Mul
|
394 |
+
0.187609 ms. 0.606486%. FC
|
395 |
+
0.120556 ms. 0.389724%. AveragePool
|
396 |
+
0.09036 ms. 0.292109%. Add
|
397 |
+
0.015727 ms. 0.050841%. Sigmoid
|
398 |
+
0.00306205 ms. 0.00989875%. Squeeze
|
399 |
+
30.9337 ms in Total
|
400 |
+
FLOP per operator type:
|
401 |
+
0.620598 GFLOP. 95.6434%. Conv
|
402 |
+
0.0248873 GFLOP. 3.8355%. SpatialBN
|
403 |
+
0.002561 GFLOP. 0.394688%. FC
|
404 |
+
0.000597408 GFLOP. 0.0920695%. Mul
|
405 |
+
0.000222656 GFLOP. 0.0343146%. Add
|
406 |
+
0 GFLOP. 0%. Relu
|
407 |
+
0.648867 GFLOP in Total
|
408 |
+
Feature Memory Read per operator type:
|
409 |
+
35.5457 MB. 38.4109%. Conv
|
410 |
+
25.1552 MB. 27.1829%. SpatialBN
|
411 |
+
22.5235 MB. 24.339%. Relu
|
412 |
+
5.12912 MB. 5.54256%. FC
|
413 |
+
2.40586 MB. 2.59978%. Mul
|
414 |
+
1.78125 MB. 1.92483%. Add
|
415 |
+
92.5406 MB in Total
|
416 |
+
Feature Memory Written per operator type:
|
417 |
+
24.9042 MB. 32.9424%. Conv
|
418 |
+
24.8873 MB. 32.92%. SpatialBN
|
419 |
+
22.5235 MB. 29.7932%. Relu
|
420 |
+
2.38963 MB. 3.16092%. Mul
|
421 |
+
0.890624 MB. 1.17809%. Add
|
422 |
+
0.004 MB. 0.00529106%. FC
|
423 |
+
75.5993 MB in Total
|
424 |
+
Parameter Memory per operator type:
|
425 |
+
10.2732 MB. 66.1459%. Conv
|
426 |
+
5.124 MB. 32.9917%. FC
|
427 |
+
0.133952 MB. 0.86247%. SpatialBN
|
428 |
+
0 MB. 0%. Add
|
429 |
+
0 MB. 0%. Mul
|
430 |
+
0 MB. 0%. Relu
|
431 |
+
15.5312 MB in Total
|
432 |
+
```
|
433 |
+
|
434 |
+
### Optimized
|
435 |
+
```
|
436 |
+
Main run finished. Milliseconds per iter: 24.2367. Iters per second: 41.2597
|
437 |
+
Time per operator type:
|
438 |
+
22.0547 ms. 91.1375%. Conv
|
439 |
+
1.49096 ms. 6.16116%. Relu
|
440 |
+
0.253417 ms. 1.0472%. Mul
|
441 |
+
0.18506 ms. 0.76473%. FC
|
442 |
+
0.112942 ms. 0.466717%. AveragePool
|
443 |
+
0.086769 ms. 0.358559%. Add
|
444 |
+
0.0127889 ms. 0.0528479%. Sigmoid
|
445 |
+
0.0027346 ms. 0.0113003%. Squeeze
|
446 |
+
24.1994 ms in Total
|
447 |
+
FLOP per operator type:
|
448 |
+
0.620598 GFLOP. 99.4581%. Conv
|
449 |
+
0.002561 GFLOP. 0.41043%. FC
|
450 |
+
0.000597408 GFLOP. 0.0957417%. Mul
|
451 |
+
0.000222656 GFLOP. 0.0356832%. Add
|
452 |
+
0 GFLOP. 0%. Relu
|
453 |
+
0.623979 GFLOP in Total
|
454 |
+
Feature Memory Read per operator type:
|
455 |
+
35.6127 MB. 52.7968%. Conv
|
456 |
+
22.5235 MB. 33.3917%. Relu
|
457 |
+
5.12912 MB. 7.60406%. FC
|
458 |
+
2.40586 MB. 3.56675%. Mul
|
459 |
+
1.78125 MB. 2.64075%. Add
|
460 |
+
67.4524 MB in Total
|
461 |
+
Feature Memory Written per operator type:
|
462 |
+
24.9042 MB. 49.1092%. Conv
|
463 |
+
22.5235 MB. 44.4145%. Relu
|
464 |
+
2.38963 MB. 4.71216%. Mul
|
465 |
+
0.890624 MB. 1.75624%. Add
|
466 |
+
0.004 MB. 0.00788768%. FC
|
467 |
+
50.712 MB in Total
|
468 |
+
Parameter Memory per operator type:
|
469 |
+
10.2732 MB. 66.7213%. Conv
|
470 |
+
5.124 MB. 33.2787%. FC
|
471 |
+
0 MB. 0%. Add
|
472 |
+
0 MB. 0%. Mul
|
473 |
+
0 MB. 0%. Relu
|
474 |
+
15.3972 MB in Total
|
475 |
+
```
|
476 |
+
## MnasNet-B1
|
477 |
+
|
478 |
+
### Unoptimized
|
479 |
+
```
|
480 |
+
Main run finished. Milliseconds per iter: 28.3109. Iters per second: 35.322
|
481 |
+
Time per operator type:
|
482 |
+
29.1121 ms. 83.3081%. Conv
|
483 |
+
4.14959 ms. 11.8746%. SpatialBN
|
484 |
+
1.35823 ms. 3.88675%. Relu
|
485 |
+
0.186188 ms. 0.532802%. FC
|
486 |
+
0.116244 ms. 0.332647%. Add
|
487 |
+
0.018641 ms. 0.0533437%. AveragePool
|
488 |
+
0.0040904 ms. 0.0117052%. Squeeze
|
489 |
+
34.9451 ms in Total
|
490 |
+
FLOP per operator type:
|
491 |
+
0.626272 GFLOP. 96.2088%. Conv
|
492 |
+
0.0218266 GFLOP. 3.35303%. SpatialBN
|
493 |
+
0.002561 GFLOP. 0.393424%. FC
|
494 |
+
0.000291648 GFLOP. 0.0448034%. Add
|
495 |
+
0 GFLOP. 0%. Relu
|
496 |
+
0.650951 GFLOP in Total
|
497 |
+
Feature Memory Read per operator type:
|
498 |
+
34.4354 MB. 41.3788%. Conv
|
499 |
+
22.1299 MB. 26.5921%. SpatialBN
|
500 |
+
19.1923 MB. 23.0622%. Relu
|
501 |
+
5.12912 MB. 6.16333%. FC
|
502 |
+
2.33318 MB. 2.80364%. Add
|
503 |
+
83.2199 MB in Total
|
504 |
+
Feature Memory Written per operator type:
|
505 |
+
21.8266 MB. 34.0955%. Conv
|
506 |
+
21.8266 MB. 34.0955%. SpatialBN
|
507 |
+
19.1923 MB. 29.9805%. Relu
|
508 |
+
1.16659 MB. 1.82234%. Add
|
509 |
+
0.004 MB. 0.00624844%. FC
|
510 |
+
64.016 MB in Total
|
511 |
+
Parameter Memory per operator type:
|
512 |
+
12.2576 MB. 69.9104%. Conv
|
513 |
+
5.124 MB. 29.2245%. FC
|
514 |
+
0.15168 MB. 0.865099%. SpatialBN
|
515 |
+
0 MB. 0%. Add
|
516 |
+
0 MB. 0%. Relu
|
517 |
+
17.5332 MB in Total
|
518 |
+
```
|
519 |
+
|
520 |
+
### Optimized
|
521 |
+
```
|
522 |
+
Main run finished. Milliseconds per iter: 26.6364. Iters per second: 37.5426
|
523 |
+
Time per operator type:
|
524 |
+
24.9888 ms. 94.0962%. Conv
|
525 |
+
1.26147 ms. 4.75011%. Relu
|
526 |
+
0.176234 ms. 0.663619%. FC
|
527 |
+
0.113309 ms. 0.426672%. Add
|
528 |
+
0.0138708 ms. 0.0522311%. AveragePool
|
529 |
+
0.00295685 ms. 0.0111341%. Squeeze
|
530 |
+
26.5566 ms in Total
|
531 |
+
FLOP per operator type:
|
532 |
+
0.626272 GFLOP. 99.5466%. Conv
|
533 |
+
0.002561 GFLOP. 0.407074%. FC
|
534 |
+
0.000291648 GFLOP. 0.0463578%. Add
|
535 |
+
0 GFLOP. 0%. Relu
|
536 |
+
0.629124 GFLOP in Total
|
537 |
+
Feature Memory Read per operator type:
|
538 |
+
34.5112 MB. 56.4224%. Conv
|
539 |
+
19.1923 MB. 31.3775%. Relu
|
540 |
+
5.12912 MB. 8.3856%. FC
|
541 |
+
2.33318 MB. 3.81452%. Add
|
542 |
+
61.1658 MB in Total
|
543 |
+
Feature Memory Written per operator type:
|
544 |
+
21.8266 MB. 51.7346%. Conv
|
545 |
+
19.1923 MB. 45.4908%. Relu
|
546 |
+
1.16659 MB. 2.76513%. Add
|
547 |
+
0.004 MB. 0.00948104%. FC
|
548 |
+
42.1895 MB in Total
|
549 |
+
Parameter Memory per operator type:
|
550 |
+
12.2576 MB. 70.5205%. Conv
|
551 |
+
5.124 MB. 29.4795%. FC
|
552 |
+
0 MB. 0%. Add
|
553 |
+
0 MB. 0%. Relu
|
554 |
+
17.3816 MB in Total
|
555 |
+
```
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "{}"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright 2020 Ross Wightman
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/README.md
ADDED
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# (Generic) EfficientNets for PyTorch
|
2 |
+
|
3 |
+
A 'generic' implementation of EfficientNet, MixNet, MobileNetV3, etc. that covers most of the compute/parameter efficient architectures derived from the MobileNet V1/V2 block sequence, including those found via automated neural architecture search.
|
4 |
+
|
5 |
+
All models are implemented by GenEfficientNet or MobileNetV3 classes, with string based architecture definitions to configure the block layouts (idea from [here](https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_models.py))
|
6 |
+
|
7 |
+
## What's New
|
8 |
+
|
9 |
+
### Aug 19, 2020
|
10 |
+
* Add updated PyTorch trained EfficientNet-B3 weights trained by myself with `timm` (82.1 top-1)
|
11 |
+
* Add PyTorch trained EfficientNet-Lite0 contributed by [@hal-314](https://github.com/hal-314) (75.5 top-1)
|
12 |
+
* Update ONNX and Caffe2 export / utility scripts to work with latest PyTorch / ONNX
|
13 |
+
* ONNX runtime based validation script added
|
14 |
+
* activations (mostly) brought in sync with `timm` equivalents
|
15 |
+
|
16 |
+
|
17 |
+
### April 5, 2020
|
18 |
+
* Add some newly trained MobileNet-V2 models trained with latest h-params, rand augment. They compare quite favourably to EfficientNet-Lite
|
19 |
+
* 3.5M param MobileNet-V2 100 @ 73%
|
20 |
+
* 4.5M param MobileNet-V2 110d @ 75%
|
21 |
+
* 6.1M param MobileNet-V2 140 @ 76.5%
|
22 |
+
* 5.8M param MobileNet-V2 120d @ 77.3%
|
23 |
+
|
24 |
+
### March 23, 2020
|
25 |
+
* Add EfficientNet-Lite models w/ weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
|
26 |
+
* Add PyTorch trained MobileNet-V3 Large weights with 75.77% top-1
|
27 |
+
* IMPORTANT CHANGE (if training from scratch) - weight init changed to better match Tensorflow impl, set `fix_group_fanout=False` in `initialize_weight_goog` for old behavior
|
28 |
+
|
29 |
+
### Feb 12, 2020
|
30 |
+
* Add EfficientNet-L2 and B0-B7 NoisyStudent weights ported from [Tensorflow TPU](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet)
|
31 |
+
* Port new EfficientNet-B8 (RandAugment) weights from TF TPU, these are different than the B8 AdvProp, different input normalization.
|
32 |
+
* Add RandAugment PyTorch trained EfficientNet-ES (EdgeTPU-Small) weights with 78.1 top-1. Trained by [Andrew Lavin](https://github.com/andravin)
|
33 |
+
|
34 |
+
### Jan 22, 2020
|
35 |
+
* Update weights for EfficientNet B0, B2, B3 and MixNet-XL with latest RandAugment trained weights. Trained with (https://github.com/rwightman/pytorch-image-models)
|
36 |
+
* Fix torchscript compatibility for PyTorch 1.4, add torchscript support for MixedConv2d using ModuleDict
|
37 |
+
* Test models, torchscript, onnx export with PyTorch 1.4 -- no issues
|
38 |
+
|
39 |
+
### Nov 22, 2019
|
40 |
+
* New top-1 high! Ported official TF EfficientNet AdvProp (https://arxiv.org/abs/1911.09665) weights and B8 model spec. Created a new set of `ap` models since they use a different
|
41 |
+
preprocessing (Inception mean/std) from the original EfficientNet base/AA/RA weights.
|
42 |
+
|
43 |
+
### Nov 15, 2019
|
44 |
+
* Ported official TF MobileNet-V3 float32 large/small/minimalistic weights
|
45 |
+
* Modifications to MobileNet-V3 model and components to support some additional config needed for differences between TF MobileNet-V3 and mine
|
46 |
+
|
47 |
+
### Oct 30, 2019
|
48 |
+
* Many of the models will now work with torch.jit.script, MixNet being the biggest exception
|
49 |
+
* Improved interface for enabling torchscript or ONNX export compatible modes (via config)
|
50 |
+
* Add JIT optimized mem-efficient Swish/Mish autograd.fn in addition to memory-efficient autgrad.fn
|
51 |
+
* Activation factory to select best version of activation by name or override one globally
|
52 |
+
* Add pretrained checkpoint load helper that handles input conv and classifier changes
|
53 |
+
|
54 |
+
### Oct 27, 2019
|
55 |
+
* Add CondConv EfficientNet variants ported from https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/condconv
|
56 |
+
* Add RandAug weights for TF EfficientNet B5 and B7 from https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet
|
57 |
+
* Bring over MixNet-XL model and depth scaling algo from my pytorch-image-models code base
|
58 |
+
* Switch activations and global pooling to modules
|
59 |
+
* Add memory-efficient Swish/Mish impl
|
60 |
+
* Add as_sequential() method to all models and allow as an argument in entrypoint fns
|
61 |
+
* Move MobileNetV3 into own file since it has a different head
|
62 |
+
* Remove ChamNet, MobileNet V2/V1 since they will likely never be used here
|
63 |
+
|
64 |
+
## Models
|
65 |
+
|
66 |
+
Implemented models include:
|
67 |
+
* EfficientNet NoisyStudent (B0-B7, L2) (https://arxiv.org/abs/1911.04252)
|
68 |
+
* EfficientNet AdvProp (B0-B8) (https://arxiv.org/abs/1911.09665)
|
69 |
+
* EfficientNet (B0-B8) (https://arxiv.org/abs/1905.11946)
|
70 |
+
* EfficientNet-EdgeTPU (S, M, L) (https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html)
|
71 |
+
* EfficientNet-CondConv (https://arxiv.org/abs/1904.04971)
|
72 |
+
* EfficientNet-Lite (https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite)
|
73 |
+
* MixNet (https://arxiv.org/abs/1907.09595)
|
74 |
+
* MNASNet B1, A1 (Squeeze-Excite), and Small (https://arxiv.org/abs/1807.11626)
|
75 |
+
* MobileNet-V3 (https://arxiv.org/abs/1905.02244)
|
76 |
+
* FBNet-C (https://arxiv.org/abs/1812.03443)
|
77 |
+
* Single-Path NAS (https://arxiv.org/abs/1904.02877)
|
78 |
+
|
79 |
+
I originally implemented and trained some these models with code [here](https://github.com/rwightman/pytorch-image-models), this repository contains just the GenEfficientNet models, validation, and associated ONNX/Caffe2 export code.
|
80 |
+
|
81 |
+
## Pretrained
|
82 |
+
|
83 |
+
I've managed to train several of the models to accuracies close to or above the originating papers and official impl. My training code is here: https://github.com/rwightman/pytorch-image-models
|
84 |
+
|
85 |
+
|
86 |
+
|Model | Prec@1 (Err) | Prec@5 (Err) | Param#(M) | MAdds(M) | Image Scaling | Resolution | Crop |
|
87 |
+
|---|---|---|---|---|---|---|---|
|
88 |
+
| efficientnet_b3 | 82.240 (17.760) | 96.116 (3.884) | 12.23 | TBD | bicubic | 320 | 1.0 |
|
89 |
+
| efficientnet_b3 | 82.076 (17.924) | 96.020 (3.980) | 12.23 | TBD | bicubic | 300 | 0.904 |
|
90 |
+
| mixnet_xl | 81.074 (18.926) | 95.282 (4.718) | 11.90 | TBD | bicubic | 256 | 1.0 |
|
91 |
+
| efficientnet_b2 | 80.612 (19.388) | 95.318 (4.682) | 9.1 | TBD | bicubic | 288 | 1.0 |
|
92 |
+
| mixnet_xl | 80.476 (19.524) | 94.936 (5.064) | 11.90 | TBD | bicubic | 224 | 0.875 |
|
93 |
+
| efficientnet_b2 | 80.288 (19.712) | 95.166 (4.834) | 9.1 | 1003 | bicubic | 260 | 0.890 |
|
94 |
+
| mixnet_l | 78.976 (21.024 | 94.184 (5.816) | 7.33 | TBD | bicubic | 224 | 0.875 |
|
95 |
+
| efficientnet_b1 | 78.692 (21.308) | 94.086 (5.914) | 7.8 | 694 | bicubic | 240 | 0.882 |
|
96 |
+
| efficientnet_es | 78.066 (21.934) | 93.926 (6.074) | 5.44 | TBD | bicubic | 224 | 0.875 |
|
97 |
+
| efficientnet_b0 | 77.698 (22.302) | 93.532 (6.468) | 5.3 | 390 | bicubic | 224 | 0.875 |
|
98 |
+
| mobilenetv2_120d | 77.294 (22.706 | 93.502 (6.498) | 5.8 | TBD | bicubic | 224 | 0.875 |
|
99 |
+
| mixnet_m | 77.256 (22.744) | 93.418 (6.582) | 5.01 | 353 | bicubic | 224 | 0.875 |
|
100 |
+
| mobilenetv2_140 | 76.524 (23.476) | 92.990 (7.010) | 6.1 | TBD | bicubic | 224 | 0.875 |
|
101 |
+
| mixnet_s | 75.988 (24.012) | 92.794 (7.206) | 4.13 | TBD | bicubic | 224 | 0.875 |
|
102 |
+
| mobilenetv3_large_100 | 75.766 (24.234) | 92.542 (7.458) | 5.5 | TBD | bicubic | 224 | 0.875 |
|
103 |
+
| mobilenetv3_rw | 75.634 (24.366) | 92.708 (7.292) | 5.5 | 219 | bicubic | 224 | 0.875 |
|
104 |
+
| efficientnet_lite0 | 75.472 (24.528) | 92.520 (7.480) | 4.65 | TBD | bicubic | 224 | 0.875 |
|
105 |
+
| mnasnet_a1 | 75.448 (24.552) | 92.604 (7.396) | 3.9 | 312 | bicubic | 224 | 0.875 |
|
106 |
+
| fbnetc_100 | 75.124 (24.876) | 92.386 (7.614) | 5.6 | 385 | bilinear | 224 | 0.875 |
|
107 |
+
| mobilenetv2_110d | 75.052 (24.948) | 92.180 (7.820) | 4.5 | TBD | bicubic | 224 | 0.875 |
|
108 |
+
| mnasnet_b1 | 74.658 (25.342) | 92.114 (7.886) | 4.4 | 315 | bicubic | 224 | 0.875 |
|
109 |
+
| spnasnet_100 | 74.084 (25.916) | 91.818 (8.182) | 4.4 | TBD | bilinear | 224 | 0.875 |
|
110 |
+
| mobilenetv2_100 | 72.978 (27.022) | 91.016 (8.984) | 3.5 | TBD | bicubic | 224 | 0.875 |
|
111 |
+
|
112 |
+
|
113 |
+
More pretrained models to come...
|
114 |
+
|
115 |
+
|
116 |
+
## Ported Weights
|
117 |
+
|
118 |
+
The weights ported from Tensorflow checkpoints for the EfficientNet models do pretty much match accuracy in Tensorflow once a SAME convolution padding equivalent is added, and the same crop factors, image scaling, etc (see table) are used via cmd line args.
|
119 |
+
|
120 |
+
**IMPORTANT:**
|
121 |
+
* Tensorflow ported weights for EfficientNet AdvProp (AP), EfficientNet EdgeTPU, EfficientNet-CondConv, EfficientNet-Lite, and MobileNet-V3 models use Inception style (0.5, 0.5, 0.5) for mean and std.
|
122 |
+
* Enabling the Tensorflow preprocessing pipeline with `--tf-preprocessing` at validation time will improve scores by 0.1-0.5%, very close to original TF impl.
|
123 |
+
|
124 |
+
To run validation for tf_efficientnet_b5:
|
125 |
+
`python validate.py /path/to/imagenet/validation/ --model tf_efficientnet_b5 -b 64 --img-size 456 --crop-pct 0.934 --interpolation bicubic`
|
126 |
+
|
127 |
+
To run validation w/ TF preprocessing for tf_efficientnet_b5:
|
128 |
+
`python validate.py /path/to/imagenet/validation/ --model tf_efficientnet_b5 -b 64 --img-size 456 --tf-preprocessing`
|
129 |
+
|
130 |
+
To run validation for a model with Inception preprocessing, ie EfficientNet-B8 AdvProp:
|
131 |
+
`python validate.py /path/to/imagenet/validation/ --model tf_efficientnet_b8_ap -b 48 --num-gpu 2 --img-size 672 --crop-pct 0.954 --mean 0.5 --std 0.5`
|
132 |
+
|
133 |
+
|Model | Prec@1 (Err) | Prec@5 (Err) | Param # | Image Scaling | Image Size | Crop |
|
134 |
+
|---|---|---|---|---|---|---|
|
135 |
+
| tf_efficientnet_l2_ns *tfp | 88.352 (11.648) | 98.652 (1.348) | 480 | bicubic | 800 | N/A |
|
136 |
+
| tf_efficientnet_l2_ns | TBD | TBD | 480 | bicubic | 800 | 0.961 |
|
137 |
+
| tf_efficientnet_l2_ns_475 | 88.234 (11.766) | 98.546 (1.454) | 480 | bicubic | 475 | 0.936 |
|
138 |
+
| tf_efficientnet_l2_ns_475 *tfp | 88.172 (11.828) | 98.566 (1.434) | 480 | bicubic | 475 | N/A |
|
139 |
+
| tf_efficientnet_b7_ns *tfp | 86.844 (13.156) | 98.084 (1.916) | 66.35 | bicubic | 600 | N/A |
|
140 |
+
| tf_efficientnet_b7_ns | 86.840 (13.160) | 98.094 (1.906) | 66.35 | bicubic | 600 | N/A |
|
141 |
+
| tf_efficientnet_b6_ns | 86.452 (13.548) | 97.882 (2.118) | 43.04 | bicubic | 528 | N/A |
|
142 |
+
| tf_efficientnet_b6_ns *tfp | 86.444 (13.556) | 97.880 (2.120) | 43.04 | bicubic | 528 | N/A |
|
143 |
+
| tf_efficientnet_b5_ns *tfp | 86.064 (13.936) | 97.746 (2.254) | 30.39 | bicubic | 456 | N/A |
|
144 |
+
| tf_efficientnet_b5_ns | 86.088 (13.912) | 97.752 (2.248) | 30.39 | bicubic | 456 | N/A |
|
145 |
+
| tf_efficientnet_b8_ap *tfp | 85.436 (14.564) | 97.272 (2.728) | 87.4 | bicubic | 672 | N/A |
|
146 |
+
| tf_efficientnet_b8 *tfp | 85.384 (14.616) | 97.394 (2.606) | 87.4 | bicubic | 672 | N/A |
|
147 |
+
| tf_efficientnet_b8 | 85.370 (14.630) | 97.390 (2.610) | 87.4 | bicubic | 672 | 0.954 |
|
148 |
+
| tf_efficientnet_b8_ap | 85.368 (14.632) | 97.294 (2.706) | 87.4 | bicubic | 672 | 0.954 |
|
149 |
+
| tf_efficientnet_b4_ns *tfp | 85.298 (14.702) | 97.504 (2.496) | 19.34 | bicubic | 380 | N/A |
|
150 |
+
| tf_efficientnet_b4_ns | 85.162 (14.838) | 97.470 (2.530) | 19.34 | bicubic | 380 | 0.922 |
|
151 |
+
| tf_efficientnet_b7_ap *tfp | 85.154 (14.846) | 97.244 (2.756) | 66.35 | bicubic | 600 | N/A |
|
152 |
+
| tf_efficientnet_b7_ap | 85.118 (14.882) | 97.252 (2.748) | 66.35 | bicubic | 600 | 0.949 |
|
153 |
+
| tf_efficientnet_b7 *tfp | 84.940 (15.060) | 97.214 (2.786) | 66.35 | bicubic | 600 | N/A |
|
154 |
+
| tf_efficientnet_b7 | 84.932 (15.068) | 97.208 (2.792) | 66.35 | bicubic | 600 | 0.949 |
|
155 |
+
| tf_efficientnet_b6_ap | 84.786 (15.214) | 97.138 (2.862) | 43.04 | bicubic | 528 | 0.942 |
|
156 |
+
| tf_efficientnet_b6_ap *tfp | 84.760 (15.240) | 97.124 (2.876) | 43.04 | bicubic | 528 | N/A |
|
157 |
+
| tf_efficientnet_b5_ap *tfp | 84.276 (15.724) | 96.932 (3.068) | 30.39 | bicubic | 456 | N/A |
|
158 |
+
| tf_efficientnet_b5_ap | 84.254 (15.746) | 96.976 (3.024) | 30.39 | bicubic | 456 | 0.934 |
|
159 |
+
| tf_efficientnet_b6 *tfp | 84.140 (15.860) | 96.852 (3.148) | 43.04 | bicubic | 528 | N/A |
|
160 |
+
| tf_efficientnet_b6 | 84.110 (15.890) | 96.886 (3.114) | 43.04 | bicubic | 528 | 0.942 |
|
161 |
+
| tf_efficientnet_b3_ns *tfp | 84.054 (15.946) | 96.918 (3.082) | 12.23 | bicubic | 300 | N/A |
|
162 |
+
| tf_efficientnet_b3_ns | 84.048 (15.952) | 96.910 (3.090) | 12.23 | bicubic | 300 | .904 |
|
163 |
+
| tf_efficientnet_b5 *tfp | 83.822 (16.178) | 96.756 (3.244) | 30.39 | bicubic | 456 | N/A |
|
164 |
+
| tf_efficientnet_b5 | 83.812 (16.188) | 96.748 (3.252) | 30.39 | bicubic | 456 | 0.934 |
|
165 |
+
| tf_efficientnet_b4_ap *tfp | 83.278 (16.722) | 96.376 (3.624) | 19.34 | bicubic | 380 | N/A |
|
166 |
+
| tf_efficientnet_b4_ap | 83.248 (16.752) | 96.388 (3.612) | 19.34 | bicubic | 380 | 0.922 |
|
167 |
+
| tf_efficientnet_b4 | 83.022 (16.978) | 96.300 (3.700) | 19.34 | bicubic | 380 | 0.922 |
|
168 |
+
| tf_efficientnet_b4 *tfp | 82.948 (17.052) | 96.308 (3.692) | 19.34 | bicubic | 380 | N/A |
|
169 |
+
| tf_efficientnet_b2_ns *tfp | 82.436 (17.564) | 96.268 (3.732) | 9.11 | bicubic | 260 | N/A |
|
170 |
+
| tf_efficientnet_b2_ns | 82.380 (17.620) | 96.248 (3.752) | 9.11 | bicubic | 260 | 0.89 |
|
171 |
+
| tf_efficientnet_b3_ap *tfp | 81.882 (18.118) | 95.662 (4.338) | 12.23 | bicubic | 300 | N/A |
|
172 |
+
| tf_efficientnet_b3_ap | 81.828 (18.172) | 95.624 (4.376) | 12.23 | bicubic | 300 | 0.904 |
|
173 |
+
| tf_efficientnet_b3 | 81.636 (18.364) | 95.718 (4.282) | 12.23 | bicubic | 300 | 0.904 |
|
174 |
+
| tf_efficientnet_b3 *tfp | 81.576 (18.424) | 95.662 (4.338) | 12.23 | bicubic | 300 | N/A |
|
175 |
+
| tf_efficientnet_lite4 | 81.528 (18.472) | 95.668 (4.332) | 13.00 | bilinear | 380 | 0.92 |
|
176 |
+
| tf_efficientnet_b1_ns *tfp | 81.514 (18.486) | 95.776 (4.224) | 7.79 | bicubic | 240 | N/A |
|
177 |
+
| tf_efficientnet_lite4 *tfp | 81.502 (18.498) | 95.676 (4.324) | 13.00 | bilinear | 380 | N/A |
|
178 |
+
| tf_efficientnet_b1_ns | 81.388 (18.612) | 95.738 (4.262) | 7.79 | bicubic | 240 | 0.88 |
|
179 |
+
| tf_efficientnet_el | 80.534 (19.466) | 95.190 (4.810) | 10.59 | bicubic | 300 | 0.904 |
|
180 |
+
| tf_efficientnet_el *tfp | 80.476 (19.524) | 95.200 (4.800) | 10.59 | bicubic | 300 | N/A |
|
181 |
+
| tf_efficientnet_b2_ap *tfp | 80.420 (19.580) | 95.040 (4.960) | 9.11 | bicubic | 260 | N/A |
|
182 |
+
| tf_efficientnet_b2_ap | 80.306 (19.694) | 95.028 (4.972) | 9.11 | bicubic | 260 | 0.890 |
|
183 |
+
| tf_efficientnet_b2 *tfp | 80.188 (19.812) | 94.974 (5.026) | 9.11 | bicubic | 260 | N/A |
|
184 |
+
| tf_efficientnet_b2 | 80.086 (19.914) | 94.908 (5.092) | 9.11 | bicubic | 260 | 0.890 |
|
185 |
+
| tf_efficientnet_lite3 | 79.812 (20.188) | 94.914 (5.086) | 8.20 | bilinear | 300 | 0.904 |
|
186 |
+
| tf_efficientnet_lite3 *tfp | 79.734 (20.266) | 94.838 (5.162) | 8.20 | bilinear | 300 | N/A |
|
187 |
+
| tf_efficientnet_b1_ap *tfp | 79.532 (20.468) | 94.378 (5.622) | 7.79 | bicubic | 240 | N/A |
|
188 |
+
| tf_efficientnet_cc_b1_8e *tfp | 79.464 (20.536)| 94.492 (5.508) | 39.7 | bicubic | 240 | 0.88 |
|
189 |
+
| tf_efficientnet_cc_b1_8e | 79.298 (20.702) | 94.364 (5.636) | 39.7 | bicubic | 240 | 0.88 |
|
190 |
+
| tf_efficientnet_b1_ap | 79.278 (20.722) | 94.308 (5.692) | 7.79 | bicubic | 240 | 0.88 |
|
191 |
+
| tf_efficientnet_b1 *tfp | 79.172 (20.828) | 94.450 (5.550) | 7.79 | bicubic | 240 | N/A |
|
192 |
+
| tf_efficientnet_em *tfp | 78.958 (21.042) | 94.458 (5.542) | 6.90 | bicubic | 240 | N/A |
|
193 |
+
| tf_efficientnet_b0_ns *tfp | 78.806 (21.194) | 94.496 (5.504) | 5.29 | bicubic | 224 | N/A |
|
194 |
+
| tf_mixnet_l *tfp | 78.846 (21.154) | 94.212 (5.788) | 7.33 | bilinear | 224 | N/A |
|
195 |
+
| tf_efficientnet_b1 | 78.826 (21.174) | 94.198 (5.802) | 7.79 | bicubic | 240 | 0.88 |
|
196 |
+
| tf_mixnet_l | 78.770 (21.230) | 94.004 (5.996) | 7.33 | bicubic | 224 | 0.875 |
|
197 |
+
| tf_efficientnet_em | 78.742 (21.258) | 94.332 (5.668) | 6.90 | bicubic | 240 | 0.875 |
|
198 |
+
| tf_efficientnet_b0_ns | 78.658 (21.342) | 94.376 (5.624) | 5.29 | bicubic | 224 | 0.875 |
|
199 |
+
| tf_efficientnet_cc_b0_8e *tfp | 78.314 (21.686) | 93.790 (6.210) | 24.0 | bicubic | 224 | 0.875 |
|
200 |
+
| tf_efficientnet_cc_b0_8e | 77.908 (22.092) | 93.656 (6.344) | 24.0 | bicubic | 224 | 0.875 |
|
201 |
+
| tf_efficientnet_cc_b0_4e *tfp | 77.746 (22.254) | 93.552 (6.448) | 13.3 | bicubic | 224 | 0.875 |
|
202 |
+
| tf_efficientnet_cc_b0_4e | 77.304 (22.696) | 93.332 (6.668) | 13.3 | bicubic | 224 | 0.875 |
|
203 |
+
| tf_efficientnet_es *tfp | 77.616 (22.384) | 93.750 (6.250) | 5.44 | bicubic | 224 | N/A |
|
204 |
+
| tf_efficientnet_lite2 *tfp | 77.544 (22.456) | 93.800 (6.200) | 6.09 | bilinear | 260 | N/A |
|
205 |
+
| tf_efficientnet_lite2 | 77.460 (22.540) | 93.746 (6.254) | 6.09 | bicubic | 260 | 0.89 |
|
206 |
+
| tf_efficientnet_b0_ap *tfp | 77.514 (22.486) | 93.576 (6.424) | 5.29 | bicubic | 224 | N/A |
|
207 |
+
| tf_efficientnet_es | 77.264 (22.736) | 93.600 (6.400) | 5.44 | bicubic | 224 | N/A |
|
208 |
+
| tf_efficientnet_b0 *tfp | 77.258 (22.742) | 93.478 (6.522) | 5.29 | bicubic | 224 | N/A |
|
209 |
+
| tf_efficientnet_b0_ap | 77.084 (22.916) | 93.254 (6.746) | 5.29 | bicubic | 224 | 0.875 |
|
210 |
+
| tf_mixnet_m *tfp | 77.072 (22.928) | 93.368 (6.632) | 5.01 | bilinear | 224 | N/A |
|
211 |
+
| tf_mixnet_m | 76.950 (23.050) | 93.156 (6.844) | 5.01 | bicubic | 224 | 0.875 |
|
212 |
+
| tf_efficientnet_b0 | 76.848 (23.152) | 93.228 (6.772) | 5.29 | bicubic | 224 | 0.875 |
|
213 |
+
| tf_efficientnet_lite1 *tfp | 76.764 (23.236) | 93.326 (6.674) | 5.42 | bilinear | 240 | N/A |
|
214 |
+
| tf_efficientnet_lite1 | 76.638 (23.362) | 93.232 (6.768) | 5.42 | bicubic | 240 | 0.882 |
|
215 |
+
| tf_mixnet_s *tfp | 75.800 (24.200) | 92.788 (7.212) | 4.13 | bilinear | 224 | N/A |
|
216 |
+
| tf_mobilenetv3_large_100 *tfp | 75.768 (24.232) | 92.710 (7.290) | 5.48 | bilinear | 224 | N/A |
|
217 |
+
| tf_mixnet_s | 75.648 (24.352) | 92.636 (7.364) | 4.13 | bicubic | 224 | 0.875 |
|
218 |
+
| tf_mobilenetv3_large_100 | 75.516 (24.484) | 92.600 (7.400) | 5.48 | bilinear | 224 | 0.875 |
|
219 |
+
| tf_efficientnet_lite0 *tfp | 75.074 (24.926) | 92.314 (7.686) | 4.65 | bilinear | 224 | N/A |
|
220 |
+
| tf_efficientnet_lite0 | 74.842 (25.158) | 92.170 (7.830) | 4.65 | bicubic | 224 | 0.875 |
|
221 |
+
| tf_mobilenetv3_large_075 *tfp | 73.730 (26.270) | 91.616 (8.384) | 3.99 | bilinear | 224 |N/A |
|
222 |
+
| tf_mobilenetv3_large_075 | 73.442 (26.558) | 91.352 (8.648) | 3.99 | bilinear | 224 | 0.875 |
|
223 |
+
| tf_mobilenetv3_large_minimal_100 *tfp | 72.678 (27.322) | 90.860 (9.140) | 3.92 | bilinear | 224 | N/A |
|
224 |
+
| tf_mobilenetv3_large_minimal_100 | 72.244 (27.756) | 90.636 (9.364) | 3.92 | bilinear | 224 | 0.875 |
|
225 |
+
| tf_mobilenetv3_small_100 *tfp | 67.918 (32.082) | 87.958 (12.042 | 2.54 | bilinear | 224 | N/A |
|
226 |
+
| tf_mobilenetv3_small_100 | 67.918 (32.082) | 87.662 (12.338) | 2.54 | bilinear | 224 | 0.875 |
|
227 |
+
| tf_mobilenetv3_small_075 *tfp | 66.142 (33.858) | 86.498 (13.502) | 2.04 | bilinear | 224 | N/A |
|
228 |
+
| tf_mobilenetv3_small_075 | 65.718 (34.282) | 86.136 (13.864) | 2.04 | bilinear | 224 | 0.875 |
|
229 |
+
| tf_mobilenetv3_small_minimal_100 *tfp | 63.378 (36.622) | 84.802 (15.198) | 2.04 | bilinear | 224 | N/A |
|
230 |
+
| tf_mobilenetv3_small_minimal_100 | 62.898 (37.102) | 84.230 (15.770) | 2.04 | bilinear | 224 | 0.875 |
|
231 |
+
|
232 |
+
|
233 |
+
*tfp models validated with `tf-preprocessing` pipeline
|
234 |
+
|
235 |
+
Google tf and tflite weights ported from official Tensorflow repositories
|
236 |
+
* https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
|
237 |
+
* https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet
|
238 |
+
* https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet
|
239 |
+
|
240 |
+
## Usage
|
241 |
+
|
242 |
+
### Environment
|
243 |
+
|
244 |
+
All development and testing has been done in Conda Python 3 environments on Linux x86-64 systems, specifically Python 3.6.x, 3.7.x, 3.8.x.
|
245 |
+
|
246 |
+
Users have reported that a Python 3 Anaconda install in Windows works. I have not verified this myself.
|
247 |
+
|
248 |
+
PyTorch versions 1.4, 1.5, 1.6 have been tested with this code.
|
249 |
+
|
250 |
+
I've tried to keep the dependencies minimal, the setup is as per the PyTorch default install instructions for Conda:
|
251 |
+
```
|
252 |
+
conda create -n torch-env
|
253 |
+
conda activate torch-env
|
254 |
+
conda install -c pytorch pytorch torchvision cudatoolkit=10.2
|
255 |
+
```
|
256 |
+
|
257 |
+
### PyTorch Hub
|
258 |
+
|
259 |
+
Models can be accessed via the PyTorch Hub API
|
260 |
+
|
261 |
+
```
|
262 |
+
>>> torch.hub.list('rwightman/gen-efficientnet-pytorch')
|
263 |
+
['efficientnet_b0', ...]
|
264 |
+
>>> model = torch.hub.load('rwightman/gen-efficientnet-pytorch', 'efficientnet_b0', pretrained=True)
|
265 |
+
>>> model.eval()
|
266 |
+
>>> output = model(torch.randn(1,3,224,224))
|
267 |
+
```
|
268 |
+
|
269 |
+
### Pip
|
270 |
+
This package can be installed via pip.
|
271 |
+
|
272 |
+
Install (after conda env/install):
|
273 |
+
```
|
274 |
+
pip install geffnet
|
275 |
+
```
|
276 |
+
|
277 |
+
Eval use:
|
278 |
+
```
|
279 |
+
>>> import geffnet
|
280 |
+
>>> m = geffnet.create_model('mobilenetv3_large_100', pretrained=True)
|
281 |
+
>>> m.eval()
|
282 |
+
```
|
283 |
+
|
284 |
+
Train use:
|
285 |
+
```
|
286 |
+
>>> import geffnet
|
287 |
+
>>> # models can also be created by using the entrypoint directly
|
288 |
+
>>> m = geffnet.efficientnet_b2(pretrained=True, drop_rate=0.25, drop_connect_rate=0.2)
|
289 |
+
>>> m.train()
|
290 |
+
```
|
291 |
+
|
292 |
+
Create in a nn.Sequential container, for fast.ai, etc:
|
293 |
+
```
|
294 |
+
>>> import geffnet
|
295 |
+
>>> m = geffnet.mixnet_l(pretrained=True, drop_rate=0.25, drop_connect_rate=0.2, as_sequential=True)
|
296 |
+
```
|
297 |
+
|
298 |
+
### Exporting
|
299 |
+
|
300 |
+
Scripts are included to
|
301 |
+
* export models to ONNX (`onnx_export.py`)
|
302 |
+
* optimized ONNX graph (`onnx_optimize.py` or `onnx_validate.py` w/ `--onnx-output-opt` arg)
|
303 |
+
* validate with ONNX runtime (`onnx_validate.py`)
|
304 |
+
* convert ONNX model to Caffe2 (`onnx_to_caffe.py`)
|
305 |
+
* validate in Caffe2 (`caffe2_validate.py`)
|
306 |
+
* benchmark in Caffe2 w/ FLOPs, parameters output (`caffe2_benchmark.py`)
|
307 |
+
|
308 |
+
As an example, to export the MobileNet-V3 pretrained model and then run an Imagenet validation:
|
309 |
+
```
|
310 |
+
python onnx_export.py --model mobilenetv3_large_100 ./mobilenetv3_100.onnx
|
311 |
+
python onnx_validate.py /imagenet/validation/ --onnx-input ./mobilenetv3_100.onnx
|
312 |
+
```
|
313 |
+
|
314 |
+
These scripts were tested to be working as of PyTorch 1.6 and ONNX 1.7 w/ ONNX runtime 1.4. Caffe2 compatible
|
315 |
+
export now requires additional args mentioned in the export script (not needed in earlier versions).
|
316 |
+
|
317 |
+
#### Export Notes
|
318 |
+
1. The TF ported weights with the 'SAME' conv padding activated cannot be exported to ONNX unless `_EXPORTABLE` flag in `config.py` is set to True. Use `config.set_exportable(True)` as in the `onnx_export.py` script.
|
319 |
+
2. TF ported models with 'SAME' padding will have the padding fixed at export time to the resolution used for export. Even though dynamic padding is supported in opset >= 11, I can't get it working.
|
320 |
+
3. ONNX optimize facility doesn't work reliably in PyTorch 1.6 / ONNX 1.7. Fortunately, the onnxruntime based inference is working very well now and includes on the fly optimization.
|
321 |
+
3. ONNX / Caffe2 export/import frequently breaks with different PyTorch and ONNX version releases. Please check their respective issue trackers before filing issues here.
|
322 |
+
|
323 |
+
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_benchmark.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Caffe2 validation script
|
2 |
+
|
3 |
+
This script runs Caffe2 benchmark on exported ONNX model.
|
4 |
+
It is a useful tool for reporting model FLOPS.
|
5 |
+
|
6 |
+
Copyright 2020 Ross Wightman
|
7 |
+
"""
|
8 |
+
import argparse
|
9 |
+
from caffe2.python import core, workspace, model_helper
|
10 |
+
from caffe2.proto import caffe2_pb2
|
11 |
+
|
12 |
+
|
13 |
+
parser = argparse.ArgumentParser(description='Caffe2 Model Benchmark')
|
14 |
+
parser.add_argument('--c2-prefix', default='', type=str, metavar='NAME',
|
15 |
+
help='caffe2 model pb name prefix')
|
16 |
+
parser.add_argument('--c2-init', default='', type=str, metavar='PATH',
|
17 |
+
help='caffe2 model init .pb')
|
18 |
+
parser.add_argument('--c2-predict', default='', type=str, metavar='PATH',
|
19 |
+
help='caffe2 model predict .pb')
|
20 |
+
parser.add_argument('-b', '--batch-size', default=1, type=int,
|
21 |
+
metavar='N', help='mini-batch size (default: 1)')
|
22 |
+
parser.add_argument('--img-size', default=224, type=int,
|
23 |
+
metavar='N', help='Input image dimension, uses model default if empty')
|
24 |
+
|
25 |
+
|
26 |
+
def main():
|
27 |
+
args = parser.parse_args()
|
28 |
+
args.gpu_id = 0
|
29 |
+
if args.c2_prefix:
|
30 |
+
args.c2_init = args.c2_prefix + '.init.pb'
|
31 |
+
args.c2_predict = args.c2_prefix + '.predict.pb'
|
32 |
+
|
33 |
+
model = model_helper.ModelHelper(name="le_net", init_params=False)
|
34 |
+
|
35 |
+
# Bring in the init net from init_net.pb
|
36 |
+
init_net_proto = caffe2_pb2.NetDef()
|
37 |
+
with open(args.c2_init, "rb") as f:
|
38 |
+
init_net_proto.ParseFromString(f.read())
|
39 |
+
model.param_init_net = core.Net(init_net_proto)
|
40 |
+
|
41 |
+
# bring in the predict net from predict_net.pb
|
42 |
+
predict_net_proto = caffe2_pb2.NetDef()
|
43 |
+
with open(args.c2_predict, "rb") as f:
|
44 |
+
predict_net_proto.ParseFromString(f.read())
|
45 |
+
model.net = core.Net(predict_net_proto)
|
46 |
+
|
47 |
+
# CUDA performance not impressive
|
48 |
+
#device_opts = core.DeviceOption(caffe2_pb2.PROTO_CUDA, args.gpu_id)
|
49 |
+
#model.net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True)
|
50 |
+
#model.param_init_net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True)
|
51 |
+
|
52 |
+
input_blob = model.net.external_inputs[0]
|
53 |
+
model.param_init_net.GaussianFill(
|
54 |
+
[],
|
55 |
+
input_blob.GetUnscopedName(),
|
56 |
+
shape=(args.batch_size, 3, args.img_size, args.img_size),
|
57 |
+
mean=0.0,
|
58 |
+
std=1.0)
|
59 |
+
workspace.RunNetOnce(model.param_init_net)
|
60 |
+
workspace.CreateNet(model.net, overwrite=True)
|
61 |
+
workspace.BenchmarkNet(model.net.Proto().name, 5, 20, True)
|
62 |
+
|
63 |
+
|
64 |
+
if __name__ == '__main__':
|
65 |
+
main()
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/caffe2_validate.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Caffe2 validation script
|
2 |
+
|
3 |
+
This script is created to verify exported ONNX models running in Caffe2
|
4 |
+
It utilizes the same PyTorch dataloader/processing pipeline for a
|
5 |
+
fair comparison against the originals.
|
6 |
+
|
7 |
+
Copyright 2020 Ross Wightman
|
8 |
+
"""
|
9 |
+
import argparse
|
10 |
+
import numpy as np
|
11 |
+
from caffe2.python import core, workspace, model_helper
|
12 |
+
from caffe2.proto import caffe2_pb2
|
13 |
+
from data import create_loader, resolve_data_config, Dataset
|
14 |
+
from utils import AverageMeter
|
15 |
+
import time
|
16 |
+
|
17 |
+
parser = argparse.ArgumentParser(description='Caffe2 ImageNet Validation')
|
18 |
+
parser.add_argument('data', metavar='DIR',
|
19 |
+
help='path to dataset')
|
20 |
+
parser.add_argument('--c2-prefix', default='', type=str, metavar='NAME',
|
21 |
+
help='caffe2 model pb name prefix')
|
22 |
+
parser.add_argument('--c2-init', default='', type=str, metavar='PATH',
|
23 |
+
help='caffe2 model init .pb')
|
24 |
+
parser.add_argument('--c2-predict', default='', type=str, metavar='PATH',
|
25 |
+
help='caffe2 model predict .pb')
|
26 |
+
parser.add_argument('-j', '--workers', default=2, type=int, metavar='N',
|
27 |
+
help='number of data loading workers (default: 2)')
|
28 |
+
parser.add_argument('-b', '--batch-size', default=256, type=int,
|
29 |
+
metavar='N', help='mini-batch size (default: 256)')
|
30 |
+
parser.add_argument('--img-size', default=None, type=int,
|
31 |
+
metavar='N', help='Input image dimension, uses model default if empty')
|
32 |
+
parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN',
|
33 |
+
help='Override mean pixel value of dataset')
|
34 |
+
parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD',
|
35 |
+
help='Override std deviation of of dataset')
|
36 |
+
parser.add_argument('--crop-pct', type=float, default=None, metavar='PCT',
|
37 |
+
help='Override default crop pct of 0.875')
|
38 |
+
parser.add_argument('--interpolation', default='', type=str, metavar='NAME',
|
39 |
+
help='Image resize interpolation type (overrides model)')
|
40 |
+
parser.add_argument('--tf-preprocessing', dest='tf_preprocessing', action='store_true',
|
41 |
+
help='use tensorflow mnasnet preporcessing')
|
42 |
+
parser.add_argument('--print-freq', '-p', default=10, type=int,
|
43 |
+
metavar='N', help='print frequency (default: 10)')
|
44 |
+
|
45 |
+
|
46 |
+
def main():
|
47 |
+
args = parser.parse_args()
|
48 |
+
args.gpu_id = 0
|
49 |
+
if args.c2_prefix:
|
50 |
+
args.c2_init = args.c2_prefix + '.init.pb'
|
51 |
+
args.c2_predict = args.c2_prefix + '.predict.pb'
|
52 |
+
|
53 |
+
model = model_helper.ModelHelper(name="validation_net", init_params=False)
|
54 |
+
|
55 |
+
# Bring in the init net from init_net.pb
|
56 |
+
init_net_proto = caffe2_pb2.NetDef()
|
57 |
+
with open(args.c2_init, "rb") as f:
|
58 |
+
init_net_proto.ParseFromString(f.read())
|
59 |
+
model.param_init_net = core.Net(init_net_proto)
|
60 |
+
|
61 |
+
# bring in the predict net from predict_net.pb
|
62 |
+
predict_net_proto = caffe2_pb2.NetDef()
|
63 |
+
with open(args.c2_predict, "rb") as f:
|
64 |
+
predict_net_proto.ParseFromString(f.read())
|
65 |
+
model.net = core.Net(predict_net_proto)
|
66 |
+
|
67 |
+
data_config = resolve_data_config(None, args)
|
68 |
+
loader = create_loader(
|
69 |
+
Dataset(args.data, load_bytes=args.tf_preprocessing),
|
70 |
+
input_size=data_config['input_size'],
|
71 |
+
batch_size=args.batch_size,
|
72 |
+
use_prefetcher=False,
|
73 |
+
interpolation=data_config['interpolation'],
|
74 |
+
mean=data_config['mean'],
|
75 |
+
std=data_config['std'],
|
76 |
+
num_workers=args.workers,
|
77 |
+
crop_pct=data_config['crop_pct'],
|
78 |
+
tensorflow_preprocessing=args.tf_preprocessing)
|
79 |
+
|
80 |
+
# this is so obvious, wonderful interface </sarcasm>
|
81 |
+
input_blob = model.net.external_inputs[0]
|
82 |
+
output_blob = model.net.external_outputs[0]
|
83 |
+
|
84 |
+
if True:
|
85 |
+
device_opts = None
|
86 |
+
else:
|
87 |
+
# CUDA is crashing, no idea why, awesome error message, give it a try for kicks
|
88 |
+
device_opts = core.DeviceOption(caffe2_pb2.PROTO_CUDA, args.gpu_id)
|
89 |
+
model.net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True)
|
90 |
+
model.param_init_net.RunAllOnGPU(gpu_id=args.gpu_id, use_cudnn=True)
|
91 |
+
|
92 |
+
model.param_init_net.GaussianFill(
|
93 |
+
[], input_blob.GetUnscopedName(),
|
94 |
+
shape=(1,) + data_config['input_size'], mean=0.0, std=1.0)
|
95 |
+
workspace.RunNetOnce(model.param_init_net)
|
96 |
+
workspace.CreateNet(model.net, overwrite=True)
|
97 |
+
|
98 |
+
batch_time = AverageMeter()
|
99 |
+
top1 = AverageMeter()
|
100 |
+
top5 = AverageMeter()
|
101 |
+
end = time.time()
|
102 |
+
for i, (input, target) in enumerate(loader):
|
103 |
+
# run the net and return prediction
|
104 |
+
caffe2_in = input.data.numpy()
|
105 |
+
workspace.FeedBlob(input_blob, caffe2_in, device_opts)
|
106 |
+
workspace.RunNet(model.net, num_iter=1)
|
107 |
+
output = workspace.FetchBlob(output_blob)
|
108 |
+
|
109 |
+
# measure accuracy and record loss
|
110 |
+
prec1, prec5 = accuracy_np(output.data, target.numpy())
|
111 |
+
top1.update(prec1.item(), input.size(0))
|
112 |
+
top5.update(prec5.item(), input.size(0))
|
113 |
+
|
114 |
+
# measure elapsed time
|
115 |
+
batch_time.update(time.time() - end)
|
116 |
+
end = time.time()
|
117 |
+
|
118 |
+
if i % args.print_freq == 0:
|
119 |
+
print('Test: [{0}/{1}]\t'
|
120 |
+
'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s, {ms_avg:.3f} ms/sample) \t'
|
121 |
+
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
|
122 |
+
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
|
123 |
+
i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg,
|
124 |
+
ms_avg=100 * batch_time.avg / input.size(0), top1=top1, top5=top5))
|
125 |
+
|
126 |
+
print(' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'.format(
|
127 |
+
top1=top1, top1a=100-top1.avg, top5=top5, top5a=100.-top5.avg))
|
128 |
+
|
129 |
+
|
130 |
+
def accuracy_np(output, target):
|
131 |
+
max_indices = np.argsort(output, axis=1)[:, ::-1]
|
132 |
+
top5 = 100 * np.equal(max_indices[:, :5], target[:, np.newaxis]).sum(axis=1).mean()
|
133 |
+
top1 = 100 * np.equal(max_indices[:, 0], target).mean()
|
134 |
+
return top1, top5
|
135 |
+
|
136 |
+
|
137 |
+
if __name__ == '__main__':
|
138 |
+
main()
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .gen_efficientnet import *
|
2 |
+
from .mobilenetv3 import *
|
3 |
+
from .model_factory import create_model
|
4 |
+
from .config import is_exportable, is_scriptable, set_exportable, set_scriptable
|
5 |
+
from .activations import *
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/__init__.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from geffnet import config
|
2 |
+
from geffnet.activations.activations_me import *
|
3 |
+
from geffnet.activations.activations_jit import *
|
4 |
+
from geffnet.activations.activations import *
|
5 |
+
import torch
|
6 |
+
|
7 |
+
_has_silu = 'silu' in dir(torch.nn.functional)
|
8 |
+
|
9 |
+
_ACT_FN_DEFAULT = dict(
|
10 |
+
silu=F.silu if _has_silu else swish,
|
11 |
+
swish=F.silu if _has_silu else swish,
|
12 |
+
mish=mish,
|
13 |
+
relu=F.relu,
|
14 |
+
relu6=F.relu6,
|
15 |
+
sigmoid=sigmoid,
|
16 |
+
tanh=tanh,
|
17 |
+
hard_sigmoid=hard_sigmoid,
|
18 |
+
hard_swish=hard_swish,
|
19 |
+
)
|
20 |
+
|
21 |
+
_ACT_FN_JIT = dict(
|
22 |
+
silu=F.silu if _has_silu else swish_jit,
|
23 |
+
swish=F.silu if _has_silu else swish_jit,
|
24 |
+
mish=mish_jit,
|
25 |
+
)
|
26 |
+
|
27 |
+
_ACT_FN_ME = dict(
|
28 |
+
silu=F.silu if _has_silu else swish_me,
|
29 |
+
swish=F.silu if _has_silu else swish_me,
|
30 |
+
mish=mish_me,
|
31 |
+
hard_swish=hard_swish_me,
|
32 |
+
hard_sigmoid_jit=hard_sigmoid_me,
|
33 |
+
)
|
34 |
+
|
35 |
+
_ACT_LAYER_DEFAULT = dict(
|
36 |
+
silu=nn.SiLU if _has_silu else Swish,
|
37 |
+
swish=nn.SiLU if _has_silu else Swish,
|
38 |
+
mish=Mish,
|
39 |
+
relu=nn.ReLU,
|
40 |
+
relu6=nn.ReLU6,
|
41 |
+
sigmoid=Sigmoid,
|
42 |
+
tanh=Tanh,
|
43 |
+
hard_sigmoid=HardSigmoid,
|
44 |
+
hard_swish=HardSwish,
|
45 |
+
)
|
46 |
+
|
47 |
+
_ACT_LAYER_JIT = dict(
|
48 |
+
silu=nn.SiLU if _has_silu else SwishJit,
|
49 |
+
swish=nn.SiLU if _has_silu else SwishJit,
|
50 |
+
mish=MishJit,
|
51 |
+
)
|
52 |
+
|
53 |
+
_ACT_LAYER_ME = dict(
|
54 |
+
silu=nn.SiLU if _has_silu else SwishMe,
|
55 |
+
swish=nn.SiLU if _has_silu else SwishMe,
|
56 |
+
mish=MishMe,
|
57 |
+
hard_swish=HardSwishMe,
|
58 |
+
hard_sigmoid=HardSigmoidMe
|
59 |
+
)
|
60 |
+
|
61 |
+
_OVERRIDE_FN = dict()
|
62 |
+
_OVERRIDE_LAYER = dict()
|
63 |
+
|
64 |
+
|
65 |
+
def add_override_act_fn(name, fn):
|
66 |
+
global _OVERRIDE_FN
|
67 |
+
_OVERRIDE_FN[name] = fn
|
68 |
+
|
69 |
+
|
70 |
+
def update_override_act_fn(overrides):
|
71 |
+
assert isinstance(overrides, dict)
|
72 |
+
global _OVERRIDE_FN
|
73 |
+
_OVERRIDE_FN.update(overrides)
|
74 |
+
|
75 |
+
|
76 |
+
def clear_override_act_fn():
|
77 |
+
global _OVERRIDE_FN
|
78 |
+
_OVERRIDE_FN = dict()
|
79 |
+
|
80 |
+
|
81 |
+
def add_override_act_layer(name, fn):
|
82 |
+
_OVERRIDE_LAYER[name] = fn
|
83 |
+
|
84 |
+
|
85 |
+
def update_override_act_layer(overrides):
|
86 |
+
assert isinstance(overrides, dict)
|
87 |
+
global _OVERRIDE_LAYER
|
88 |
+
_OVERRIDE_LAYER.update(overrides)
|
89 |
+
|
90 |
+
|
91 |
+
def clear_override_act_layer():
|
92 |
+
global _OVERRIDE_LAYER
|
93 |
+
_OVERRIDE_LAYER = dict()
|
94 |
+
|
95 |
+
|
96 |
+
def get_act_fn(name='relu'):
|
97 |
+
""" Activation Function Factory
|
98 |
+
Fetching activation fns by name with this function allows export or torch script friendly
|
99 |
+
functions to be returned dynamically based on current config.
|
100 |
+
"""
|
101 |
+
if name in _OVERRIDE_FN:
|
102 |
+
return _OVERRIDE_FN[name]
|
103 |
+
use_me = not (config.is_exportable() or config.is_scriptable() or config.is_no_jit())
|
104 |
+
if use_me and name in _ACT_FN_ME:
|
105 |
+
# If not exporting or scripting the model, first look for a memory optimized version
|
106 |
+
# activation with custom autograd, then fallback to jit scripted, then a Python or Torch builtin
|
107 |
+
return _ACT_FN_ME[name]
|
108 |
+
if config.is_exportable() and name in ('silu', 'swish'):
|
109 |
+
# FIXME PyTorch SiLU doesn't ONNX export, this is a temp hack
|
110 |
+
return swish
|
111 |
+
use_jit = not (config.is_exportable() or config.is_no_jit())
|
112 |
+
# NOTE: export tracing should work with jit scripted components, but I keep running into issues
|
113 |
+
if use_jit and name in _ACT_FN_JIT: # jit scripted models should be okay for export/scripting
|
114 |
+
return _ACT_FN_JIT[name]
|
115 |
+
return _ACT_FN_DEFAULT[name]
|
116 |
+
|
117 |
+
|
118 |
+
def get_act_layer(name='relu'):
|
119 |
+
""" Activation Layer Factory
|
120 |
+
Fetching activation layers by name with this function allows export or torch script friendly
|
121 |
+
functions to be returned dynamically based on current config.
|
122 |
+
"""
|
123 |
+
if name in _OVERRIDE_LAYER:
|
124 |
+
return _OVERRIDE_LAYER[name]
|
125 |
+
use_me = not (config.is_exportable() or config.is_scriptable() or config.is_no_jit())
|
126 |
+
if use_me and name in _ACT_LAYER_ME:
|
127 |
+
return _ACT_LAYER_ME[name]
|
128 |
+
if config.is_exportable() and name in ('silu', 'swish'):
|
129 |
+
# FIXME PyTorch SiLU doesn't ONNX export, this is a temp hack
|
130 |
+
return Swish
|
131 |
+
use_jit = not (config.is_exportable() or config.is_no_jit())
|
132 |
+
# NOTE: export tracing should work with jit scripted components, but I keep running into issues
|
133 |
+
if use_jit and name in _ACT_FN_JIT: # jit scripted models should be okay for export/scripting
|
134 |
+
return _ACT_LAYER_JIT[name]
|
135 |
+
return _ACT_LAYER_DEFAULT[name]
|
136 |
+
|
137 |
+
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Activations
|
2 |
+
|
3 |
+
A collection of activations fn and modules with a common interface so that they can
|
4 |
+
easily be swapped. All have an `inplace` arg even if not used.
|
5 |
+
|
6 |
+
Copyright 2020 Ross Wightman
|
7 |
+
"""
|
8 |
+
from torch import nn as nn
|
9 |
+
from torch.nn import functional as F
|
10 |
+
|
11 |
+
|
12 |
+
def swish(x, inplace: bool = False):
|
13 |
+
"""Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3)
|
14 |
+
and also as Swish (https://arxiv.org/abs/1710.05941).
|
15 |
+
|
16 |
+
TODO Rename to SiLU with addition to PyTorch
|
17 |
+
"""
|
18 |
+
return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())
|
19 |
+
|
20 |
+
|
21 |
+
class Swish(nn.Module):
|
22 |
+
def __init__(self, inplace: bool = False):
|
23 |
+
super(Swish, self).__init__()
|
24 |
+
self.inplace = inplace
|
25 |
+
|
26 |
+
def forward(self, x):
|
27 |
+
return swish(x, self.inplace)
|
28 |
+
|
29 |
+
|
30 |
+
def mish(x, inplace: bool = False):
|
31 |
+
"""Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
|
32 |
+
"""
|
33 |
+
return x.mul(F.softplus(x).tanh())
|
34 |
+
|
35 |
+
|
36 |
+
class Mish(nn.Module):
|
37 |
+
def __init__(self, inplace: bool = False):
|
38 |
+
super(Mish, self).__init__()
|
39 |
+
self.inplace = inplace
|
40 |
+
|
41 |
+
def forward(self, x):
|
42 |
+
return mish(x, self.inplace)
|
43 |
+
|
44 |
+
|
45 |
+
def sigmoid(x, inplace: bool = False):
|
46 |
+
return x.sigmoid_() if inplace else x.sigmoid()
|
47 |
+
|
48 |
+
|
49 |
+
# PyTorch has this, but not with a consistent inplace argmument interface
|
50 |
+
class Sigmoid(nn.Module):
|
51 |
+
def __init__(self, inplace: bool = False):
|
52 |
+
super(Sigmoid, self).__init__()
|
53 |
+
self.inplace = inplace
|
54 |
+
|
55 |
+
def forward(self, x):
|
56 |
+
return x.sigmoid_() if self.inplace else x.sigmoid()
|
57 |
+
|
58 |
+
|
59 |
+
def tanh(x, inplace: bool = False):
|
60 |
+
return x.tanh_() if inplace else x.tanh()
|
61 |
+
|
62 |
+
|
63 |
+
# PyTorch has this, but not with a consistent inplace argmument interface
|
64 |
+
class Tanh(nn.Module):
|
65 |
+
def __init__(self, inplace: bool = False):
|
66 |
+
super(Tanh, self).__init__()
|
67 |
+
self.inplace = inplace
|
68 |
+
|
69 |
+
def forward(self, x):
|
70 |
+
return x.tanh_() if self.inplace else x.tanh()
|
71 |
+
|
72 |
+
|
73 |
+
def hard_swish(x, inplace: bool = False):
|
74 |
+
inner = F.relu6(x + 3.).div_(6.)
|
75 |
+
return x.mul_(inner) if inplace else x.mul(inner)
|
76 |
+
|
77 |
+
|
78 |
+
class HardSwish(nn.Module):
|
79 |
+
def __init__(self, inplace: bool = False):
|
80 |
+
super(HardSwish, self).__init__()
|
81 |
+
self.inplace = inplace
|
82 |
+
|
83 |
+
def forward(self, x):
|
84 |
+
return hard_swish(x, self.inplace)
|
85 |
+
|
86 |
+
|
87 |
+
def hard_sigmoid(x, inplace: bool = False):
|
88 |
+
if inplace:
|
89 |
+
return x.add_(3.).clamp_(0., 6.).div_(6.)
|
90 |
+
else:
|
91 |
+
return F.relu6(x + 3.) / 6.
|
92 |
+
|
93 |
+
|
94 |
+
class HardSigmoid(nn.Module):
|
95 |
+
def __init__(self, inplace: bool = False):
|
96 |
+
super(HardSigmoid, self).__init__()
|
97 |
+
self.inplace = inplace
|
98 |
+
|
99 |
+
def forward(self, x):
|
100 |
+
return hard_sigmoid(x, self.inplace)
|
101 |
+
|
102 |
+
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_jit.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Activations (jit)
|
2 |
+
|
3 |
+
A collection of jit-scripted activations fn and modules with a common interface so that they can
|
4 |
+
easily be swapped. All have an `inplace` arg even if not used.
|
5 |
+
|
6 |
+
All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not
|
7 |
+
currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted
|
8 |
+
versions if they contain in-place ops.
|
9 |
+
|
10 |
+
Copyright 2020 Ross Wightman
|
11 |
+
"""
|
12 |
+
|
13 |
+
import torch
|
14 |
+
from torch import nn as nn
|
15 |
+
from torch.nn import functional as F
|
16 |
+
|
17 |
+
__all__ = ['swish_jit', 'SwishJit', 'mish_jit', 'MishJit',
|
18 |
+
'hard_sigmoid_jit', 'HardSigmoidJit', 'hard_swish_jit', 'HardSwishJit']
|
19 |
+
|
20 |
+
|
21 |
+
@torch.jit.script
|
22 |
+
def swish_jit(x, inplace: bool = False):
|
23 |
+
"""Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3)
|
24 |
+
and also as Swish (https://arxiv.org/abs/1710.05941).
|
25 |
+
|
26 |
+
TODO Rename to SiLU with addition to PyTorch
|
27 |
+
"""
|
28 |
+
return x.mul(x.sigmoid())
|
29 |
+
|
30 |
+
|
31 |
+
@torch.jit.script
|
32 |
+
def mish_jit(x, _inplace: bool = False):
|
33 |
+
"""Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
|
34 |
+
"""
|
35 |
+
return x.mul(F.softplus(x).tanh())
|
36 |
+
|
37 |
+
|
38 |
+
class SwishJit(nn.Module):
|
39 |
+
def __init__(self, inplace: bool = False):
|
40 |
+
super(SwishJit, self).__init__()
|
41 |
+
|
42 |
+
def forward(self, x):
|
43 |
+
return swish_jit(x)
|
44 |
+
|
45 |
+
|
46 |
+
class MishJit(nn.Module):
|
47 |
+
def __init__(self, inplace: bool = False):
|
48 |
+
super(MishJit, self).__init__()
|
49 |
+
|
50 |
+
def forward(self, x):
|
51 |
+
return mish_jit(x)
|
52 |
+
|
53 |
+
|
54 |
+
@torch.jit.script
|
55 |
+
def hard_sigmoid_jit(x, inplace: bool = False):
|
56 |
+
# return F.relu6(x + 3.) / 6.
|
57 |
+
return (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster?
|
58 |
+
|
59 |
+
|
60 |
+
class HardSigmoidJit(nn.Module):
|
61 |
+
def __init__(self, inplace: bool = False):
|
62 |
+
super(HardSigmoidJit, self).__init__()
|
63 |
+
|
64 |
+
def forward(self, x):
|
65 |
+
return hard_sigmoid_jit(x)
|
66 |
+
|
67 |
+
|
68 |
+
@torch.jit.script
|
69 |
+
def hard_swish_jit(x, inplace: bool = False):
|
70 |
+
# return x * (F.relu6(x + 3.) / 6)
|
71 |
+
return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster?
|
72 |
+
|
73 |
+
|
74 |
+
class HardSwishJit(nn.Module):
|
75 |
+
def __init__(self, inplace: bool = False):
|
76 |
+
super(HardSwishJit, self).__init__()
|
77 |
+
|
78 |
+
def forward(self, x):
|
79 |
+
return hard_swish_jit(x)
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/activations/activations_me.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Activations (memory-efficient w/ custom autograd)
|
2 |
+
|
3 |
+
A collection of activations fn and modules with a common interface so that they can
|
4 |
+
easily be swapped. All have an `inplace` arg even if not used.
|
5 |
+
|
6 |
+
These activations are not compatible with jit scripting or ONNX export of the model, please use either
|
7 |
+
the JIT or basic versions of the activations.
|
8 |
+
|
9 |
+
Copyright 2020 Ross Wightman
|
10 |
+
"""
|
11 |
+
|
12 |
+
import torch
|
13 |
+
from torch import nn as nn
|
14 |
+
from torch.nn import functional as F
|
15 |
+
|
16 |
+
|
17 |
+
__all__ = ['swish_me', 'SwishMe', 'mish_me', 'MishMe',
|
18 |
+
'hard_sigmoid_me', 'HardSigmoidMe', 'hard_swish_me', 'HardSwishMe']
|
19 |
+
|
20 |
+
|
21 |
+
@torch.jit.script
|
22 |
+
def swish_jit_fwd(x):
|
23 |
+
return x.mul(torch.sigmoid(x))
|
24 |
+
|
25 |
+
|
26 |
+
@torch.jit.script
|
27 |
+
def swish_jit_bwd(x, grad_output):
|
28 |
+
x_sigmoid = torch.sigmoid(x)
|
29 |
+
return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid)))
|
30 |
+
|
31 |
+
|
32 |
+
class SwishJitAutoFn(torch.autograd.Function):
|
33 |
+
""" torch.jit.script optimised Swish w/ memory-efficient checkpoint
|
34 |
+
Inspired by conversation btw Jeremy Howard & Adam Pazske
|
35 |
+
https://twitter.com/jeremyphoward/status/1188251041835315200
|
36 |
+
|
37 |
+
Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3)
|
38 |
+
and also as Swish (https://arxiv.org/abs/1710.05941).
|
39 |
+
|
40 |
+
TODO Rename to SiLU with addition to PyTorch
|
41 |
+
"""
|
42 |
+
|
43 |
+
@staticmethod
|
44 |
+
def forward(ctx, x):
|
45 |
+
ctx.save_for_backward(x)
|
46 |
+
return swish_jit_fwd(x)
|
47 |
+
|
48 |
+
@staticmethod
|
49 |
+
def backward(ctx, grad_output):
|
50 |
+
x = ctx.saved_tensors[0]
|
51 |
+
return swish_jit_bwd(x, grad_output)
|
52 |
+
|
53 |
+
|
54 |
+
def swish_me(x, inplace=False):
|
55 |
+
return SwishJitAutoFn.apply(x)
|
56 |
+
|
57 |
+
|
58 |
+
class SwishMe(nn.Module):
|
59 |
+
def __init__(self, inplace: bool = False):
|
60 |
+
super(SwishMe, self).__init__()
|
61 |
+
|
62 |
+
def forward(self, x):
|
63 |
+
return SwishJitAutoFn.apply(x)
|
64 |
+
|
65 |
+
|
66 |
+
@torch.jit.script
|
67 |
+
def mish_jit_fwd(x):
|
68 |
+
return x.mul(torch.tanh(F.softplus(x)))
|
69 |
+
|
70 |
+
|
71 |
+
@torch.jit.script
|
72 |
+
def mish_jit_bwd(x, grad_output):
|
73 |
+
x_sigmoid = torch.sigmoid(x)
|
74 |
+
x_tanh_sp = F.softplus(x).tanh()
|
75 |
+
return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp))
|
76 |
+
|
77 |
+
|
78 |
+
class MishJitAutoFn(torch.autograd.Function):
|
79 |
+
""" Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
|
80 |
+
A memory efficient, jit scripted variant of Mish
|
81 |
+
"""
|
82 |
+
@staticmethod
|
83 |
+
def forward(ctx, x):
|
84 |
+
ctx.save_for_backward(x)
|
85 |
+
return mish_jit_fwd(x)
|
86 |
+
|
87 |
+
@staticmethod
|
88 |
+
def backward(ctx, grad_output):
|
89 |
+
x = ctx.saved_tensors[0]
|
90 |
+
return mish_jit_bwd(x, grad_output)
|
91 |
+
|
92 |
+
|
93 |
+
def mish_me(x, inplace=False):
|
94 |
+
return MishJitAutoFn.apply(x)
|
95 |
+
|
96 |
+
|
97 |
+
class MishMe(nn.Module):
|
98 |
+
def __init__(self, inplace: bool = False):
|
99 |
+
super(MishMe, self).__init__()
|
100 |
+
|
101 |
+
def forward(self, x):
|
102 |
+
return MishJitAutoFn.apply(x)
|
103 |
+
|
104 |
+
|
105 |
+
@torch.jit.script
|
106 |
+
def hard_sigmoid_jit_fwd(x, inplace: bool = False):
|
107 |
+
return (x + 3).clamp(min=0, max=6).div(6.)
|
108 |
+
|
109 |
+
|
110 |
+
@torch.jit.script
|
111 |
+
def hard_sigmoid_jit_bwd(x, grad_output):
|
112 |
+
m = torch.ones_like(x) * ((x >= -3.) & (x <= 3.)) / 6.
|
113 |
+
return grad_output * m
|
114 |
+
|
115 |
+
|
116 |
+
class HardSigmoidJitAutoFn(torch.autograd.Function):
|
117 |
+
@staticmethod
|
118 |
+
def forward(ctx, x):
|
119 |
+
ctx.save_for_backward(x)
|
120 |
+
return hard_sigmoid_jit_fwd(x)
|
121 |
+
|
122 |
+
@staticmethod
|
123 |
+
def backward(ctx, grad_output):
|
124 |
+
x = ctx.saved_tensors[0]
|
125 |
+
return hard_sigmoid_jit_bwd(x, grad_output)
|
126 |
+
|
127 |
+
|
128 |
+
def hard_sigmoid_me(x, inplace: bool = False):
|
129 |
+
return HardSigmoidJitAutoFn.apply(x)
|
130 |
+
|
131 |
+
|
132 |
+
class HardSigmoidMe(nn.Module):
|
133 |
+
def __init__(self, inplace: bool = False):
|
134 |
+
super(HardSigmoidMe, self).__init__()
|
135 |
+
|
136 |
+
def forward(self, x):
|
137 |
+
return HardSigmoidJitAutoFn.apply(x)
|
138 |
+
|
139 |
+
|
140 |
+
@torch.jit.script
|
141 |
+
def hard_swish_jit_fwd(x):
|
142 |
+
return x * (x + 3).clamp(min=0, max=6).div(6.)
|
143 |
+
|
144 |
+
|
145 |
+
@torch.jit.script
|
146 |
+
def hard_swish_jit_bwd(x, grad_output):
|
147 |
+
m = torch.ones_like(x) * (x >= 3.)
|
148 |
+
m = torch.where((x >= -3.) & (x <= 3.), x / 3. + .5, m)
|
149 |
+
return grad_output * m
|
150 |
+
|
151 |
+
|
152 |
+
class HardSwishJitAutoFn(torch.autograd.Function):
|
153 |
+
"""A memory efficient, jit-scripted HardSwish activation"""
|
154 |
+
@staticmethod
|
155 |
+
def forward(ctx, x):
|
156 |
+
ctx.save_for_backward(x)
|
157 |
+
return hard_swish_jit_fwd(x)
|
158 |
+
|
159 |
+
@staticmethod
|
160 |
+
def backward(ctx, grad_output):
|
161 |
+
x = ctx.saved_tensors[0]
|
162 |
+
return hard_swish_jit_bwd(x, grad_output)
|
163 |
+
|
164 |
+
|
165 |
+
def hard_swish_me(x, inplace=False):
|
166 |
+
return HardSwishJitAutoFn.apply(x)
|
167 |
+
|
168 |
+
|
169 |
+
class HardSwishMe(nn.Module):
|
170 |
+
def __init__(self, inplace: bool = False):
|
171 |
+
super(HardSwishMe, self).__init__()
|
172 |
+
|
173 |
+
def forward(self, x):
|
174 |
+
return HardSwishJitAutoFn.apply(x)
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/config.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Global layer config state
|
2 |
+
"""
|
3 |
+
from typing import Any, Optional
|
4 |
+
|
5 |
+
__all__ = [
|
6 |
+
'is_exportable', 'is_scriptable', 'is_no_jit', 'layer_config_kwargs',
|
7 |
+
'set_exportable', 'set_scriptable', 'set_no_jit', 'set_layer_config'
|
8 |
+
]
|
9 |
+
|
10 |
+
# Set to True if prefer to have layers with no jit optimization (includes activations)
|
11 |
+
_NO_JIT = False
|
12 |
+
|
13 |
+
# Set to True if prefer to have activation layers with no jit optimization
|
14 |
+
# NOTE not currently used as no difference between no_jit and no_activation jit as only layers obeying
|
15 |
+
# the jit flags so far are activations. This will change as more layers are updated and/or added.
|
16 |
+
_NO_ACTIVATION_JIT = False
|
17 |
+
|
18 |
+
# Set to True if exporting a model with Same padding via ONNX
|
19 |
+
_EXPORTABLE = False
|
20 |
+
|
21 |
+
# Set to True if wanting to use torch.jit.script on a model
|
22 |
+
_SCRIPTABLE = False
|
23 |
+
|
24 |
+
|
25 |
+
def is_no_jit():
|
26 |
+
return _NO_JIT
|
27 |
+
|
28 |
+
|
29 |
+
class set_no_jit:
|
30 |
+
def __init__(self, mode: bool) -> None:
|
31 |
+
global _NO_JIT
|
32 |
+
self.prev = _NO_JIT
|
33 |
+
_NO_JIT = mode
|
34 |
+
|
35 |
+
def __enter__(self) -> None:
|
36 |
+
pass
|
37 |
+
|
38 |
+
def __exit__(self, *args: Any) -> bool:
|
39 |
+
global _NO_JIT
|
40 |
+
_NO_JIT = self.prev
|
41 |
+
return False
|
42 |
+
|
43 |
+
|
44 |
+
def is_exportable():
|
45 |
+
return _EXPORTABLE
|
46 |
+
|
47 |
+
|
48 |
+
class set_exportable:
|
49 |
+
def __init__(self, mode: bool) -> None:
|
50 |
+
global _EXPORTABLE
|
51 |
+
self.prev = _EXPORTABLE
|
52 |
+
_EXPORTABLE = mode
|
53 |
+
|
54 |
+
def __enter__(self) -> None:
|
55 |
+
pass
|
56 |
+
|
57 |
+
def __exit__(self, *args: Any) -> bool:
|
58 |
+
global _EXPORTABLE
|
59 |
+
_EXPORTABLE = self.prev
|
60 |
+
return False
|
61 |
+
|
62 |
+
|
63 |
+
def is_scriptable():
|
64 |
+
return _SCRIPTABLE
|
65 |
+
|
66 |
+
|
67 |
+
class set_scriptable:
|
68 |
+
def __init__(self, mode: bool) -> None:
|
69 |
+
global _SCRIPTABLE
|
70 |
+
self.prev = _SCRIPTABLE
|
71 |
+
_SCRIPTABLE = mode
|
72 |
+
|
73 |
+
def __enter__(self) -> None:
|
74 |
+
pass
|
75 |
+
|
76 |
+
def __exit__(self, *args: Any) -> bool:
|
77 |
+
global _SCRIPTABLE
|
78 |
+
_SCRIPTABLE = self.prev
|
79 |
+
return False
|
80 |
+
|
81 |
+
|
82 |
+
class set_layer_config:
|
83 |
+
""" Layer config context manager that allows setting all layer config flags at once.
|
84 |
+
If a flag arg is None, it will not change the current value.
|
85 |
+
"""
|
86 |
+
def __init__(
|
87 |
+
self,
|
88 |
+
scriptable: Optional[bool] = None,
|
89 |
+
exportable: Optional[bool] = None,
|
90 |
+
no_jit: Optional[bool] = None,
|
91 |
+
no_activation_jit: Optional[bool] = None):
|
92 |
+
global _SCRIPTABLE
|
93 |
+
global _EXPORTABLE
|
94 |
+
global _NO_JIT
|
95 |
+
global _NO_ACTIVATION_JIT
|
96 |
+
self.prev = _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT
|
97 |
+
if scriptable is not None:
|
98 |
+
_SCRIPTABLE = scriptable
|
99 |
+
if exportable is not None:
|
100 |
+
_EXPORTABLE = exportable
|
101 |
+
if no_jit is not None:
|
102 |
+
_NO_JIT = no_jit
|
103 |
+
if no_activation_jit is not None:
|
104 |
+
_NO_ACTIVATION_JIT = no_activation_jit
|
105 |
+
|
106 |
+
def __enter__(self) -> None:
|
107 |
+
pass
|
108 |
+
|
109 |
+
def __exit__(self, *args: Any) -> bool:
|
110 |
+
global _SCRIPTABLE
|
111 |
+
global _EXPORTABLE
|
112 |
+
global _NO_JIT
|
113 |
+
global _NO_ACTIVATION_JIT
|
114 |
+
_SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT = self.prev
|
115 |
+
return False
|
116 |
+
|
117 |
+
|
118 |
+
def layer_config_kwargs(kwargs):
|
119 |
+
""" Consume config kwargs and return contextmgr obj """
|
120 |
+
return set_layer_config(
|
121 |
+
scriptable=kwargs.pop('scriptable', None),
|
122 |
+
exportable=kwargs.pop('exportable', None),
|
123 |
+
no_jit=kwargs.pop('no_jit', None))
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/conv2d_layers.py
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Conv2D w/ SAME padding, CondConv, MixedConv
|
2 |
+
|
3 |
+
A collection of conv layers and padding helpers needed by EfficientNet, MixNet, and
|
4 |
+
MobileNetV3 models that maintain weight compatibility with original Tensorflow models.
|
5 |
+
|
6 |
+
Copyright 2020 Ross Wightman
|
7 |
+
"""
|
8 |
+
import collections.abc
|
9 |
+
import math
|
10 |
+
from functools import partial
|
11 |
+
from itertools import repeat
|
12 |
+
from typing import Tuple, Optional
|
13 |
+
|
14 |
+
import numpy as np
|
15 |
+
import torch
|
16 |
+
import torch.nn as nn
|
17 |
+
import torch.nn.functional as F
|
18 |
+
|
19 |
+
from .config import *
|
20 |
+
|
21 |
+
|
22 |
+
# From PyTorch internals
|
23 |
+
def _ntuple(n):
|
24 |
+
def parse(x):
|
25 |
+
if isinstance(x, collections.abc.Iterable):
|
26 |
+
return x
|
27 |
+
return tuple(repeat(x, n))
|
28 |
+
return parse
|
29 |
+
|
30 |
+
|
31 |
+
_single = _ntuple(1)
|
32 |
+
_pair = _ntuple(2)
|
33 |
+
_triple = _ntuple(3)
|
34 |
+
_quadruple = _ntuple(4)
|
35 |
+
|
36 |
+
|
37 |
+
def _is_static_pad(kernel_size, stride=1, dilation=1, **_):
|
38 |
+
return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0
|
39 |
+
|
40 |
+
|
41 |
+
def _get_padding(kernel_size, stride=1, dilation=1, **_):
|
42 |
+
padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2
|
43 |
+
return padding
|
44 |
+
|
45 |
+
|
46 |
+
def _calc_same_pad(i: int, k: int, s: int, d: int):
|
47 |
+
return max((-(i // -s) - 1) * s + (k - 1) * d + 1 - i, 0)
|
48 |
+
|
49 |
+
|
50 |
+
def _same_pad_arg(input_size, kernel_size, stride, dilation):
|
51 |
+
ih, iw = input_size
|
52 |
+
kh, kw = kernel_size
|
53 |
+
pad_h = _calc_same_pad(ih, kh, stride[0], dilation[0])
|
54 |
+
pad_w = _calc_same_pad(iw, kw, stride[1], dilation[1])
|
55 |
+
return [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
|
56 |
+
|
57 |
+
|
58 |
+
def _split_channels(num_chan, num_groups):
|
59 |
+
split = [num_chan // num_groups for _ in range(num_groups)]
|
60 |
+
split[0] += num_chan - sum(split)
|
61 |
+
return split
|
62 |
+
|
63 |
+
|
64 |
+
def conv2d_same(
|
65 |
+
x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, stride: Tuple[int, int] = (1, 1),
|
66 |
+
padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1):
|
67 |
+
ih, iw = x.size()[-2:]
|
68 |
+
kh, kw = weight.size()[-2:]
|
69 |
+
pad_h = _calc_same_pad(ih, kh, stride[0], dilation[0])
|
70 |
+
pad_w = _calc_same_pad(iw, kw, stride[1], dilation[1])
|
71 |
+
x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
|
72 |
+
return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups)
|
73 |
+
|
74 |
+
|
75 |
+
class Conv2dSame(nn.Conv2d):
|
76 |
+
""" Tensorflow like 'SAME' convolution wrapper for 2D convolutions
|
77 |
+
"""
|
78 |
+
|
79 |
+
# pylint: disable=unused-argument
|
80 |
+
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
|
81 |
+
padding=0, dilation=1, groups=1, bias=True):
|
82 |
+
super(Conv2dSame, self).__init__(
|
83 |
+
in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
|
84 |
+
|
85 |
+
def forward(self, x):
|
86 |
+
return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
|
87 |
+
|
88 |
+
|
89 |
+
class Conv2dSameExport(nn.Conv2d):
|
90 |
+
""" ONNX export friendly Tensorflow like 'SAME' convolution wrapper for 2D convolutions
|
91 |
+
|
92 |
+
NOTE: This does not currently work with torch.jit.script
|
93 |
+
"""
|
94 |
+
|
95 |
+
# pylint: disable=unused-argument
|
96 |
+
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
|
97 |
+
padding=0, dilation=1, groups=1, bias=True):
|
98 |
+
super(Conv2dSameExport, self).__init__(
|
99 |
+
in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
|
100 |
+
self.pad = None
|
101 |
+
self.pad_input_size = (0, 0)
|
102 |
+
|
103 |
+
def forward(self, x):
|
104 |
+
input_size = x.size()[-2:]
|
105 |
+
if self.pad is None:
|
106 |
+
pad_arg = _same_pad_arg(input_size, self.weight.size()[-2:], self.stride, self.dilation)
|
107 |
+
self.pad = nn.ZeroPad2d(pad_arg)
|
108 |
+
self.pad_input_size = input_size
|
109 |
+
|
110 |
+
if self.pad is not None:
|
111 |
+
x = self.pad(x)
|
112 |
+
return F.conv2d(
|
113 |
+
x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
|
114 |
+
|
115 |
+
|
116 |
+
def get_padding_value(padding, kernel_size, **kwargs):
|
117 |
+
dynamic = False
|
118 |
+
if isinstance(padding, str):
|
119 |
+
# for any string padding, the padding will be calculated for you, one of three ways
|
120 |
+
padding = padding.lower()
|
121 |
+
if padding == 'same':
|
122 |
+
# TF compatible 'SAME' padding, has a performance and GPU memory allocation impact
|
123 |
+
if _is_static_pad(kernel_size, **kwargs):
|
124 |
+
# static case, no extra overhead
|
125 |
+
padding = _get_padding(kernel_size, **kwargs)
|
126 |
+
else:
|
127 |
+
# dynamic padding
|
128 |
+
padding = 0
|
129 |
+
dynamic = True
|
130 |
+
elif padding == 'valid':
|
131 |
+
# 'VALID' padding, same as padding=0
|
132 |
+
padding = 0
|
133 |
+
else:
|
134 |
+
# Default to PyTorch style 'same'-ish symmetric padding
|
135 |
+
padding = _get_padding(kernel_size, **kwargs)
|
136 |
+
return padding, dynamic
|
137 |
+
|
138 |
+
|
139 |
+
def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs):
|
140 |
+
padding = kwargs.pop('padding', '')
|
141 |
+
kwargs.setdefault('bias', False)
|
142 |
+
padding, is_dynamic = get_padding_value(padding, kernel_size, **kwargs)
|
143 |
+
if is_dynamic:
|
144 |
+
if is_exportable():
|
145 |
+
assert not is_scriptable()
|
146 |
+
return Conv2dSameExport(in_chs, out_chs, kernel_size, **kwargs)
|
147 |
+
else:
|
148 |
+
return Conv2dSame(in_chs, out_chs, kernel_size, **kwargs)
|
149 |
+
else:
|
150 |
+
return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs)
|
151 |
+
|
152 |
+
|
153 |
+
class MixedConv2d(nn.ModuleDict):
|
154 |
+
""" Mixed Grouped Convolution
|
155 |
+
Based on MDConv and GroupedConv in MixNet impl:
|
156 |
+
https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mixnet/custom_layers.py
|
157 |
+
"""
|
158 |
+
|
159 |
+
def __init__(self, in_channels, out_channels, kernel_size=3,
|
160 |
+
stride=1, padding='', dilation=1, depthwise=False, **kwargs):
|
161 |
+
super(MixedConv2d, self).__init__()
|
162 |
+
|
163 |
+
kernel_size = kernel_size if isinstance(kernel_size, list) else [kernel_size]
|
164 |
+
num_groups = len(kernel_size)
|
165 |
+
in_splits = _split_channels(in_channels, num_groups)
|
166 |
+
out_splits = _split_channels(out_channels, num_groups)
|
167 |
+
self.in_channels = sum(in_splits)
|
168 |
+
self.out_channels = sum(out_splits)
|
169 |
+
for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)):
|
170 |
+
conv_groups = out_ch if depthwise else 1
|
171 |
+
self.add_module(
|
172 |
+
str(idx),
|
173 |
+
create_conv2d_pad(
|
174 |
+
in_ch, out_ch, k, stride=stride,
|
175 |
+
padding=padding, dilation=dilation, groups=conv_groups, **kwargs)
|
176 |
+
)
|
177 |
+
self.splits = in_splits
|
178 |
+
|
179 |
+
def forward(self, x):
|
180 |
+
x_split = torch.split(x, self.splits, 1)
|
181 |
+
x_out = [conv(x_split[i]) for i, conv in enumerate(self.values())]
|
182 |
+
x = torch.cat(x_out, 1)
|
183 |
+
return x
|
184 |
+
|
185 |
+
|
186 |
+
def get_condconv_initializer(initializer, num_experts, expert_shape):
|
187 |
+
def condconv_initializer(weight):
|
188 |
+
"""CondConv initializer function."""
|
189 |
+
num_params = np.prod(expert_shape)
|
190 |
+
if (len(weight.shape) != 2 or weight.shape[0] != num_experts or
|
191 |
+
weight.shape[1] != num_params):
|
192 |
+
raise (ValueError(
|
193 |
+
'CondConv variables must have shape [num_experts, num_params]'))
|
194 |
+
for i in range(num_experts):
|
195 |
+
initializer(weight[i].view(expert_shape))
|
196 |
+
return condconv_initializer
|
197 |
+
|
198 |
+
|
199 |
+
class CondConv2d(nn.Module):
|
200 |
+
""" Conditional Convolution
|
201 |
+
Inspired by: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/condconv/condconv_layers.py
|
202 |
+
|
203 |
+
Grouped convolution hackery for parallel execution of the per-sample kernel filters inspired by this discussion:
|
204 |
+
https://github.com/pytorch/pytorch/issues/17983
|
205 |
+
"""
|
206 |
+
__constants__ = ['bias', 'in_channels', 'out_channels', 'dynamic_padding']
|
207 |
+
|
208 |
+
def __init__(self, in_channels, out_channels, kernel_size=3,
|
209 |
+
stride=1, padding='', dilation=1, groups=1, bias=False, num_experts=4):
|
210 |
+
super(CondConv2d, self).__init__()
|
211 |
+
|
212 |
+
self.in_channels = in_channels
|
213 |
+
self.out_channels = out_channels
|
214 |
+
self.kernel_size = _pair(kernel_size)
|
215 |
+
self.stride = _pair(stride)
|
216 |
+
padding_val, is_padding_dynamic = get_padding_value(
|
217 |
+
padding, kernel_size, stride=stride, dilation=dilation)
|
218 |
+
self.dynamic_padding = is_padding_dynamic # if in forward to work with torchscript
|
219 |
+
self.padding = _pair(padding_val)
|
220 |
+
self.dilation = _pair(dilation)
|
221 |
+
self.groups = groups
|
222 |
+
self.num_experts = num_experts
|
223 |
+
|
224 |
+
self.weight_shape = (self.out_channels, self.in_channels // self.groups) + self.kernel_size
|
225 |
+
weight_num_param = 1
|
226 |
+
for wd in self.weight_shape:
|
227 |
+
weight_num_param *= wd
|
228 |
+
self.weight = torch.nn.Parameter(torch.Tensor(self.num_experts, weight_num_param))
|
229 |
+
|
230 |
+
if bias:
|
231 |
+
self.bias_shape = (self.out_channels,)
|
232 |
+
self.bias = torch.nn.Parameter(torch.Tensor(self.num_experts, self.out_channels))
|
233 |
+
else:
|
234 |
+
self.register_parameter('bias', None)
|
235 |
+
|
236 |
+
self.reset_parameters()
|
237 |
+
|
238 |
+
def reset_parameters(self):
|
239 |
+
init_weight = get_condconv_initializer(
|
240 |
+
partial(nn.init.kaiming_uniform_, a=math.sqrt(5)), self.num_experts, self.weight_shape)
|
241 |
+
init_weight(self.weight)
|
242 |
+
if self.bias is not None:
|
243 |
+
fan_in = np.prod(self.weight_shape[1:])
|
244 |
+
bound = 1 / math.sqrt(fan_in)
|
245 |
+
init_bias = get_condconv_initializer(
|
246 |
+
partial(nn.init.uniform_, a=-bound, b=bound), self.num_experts, self.bias_shape)
|
247 |
+
init_bias(self.bias)
|
248 |
+
|
249 |
+
def forward(self, x, routing_weights):
|
250 |
+
B, C, H, W = x.shape
|
251 |
+
weight = torch.matmul(routing_weights, self.weight)
|
252 |
+
new_weight_shape = (B * self.out_channels, self.in_channels // self.groups) + self.kernel_size
|
253 |
+
weight = weight.view(new_weight_shape)
|
254 |
+
bias = None
|
255 |
+
if self.bias is not None:
|
256 |
+
bias = torch.matmul(routing_weights, self.bias)
|
257 |
+
bias = bias.view(B * self.out_channels)
|
258 |
+
# move batch elements with channels so each batch element can be efficiently convolved with separate kernel
|
259 |
+
x = x.view(1, B * C, H, W)
|
260 |
+
if self.dynamic_padding:
|
261 |
+
out = conv2d_same(
|
262 |
+
x, weight, bias, stride=self.stride, padding=self.padding,
|
263 |
+
dilation=self.dilation, groups=self.groups * B)
|
264 |
+
else:
|
265 |
+
out = F.conv2d(
|
266 |
+
x, weight, bias, stride=self.stride, padding=self.padding,
|
267 |
+
dilation=self.dilation, groups=self.groups * B)
|
268 |
+
out = out.permute([1, 0, 2, 3]).view(B, self.out_channels, out.shape[-2], out.shape[-1])
|
269 |
+
|
270 |
+
# Literal port (from TF definition)
|
271 |
+
# x = torch.split(x, 1, 0)
|
272 |
+
# weight = torch.split(weight, 1, 0)
|
273 |
+
# if self.bias is not None:
|
274 |
+
# bias = torch.matmul(routing_weights, self.bias)
|
275 |
+
# bias = torch.split(bias, 1, 0)
|
276 |
+
# else:
|
277 |
+
# bias = [None] * B
|
278 |
+
# out = []
|
279 |
+
# for xi, wi, bi in zip(x, weight, bias):
|
280 |
+
# wi = wi.view(*self.weight_shape)
|
281 |
+
# if bi is not None:
|
282 |
+
# bi = bi.view(*self.bias_shape)
|
283 |
+
# out.append(self.conv_fn(
|
284 |
+
# xi, wi, bi, stride=self.stride, padding=self.padding,
|
285 |
+
# dilation=self.dilation, groups=self.groups))
|
286 |
+
# out = torch.cat(out, 0)
|
287 |
+
return out
|
288 |
+
|
289 |
+
|
290 |
+
def select_conv2d(in_chs, out_chs, kernel_size, **kwargs):
|
291 |
+
assert 'groups' not in kwargs # only use 'depthwise' bool arg
|
292 |
+
if isinstance(kernel_size, list):
|
293 |
+
assert 'num_experts' not in kwargs # MixNet + CondConv combo not supported currently
|
294 |
+
# We're going to use only lists for defining the MixedConv2d kernel groups,
|
295 |
+
# ints, tuples, other iterables will continue to pass to normal conv and specify h, w.
|
296 |
+
m = MixedConv2d(in_chs, out_chs, kernel_size, **kwargs)
|
297 |
+
else:
|
298 |
+
depthwise = kwargs.pop('depthwise', False)
|
299 |
+
groups = out_chs if depthwise else 1
|
300 |
+
if 'num_experts' in kwargs and kwargs['num_experts'] > 0:
|
301 |
+
m = CondConv2d(in_chs, out_chs, kernel_size, groups=groups, **kwargs)
|
302 |
+
else:
|
303 |
+
m = create_conv2d_pad(in_chs, out_chs, kernel_size, groups=groups, **kwargs)
|
304 |
+
return m
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/efficientnet_builder.py
ADDED
@@ -0,0 +1,683 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" EfficientNet / MobileNetV3 Blocks and Builder
|
2 |
+
|
3 |
+
Copyright 2020 Ross Wightman
|
4 |
+
"""
|
5 |
+
import re
|
6 |
+
from copy import deepcopy
|
7 |
+
|
8 |
+
from .conv2d_layers import *
|
9 |
+
from geffnet.activations import *
|
10 |
+
|
11 |
+
__all__ = ['get_bn_args_tf', 'resolve_bn_args', 'resolve_se_args', 'resolve_act_layer', 'make_divisible',
|
12 |
+
'round_channels', 'drop_connect', 'SqueezeExcite', 'ConvBnAct', 'DepthwiseSeparableConv',
|
13 |
+
'InvertedResidual', 'CondConvResidual', 'EdgeResidual', 'EfficientNetBuilder', 'decode_arch_def',
|
14 |
+
'initialize_weight_default', 'initialize_weight_goog', 'BN_MOMENTUM_TF_DEFAULT', 'BN_EPS_TF_DEFAULT'
|
15 |
+
]
|
16 |
+
|
17 |
+
# Defaults used for Google/Tensorflow training of mobile networks /w RMSprop as per
|
18 |
+
# papers and TF reference implementations. PT momentum equiv for TF decay is (1 - TF decay)
|
19 |
+
# NOTE: momentum varies btw .99 and .9997 depending on source
|
20 |
+
# .99 in official TF TPU impl
|
21 |
+
# .9997 (/w .999 in search space) for paper
|
22 |
+
#
|
23 |
+
# PyTorch defaults are momentum = .1, eps = 1e-5
|
24 |
+
#
|
25 |
+
BN_MOMENTUM_TF_DEFAULT = 1 - 0.99
|
26 |
+
BN_EPS_TF_DEFAULT = 1e-3
|
27 |
+
_BN_ARGS_TF = dict(momentum=BN_MOMENTUM_TF_DEFAULT, eps=BN_EPS_TF_DEFAULT)
|
28 |
+
|
29 |
+
|
30 |
+
def get_bn_args_tf():
|
31 |
+
return _BN_ARGS_TF.copy()
|
32 |
+
|
33 |
+
|
34 |
+
def resolve_bn_args(kwargs):
|
35 |
+
bn_args = get_bn_args_tf() if kwargs.pop('bn_tf', False) else {}
|
36 |
+
bn_momentum = kwargs.pop('bn_momentum', None)
|
37 |
+
if bn_momentum is not None:
|
38 |
+
bn_args['momentum'] = bn_momentum
|
39 |
+
bn_eps = kwargs.pop('bn_eps', None)
|
40 |
+
if bn_eps is not None:
|
41 |
+
bn_args['eps'] = bn_eps
|
42 |
+
return bn_args
|
43 |
+
|
44 |
+
|
45 |
+
_SE_ARGS_DEFAULT = dict(
|
46 |
+
gate_fn=sigmoid,
|
47 |
+
act_layer=None, # None == use containing block's activation layer
|
48 |
+
reduce_mid=False,
|
49 |
+
divisor=1)
|
50 |
+
|
51 |
+
|
52 |
+
def resolve_se_args(kwargs, in_chs, act_layer=None):
|
53 |
+
se_kwargs = kwargs.copy() if kwargs is not None else {}
|
54 |
+
# fill in args that aren't specified with the defaults
|
55 |
+
for k, v in _SE_ARGS_DEFAULT.items():
|
56 |
+
se_kwargs.setdefault(k, v)
|
57 |
+
# some models, like MobilNetV3, calculate SE reduction chs from the containing block's mid_ch instead of in_ch
|
58 |
+
if not se_kwargs.pop('reduce_mid'):
|
59 |
+
se_kwargs['reduced_base_chs'] = in_chs
|
60 |
+
# act_layer override, if it remains None, the containing block's act_layer will be used
|
61 |
+
if se_kwargs['act_layer'] is None:
|
62 |
+
assert act_layer is not None
|
63 |
+
se_kwargs['act_layer'] = act_layer
|
64 |
+
return se_kwargs
|
65 |
+
|
66 |
+
|
67 |
+
def resolve_act_layer(kwargs, default='relu'):
|
68 |
+
act_layer = kwargs.pop('act_layer', default)
|
69 |
+
if isinstance(act_layer, str):
|
70 |
+
act_layer = get_act_layer(act_layer)
|
71 |
+
return act_layer
|
72 |
+
|
73 |
+
|
74 |
+
def make_divisible(v: int, divisor: int = 8, min_value: int = None):
|
75 |
+
min_value = min_value or divisor
|
76 |
+
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
77 |
+
if new_v < 0.9 * v: # ensure round down does not go down by more than 10%.
|
78 |
+
new_v += divisor
|
79 |
+
return new_v
|
80 |
+
|
81 |
+
|
82 |
+
def round_channels(channels, multiplier=1.0, divisor=8, channel_min=None):
|
83 |
+
"""Round number of filters based on depth multiplier."""
|
84 |
+
if not multiplier:
|
85 |
+
return channels
|
86 |
+
channels *= multiplier
|
87 |
+
return make_divisible(channels, divisor, channel_min)
|
88 |
+
|
89 |
+
|
90 |
+
def drop_connect(inputs, training: bool = False, drop_connect_rate: float = 0.):
|
91 |
+
"""Apply drop connect."""
|
92 |
+
if not training:
|
93 |
+
return inputs
|
94 |
+
|
95 |
+
keep_prob = 1 - drop_connect_rate
|
96 |
+
random_tensor = keep_prob + torch.rand(
|
97 |
+
(inputs.size()[0], 1, 1, 1), dtype=inputs.dtype, device=inputs.device)
|
98 |
+
random_tensor.floor_() # binarize
|
99 |
+
output = inputs.div(keep_prob) * random_tensor
|
100 |
+
return output
|
101 |
+
|
102 |
+
|
103 |
+
class SqueezeExcite(nn.Module):
|
104 |
+
|
105 |
+
def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None, act_layer=nn.ReLU, gate_fn=sigmoid, divisor=1):
|
106 |
+
super(SqueezeExcite, self).__init__()
|
107 |
+
reduced_chs = make_divisible((reduced_base_chs or in_chs) * se_ratio, divisor)
|
108 |
+
self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)
|
109 |
+
self.act1 = act_layer(inplace=True)
|
110 |
+
self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)
|
111 |
+
self.gate_fn = gate_fn
|
112 |
+
|
113 |
+
def forward(self, x):
|
114 |
+
x_se = x.mean((2, 3), keepdim=True)
|
115 |
+
x_se = self.conv_reduce(x_se)
|
116 |
+
x_se = self.act1(x_se)
|
117 |
+
x_se = self.conv_expand(x_se)
|
118 |
+
x = x * self.gate_fn(x_se)
|
119 |
+
return x
|
120 |
+
|
121 |
+
|
122 |
+
class ConvBnAct(nn.Module):
|
123 |
+
def __init__(self, in_chs, out_chs, kernel_size,
|
124 |
+
stride=1, pad_type='', act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, norm_kwargs=None):
|
125 |
+
super(ConvBnAct, self).__init__()
|
126 |
+
assert stride in [1, 2]
|
127 |
+
norm_kwargs = norm_kwargs or {}
|
128 |
+
self.conv = select_conv2d(in_chs, out_chs, kernel_size, stride=stride, padding=pad_type)
|
129 |
+
self.bn1 = norm_layer(out_chs, **norm_kwargs)
|
130 |
+
self.act1 = act_layer(inplace=True)
|
131 |
+
|
132 |
+
def forward(self, x):
|
133 |
+
x = self.conv(x)
|
134 |
+
x = self.bn1(x)
|
135 |
+
x = self.act1(x)
|
136 |
+
return x
|
137 |
+
|
138 |
+
|
139 |
+
class DepthwiseSeparableConv(nn.Module):
|
140 |
+
""" DepthwiseSeparable block
|
141 |
+
Used for DS convs in MobileNet-V1 and in the place of IR blocks with an expansion
|
142 |
+
factor of 1.0. This is an alternative to having a IR with optional first pw conv.
|
143 |
+
"""
|
144 |
+
def __init__(self, in_chs, out_chs, dw_kernel_size=3,
|
145 |
+
stride=1, pad_type='', act_layer=nn.ReLU, noskip=False,
|
146 |
+
pw_kernel_size=1, pw_act=False, se_ratio=0., se_kwargs=None,
|
147 |
+
norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.):
|
148 |
+
super(DepthwiseSeparableConv, self).__init__()
|
149 |
+
assert stride in [1, 2]
|
150 |
+
norm_kwargs = norm_kwargs or {}
|
151 |
+
self.has_residual = (stride == 1 and in_chs == out_chs) and not noskip
|
152 |
+
self.drop_connect_rate = drop_connect_rate
|
153 |
+
|
154 |
+
self.conv_dw = select_conv2d(
|
155 |
+
in_chs, in_chs, dw_kernel_size, stride=stride, padding=pad_type, depthwise=True)
|
156 |
+
self.bn1 = norm_layer(in_chs, **norm_kwargs)
|
157 |
+
self.act1 = act_layer(inplace=True)
|
158 |
+
|
159 |
+
# Squeeze-and-excitation
|
160 |
+
if se_ratio is not None and se_ratio > 0.:
|
161 |
+
se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer)
|
162 |
+
self.se = SqueezeExcite(in_chs, se_ratio=se_ratio, **se_kwargs)
|
163 |
+
else:
|
164 |
+
self.se = nn.Identity()
|
165 |
+
|
166 |
+
self.conv_pw = select_conv2d(in_chs, out_chs, pw_kernel_size, padding=pad_type)
|
167 |
+
self.bn2 = norm_layer(out_chs, **norm_kwargs)
|
168 |
+
self.act2 = act_layer(inplace=True) if pw_act else nn.Identity()
|
169 |
+
|
170 |
+
def forward(self, x):
|
171 |
+
residual = x
|
172 |
+
|
173 |
+
x = self.conv_dw(x)
|
174 |
+
x = self.bn1(x)
|
175 |
+
x = self.act1(x)
|
176 |
+
|
177 |
+
x = self.se(x)
|
178 |
+
|
179 |
+
x = self.conv_pw(x)
|
180 |
+
x = self.bn2(x)
|
181 |
+
x = self.act2(x)
|
182 |
+
|
183 |
+
if self.has_residual:
|
184 |
+
if self.drop_connect_rate > 0.:
|
185 |
+
x = drop_connect(x, self.training, self.drop_connect_rate)
|
186 |
+
x += residual
|
187 |
+
return x
|
188 |
+
|
189 |
+
|
190 |
+
class InvertedResidual(nn.Module):
|
191 |
+
""" Inverted residual block w/ optional SE"""
|
192 |
+
|
193 |
+
def __init__(self, in_chs, out_chs, dw_kernel_size=3,
|
194 |
+
stride=1, pad_type='', act_layer=nn.ReLU, noskip=False,
|
195 |
+
exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1,
|
196 |
+
se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
|
197 |
+
conv_kwargs=None, drop_connect_rate=0.):
|
198 |
+
super(InvertedResidual, self).__init__()
|
199 |
+
norm_kwargs = norm_kwargs or {}
|
200 |
+
conv_kwargs = conv_kwargs or {}
|
201 |
+
mid_chs: int = make_divisible(in_chs * exp_ratio)
|
202 |
+
self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
|
203 |
+
self.drop_connect_rate = drop_connect_rate
|
204 |
+
|
205 |
+
# Point-wise expansion
|
206 |
+
self.conv_pw = select_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type, **conv_kwargs)
|
207 |
+
self.bn1 = norm_layer(mid_chs, **norm_kwargs)
|
208 |
+
self.act1 = act_layer(inplace=True)
|
209 |
+
|
210 |
+
# Depth-wise convolution
|
211 |
+
self.conv_dw = select_conv2d(
|
212 |
+
mid_chs, mid_chs, dw_kernel_size, stride=stride, padding=pad_type, depthwise=True, **conv_kwargs)
|
213 |
+
self.bn2 = norm_layer(mid_chs, **norm_kwargs)
|
214 |
+
self.act2 = act_layer(inplace=True)
|
215 |
+
|
216 |
+
# Squeeze-and-excitation
|
217 |
+
if se_ratio is not None and se_ratio > 0.:
|
218 |
+
se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer)
|
219 |
+
self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio, **se_kwargs)
|
220 |
+
else:
|
221 |
+
self.se = nn.Identity() # for jit.script compat
|
222 |
+
|
223 |
+
# Point-wise linear projection
|
224 |
+
self.conv_pwl = select_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type, **conv_kwargs)
|
225 |
+
self.bn3 = norm_layer(out_chs, **norm_kwargs)
|
226 |
+
|
227 |
+
def forward(self, x):
|
228 |
+
residual = x
|
229 |
+
|
230 |
+
# Point-wise expansion
|
231 |
+
x = self.conv_pw(x)
|
232 |
+
x = self.bn1(x)
|
233 |
+
x = self.act1(x)
|
234 |
+
|
235 |
+
# Depth-wise convolution
|
236 |
+
x = self.conv_dw(x)
|
237 |
+
x = self.bn2(x)
|
238 |
+
x = self.act2(x)
|
239 |
+
|
240 |
+
# Squeeze-and-excitation
|
241 |
+
x = self.se(x)
|
242 |
+
|
243 |
+
# Point-wise linear projection
|
244 |
+
x = self.conv_pwl(x)
|
245 |
+
x = self.bn3(x)
|
246 |
+
|
247 |
+
if self.has_residual:
|
248 |
+
if self.drop_connect_rate > 0.:
|
249 |
+
x = drop_connect(x, self.training, self.drop_connect_rate)
|
250 |
+
x += residual
|
251 |
+
return x
|
252 |
+
|
253 |
+
|
254 |
+
class CondConvResidual(InvertedResidual):
|
255 |
+
""" Inverted residual block w/ CondConv routing"""
|
256 |
+
|
257 |
+
def __init__(self, in_chs, out_chs, dw_kernel_size=3,
|
258 |
+
stride=1, pad_type='', act_layer=nn.ReLU, noskip=False,
|
259 |
+
exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1,
|
260 |
+
se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
|
261 |
+
num_experts=0, drop_connect_rate=0.):
|
262 |
+
|
263 |
+
self.num_experts = num_experts
|
264 |
+
conv_kwargs = dict(num_experts=self.num_experts)
|
265 |
+
|
266 |
+
super(CondConvResidual, self).__init__(
|
267 |
+
in_chs, out_chs, dw_kernel_size=dw_kernel_size, stride=stride, pad_type=pad_type,
|
268 |
+
act_layer=act_layer, noskip=noskip, exp_ratio=exp_ratio, exp_kernel_size=exp_kernel_size,
|
269 |
+
pw_kernel_size=pw_kernel_size, se_ratio=se_ratio, se_kwargs=se_kwargs,
|
270 |
+
norm_layer=norm_layer, norm_kwargs=norm_kwargs, conv_kwargs=conv_kwargs,
|
271 |
+
drop_connect_rate=drop_connect_rate)
|
272 |
+
|
273 |
+
self.routing_fn = nn.Linear(in_chs, self.num_experts)
|
274 |
+
|
275 |
+
def forward(self, x):
|
276 |
+
residual = x
|
277 |
+
|
278 |
+
# CondConv routing
|
279 |
+
pooled_inputs = F.adaptive_avg_pool2d(x, 1).flatten(1)
|
280 |
+
routing_weights = torch.sigmoid(self.routing_fn(pooled_inputs))
|
281 |
+
|
282 |
+
# Point-wise expansion
|
283 |
+
x = self.conv_pw(x, routing_weights)
|
284 |
+
x = self.bn1(x)
|
285 |
+
x = self.act1(x)
|
286 |
+
|
287 |
+
# Depth-wise convolution
|
288 |
+
x = self.conv_dw(x, routing_weights)
|
289 |
+
x = self.bn2(x)
|
290 |
+
x = self.act2(x)
|
291 |
+
|
292 |
+
# Squeeze-and-excitation
|
293 |
+
x = self.se(x)
|
294 |
+
|
295 |
+
# Point-wise linear projection
|
296 |
+
x = self.conv_pwl(x, routing_weights)
|
297 |
+
x = self.bn3(x)
|
298 |
+
|
299 |
+
if self.has_residual:
|
300 |
+
if self.drop_connect_rate > 0.:
|
301 |
+
x = drop_connect(x, self.training, self.drop_connect_rate)
|
302 |
+
x += residual
|
303 |
+
return x
|
304 |
+
|
305 |
+
|
306 |
+
class EdgeResidual(nn.Module):
|
307 |
+
""" EdgeTPU Residual block with expansion convolution followed by pointwise-linear w/ stride"""
|
308 |
+
|
309 |
+
def __init__(self, in_chs, out_chs, exp_kernel_size=3, exp_ratio=1.0, fake_in_chs=0,
|
310 |
+
stride=1, pad_type='', act_layer=nn.ReLU, noskip=False, pw_kernel_size=1,
|
311 |
+
se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.):
|
312 |
+
super(EdgeResidual, self).__init__()
|
313 |
+
norm_kwargs = norm_kwargs or {}
|
314 |
+
mid_chs = make_divisible(fake_in_chs * exp_ratio) if fake_in_chs > 0 else make_divisible(in_chs * exp_ratio)
|
315 |
+
self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
|
316 |
+
self.drop_connect_rate = drop_connect_rate
|
317 |
+
|
318 |
+
# Expansion convolution
|
319 |
+
self.conv_exp = select_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type)
|
320 |
+
self.bn1 = norm_layer(mid_chs, **norm_kwargs)
|
321 |
+
self.act1 = act_layer(inplace=True)
|
322 |
+
|
323 |
+
# Squeeze-and-excitation
|
324 |
+
if se_ratio is not None and se_ratio > 0.:
|
325 |
+
se_kwargs = resolve_se_args(se_kwargs, in_chs, act_layer)
|
326 |
+
self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio, **se_kwargs)
|
327 |
+
else:
|
328 |
+
self.se = nn.Identity()
|
329 |
+
|
330 |
+
# Point-wise linear projection
|
331 |
+
self.conv_pwl = select_conv2d(mid_chs, out_chs, pw_kernel_size, stride=stride, padding=pad_type)
|
332 |
+
self.bn2 = nn.BatchNorm2d(out_chs, **norm_kwargs)
|
333 |
+
|
334 |
+
def forward(self, x):
|
335 |
+
residual = x
|
336 |
+
|
337 |
+
# Expansion convolution
|
338 |
+
x = self.conv_exp(x)
|
339 |
+
x = self.bn1(x)
|
340 |
+
x = self.act1(x)
|
341 |
+
|
342 |
+
# Squeeze-and-excitation
|
343 |
+
x = self.se(x)
|
344 |
+
|
345 |
+
# Point-wise linear projection
|
346 |
+
x = self.conv_pwl(x)
|
347 |
+
x = self.bn2(x)
|
348 |
+
|
349 |
+
if self.has_residual:
|
350 |
+
if self.drop_connect_rate > 0.:
|
351 |
+
x = drop_connect(x, self.training, self.drop_connect_rate)
|
352 |
+
x += residual
|
353 |
+
|
354 |
+
return x
|
355 |
+
|
356 |
+
|
357 |
+
class EfficientNetBuilder:
|
358 |
+
""" Build Trunk Blocks for Efficient/Mobile Networks
|
359 |
+
|
360 |
+
This ended up being somewhat of a cross between
|
361 |
+
https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_models.py
|
362 |
+
and
|
363 |
+
https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py
|
364 |
+
|
365 |
+
"""
|
366 |
+
|
367 |
+
def __init__(self, channel_multiplier=1.0, channel_divisor=8, channel_min=None,
|
368 |
+
pad_type='', act_layer=None, se_kwargs=None,
|
369 |
+
norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.):
|
370 |
+
self.channel_multiplier = channel_multiplier
|
371 |
+
self.channel_divisor = channel_divisor
|
372 |
+
self.channel_min = channel_min
|
373 |
+
self.pad_type = pad_type
|
374 |
+
self.act_layer = act_layer
|
375 |
+
self.se_kwargs = se_kwargs
|
376 |
+
self.norm_layer = norm_layer
|
377 |
+
self.norm_kwargs = norm_kwargs
|
378 |
+
self.drop_connect_rate = drop_connect_rate
|
379 |
+
|
380 |
+
# updated during build
|
381 |
+
self.in_chs = None
|
382 |
+
self.block_idx = 0
|
383 |
+
self.block_count = 0
|
384 |
+
|
385 |
+
def _round_channels(self, chs):
|
386 |
+
return round_channels(chs, self.channel_multiplier, self.channel_divisor, self.channel_min)
|
387 |
+
|
388 |
+
def _make_block(self, ba):
|
389 |
+
bt = ba.pop('block_type')
|
390 |
+
ba['in_chs'] = self.in_chs
|
391 |
+
ba['out_chs'] = self._round_channels(ba['out_chs'])
|
392 |
+
if 'fake_in_chs' in ba and ba['fake_in_chs']:
|
393 |
+
# FIXME this is a hack to work around mismatch in origin impl input filters for EdgeTPU
|
394 |
+
ba['fake_in_chs'] = self._round_channels(ba['fake_in_chs'])
|
395 |
+
ba['norm_layer'] = self.norm_layer
|
396 |
+
ba['norm_kwargs'] = self.norm_kwargs
|
397 |
+
ba['pad_type'] = self.pad_type
|
398 |
+
# block act fn overrides the model default
|
399 |
+
ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer
|
400 |
+
assert ba['act_layer'] is not None
|
401 |
+
if bt == 'ir':
|
402 |
+
ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count
|
403 |
+
ba['se_kwargs'] = self.se_kwargs
|
404 |
+
if ba.get('num_experts', 0) > 0:
|
405 |
+
block = CondConvResidual(**ba)
|
406 |
+
else:
|
407 |
+
block = InvertedResidual(**ba)
|
408 |
+
elif bt == 'ds' or bt == 'dsa':
|
409 |
+
ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count
|
410 |
+
ba['se_kwargs'] = self.se_kwargs
|
411 |
+
block = DepthwiseSeparableConv(**ba)
|
412 |
+
elif bt == 'er':
|
413 |
+
ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count
|
414 |
+
ba['se_kwargs'] = self.se_kwargs
|
415 |
+
block = EdgeResidual(**ba)
|
416 |
+
elif bt == 'cn':
|
417 |
+
block = ConvBnAct(**ba)
|
418 |
+
else:
|
419 |
+
assert False, 'Uknkown block type (%s) while building model.' % bt
|
420 |
+
self.in_chs = ba['out_chs'] # update in_chs for arg of next block
|
421 |
+
return block
|
422 |
+
|
423 |
+
def _make_stack(self, stack_args):
|
424 |
+
blocks = []
|
425 |
+
# each stack (stage) contains a list of block arguments
|
426 |
+
for i, ba in enumerate(stack_args):
|
427 |
+
if i >= 1:
|
428 |
+
# only the first block in any stack can have a stride > 1
|
429 |
+
ba['stride'] = 1
|
430 |
+
block = self._make_block(ba)
|
431 |
+
blocks.append(block)
|
432 |
+
self.block_idx += 1 # incr global idx (across all stacks)
|
433 |
+
return nn.Sequential(*blocks)
|
434 |
+
|
435 |
+
def __call__(self, in_chs, block_args):
|
436 |
+
""" Build the blocks
|
437 |
+
Args:
|
438 |
+
in_chs: Number of input-channels passed to first block
|
439 |
+
block_args: A list of lists, outer list defines stages, inner
|
440 |
+
list contains strings defining block configuration(s)
|
441 |
+
Return:
|
442 |
+
List of block stacks (each stack wrapped in nn.Sequential)
|
443 |
+
"""
|
444 |
+
self.in_chs = in_chs
|
445 |
+
self.block_count = sum([len(x) for x in block_args])
|
446 |
+
self.block_idx = 0
|
447 |
+
blocks = []
|
448 |
+
# outer list of block_args defines the stacks ('stages' by some conventions)
|
449 |
+
for stack_idx, stack in enumerate(block_args):
|
450 |
+
assert isinstance(stack, list)
|
451 |
+
stack = self._make_stack(stack)
|
452 |
+
blocks.append(stack)
|
453 |
+
return blocks
|
454 |
+
|
455 |
+
|
456 |
+
def _parse_ksize(ss):
|
457 |
+
if ss.isdigit():
|
458 |
+
return int(ss)
|
459 |
+
else:
|
460 |
+
return [int(k) for k in ss.split('.')]
|
461 |
+
|
462 |
+
|
463 |
+
def _decode_block_str(block_str):
|
464 |
+
""" Decode block definition string
|
465 |
+
|
466 |
+
Gets a list of block arg (dicts) through a string notation of arguments.
|
467 |
+
E.g. ir_r2_k3_s2_e1_i32_o16_se0.25_noskip
|
468 |
+
|
469 |
+
All args can exist in any order with the exception of the leading string which
|
470 |
+
is assumed to indicate the block type.
|
471 |
+
|
472 |
+
leading string - block type (
|
473 |
+
ir = InvertedResidual, ds = DepthwiseSep, dsa = DeptwhiseSep with pw act, cn = ConvBnAct)
|
474 |
+
r - number of repeat blocks,
|
475 |
+
k - kernel size,
|
476 |
+
s - strides (1-9),
|
477 |
+
e - expansion ratio,
|
478 |
+
c - output channels,
|
479 |
+
se - squeeze/excitation ratio
|
480 |
+
n - activation fn ('re', 'r6', 'hs', or 'sw')
|
481 |
+
Args:
|
482 |
+
block_str: a string representation of block arguments.
|
483 |
+
Returns:
|
484 |
+
A list of block args (dicts)
|
485 |
+
Raises:
|
486 |
+
ValueError: if the string def not properly specified (TODO)
|
487 |
+
"""
|
488 |
+
assert isinstance(block_str, str)
|
489 |
+
ops = block_str.split('_')
|
490 |
+
block_type = ops[0] # take the block type off the front
|
491 |
+
ops = ops[1:]
|
492 |
+
options = {}
|
493 |
+
noskip = False
|
494 |
+
for op in ops:
|
495 |
+
# string options being checked on individual basis, combine if they grow
|
496 |
+
if op == 'noskip':
|
497 |
+
noskip = True
|
498 |
+
elif op.startswith('n'):
|
499 |
+
# activation fn
|
500 |
+
key = op[0]
|
501 |
+
v = op[1:]
|
502 |
+
if v == 're':
|
503 |
+
value = get_act_layer('relu')
|
504 |
+
elif v == 'r6':
|
505 |
+
value = get_act_layer('relu6')
|
506 |
+
elif v == 'hs':
|
507 |
+
value = get_act_layer('hard_swish')
|
508 |
+
elif v == 'sw':
|
509 |
+
value = get_act_layer('swish')
|
510 |
+
else:
|
511 |
+
continue
|
512 |
+
options[key] = value
|
513 |
+
else:
|
514 |
+
# all numeric options
|
515 |
+
splits = re.split(r'(\d.*)', op)
|
516 |
+
if len(splits) >= 2:
|
517 |
+
key, value = splits[:2]
|
518 |
+
options[key] = value
|
519 |
+
|
520 |
+
# if act_layer is None, the model default (passed to model init) will be used
|
521 |
+
act_layer = options['n'] if 'n' in options else None
|
522 |
+
exp_kernel_size = _parse_ksize(options['a']) if 'a' in options else 1
|
523 |
+
pw_kernel_size = _parse_ksize(options['p']) if 'p' in options else 1
|
524 |
+
fake_in_chs = int(options['fc']) if 'fc' in options else 0 # FIXME hack to deal with in_chs issue in TPU def
|
525 |
+
|
526 |
+
num_repeat = int(options['r'])
|
527 |
+
# each type of block has different valid arguments, fill accordingly
|
528 |
+
if block_type == 'ir':
|
529 |
+
block_args = dict(
|
530 |
+
block_type=block_type,
|
531 |
+
dw_kernel_size=_parse_ksize(options['k']),
|
532 |
+
exp_kernel_size=exp_kernel_size,
|
533 |
+
pw_kernel_size=pw_kernel_size,
|
534 |
+
out_chs=int(options['c']),
|
535 |
+
exp_ratio=float(options['e']),
|
536 |
+
se_ratio=float(options['se']) if 'se' in options else None,
|
537 |
+
stride=int(options['s']),
|
538 |
+
act_layer=act_layer,
|
539 |
+
noskip=noskip,
|
540 |
+
)
|
541 |
+
if 'cc' in options:
|
542 |
+
block_args['num_experts'] = int(options['cc'])
|
543 |
+
elif block_type == 'ds' or block_type == 'dsa':
|
544 |
+
block_args = dict(
|
545 |
+
block_type=block_type,
|
546 |
+
dw_kernel_size=_parse_ksize(options['k']),
|
547 |
+
pw_kernel_size=pw_kernel_size,
|
548 |
+
out_chs=int(options['c']),
|
549 |
+
se_ratio=float(options['se']) if 'se' in options else None,
|
550 |
+
stride=int(options['s']),
|
551 |
+
act_layer=act_layer,
|
552 |
+
pw_act=block_type == 'dsa',
|
553 |
+
noskip=block_type == 'dsa' or noskip,
|
554 |
+
)
|
555 |
+
elif block_type == 'er':
|
556 |
+
block_args = dict(
|
557 |
+
block_type=block_type,
|
558 |
+
exp_kernel_size=_parse_ksize(options['k']),
|
559 |
+
pw_kernel_size=pw_kernel_size,
|
560 |
+
out_chs=int(options['c']),
|
561 |
+
exp_ratio=float(options['e']),
|
562 |
+
fake_in_chs=fake_in_chs,
|
563 |
+
se_ratio=float(options['se']) if 'se' in options else None,
|
564 |
+
stride=int(options['s']),
|
565 |
+
act_layer=act_layer,
|
566 |
+
noskip=noskip,
|
567 |
+
)
|
568 |
+
elif block_type == 'cn':
|
569 |
+
block_args = dict(
|
570 |
+
block_type=block_type,
|
571 |
+
kernel_size=int(options['k']),
|
572 |
+
out_chs=int(options['c']),
|
573 |
+
stride=int(options['s']),
|
574 |
+
act_layer=act_layer,
|
575 |
+
)
|
576 |
+
else:
|
577 |
+
assert False, 'Unknown block type (%s)' % block_type
|
578 |
+
|
579 |
+
return block_args, num_repeat
|
580 |
+
|
581 |
+
|
582 |
+
def _scale_stage_depth(stack_args, repeats, depth_multiplier=1.0, depth_trunc='ceil'):
|
583 |
+
""" Per-stage depth scaling
|
584 |
+
Scales the block repeats in each stage. This depth scaling impl maintains
|
585 |
+
compatibility with the EfficientNet scaling method, while allowing sensible
|
586 |
+
scaling for other models that may have multiple block arg definitions in each stage.
|
587 |
+
"""
|
588 |
+
|
589 |
+
# We scale the total repeat count for each stage, there may be multiple
|
590 |
+
# block arg defs per stage so we need to sum.
|
591 |
+
num_repeat = sum(repeats)
|
592 |
+
if depth_trunc == 'round':
|
593 |
+
# Truncating to int by rounding allows stages with few repeats to remain
|
594 |
+
# proportionally smaller for longer. This is a good choice when stage definitions
|
595 |
+
# include single repeat stages that we'd prefer to keep that way as long as possible
|
596 |
+
num_repeat_scaled = max(1, round(num_repeat * depth_multiplier))
|
597 |
+
else:
|
598 |
+
# The default for EfficientNet truncates repeats to int via 'ceil'.
|
599 |
+
# Any multiplier > 1.0 will result in an increased depth for every stage.
|
600 |
+
num_repeat_scaled = int(math.ceil(num_repeat * depth_multiplier))
|
601 |
+
|
602 |
+
# Proportionally distribute repeat count scaling to each block definition in the stage.
|
603 |
+
# Allocation is done in reverse as it results in the first block being less likely to be scaled.
|
604 |
+
# The first block makes less sense to repeat in most of the arch definitions.
|
605 |
+
repeats_scaled = []
|
606 |
+
for r in repeats[::-1]:
|
607 |
+
rs = max(1, round((r / num_repeat * num_repeat_scaled)))
|
608 |
+
repeats_scaled.append(rs)
|
609 |
+
num_repeat -= r
|
610 |
+
num_repeat_scaled -= rs
|
611 |
+
repeats_scaled = repeats_scaled[::-1]
|
612 |
+
|
613 |
+
# Apply the calculated scaling to each block arg in the stage
|
614 |
+
sa_scaled = []
|
615 |
+
for ba, rep in zip(stack_args, repeats_scaled):
|
616 |
+
sa_scaled.extend([deepcopy(ba) for _ in range(rep)])
|
617 |
+
return sa_scaled
|
618 |
+
|
619 |
+
|
620 |
+
def decode_arch_def(arch_def, depth_multiplier=1.0, depth_trunc='ceil', experts_multiplier=1, fix_first_last=False):
|
621 |
+
arch_args = []
|
622 |
+
for stack_idx, block_strings in enumerate(arch_def):
|
623 |
+
assert isinstance(block_strings, list)
|
624 |
+
stack_args = []
|
625 |
+
repeats = []
|
626 |
+
for block_str in block_strings:
|
627 |
+
assert isinstance(block_str, str)
|
628 |
+
ba, rep = _decode_block_str(block_str)
|
629 |
+
if ba.get('num_experts', 0) > 0 and experts_multiplier > 1:
|
630 |
+
ba['num_experts'] *= experts_multiplier
|
631 |
+
stack_args.append(ba)
|
632 |
+
repeats.append(rep)
|
633 |
+
if fix_first_last and (stack_idx == 0 or stack_idx == len(arch_def) - 1):
|
634 |
+
arch_args.append(_scale_stage_depth(stack_args, repeats, 1.0, depth_trunc))
|
635 |
+
else:
|
636 |
+
arch_args.append(_scale_stage_depth(stack_args, repeats, depth_multiplier, depth_trunc))
|
637 |
+
return arch_args
|
638 |
+
|
639 |
+
|
640 |
+
def initialize_weight_goog(m, n='', fix_group_fanout=True):
|
641 |
+
# weight init as per Tensorflow Official impl
|
642 |
+
# https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_model.py
|
643 |
+
if isinstance(m, CondConv2d):
|
644 |
+
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
645 |
+
if fix_group_fanout:
|
646 |
+
fan_out //= m.groups
|
647 |
+
init_weight_fn = get_condconv_initializer(
|
648 |
+
lambda w: w.data.normal_(0, math.sqrt(2.0 / fan_out)), m.num_experts, m.weight_shape)
|
649 |
+
init_weight_fn(m.weight)
|
650 |
+
if m.bias is not None:
|
651 |
+
m.bias.data.zero_()
|
652 |
+
elif isinstance(m, nn.Conv2d):
|
653 |
+
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
654 |
+
if fix_group_fanout:
|
655 |
+
fan_out //= m.groups
|
656 |
+
m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
|
657 |
+
if m.bias is not None:
|
658 |
+
m.bias.data.zero_()
|
659 |
+
elif isinstance(m, nn.BatchNorm2d):
|
660 |
+
m.weight.data.fill_(1.0)
|
661 |
+
m.bias.data.zero_()
|
662 |
+
elif isinstance(m, nn.Linear):
|
663 |
+
fan_out = m.weight.size(0) # fan-out
|
664 |
+
fan_in = 0
|
665 |
+
if 'routing_fn' in n:
|
666 |
+
fan_in = m.weight.size(1)
|
667 |
+
init_range = 1.0 / math.sqrt(fan_in + fan_out)
|
668 |
+
m.weight.data.uniform_(-init_range, init_range)
|
669 |
+
m.bias.data.zero_()
|
670 |
+
|
671 |
+
|
672 |
+
def initialize_weight_default(m, n=''):
|
673 |
+
if isinstance(m, CondConv2d):
|
674 |
+
init_fn = get_condconv_initializer(partial(
|
675 |
+
nn.init.kaiming_normal_, mode='fan_out', nonlinearity='relu'), m.num_experts, m.weight_shape)
|
676 |
+
init_fn(m.weight)
|
677 |
+
elif isinstance(m, nn.Conv2d):
|
678 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
679 |
+
elif isinstance(m, nn.BatchNorm2d):
|
680 |
+
m.weight.data.fill_(1.0)
|
681 |
+
m.bias.data.zero_()
|
682 |
+
elif isinstance(m, nn.Linear):
|
683 |
+
nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='linear')
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/gen_efficientnet.py
ADDED
@@ -0,0 +1,1450 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Generic Efficient Networks
|
2 |
+
|
3 |
+
A generic MobileNet class with building blocks to support a variety of models:
|
4 |
+
|
5 |
+
* EfficientNet (B0-B8, L2 + Tensorflow pretrained AutoAug/RandAug/AdvProp/NoisyStudent ports)
|
6 |
+
- EfficientNet: Rethinking Model Scaling for CNNs - https://arxiv.org/abs/1905.11946
|
7 |
+
- CondConv: Conditionally Parameterized Convolutions for Efficient Inference - https://arxiv.org/abs/1904.04971
|
8 |
+
- Adversarial Examples Improve Image Recognition - https://arxiv.org/abs/1911.09665
|
9 |
+
- Self-training with Noisy Student improves ImageNet classification - https://arxiv.org/abs/1911.04252
|
10 |
+
|
11 |
+
* EfficientNet-Lite
|
12 |
+
|
13 |
+
* MixNet (Small, Medium, and Large)
|
14 |
+
- MixConv: Mixed Depthwise Convolutional Kernels - https://arxiv.org/abs/1907.09595
|
15 |
+
|
16 |
+
* MNasNet B1, A1 (SE), Small
|
17 |
+
- MnasNet: Platform-Aware Neural Architecture Search for Mobile - https://arxiv.org/abs/1807.11626
|
18 |
+
|
19 |
+
* FBNet-C
|
20 |
+
- FBNet: Hardware-Aware Efficient ConvNet Design via Differentiable NAS - https://arxiv.org/abs/1812.03443
|
21 |
+
|
22 |
+
* Single-Path NAS Pixel1
|
23 |
+
- Single-Path NAS: Designing Hardware-Efficient ConvNets - https://arxiv.org/abs/1904.02877
|
24 |
+
|
25 |
+
* And likely more...
|
26 |
+
|
27 |
+
Hacked together by / Copyright 2020 Ross Wightman
|
28 |
+
"""
|
29 |
+
import torch.nn as nn
|
30 |
+
import torch.nn.functional as F
|
31 |
+
|
32 |
+
from .config import layer_config_kwargs, is_scriptable
|
33 |
+
from .conv2d_layers import select_conv2d
|
34 |
+
from .helpers import load_pretrained
|
35 |
+
from .efficientnet_builder import *
|
36 |
+
|
37 |
+
__all__ = ['GenEfficientNet', 'mnasnet_050', 'mnasnet_075', 'mnasnet_100', 'mnasnet_b1', 'mnasnet_140',
|
38 |
+
'semnasnet_050', 'semnasnet_075', 'semnasnet_100', 'mnasnet_a1', 'semnasnet_140', 'mnasnet_small',
|
39 |
+
'mobilenetv2_100', 'mobilenetv2_140', 'mobilenetv2_110d', 'mobilenetv2_120d',
|
40 |
+
'fbnetc_100', 'spnasnet_100', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3',
|
41 |
+
'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_b8',
|
42 |
+
'efficientnet_l2', 'efficientnet_es', 'efficientnet_em', 'efficientnet_el',
|
43 |
+
'efficientnet_cc_b0_4e', 'efficientnet_cc_b0_8e', 'efficientnet_cc_b1_8e',
|
44 |
+
'efficientnet_lite0', 'efficientnet_lite1', 'efficientnet_lite2', 'efficientnet_lite3', 'efficientnet_lite4',
|
45 |
+
'tf_efficientnet_b0', 'tf_efficientnet_b1', 'tf_efficientnet_b2', 'tf_efficientnet_b3',
|
46 |
+
'tf_efficientnet_b4', 'tf_efficientnet_b5', 'tf_efficientnet_b6', 'tf_efficientnet_b7', 'tf_efficientnet_b8',
|
47 |
+
'tf_efficientnet_b0_ap', 'tf_efficientnet_b1_ap', 'tf_efficientnet_b2_ap', 'tf_efficientnet_b3_ap',
|
48 |
+
'tf_efficientnet_b4_ap', 'tf_efficientnet_b5_ap', 'tf_efficientnet_b6_ap', 'tf_efficientnet_b7_ap',
|
49 |
+
'tf_efficientnet_b8_ap', 'tf_efficientnet_b0_ns', 'tf_efficientnet_b1_ns', 'tf_efficientnet_b2_ns',
|
50 |
+
'tf_efficientnet_b3_ns', 'tf_efficientnet_b4_ns', 'tf_efficientnet_b5_ns', 'tf_efficientnet_b6_ns',
|
51 |
+
'tf_efficientnet_b7_ns', 'tf_efficientnet_l2_ns', 'tf_efficientnet_l2_ns_475',
|
52 |
+
'tf_efficientnet_es', 'tf_efficientnet_em', 'tf_efficientnet_el',
|
53 |
+
'tf_efficientnet_cc_b0_4e', 'tf_efficientnet_cc_b0_8e', 'tf_efficientnet_cc_b1_8e',
|
54 |
+
'tf_efficientnet_lite0', 'tf_efficientnet_lite1', 'tf_efficientnet_lite2', 'tf_efficientnet_lite3',
|
55 |
+
'tf_efficientnet_lite4',
|
56 |
+
'mixnet_s', 'mixnet_m', 'mixnet_l', 'mixnet_xl', 'tf_mixnet_s', 'tf_mixnet_m', 'tf_mixnet_l']
|
57 |
+
|
58 |
+
|
59 |
+
model_urls = {
|
60 |
+
'mnasnet_050': None,
|
61 |
+
'mnasnet_075': None,
|
62 |
+
'mnasnet_100':
|
63 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mnasnet_b1-74cb7081.pth',
|
64 |
+
'mnasnet_140': None,
|
65 |
+
'mnasnet_small': None,
|
66 |
+
|
67 |
+
'semnasnet_050': None,
|
68 |
+
'semnasnet_075': None,
|
69 |
+
'semnasnet_100':
|
70 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mnasnet_a1-d9418771.pth',
|
71 |
+
'semnasnet_140': None,
|
72 |
+
|
73 |
+
'mobilenetv2_100':
|
74 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_100_ra-b33bc2c4.pth',
|
75 |
+
'mobilenetv2_110d':
|
76 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_110d_ra-77090ade.pth',
|
77 |
+
'mobilenetv2_120d':
|
78 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_120d_ra-5987e2ed.pth',
|
79 |
+
'mobilenetv2_140':
|
80 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv2_140_ra-21a4e913.pth',
|
81 |
+
|
82 |
+
'fbnetc_100':
|
83 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/fbnetc_100-c345b898.pth',
|
84 |
+
'spnasnet_100':
|
85 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/spnasnet_100-048bc3f4.pth',
|
86 |
+
|
87 |
+
'efficientnet_b0':
|
88 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b0_ra-3dd342df.pth',
|
89 |
+
'efficientnet_b1':
|
90 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b1-533bc792.pth',
|
91 |
+
'efficientnet_b2':
|
92 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b2_ra-bcdf34b7.pth',
|
93 |
+
'efficientnet_b3':
|
94 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b3_ra2-cf984f9c.pth',
|
95 |
+
'efficientnet_b4': None,
|
96 |
+
'efficientnet_b5': None,
|
97 |
+
'efficientnet_b6': None,
|
98 |
+
'efficientnet_b7': None,
|
99 |
+
'efficientnet_b8': None,
|
100 |
+
'efficientnet_l2': None,
|
101 |
+
|
102 |
+
'efficientnet_es':
|
103 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_es_ra-f111e99c.pth',
|
104 |
+
'efficientnet_em': None,
|
105 |
+
'efficientnet_el': None,
|
106 |
+
|
107 |
+
'efficientnet_cc_b0_4e': None,
|
108 |
+
'efficientnet_cc_b0_8e': None,
|
109 |
+
'efficientnet_cc_b1_8e': None,
|
110 |
+
|
111 |
+
'efficientnet_lite0': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_lite0_ra-37913777.pth',
|
112 |
+
'efficientnet_lite1': None,
|
113 |
+
'efficientnet_lite2': None,
|
114 |
+
'efficientnet_lite3': None,
|
115 |
+
'efficientnet_lite4': None,
|
116 |
+
|
117 |
+
'tf_efficientnet_b0':
|
118 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_aa-827b6e33.pth',
|
119 |
+
'tf_efficientnet_b1':
|
120 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_aa-ea7a6ee0.pth',
|
121 |
+
'tf_efficientnet_b2':
|
122 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_aa-60c94f97.pth',
|
123 |
+
'tf_efficientnet_b3':
|
124 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_aa-84b4657e.pth',
|
125 |
+
'tf_efficientnet_b4':
|
126 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_aa-818f208c.pth',
|
127 |
+
'tf_efficientnet_b5':
|
128 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ra-9a3e5369.pth',
|
129 |
+
'tf_efficientnet_b6':
|
130 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_aa-80ba17e4.pth',
|
131 |
+
'tf_efficientnet_b7':
|
132 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ra-6c08e654.pth',
|
133 |
+
'tf_efficientnet_b8':
|
134 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ra-572d5dd9.pth',
|
135 |
+
|
136 |
+
'tf_efficientnet_b0_ap':
|
137 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ap-f262efe1.pth',
|
138 |
+
'tf_efficientnet_b1_ap':
|
139 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_ap-44ef0a3d.pth',
|
140 |
+
'tf_efficientnet_b2_ap':
|
141 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_ap-2f8e7636.pth',
|
142 |
+
'tf_efficientnet_b3_ap':
|
143 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_ap-aad25bdd.pth',
|
144 |
+
'tf_efficientnet_b4_ap':
|
145 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ap-dedb23e6.pth',
|
146 |
+
'tf_efficientnet_b5_ap':
|
147 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ap-9e82fae8.pth',
|
148 |
+
'tf_efficientnet_b6_ap':
|
149 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_ap-4ffb161f.pth',
|
150 |
+
'tf_efficientnet_b7_ap':
|
151 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ap-ddb28fec.pth',
|
152 |
+
'tf_efficientnet_b8_ap':
|
153 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth',
|
154 |
+
|
155 |
+
'tf_efficientnet_b0_ns':
|
156 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b0_ns-c0e6a31c.pth',
|
157 |
+
'tf_efficientnet_b1_ns':
|
158 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b1_ns-99dd0c41.pth',
|
159 |
+
'tf_efficientnet_b2_ns':
|
160 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b2_ns-00306e48.pth',
|
161 |
+
'tf_efficientnet_b3_ns':
|
162 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b3_ns-9d44bf68.pth',
|
163 |
+
'tf_efficientnet_b4_ns':
|
164 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ns-d6313a46.pth',
|
165 |
+
'tf_efficientnet_b5_ns':
|
166 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ns-6f26d0cf.pth',
|
167 |
+
'tf_efficientnet_b6_ns':
|
168 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b6_ns-51548356.pth',
|
169 |
+
'tf_efficientnet_b7_ns':
|
170 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ns-1dbc32de.pth',
|
171 |
+
'tf_efficientnet_l2_ns_475':
|
172 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_l2_ns_475-bebbd00a.pth',
|
173 |
+
'tf_efficientnet_l2_ns':
|
174 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_l2_ns-df73bb44.pth',
|
175 |
+
|
176 |
+
'tf_efficientnet_es':
|
177 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_es-ca1afbfe.pth',
|
178 |
+
'tf_efficientnet_em':
|
179 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_em-e78cfe58.pth',
|
180 |
+
'tf_efficientnet_el':
|
181 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_el-5143854e.pth',
|
182 |
+
|
183 |
+
'tf_efficientnet_cc_b0_4e':
|
184 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_cc_b0_4e-4362b6b2.pth',
|
185 |
+
'tf_efficientnet_cc_b0_8e':
|
186 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_cc_b0_8e-66184a25.pth',
|
187 |
+
'tf_efficientnet_cc_b1_8e':
|
188 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_cc_b1_8e-f7c79ae1.pth',
|
189 |
+
|
190 |
+
'tf_efficientnet_lite0':
|
191 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite0-0aa007d2.pth',
|
192 |
+
'tf_efficientnet_lite1':
|
193 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite1-bde8b488.pth',
|
194 |
+
'tf_efficientnet_lite2':
|
195 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite2-dcccb7df.pth',
|
196 |
+
'tf_efficientnet_lite3':
|
197 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite3-b733e338.pth',
|
198 |
+
'tf_efficientnet_lite4':
|
199 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_lite4-741542c3.pth',
|
200 |
+
|
201 |
+
'mixnet_s': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_s-a907afbc.pth',
|
202 |
+
'mixnet_m': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_m-4647fc68.pth',
|
203 |
+
'mixnet_l': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_l-5a9a2ed8.pth',
|
204 |
+
'mixnet_xl': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mixnet_xl_ra-aac3c00c.pth',
|
205 |
+
|
206 |
+
'tf_mixnet_s':
|
207 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mixnet_s-89d3354b.pth',
|
208 |
+
'tf_mixnet_m':
|
209 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mixnet_m-0f4d8805.pth',
|
210 |
+
'tf_mixnet_l':
|
211 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mixnet_l-6c92e0c8.pth',
|
212 |
+
}
|
213 |
+
|
214 |
+
|
215 |
+
class GenEfficientNet(nn.Module):
|
216 |
+
""" Generic EfficientNets
|
217 |
+
|
218 |
+
An implementation of mobile optimized networks that covers:
|
219 |
+
* EfficientNet (B0-B8, L2, CondConv, EdgeTPU)
|
220 |
+
* MixNet (Small, Medium, and Large, XL)
|
221 |
+
* MNASNet A1, B1, and small
|
222 |
+
* FBNet C
|
223 |
+
* Single-Path NAS Pixel1
|
224 |
+
"""
|
225 |
+
|
226 |
+
def __init__(self, block_args, num_classes=1000, in_chans=3, num_features=1280, stem_size=32, fix_stem=False,
|
227 |
+
channel_multiplier=1.0, channel_divisor=8, channel_min=None,
|
228 |
+
pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0.,
|
229 |
+
se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
|
230 |
+
weight_init='goog'):
|
231 |
+
super(GenEfficientNet, self).__init__()
|
232 |
+
self.drop_rate = drop_rate
|
233 |
+
|
234 |
+
if not fix_stem:
|
235 |
+
stem_size = round_channels(stem_size, channel_multiplier, channel_divisor, channel_min)
|
236 |
+
self.conv_stem = select_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type)
|
237 |
+
self.bn1 = norm_layer(stem_size, **norm_kwargs)
|
238 |
+
self.act1 = act_layer(inplace=True)
|
239 |
+
in_chs = stem_size
|
240 |
+
|
241 |
+
builder = EfficientNetBuilder(
|
242 |
+
channel_multiplier, channel_divisor, channel_min,
|
243 |
+
pad_type, act_layer, se_kwargs, norm_layer, norm_kwargs, drop_connect_rate)
|
244 |
+
self.blocks = nn.Sequential(*builder(in_chs, block_args))
|
245 |
+
in_chs = builder.in_chs
|
246 |
+
|
247 |
+
self.conv_head = select_conv2d(in_chs, num_features, 1, padding=pad_type)
|
248 |
+
self.bn2 = norm_layer(num_features, **norm_kwargs)
|
249 |
+
self.act2 = act_layer(inplace=True)
|
250 |
+
self.global_pool = nn.AdaptiveAvgPool2d(1)
|
251 |
+
self.classifier = nn.Linear(num_features, num_classes)
|
252 |
+
|
253 |
+
for n, m in self.named_modules():
|
254 |
+
if weight_init == 'goog':
|
255 |
+
initialize_weight_goog(m, n)
|
256 |
+
else:
|
257 |
+
initialize_weight_default(m, n)
|
258 |
+
|
259 |
+
def features(self, x):
|
260 |
+
x = self.conv_stem(x)
|
261 |
+
x = self.bn1(x)
|
262 |
+
x = self.act1(x)
|
263 |
+
x = self.blocks(x)
|
264 |
+
x = self.conv_head(x)
|
265 |
+
x = self.bn2(x)
|
266 |
+
x = self.act2(x)
|
267 |
+
return x
|
268 |
+
|
269 |
+
def as_sequential(self):
|
270 |
+
layers = [self.conv_stem, self.bn1, self.act1]
|
271 |
+
layers.extend(self.blocks)
|
272 |
+
layers.extend([
|
273 |
+
self.conv_head, self.bn2, self.act2,
|
274 |
+
self.global_pool, nn.Flatten(), nn.Dropout(self.drop_rate), self.classifier])
|
275 |
+
return nn.Sequential(*layers)
|
276 |
+
|
277 |
+
def forward(self, x):
|
278 |
+
x = self.features(x)
|
279 |
+
x = self.global_pool(x)
|
280 |
+
x = x.flatten(1)
|
281 |
+
if self.drop_rate > 0.:
|
282 |
+
x = F.dropout(x, p=self.drop_rate, training=self.training)
|
283 |
+
return self.classifier(x)
|
284 |
+
|
285 |
+
|
286 |
+
def _create_model(model_kwargs, variant, pretrained=False):
|
287 |
+
as_sequential = model_kwargs.pop('as_sequential', False)
|
288 |
+
model = GenEfficientNet(**model_kwargs)
|
289 |
+
if pretrained:
|
290 |
+
load_pretrained(model, model_urls[variant])
|
291 |
+
if as_sequential:
|
292 |
+
model = model.as_sequential()
|
293 |
+
return model
|
294 |
+
|
295 |
+
|
296 |
+
def _gen_mnasnet_a1(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
|
297 |
+
"""Creates a mnasnet-a1 model.
|
298 |
+
|
299 |
+
Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
|
300 |
+
Paper: https://arxiv.org/pdf/1807.11626.pdf.
|
301 |
+
|
302 |
+
Args:
|
303 |
+
channel_multiplier: multiplier to number of channels per layer.
|
304 |
+
"""
|
305 |
+
arch_def = [
|
306 |
+
# stage 0, 112x112 in
|
307 |
+
['ds_r1_k3_s1_e1_c16_noskip'],
|
308 |
+
# stage 1, 112x112 in
|
309 |
+
['ir_r2_k3_s2_e6_c24'],
|
310 |
+
# stage 2, 56x56 in
|
311 |
+
['ir_r3_k5_s2_e3_c40_se0.25'],
|
312 |
+
# stage 3, 28x28 in
|
313 |
+
['ir_r4_k3_s2_e6_c80'],
|
314 |
+
# stage 4, 14x14in
|
315 |
+
['ir_r2_k3_s1_e6_c112_se0.25'],
|
316 |
+
# stage 5, 14x14in
|
317 |
+
['ir_r3_k5_s2_e6_c160_se0.25'],
|
318 |
+
# stage 6, 7x7 in
|
319 |
+
['ir_r1_k3_s1_e6_c320'],
|
320 |
+
]
|
321 |
+
with layer_config_kwargs(kwargs):
|
322 |
+
model_kwargs = dict(
|
323 |
+
block_args=decode_arch_def(arch_def),
|
324 |
+
stem_size=32,
|
325 |
+
channel_multiplier=channel_multiplier,
|
326 |
+
act_layer=resolve_act_layer(kwargs, 'relu'),
|
327 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
328 |
+
**kwargs
|
329 |
+
)
|
330 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
331 |
+
return model
|
332 |
+
|
333 |
+
|
334 |
+
def _gen_mnasnet_b1(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
|
335 |
+
"""Creates a mnasnet-b1 model.
|
336 |
+
|
337 |
+
Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
|
338 |
+
Paper: https://arxiv.org/pdf/1807.11626.pdf.
|
339 |
+
|
340 |
+
Args:
|
341 |
+
channel_multiplier: multiplier to number of channels per layer.
|
342 |
+
"""
|
343 |
+
arch_def = [
|
344 |
+
# stage 0, 112x112 in
|
345 |
+
['ds_r1_k3_s1_c16_noskip'],
|
346 |
+
# stage 1, 112x112 in
|
347 |
+
['ir_r3_k3_s2_e3_c24'],
|
348 |
+
# stage 2, 56x56 in
|
349 |
+
['ir_r3_k5_s2_e3_c40'],
|
350 |
+
# stage 3, 28x28 in
|
351 |
+
['ir_r3_k5_s2_e6_c80'],
|
352 |
+
# stage 4, 14x14in
|
353 |
+
['ir_r2_k3_s1_e6_c96'],
|
354 |
+
# stage 5, 14x14in
|
355 |
+
['ir_r4_k5_s2_e6_c192'],
|
356 |
+
# stage 6, 7x7 in
|
357 |
+
['ir_r1_k3_s1_e6_c320_noskip']
|
358 |
+
]
|
359 |
+
with layer_config_kwargs(kwargs):
|
360 |
+
model_kwargs = dict(
|
361 |
+
block_args=decode_arch_def(arch_def),
|
362 |
+
stem_size=32,
|
363 |
+
channel_multiplier=channel_multiplier,
|
364 |
+
act_layer=resolve_act_layer(kwargs, 'relu'),
|
365 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
366 |
+
**kwargs
|
367 |
+
)
|
368 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
369 |
+
return model
|
370 |
+
|
371 |
+
|
372 |
+
def _gen_mnasnet_small(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
|
373 |
+
"""Creates a mnasnet-b1 model.
|
374 |
+
|
375 |
+
Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet
|
376 |
+
Paper: https://arxiv.org/pdf/1807.11626.pdf.
|
377 |
+
|
378 |
+
Args:
|
379 |
+
channel_multiplier: multiplier to number of channels per layer.
|
380 |
+
"""
|
381 |
+
arch_def = [
|
382 |
+
['ds_r1_k3_s1_c8'],
|
383 |
+
['ir_r1_k3_s2_e3_c16'],
|
384 |
+
['ir_r2_k3_s2_e6_c16'],
|
385 |
+
['ir_r4_k5_s2_e6_c32_se0.25'],
|
386 |
+
['ir_r3_k3_s1_e6_c32_se0.25'],
|
387 |
+
['ir_r3_k5_s2_e6_c88_se0.25'],
|
388 |
+
['ir_r1_k3_s1_e6_c144']
|
389 |
+
]
|
390 |
+
with layer_config_kwargs(kwargs):
|
391 |
+
model_kwargs = dict(
|
392 |
+
block_args=decode_arch_def(arch_def),
|
393 |
+
stem_size=8,
|
394 |
+
channel_multiplier=channel_multiplier,
|
395 |
+
act_layer=resolve_act_layer(kwargs, 'relu'),
|
396 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
397 |
+
**kwargs
|
398 |
+
)
|
399 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
400 |
+
return model
|
401 |
+
|
402 |
+
|
403 |
+
def _gen_mobilenet_v2(
|
404 |
+
variant, channel_multiplier=1.0, depth_multiplier=1.0, fix_stem_head=False, pretrained=False, **kwargs):
|
405 |
+
""" Generate MobileNet-V2 network
|
406 |
+
Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v2.py
|
407 |
+
Paper: https://arxiv.org/abs/1801.04381
|
408 |
+
"""
|
409 |
+
arch_def = [
|
410 |
+
['ds_r1_k3_s1_c16'],
|
411 |
+
['ir_r2_k3_s2_e6_c24'],
|
412 |
+
['ir_r3_k3_s2_e6_c32'],
|
413 |
+
['ir_r4_k3_s2_e6_c64'],
|
414 |
+
['ir_r3_k3_s1_e6_c96'],
|
415 |
+
['ir_r3_k3_s2_e6_c160'],
|
416 |
+
['ir_r1_k3_s1_e6_c320'],
|
417 |
+
]
|
418 |
+
with layer_config_kwargs(kwargs):
|
419 |
+
model_kwargs = dict(
|
420 |
+
block_args=decode_arch_def(arch_def, depth_multiplier=depth_multiplier, fix_first_last=fix_stem_head),
|
421 |
+
num_features=1280 if fix_stem_head else round_channels(1280, channel_multiplier, 8, None),
|
422 |
+
stem_size=32,
|
423 |
+
fix_stem=fix_stem_head,
|
424 |
+
channel_multiplier=channel_multiplier,
|
425 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
426 |
+
act_layer=nn.ReLU6,
|
427 |
+
**kwargs
|
428 |
+
)
|
429 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
430 |
+
return model
|
431 |
+
|
432 |
+
|
433 |
+
def _gen_fbnetc(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
|
434 |
+
""" FBNet-C
|
435 |
+
|
436 |
+
Paper: https://arxiv.org/abs/1812.03443
|
437 |
+
Ref Impl: https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py
|
438 |
+
|
439 |
+
NOTE: the impl above does not relate to the 'C' variant here, that was derived from paper,
|
440 |
+
it was used to confirm some building block details
|
441 |
+
"""
|
442 |
+
arch_def = [
|
443 |
+
['ir_r1_k3_s1_e1_c16'],
|
444 |
+
['ir_r1_k3_s2_e6_c24', 'ir_r2_k3_s1_e1_c24'],
|
445 |
+
['ir_r1_k5_s2_e6_c32', 'ir_r1_k5_s1_e3_c32', 'ir_r1_k5_s1_e6_c32', 'ir_r1_k3_s1_e6_c32'],
|
446 |
+
['ir_r1_k5_s2_e6_c64', 'ir_r1_k5_s1_e3_c64', 'ir_r2_k5_s1_e6_c64'],
|
447 |
+
['ir_r3_k5_s1_e6_c112', 'ir_r1_k5_s1_e3_c112'],
|
448 |
+
['ir_r4_k5_s2_e6_c184'],
|
449 |
+
['ir_r1_k3_s1_e6_c352'],
|
450 |
+
]
|
451 |
+
with layer_config_kwargs(kwargs):
|
452 |
+
model_kwargs = dict(
|
453 |
+
block_args=decode_arch_def(arch_def),
|
454 |
+
stem_size=16,
|
455 |
+
num_features=1984, # paper suggests this, but is not 100% clear
|
456 |
+
channel_multiplier=channel_multiplier,
|
457 |
+
act_layer=resolve_act_layer(kwargs, 'relu'),
|
458 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
459 |
+
**kwargs
|
460 |
+
)
|
461 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
462 |
+
return model
|
463 |
+
|
464 |
+
|
465 |
+
def _gen_spnasnet(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
|
466 |
+
"""Creates the Single-Path NAS model from search targeted for Pixel1 phone.
|
467 |
+
|
468 |
+
Paper: https://arxiv.org/abs/1904.02877
|
469 |
+
|
470 |
+
Args:
|
471 |
+
channel_multiplier: multiplier to number of channels per layer.
|
472 |
+
"""
|
473 |
+
arch_def = [
|
474 |
+
# stage 0, 112x112 in
|
475 |
+
['ds_r1_k3_s1_c16_noskip'],
|
476 |
+
# stage 1, 112x112 in
|
477 |
+
['ir_r3_k3_s2_e3_c24'],
|
478 |
+
# stage 2, 56x56 in
|
479 |
+
['ir_r1_k5_s2_e6_c40', 'ir_r3_k3_s1_e3_c40'],
|
480 |
+
# stage 3, 28x28 in
|
481 |
+
['ir_r1_k5_s2_e6_c80', 'ir_r3_k3_s1_e3_c80'],
|
482 |
+
# stage 4, 14x14in
|
483 |
+
['ir_r1_k5_s1_e6_c96', 'ir_r3_k5_s1_e3_c96'],
|
484 |
+
# stage 5, 14x14in
|
485 |
+
['ir_r4_k5_s2_e6_c192'],
|
486 |
+
# stage 6, 7x7 in
|
487 |
+
['ir_r1_k3_s1_e6_c320_noskip']
|
488 |
+
]
|
489 |
+
with layer_config_kwargs(kwargs):
|
490 |
+
model_kwargs = dict(
|
491 |
+
block_args=decode_arch_def(arch_def),
|
492 |
+
stem_size=32,
|
493 |
+
channel_multiplier=channel_multiplier,
|
494 |
+
act_layer=resolve_act_layer(kwargs, 'relu'),
|
495 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
496 |
+
**kwargs
|
497 |
+
)
|
498 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
499 |
+
return model
|
500 |
+
|
501 |
+
|
502 |
+
def _gen_efficientnet(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
|
503 |
+
"""Creates an EfficientNet model.
|
504 |
+
|
505 |
+
Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py
|
506 |
+
Paper: https://arxiv.org/abs/1905.11946
|
507 |
+
|
508 |
+
EfficientNet params
|
509 |
+
name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
|
510 |
+
'efficientnet-b0': (1.0, 1.0, 224, 0.2),
|
511 |
+
'efficientnet-b1': (1.0, 1.1, 240, 0.2),
|
512 |
+
'efficientnet-b2': (1.1, 1.2, 260, 0.3),
|
513 |
+
'efficientnet-b3': (1.2, 1.4, 300, 0.3),
|
514 |
+
'efficientnet-b4': (1.4, 1.8, 380, 0.4),
|
515 |
+
'efficientnet-b5': (1.6, 2.2, 456, 0.4),
|
516 |
+
'efficientnet-b6': (1.8, 2.6, 528, 0.5),
|
517 |
+
'efficientnet-b7': (2.0, 3.1, 600, 0.5),
|
518 |
+
'efficientnet-b8': (2.2, 3.6, 672, 0.5),
|
519 |
+
|
520 |
+
Args:
|
521 |
+
channel_multiplier: multiplier to number of channels per layer
|
522 |
+
depth_multiplier: multiplier to number of repeats per stage
|
523 |
+
|
524 |
+
"""
|
525 |
+
arch_def = [
|
526 |
+
['ds_r1_k3_s1_e1_c16_se0.25'],
|
527 |
+
['ir_r2_k3_s2_e6_c24_se0.25'],
|
528 |
+
['ir_r2_k5_s2_e6_c40_se0.25'],
|
529 |
+
['ir_r3_k3_s2_e6_c80_se0.25'],
|
530 |
+
['ir_r3_k5_s1_e6_c112_se0.25'],
|
531 |
+
['ir_r4_k5_s2_e6_c192_se0.25'],
|
532 |
+
['ir_r1_k3_s1_e6_c320_se0.25'],
|
533 |
+
]
|
534 |
+
with layer_config_kwargs(kwargs):
|
535 |
+
model_kwargs = dict(
|
536 |
+
block_args=decode_arch_def(arch_def, depth_multiplier),
|
537 |
+
num_features=round_channels(1280, channel_multiplier, 8, None),
|
538 |
+
stem_size=32,
|
539 |
+
channel_multiplier=channel_multiplier,
|
540 |
+
act_layer=resolve_act_layer(kwargs, 'swish'),
|
541 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
542 |
+
**kwargs,
|
543 |
+
)
|
544 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
545 |
+
return model
|
546 |
+
|
547 |
+
|
548 |
+
def _gen_efficientnet_edge(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
|
549 |
+
arch_def = [
|
550 |
+
# NOTE `fc` is present to override a mismatch between stem channels and in chs not
|
551 |
+
# present in other models
|
552 |
+
['er_r1_k3_s1_e4_c24_fc24_noskip'],
|
553 |
+
['er_r2_k3_s2_e8_c32'],
|
554 |
+
['er_r4_k3_s2_e8_c48'],
|
555 |
+
['ir_r5_k5_s2_e8_c96'],
|
556 |
+
['ir_r4_k5_s1_e8_c144'],
|
557 |
+
['ir_r2_k5_s2_e8_c192'],
|
558 |
+
]
|
559 |
+
with layer_config_kwargs(kwargs):
|
560 |
+
model_kwargs = dict(
|
561 |
+
block_args=decode_arch_def(arch_def, depth_multiplier),
|
562 |
+
num_features=round_channels(1280, channel_multiplier, 8, None),
|
563 |
+
stem_size=32,
|
564 |
+
channel_multiplier=channel_multiplier,
|
565 |
+
act_layer=resolve_act_layer(kwargs, 'relu'),
|
566 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
567 |
+
**kwargs,
|
568 |
+
)
|
569 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
570 |
+
return model
|
571 |
+
|
572 |
+
|
573 |
+
def _gen_efficientnet_condconv(
|
574 |
+
variant, channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=1, pretrained=False, **kwargs):
|
575 |
+
"""Creates an efficientnet-condconv model."""
|
576 |
+
arch_def = [
|
577 |
+
['ds_r1_k3_s1_e1_c16_se0.25'],
|
578 |
+
['ir_r2_k3_s2_e6_c24_se0.25'],
|
579 |
+
['ir_r2_k5_s2_e6_c40_se0.25'],
|
580 |
+
['ir_r3_k3_s2_e6_c80_se0.25'],
|
581 |
+
['ir_r3_k5_s1_e6_c112_se0.25_cc4'],
|
582 |
+
['ir_r4_k5_s2_e6_c192_se0.25_cc4'],
|
583 |
+
['ir_r1_k3_s1_e6_c320_se0.25_cc4'],
|
584 |
+
]
|
585 |
+
with layer_config_kwargs(kwargs):
|
586 |
+
model_kwargs = dict(
|
587 |
+
block_args=decode_arch_def(arch_def, depth_multiplier, experts_multiplier=experts_multiplier),
|
588 |
+
num_features=round_channels(1280, channel_multiplier, 8, None),
|
589 |
+
stem_size=32,
|
590 |
+
channel_multiplier=channel_multiplier,
|
591 |
+
act_layer=resolve_act_layer(kwargs, 'swish'),
|
592 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
593 |
+
**kwargs,
|
594 |
+
)
|
595 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
596 |
+
return model
|
597 |
+
|
598 |
+
|
599 |
+
def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
|
600 |
+
"""Creates an EfficientNet-Lite model.
|
601 |
+
|
602 |
+
Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite
|
603 |
+
Paper: https://arxiv.org/abs/1905.11946
|
604 |
+
|
605 |
+
EfficientNet params
|
606 |
+
name: (channel_multiplier, depth_multiplier, resolution, dropout_rate)
|
607 |
+
'efficientnet-lite0': (1.0, 1.0, 224, 0.2),
|
608 |
+
'efficientnet-lite1': (1.0, 1.1, 240, 0.2),
|
609 |
+
'efficientnet-lite2': (1.1, 1.2, 260, 0.3),
|
610 |
+
'efficientnet-lite3': (1.2, 1.4, 280, 0.3),
|
611 |
+
'efficientnet-lite4': (1.4, 1.8, 300, 0.3),
|
612 |
+
|
613 |
+
Args:
|
614 |
+
channel_multiplier: multiplier to number of channels per layer
|
615 |
+
depth_multiplier: multiplier to number of repeats per stage
|
616 |
+
"""
|
617 |
+
arch_def = [
|
618 |
+
['ds_r1_k3_s1_e1_c16'],
|
619 |
+
['ir_r2_k3_s2_e6_c24'],
|
620 |
+
['ir_r2_k5_s2_e6_c40'],
|
621 |
+
['ir_r3_k3_s2_e6_c80'],
|
622 |
+
['ir_r3_k5_s1_e6_c112'],
|
623 |
+
['ir_r4_k5_s2_e6_c192'],
|
624 |
+
['ir_r1_k3_s1_e6_c320'],
|
625 |
+
]
|
626 |
+
with layer_config_kwargs(kwargs):
|
627 |
+
model_kwargs = dict(
|
628 |
+
block_args=decode_arch_def(arch_def, depth_multiplier, fix_first_last=True),
|
629 |
+
num_features=1280,
|
630 |
+
stem_size=32,
|
631 |
+
fix_stem=True,
|
632 |
+
channel_multiplier=channel_multiplier,
|
633 |
+
act_layer=nn.ReLU6,
|
634 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
635 |
+
**kwargs,
|
636 |
+
)
|
637 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
638 |
+
return model
|
639 |
+
|
640 |
+
|
641 |
+
def _gen_mixnet_s(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
|
642 |
+
"""Creates a MixNet Small model.
|
643 |
+
|
644 |
+
Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet
|
645 |
+
Paper: https://arxiv.org/abs/1907.09595
|
646 |
+
"""
|
647 |
+
arch_def = [
|
648 |
+
# stage 0, 112x112 in
|
649 |
+
['ds_r1_k3_s1_e1_c16'], # relu
|
650 |
+
# stage 1, 112x112 in
|
651 |
+
['ir_r1_k3_a1.1_p1.1_s2_e6_c24', 'ir_r1_k3_a1.1_p1.1_s1_e3_c24'], # relu
|
652 |
+
# stage 2, 56x56 in
|
653 |
+
['ir_r1_k3.5.7_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'], # swish
|
654 |
+
# stage 3, 28x28 in
|
655 |
+
['ir_r1_k3.5.7_p1.1_s2_e6_c80_se0.25_nsw', 'ir_r2_k3.5_p1.1_s1_e6_c80_se0.25_nsw'], # swish
|
656 |
+
# stage 4, 14x14in
|
657 |
+
['ir_r1_k3.5.7_a1.1_p1.1_s1_e6_c120_se0.5_nsw', 'ir_r2_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'], # swish
|
658 |
+
# stage 5, 14x14in
|
659 |
+
['ir_r1_k3.5.7.9.11_s2_e6_c200_se0.5_nsw', 'ir_r2_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'], # swish
|
660 |
+
# 7x7
|
661 |
+
]
|
662 |
+
with layer_config_kwargs(kwargs):
|
663 |
+
model_kwargs = dict(
|
664 |
+
block_args=decode_arch_def(arch_def),
|
665 |
+
num_features=1536,
|
666 |
+
stem_size=16,
|
667 |
+
channel_multiplier=channel_multiplier,
|
668 |
+
act_layer=resolve_act_layer(kwargs, 'relu'),
|
669 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
670 |
+
**kwargs
|
671 |
+
)
|
672 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
673 |
+
return model
|
674 |
+
|
675 |
+
|
676 |
+
def _gen_mixnet_m(variant, channel_multiplier=1.0, depth_multiplier=1.0, pretrained=False, **kwargs):
|
677 |
+
"""Creates a MixNet Medium-Large model.
|
678 |
+
|
679 |
+
Ref impl: https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet/mixnet
|
680 |
+
Paper: https://arxiv.org/abs/1907.09595
|
681 |
+
"""
|
682 |
+
arch_def = [
|
683 |
+
# stage 0, 112x112 in
|
684 |
+
['ds_r1_k3_s1_e1_c24'], # relu
|
685 |
+
# stage 1, 112x112 in
|
686 |
+
['ir_r1_k3.5.7_a1.1_p1.1_s2_e6_c32', 'ir_r1_k3_a1.1_p1.1_s1_e3_c32'], # relu
|
687 |
+
# stage 2, 56x56 in
|
688 |
+
['ir_r1_k3.5.7.9_s2_e6_c40_se0.5_nsw', 'ir_r3_k3.5_a1.1_p1.1_s1_e6_c40_se0.5_nsw'], # swish
|
689 |
+
# stage 3, 28x28 in
|
690 |
+
['ir_r1_k3.5.7_s2_e6_c80_se0.25_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e6_c80_se0.25_nsw'], # swish
|
691 |
+
# stage 4, 14x14in
|
692 |
+
['ir_r1_k3_s1_e6_c120_se0.5_nsw', 'ir_r3_k3.5.7.9_a1.1_p1.1_s1_e3_c120_se0.5_nsw'], # swish
|
693 |
+
# stage 5, 14x14in
|
694 |
+
['ir_r1_k3.5.7.9_s2_e6_c200_se0.5_nsw', 'ir_r3_k3.5.7.9_p1.1_s1_e6_c200_se0.5_nsw'], # swish
|
695 |
+
# 7x7
|
696 |
+
]
|
697 |
+
with layer_config_kwargs(kwargs):
|
698 |
+
model_kwargs = dict(
|
699 |
+
block_args=decode_arch_def(arch_def, depth_multiplier, depth_trunc='round'),
|
700 |
+
num_features=1536,
|
701 |
+
stem_size=24,
|
702 |
+
channel_multiplier=channel_multiplier,
|
703 |
+
act_layer=resolve_act_layer(kwargs, 'relu'),
|
704 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
705 |
+
**kwargs
|
706 |
+
)
|
707 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
708 |
+
return model
|
709 |
+
|
710 |
+
|
711 |
+
def mnasnet_050(pretrained=False, **kwargs):
|
712 |
+
""" MNASNet B1, depth multiplier of 0.5. """
|
713 |
+
model = _gen_mnasnet_b1('mnasnet_050', 0.5, pretrained=pretrained, **kwargs)
|
714 |
+
return model
|
715 |
+
|
716 |
+
|
717 |
+
def mnasnet_075(pretrained=False, **kwargs):
|
718 |
+
""" MNASNet B1, depth multiplier of 0.75. """
|
719 |
+
model = _gen_mnasnet_b1('mnasnet_075', 0.75, pretrained=pretrained, **kwargs)
|
720 |
+
return model
|
721 |
+
|
722 |
+
|
723 |
+
def mnasnet_100(pretrained=False, **kwargs):
|
724 |
+
""" MNASNet B1, depth multiplier of 1.0. """
|
725 |
+
model = _gen_mnasnet_b1('mnasnet_100', 1.0, pretrained=pretrained, **kwargs)
|
726 |
+
return model
|
727 |
+
|
728 |
+
|
729 |
+
def mnasnet_b1(pretrained=False, **kwargs):
|
730 |
+
""" MNASNet B1, depth multiplier of 1.0. """
|
731 |
+
return mnasnet_100(pretrained, **kwargs)
|
732 |
+
|
733 |
+
|
734 |
+
def mnasnet_140(pretrained=False, **kwargs):
|
735 |
+
""" MNASNet B1, depth multiplier of 1.4 """
|
736 |
+
model = _gen_mnasnet_b1('mnasnet_140', 1.4, pretrained=pretrained, **kwargs)
|
737 |
+
return model
|
738 |
+
|
739 |
+
|
740 |
+
def semnasnet_050(pretrained=False, **kwargs):
|
741 |
+
""" MNASNet A1 (w/ SE), depth multiplier of 0.5 """
|
742 |
+
model = _gen_mnasnet_a1('semnasnet_050', 0.5, pretrained=pretrained, **kwargs)
|
743 |
+
return model
|
744 |
+
|
745 |
+
|
746 |
+
def semnasnet_075(pretrained=False, **kwargs):
|
747 |
+
""" MNASNet A1 (w/ SE), depth multiplier of 0.75. """
|
748 |
+
model = _gen_mnasnet_a1('semnasnet_075', 0.75, pretrained=pretrained, **kwargs)
|
749 |
+
return model
|
750 |
+
|
751 |
+
|
752 |
+
def semnasnet_100(pretrained=False, **kwargs):
|
753 |
+
""" MNASNet A1 (w/ SE), depth multiplier of 1.0. """
|
754 |
+
model = _gen_mnasnet_a1('semnasnet_100', 1.0, pretrained=pretrained, **kwargs)
|
755 |
+
return model
|
756 |
+
|
757 |
+
|
758 |
+
def mnasnet_a1(pretrained=False, **kwargs):
|
759 |
+
""" MNASNet A1 (w/ SE), depth multiplier of 1.0. """
|
760 |
+
return semnasnet_100(pretrained, **kwargs)
|
761 |
+
|
762 |
+
|
763 |
+
def semnasnet_140(pretrained=False, **kwargs):
|
764 |
+
""" MNASNet A1 (w/ SE), depth multiplier of 1.4. """
|
765 |
+
model = _gen_mnasnet_a1('semnasnet_140', 1.4, pretrained=pretrained, **kwargs)
|
766 |
+
return model
|
767 |
+
|
768 |
+
|
769 |
+
def mnasnet_small(pretrained=False, **kwargs):
|
770 |
+
""" MNASNet Small, depth multiplier of 1.0. """
|
771 |
+
model = _gen_mnasnet_small('mnasnet_small', 1.0, pretrained=pretrained, **kwargs)
|
772 |
+
return model
|
773 |
+
|
774 |
+
|
775 |
+
def mobilenetv2_100(pretrained=False, **kwargs):
|
776 |
+
""" MobileNet V2 w/ 1.0 channel multiplier """
|
777 |
+
model = _gen_mobilenet_v2('mobilenetv2_100', 1.0, pretrained=pretrained, **kwargs)
|
778 |
+
return model
|
779 |
+
|
780 |
+
|
781 |
+
def mobilenetv2_140(pretrained=False, **kwargs):
|
782 |
+
""" MobileNet V2 w/ 1.4 channel multiplier """
|
783 |
+
model = _gen_mobilenet_v2('mobilenetv2_140', 1.4, pretrained=pretrained, **kwargs)
|
784 |
+
return model
|
785 |
+
|
786 |
+
|
787 |
+
def mobilenetv2_110d(pretrained=False, **kwargs):
|
788 |
+
""" MobileNet V2 w/ 1.1 channel, 1.2 depth multipliers"""
|
789 |
+
model = _gen_mobilenet_v2(
|
790 |
+
'mobilenetv2_110d', 1.1, depth_multiplier=1.2, fix_stem_head=True, pretrained=pretrained, **kwargs)
|
791 |
+
return model
|
792 |
+
|
793 |
+
|
794 |
+
def mobilenetv2_120d(pretrained=False, **kwargs):
|
795 |
+
""" MobileNet V2 w/ 1.2 channel, 1.4 depth multipliers """
|
796 |
+
model = _gen_mobilenet_v2(
|
797 |
+
'mobilenetv2_120d', 1.2, depth_multiplier=1.4, fix_stem_head=True, pretrained=pretrained, **kwargs)
|
798 |
+
return model
|
799 |
+
|
800 |
+
|
801 |
+
def fbnetc_100(pretrained=False, **kwargs):
|
802 |
+
""" FBNet-C """
|
803 |
+
if pretrained:
|
804 |
+
# pretrained model trained with non-default BN epsilon
|
805 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
806 |
+
model = _gen_fbnetc('fbnetc_100', 1.0, pretrained=pretrained, **kwargs)
|
807 |
+
return model
|
808 |
+
|
809 |
+
|
810 |
+
def spnasnet_100(pretrained=False, **kwargs):
|
811 |
+
""" Single-Path NAS Pixel1"""
|
812 |
+
model = _gen_spnasnet('spnasnet_100', 1.0, pretrained=pretrained, **kwargs)
|
813 |
+
return model
|
814 |
+
|
815 |
+
|
816 |
+
def efficientnet_b0(pretrained=False, **kwargs):
|
817 |
+
""" EfficientNet-B0 """
|
818 |
+
# NOTE for train set drop_rate=0.2, drop_connect_rate=0.2
|
819 |
+
model = _gen_efficientnet(
|
820 |
+
'efficientnet_b0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
821 |
+
return model
|
822 |
+
|
823 |
+
|
824 |
+
def efficientnet_b1(pretrained=False, **kwargs):
|
825 |
+
""" EfficientNet-B1 """
|
826 |
+
# NOTE for train set drop_rate=0.2, drop_connect_rate=0.2
|
827 |
+
model = _gen_efficientnet(
|
828 |
+
'efficientnet_b1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
|
829 |
+
return model
|
830 |
+
|
831 |
+
|
832 |
+
def efficientnet_b2(pretrained=False, **kwargs):
|
833 |
+
""" EfficientNet-B2 """
|
834 |
+
# NOTE for train set drop_rate=0.3, drop_connect_rate=0.2
|
835 |
+
model = _gen_efficientnet(
|
836 |
+
'efficientnet_b2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
|
837 |
+
return model
|
838 |
+
|
839 |
+
|
840 |
+
def efficientnet_b3(pretrained=False, **kwargs):
|
841 |
+
""" EfficientNet-B3 """
|
842 |
+
# NOTE for train set drop_rate=0.3, drop_connect_rate=0.2
|
843 |
+
model = _gen_efficientnet(
|
844 |
+
'efficientnet_b3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
|
845 |
+
return model
|
846 |
+
|
847 |
+
|
848 |
+
def efficientnet_b4(pretrained=False, **kwargs):
|
849 |
+
""" EfficientNet-B4 """
|
850 |
+
# NOTE for train set drop_rate=0.4, drop_connect_rate=0.2
|
851 |
+
model = _gen_efficientnet(
|
852 |
+
'efficientnet_b4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
|
853 |
+
return model
|
854 |
+
|
855 |
+
|
856 |
+
def efficientnet_b5(pretrained=False, **kwargs):
|
857 |
+
""" EfficientNet-B5 """
|
858 |
+
# NOTE for train set drop_rate=0.4, drop_connect_rate=0.2
|
859 |
+
model = _gen_efficientnet(
|
860 |
+
'efficientnet_b5', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
|
861 |
+
return model
|
862 |
+
|
863 |
+
|
864 |
+
def efficientnet_b6(pretrained=False, **kwargs):
|
865 |
+
""" EfficientNet-B6 """
|
866 |
+
# NOTE for train set drop_rate=0.5, drop_connect_rate=0.2
|
867 |
+
model = _gen_efficientnet(
|
868 |
+
'efficientnet_b6', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
|
869 |
+
return model
|
870 |
+
|
871 |
+
|
872 |
+
def efficientnet_b7(pretrained=False, **kwargs):
|
873 |
+
""" EfficientNet-B7 """
|
874 |
+
# NOTE for train set drop_rate=0.5, drop_connect_rate=0.2
|
875 |
+
model = _gen_efficientnet(
|
876 |
+
'efficientnet_b7', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
|
877 |
+
return model
|
878 |
+
|
879 |
+
|
880 |
+
def efficientnet_b8(pretrained=False, **kwargs):
|
881 |
+
""" EfficientNet-B8 """
|
882 |
+
# NOTE for train set drop_rate=0.5, drop_connect_rate=0.2
|
883 |
+
model = _gen_efficientnet(
|
884 |
+
'efficientnet_b8', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs)
|
885 |
+
return model
|
886 |
+
|
887 |
+
|
888 |
+
def efficientnet_l2(pretrained=False, **kwargs):
|
889 |
+
""" EfficientNet-L2. """
|
890 |
+
# NOTE for train, drop_rate should be 0.5
|
891 |
+
model = _gen_efficientnet(
|
892 |
+
'efficientnet_l2', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs)
|
893 |
+
return model
|
894 |
+
|
895 |
+
|
896 |
+
def efficientnet_es(pretrained=False, **kwargs):
|
897 |
+
""" EfficientNet-Edge Small. """
|
898 |
+
model = _gen_efficientnet_edge(
|
899 |
+
'efficientnet_es', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
900 |
+
return model
|
901 |
+
|
902 |
+
|
903 |
+
def efficientnet_em(pretrained=False, **kwargs):
|
904 |
+
""" EfficientNet-Edge-Medium. """
|
905 |
+
model = _gen_efficientnet_edge(
|
906 |
+
'efficientnet_em', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
|
907 |
+
return model
|
908 |
+
|
909 |
+
|
910 |
+
def efficientnet_el(pretrained=False, **kwargs):
|
911 |
+
""" EfficientNet-Edge-Large. """
|
912 |
+
model = _gen_efficientnet_edge(
|
913 |
+
'efficientnet_el', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
|
914 |
+
return model
|
915 |
+
|
916 |
+
|
917 |
+
def efficientnet_cc_b0_4e(pretrained=False, **kwargs):
|
918 |
+
""" EfficientNet-CondConv-B0 w/ 8 Experts """
|
919 |
+
# NOTE for train set drop_rate=0.25, drop_connect_rate=0.2
|
920 |
+
model = _gen_efficientnet_condconv(
|
921 |
+
'efficientnet_cc_b0_4e', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
922 |
+
return model
|
923 |
+
|
924 |
+
|
925 |
+
def efficientnet_cc_b0_8e(pretrained=False, **kwargs):
|
926 |
+
""" EfficientNet-CondConv-B0 w/ 8 Experts """
|
927 |
+
# NOTE for train set drop_rate=0.25, drop_connect_rate=0.2
|
928 |
+
model = _gen_efficientnet_condconv(
|
929 |
+
'efficientnet_cc_b0_8e', channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=2,
|
930 |
+
pretrained=pretrained, **kwargs)
|
931 |
+
return model
|
932 |
+
|
933 |
+
|
934 |
+
def efficientnet_cc_b1_8e(pretrained=False, **kwargs):
|
935 |
+
""" EfficientNet-CondConv-B1 w/ 8 Experts """
|
936 |
+
# NOTE for train set drop_rate=0.25, drop_connect_rate=0.2
|
937 |
+
model = _gen_efficientnet_condconv(
|
938 |
+
'efficientnet_cc_b1_8e', channel_multiplier=1.0, depth_multiplier=1.1, experts_multiplier=2,
|
939 |
+
pretrained=pretrained, **kwargs)
|
940 |
+
return model
|
941 |
+
|
942 |
+
|
943 |
+
def efficientnet_lite0(pretrained=False, **kwargs):
|
944 |
+
""" EfficientNet-Lite0 """
|
945 |
+
model = _gen_efficientnet_lite(
|
946 |
+
'efficientnet_lite0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
947 |
+
return model
|
948 |
+
|
949 |
+
|
950 |
+
def efficientnet_lite1(pretrained=False, **kwargs):
|
951 |
+
""" EfficientNet-Lite1 """
|
952 |
+
model = _gen_efficientnet_lite(
|
953 |
+
'efficientnet_lite1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
|
954 |
+
return model
|
955 |
+
|
956 |
+
|
957 |
+
def efficientnet_lite2(pretrained=False, **kwargs):
|
958 |
+
""" EfficientNet-Lite2 """
|
959 |
+
model = _gen_efficientnet_lite(
|
960 |
+
'efficientnet_lite2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
|
961 |
+
return model
|
962 |
+
|
963 |
+
|
964 |
+
def efficientnet_lite3(pretrained=False, **kwargs):
|
965 |
+
""" EfficientNet-Lite3 """
|
966 |
+
model = _gen_efficientnet_lite(
|
967 |
+
'efficientnet_lite3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
|
968 |
+
return model
|
969 |
+
|
970 |
+
|
971 |
+
def efficientnet_lite4(pretrained=False, **kwargs):
|
972 |
+
""" EfficientNet-Lite4 """
|
973 |
+
model = _gen_efficientnet_lite(
|
974 |
+
'efficientnet_lite4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
|
975 |
+
return model
|
976 |
+
|
977 |
+
|
978 |
+
def tf_efficientnet_b0(pretrained=False, **kwargs):
|
979 |
+
""" EfficientNet-B0 AutoAug. Tensorflow compatible variant """
|
980 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
981 |
+
kwargs['pad_type'] = 'same'
|
982 |
+
model = _gen_efficientnet(
|
983 |
+
'tf_efficientnet_b0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
984 |
+
return model
|
985 |
+
|
986 |
+
|
987 |
+
def tf_efficientnet_b1(pretrained=False, **kwargs):
|
988 |
+
""" EfficientNet-B1 AutoAug. Tensorflow compatible variant """
|
989 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
990 |
+
kwargs['pad_type'] = 'same'
|
991 |
+
model = _gen_efficientnet(
|
992 |
+
'tf_efficientnet_b1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
|
993 |
+
return model
|
994 |
+
|
995 |
+
|
996 |
+
def tf_efficientnet_b2(pretrained=False, **kwargs):
|
997 |
+
""" EfficientNet-B2 AutoAug. Tensorflow compatible variant """
|
998 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
999 |
+
kwargs['pad_type'] = 'same'
|
1000 |
+
model = _gen_efficientnet(
|
1001 |
+
'tf_efficientnet_b2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
|
1002 |
+
return model
|
1003 |
+
|
1004 |
+
|
1005 |
+
def tf_efficientnet_b3(pretrained=False, **kwargs):
|
1006 |
+
""" EfficientNet-B3 AutoAug. Tensorflow compatible variant """
|
1007 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1008 |
+
kwargs['pad_type'] = 'same'
|
1009 |
+
model = _gen_efficientnet(
|
1010 |
+
'tf_efficientnet_b3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
|
1011 |
+
return model
|
1012 |
+
|
1013 |
+
|
1014 |
+
def tf_efficientnet_b4(pretrained=False, **kwargs):
|
1015 |
+
""" EfficientNet-B4 AutoAug. Tensorflow compatible variant """
|
1016 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1017 |
+
kwargs['pad_type'] = 'same'
|
1018 |
+
model = _gen_efficientnet(
|
1019 |
+
'tf_efficientnet_b4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
|
1020 |
+
return model
|
1021 |
+
|
1022 |
+
|
1023 |
+
def tf_efficientnet_b5(pretrained=False, **kwargs):
|
1024 |
+
""" EfficientNet-B5 RandAug. Tensorflow compatible variant """
|
1025 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1026 |
+
kwargs['pad_type'] = 'same'
|
1027 |
+
model = _gen_efficientnet(
|
1028 |
+
'tf_efficientnet_b5', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
|
1029 |
+
return model
|
1030 |
+
|
1031 |
+
|
1032 |
+
def tf_efficientnet_b6(pretrained=False, **kwargs):
|
1033 |
+
""" EfficientNet-B6 AutoAug. Tensorflow compatible variant """
|
1034 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1035 |
+
kwargs['pad_type'] = 'same'
|
1036 |
+
model = _gen_efficientnet(
|
1037 |
+
'tf_efficientnet_b6', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
|
1038 |
+
return model
|
1039 |
+
|
1040 |
+
|
1041 |
+
def tf_efficientnet_b7(pretrained=False, **kwargs):
|
1042 |
+
""" EfficientNet-B7 RandAug. Tensorflow compatible variant """
|
1043 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1044 |
+
kwargs['pad_type'] = 'same'
|
1045 |
+
model = _gen_efficientnet(
|
1046 |
+
'tf_efficientnet_b7', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
|
1047 |
+
return model
|
1048 |
+
|
1049 |
+
|
1050 |
+
def tf_efficientnet_b8(pretrained=False, **kwargs):
|
1051 |
+
""" EfficientNet-B8 RandAug. Tensorflow compatible variant """
|
1052 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1053 |
+
kwargs['pad_type'] = 'same'
|
1054 |
+
model = _gen_efficientnet(
|
1055 |
+
'tf_efficientnet_b8', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs)
|
1056 |
+
return model
|
1057 |
+
|
1058 |
+
|
1059 |
+
def tf_efficientnet_b0_ap(pretrained=False, **kwargs):
|
1060 |
+
""" EfficientNet-B0 AdvProp. Tensorflow compatible variant
|
1061 |
+
Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
|
1062 |
+
"""
|
1063 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1064 |
+
kwargs['pad_type'] = 'same'
|
1065 |
+
model = _gen_efficientnet(
|
1066 |
+
'tf_efficientnet_b0_ap', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
1067 |
+
return model
|
1068 |
+
|
1069 |
+
|
1070 |
+
def tf_efficientnet_b1_ap(pretrained=False, **kwargs):
|
1071 |
+
""" EfficientNet-B1 AdvProp. Tensorflow compatible variant
|
1072 |
+
Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
|
1073 |
+
"""
|
1074 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1075 |
+
kwargs['pad_type'] = 'same'
|
1076 |
+
model = _gen_efficientnet(
|
1077 |
+
'tf_efficientnet_b1_ap', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
|
1078 |
+
return model
|
1079 |
+
|
1080 |
+
|
1081 |
+
def tf_efficientnet_b2_ap(pretrained=False, **kwargs):
|
1082 |
+
""" EfficientNet-B2 AdvProp. Tensorflow compatible variant
|
1083 |
+
Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
|
1084 |
+
"""
|
1085 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1086 |
+
kwargs['pad_type'] = 'same'
|
1087 |
+
model = _gen_efficientnet(
|
1088 |
+
'tf_efficientnet_b2_ap', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
|
1089 |
+
return model
|
1090 |
+
|
1091 |
+
|
1092 |
+
def tf_efficientnet_b3_ap(pretrained=False, **kwargs):
|
1093 |
+
""" EfficientNet-B3 AdvProp. Tensorflow compatible variant
|
1094 |
+
Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
|
1095 |
+
"""
|
1096 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1097 |
+
kwargs['pad_type'] = 'same'
|
1098 |
+
model = _gen_efficientnet(
|
1099 |
+
'tf_efficientnet_b3_ap', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
|
1100 |
+
return model
|
1101 |
+
|
1102 |
+
|
1103 |
+
def tf_efficientnet_b4_ap(pretrained=False, **kwargs):
|
1104 |
+
""" EfficientNet-B4 AdvProp. Tensorflow compatible variant
|
1105 |
+
Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
|
1106 |
+
"""
|
1107 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1108 |
+
kwargs['pad_type'] = 'same'
|
1109 |
+
model = _gen_efficientnet(
|
1110 |
+
'tf_efficientnet_b4_ap', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
|
1111 |
+
return model
|
1112 |
+
|
1113 |
+
|
1114 |
+
def tf_efficientnet_b5_ap(pretrained=False, **kwargs):
|
1115 |
+
""" EfficientNet-B5 AdvProp. Tensorflow compatible variant
|
1116 |
+
Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
|
1117 |
+
"""
|
1118 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1119 |
+
kwargs['pad_type'] = 'same'
|
1120 |
+
model = _gen_efficientnet(
|
1121 |
+
'tf_efficientnet_b5_ap', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
|
1122 |
+
return model
|
1123 |
+
|
1124 |
+
|
1125 |
+
def tf_efficientnet_b6_ap(pretrained=False, **kwargs):
|
1126 |
+
""" EfficientNet-B6 AdvProp. Tensorflow compatible variant
|
1127 |
+
Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
|
1128 |
+
"""
|
1129 |
+
# NOTE for train, drop_rate should be 0.5
|
1130 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1131 |
+
kwargs['pad_type'] = 'same'
|
1132 |
+
model = _gen_efficientnet(
|
1133 |
+
'tf_efficientnet_b6_ap', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
|
1134 |
+
return model
|
1135 |
+
|
1136 |
+
|
1137 |
+
def tf_efficientnet_b7_ap(pretrained=False, **kwargs):
|
1138 |
+
""" EfficientNet-B7 AdvProp. Tensorflow compatible variant
|
1139 |
+
Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
|
1140 |
+
"""
|
1141 |
+
# NOTE for train, drop_rate should be 0.5
|
1142 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1143 |
+
kwargs['pad_type'] = 'same'
|
1144 |
+
model = _gen_efficientnet(
|
1145 |
+
'tf_efficientnet_b7_ap', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
|
1146 |
+
return model
|
1147 |
+
|
1148 |
+
|
1149 |
+
def tf_efficientnet_b8_ap(pretrained=False, **kwargs):
|
1150 |
+
""" EfficientNet-B8 AdvProp. Tensorflow compatible variant
|
1151 |
+
Paper: Adversarial Examples Improve Image Recognition (https://arxiv.org/abs/1911.09665)
|
1152 |
+
"""
|
1153 |
+
# NOTE for train, drop_rate should be 0.5
|
1154 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1155 |
+
kwargs['pad_type'] = 'same'
|
1156 |
+
model = _gen_efficientnet(
|
1157 |
+
'tf_efficientnet_b8_ap', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs)
|
1158 |
+
return model
|
1159 |
+
|
1160 |
+
|
1161 |
+
def tf_efficientnet_b0_ns(pretrained=False, **kwargs):
|
1162 |
+
""" EfficientNet-B0 NoisyStudent. Tensorflow compatible variant
|
1163 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1164 |
+
"""
|
1165 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1166 |
+
kwargs['pad_type'] = 'same'
|
1167 |
+
model = _gen_efficientnet(
|
1168 |
+
'tf_efficientnet_b0_ns', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
1169 |
+
return model
|
1170 |
+
|
1171 |
+
|
1172 |
+
def tf_efficientnet_b1_ns(pretrained=False, **kwargs):
|
1173 |
+
""" EfficientNet-B1 NoisyStudent. Tensorflow compatible variant
|
1174 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1175 |
+
"""
|
1176 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1177 |
+
kwargs['pad_type'] = 'same'
|
1178 |
+
model = _gen_efficientnet(
|
1179 |
+
'tf_efficientnet_b1_ns', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
|
1180 |
+
return model
|
1181 |
+
|
1182 |
+
|
1183 |
+
def tf_efficientnet_b2_ns(pretrained=False, **kwargs):
|
1184 |
+
""" EfficientNet-B2 NoisyStudent. Tensorflow compatible variant
|
1185 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1186 |
+
"""
|
1187 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1188 |
+
kwargs['pad_type'] = 'same'
|
1189 |
+
model = _gen_efficientnet(
|
1190 |
+
'tf_efficientnet_b2_ns', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
|
1191 |
+
return model
|
1192 |
+
|
1193 |
+
|
1194 |
+
def tf_efficientnet_b3_ns(pretrained=False, **kwargs):
|
1195 |
+
""" EfficientNet-B3 NoisyStudent. Tensorflow compatible variant
|
1196 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1197 |
+
"""
|
1198 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1199 |
+
kwargs['pad_type'] = 'same'
|
1200 |
+
model = _gen_efficientnet(
|
1201 |
+
'tf_efficientnet_b3_ns', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
|
1202 |
+
return model
|
1203 |
+
|
1204 |
+
|
1205 |
+
def tf_efficientnet_b4_ns(pretrained=False, **kwargs):
|
1206 |
+
""" EfficientNet-B4 NoisyStudent. Tensorflow compatible variant
|
1207 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1208 |
+
"""
|
1209 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1210 |
+
kwargs['pad_type'] = 'same'
|
1211 |
+
model = _gen_efficientnet(
|
1212 |
+
'tf_efficientnet_b4_ns', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
|
1213 |
+
return model
|
1214 |
+
|
1215 |
+
|
1216 |
+
def tf_efficientnet_b5_ns(pretrained=False, **kwargs):
|
1217 |
+
""" EfficientNet-B5 NoisyStudent. Tensorflow compatible variant
|
1218 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1219 |
+
"""
|
1220 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1221 |
+
kwargs['pad_type'] = 'same'
|
1222 |
+
model = _gen_efficientnet(
|
1223 |
+
'tf_efficientnet_b5_ns', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
|
1224 |
+
return model
|
1225 |
+
|
1226 |
+
|
1227 |
+
def tf_efficientnet_b6_ns(pretrained=False, **kwargs):
|
1228 |
+
""" EfficientNet-B6 NoisyStudent. Tensorflow compatible variant
|
1229 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1230 |
+
"""
|
1231 |
+
# NOTE for train, drop_rate should be 0.5
|
1232 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1233 |
+
kwargs['pad_type'] = 'same'
|
1234 |
+
model = _gen_efficientnet(
|
1235 |
+
'tf_efficientnet_b6_ns', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
|
1236 |
+
return model
|
1237 |
+
|
1238 |
+
|
1239 |
+
def tf_efficientnet_b7_ns(pretrained=False, **kwargs):
|
1240 |
+
""" EfficientNet-B7 NoisyStudent. Tensorflow compatible variant
|
1241 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1242 |
+
"""
|
1243 |
+
# NOTE for train, drop_rate should be 0.5
|
1244 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1245 |
+
kwargs['pad_type'] = 'same'
|
1246 |
+
model = _gen_efficientnet(
|
1247 |
+
'tf_efficientnet_b7_ns', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
|
1248 |
+
return model
|
1249 |
+
|
1250 |
+
|
1251 |
+
def tf_efficientnet_l2_ns_475(pretrained=False, **kwargs):
|
1252 |
+
""" EfficientNet-L2 NoisyStudent @ 475x475. Tensorflow compatible variant
|
1253 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1254 |
+
"""
|
1255 |
+
# NOTE for train, drop_rate should be 0.5
|
1256 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1257 |
+
kwargs['pad_type'] = 'same'
|
1258 |
+
model = _gen_efficientnet(
|
1259 |
+
'tf_efficientnet_l2_ns_475', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs)
|
1260 |
+
return model
|
1261 |
+
|
1262 |
+
|
1263 |
+
def tf_efficientnet_l2_ns(pretrained=False, **kwargs):
|
1264 |
+
""" EfficientNet-L2 NoisyStudent. Tensorflow compatible variant
|
1265 |
+
Paper: Self-training with Noisy Student improves ImageNet classification (https://arxiv.org/abs/1911.04252)
|
1266 |
+
"""
|
1267 |
+
# NOTE for train, drop_rate should be 0.5
|
1268 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1269 |
+
kwargs['pad_type'] = 'same'
|
1270 |
+
model = _gen_efficientnet(
|
1271 |
+
'tf_efficientnet_l2_ns', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs)
|
1272 |
+
return model
|
1273 |
+
|
1274 |
+
|
1275 |
+
def tf_efficientnet_es(pretrained=False, **kwargs):
|
1276 |
+
""" EfficientNet-Edge Small. Tensorflow compatible variant """
|
1277 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1278 |
+
kwargs['pad_type'] = 'same'
|
1279 |
+
model = _gen_efficientnet_edge(
|
1280 |
+
'tf_efficientnet_es', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
1281 |
+
return model
|
1282 |
+
|
1283 |
+
|
1284 |
+
def tf_efficientnet_em(pretrained=False, **kwargs):
|
1285 |
+
""" EfficientNet-Edge-Medium. Tensorflow compatible variant """
|
1286 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1287 |
+
kwargs['pad_type'] = 'same'
|
1288 |
+
model = _gen_efficientnet_edge(
|
1289 |
+
'tf_efficientnet_em', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
|
1290 |
+
return model
|
1291 |
+
|
1292 |
+
|
1293 |
+
def tf_efficientnet_el(pretrained=False, **kwargs):
|
1294 |
+
""" EfficientNet-Edge-Large. Tensorflow compatible variant """
|
1295 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1296 |
+
kwargs['pad_type'] = 'same'
|
1297 |
+
model = _gen_efficientnet_edge(
|
1298 |
+
'tf_efficientnet_el', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
|
1299 |
+
return model
|
1300 |
+
|
1301 |
+
|
1302 |
+
def tf_efficientnet_cc_b0_4e(pretrained=False, **kwargs):
|
1303 |
+
""" EfficientNet-CondConv-B0 w/ 4 Experts """
|
1304 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1305 |
+
kwargs['pad_type'] = 'same'
|
1306 |
+
model = _gen_efficientnet_condconv(
|
1307 |
+
'tf_efficientnet_cc_b0_4e', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
1308 |
+
return model
|
1309 |
+
|
1310 |
+
|
1311 |
+
def tf_efficientnet_cc_b0_8e(pretrained=False, **kwargs):
|
1312 |
+
""" EfficientNet-CondConv-B0 w/ 8 Experts """
|
1313 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1314 |
+
kwargs['pad_type'] = 'same'
|
1315 |
+
model = _gen_efficientnet_condconv(
|
1316 |
+
'tf_efficientnet_cc_b0_8e', channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=2,
|
1317 |
+
pretrained=pretrained, **kwargs)
|
1318 |
+
return model
|
1319 |
+
|
1320 |
+
|
1321 |
+
def tf_efficientnet_cc_b1_8e(pretrained=False, **kwargs):
|
1322 |
+
""" EfficientNet-CondConv-B1 w/ 8 Experts """
|
1323 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1324 |
+
kwargs['pad_type'] = 'same'
|
1325 |
+
model = _gen_efficientnet_condconv(
|
1326 |
+
'tf_efficientnet_cc_b1_8e', channel_multiplier=1.0, depth_multiplier=1.1, experts_multiplier=2,
|
1327 |
+
pretrained=pretrained, **kwargs)
|
1328 |
+
return model
|
1329 |
+
|
1330 |
+
|
1331 |
+
def tf_efficientnet_lite0(pretrained=False, **kwargs):
|
1332 |
+
""" EfficientNet-Lite0. Tensorflow compatible variant """
|
1333 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1334 |
+
kwargs['pad_type'] = 'same'
|
1335 |
+
model = _gen_efficientnet_lite(
|
1336 |
+
'tf_efficientnet_lite0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
|
1337 |
+
return model
|
1338 |
+
|
1339 |
+
|
1340 |
+
def tf_efficientnet_lite1(pretrained=False, **kwargs):
|
1341 |
+
""" EfficientNet-Lite1. Tensorflow compatible variant """
|
1342 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1343 |
+
kwargs['pad_type'] = 'same'
|
1344 |
+
model = _gen_efficientnet_lite(
|
1345 |
+
'tf_efficientnet_lite1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
|
1346 |
+
return model
|
1347 |
+
|
1348 |
+
|
1349 |
+
def tf_efficientnet_lite2(pretrained=False, **kwargs):
|
1350 |
+
""" EfficientNet-Lite2. Tensorflow compatible variant """
|
1351 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1352 |
+
kwargs['pad_type'] = 'same'
|
1353 |
+
model = _gen_efficientnet_lite(
|
1354 |
+
'tf_efficientnet_lite2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
|
1355 |
+
return model
|
1356 |
+
|
1357 |
+
|
1358 |
+
def tf_efficientnet_lite3(pretrained=False, **kwargs):
|
1359 |
+
""" EfficientNet-Lite3. Tensorflow compatible variant """
|
1360 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1361 |
+
kwargs['pad_type'] = 'same'
|
1362 |
+
model = _gen_efficientnet_lite(
|
1363 |
+
'tf_efficientnet_lite3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
|
1364 |
+
return model
|
1365 |
+
|
1366 |
+
|
1367 |
+
def tf_efficientnet_lite4(pretrained=False, **kwargs):
|
1368 |
+
""" EfficientNet-Lite4. Tensorflow compatible variant """
|
1369 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1370 |
+
kwargs['pad_type'] = 'same'
|
1371 |
+
model = _gen_efficientnet_lite(
|
1372 |
+
'tf_efficientnet_lite4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
|
1373 |
+
return model
|
1374 |
+
|
1375 |
+
|
1376 |
+
def mixnet_s(pretrained=False, **kwargs):
|
1377 |
+
"""Creates a MixNet Small model.
|
1378 |
+
"""
|
1379 |
+
# NOTE for train set drop_rate=0.2
|
1380 |
+
model = _gen_mixnet_s(
|
1381 |
+
'mixnet_s', channel_multiplier=1.0, pretrained=pretrained, **kwargs)
|
1382 |
+
return model
|
1383 |
+
|
1384 |
+
|
1385 |
+
def mixnet_m(pretrained=False, **kwargs):
|
1386 |
+
"""Creates a MixNet Medium model.
|
1387 |
+
"""
|
1388 |
+
# NOTE for train set drop_rate=0.25
|
1389 |
+
model = _gen_mixnet_m(
|
1390 |
+
'mixnet_m', channel_multiplier=1.0, pretrained=pretrained, **kwargs)
|
1391 |
+
return model
|
1392 |
+
|
1393 |
+
|
1394 |
+
def mixnet_l(pretrained=False, **kwargs):
|
1395 |
+
"""Creates a MixNet Large model.
|
1396 |
+
"""
|
1397 |
+
# NOTE for train set drop_rate=0.25
|
1398 |
+
model = _gen_mixnet_m(
|
1399 |
+
'mixnet_l', channel_multiplier=1.3, pretrained=pretrained, **kwargs)
|
1400 |
+
return model
|
1401 |
+
|
1402 |
+
|
1403 |
+
def mixnet_xl(pretrained=False, **kwargs):
|
1404 |
+
"""Creates a MixNet Extra-Large model.
|
1405 |
+
Not a paper spec, experimental def by RW w/ depth scaling.
|
1406 |
+
"""
|
1407 |
+
# NOTE for train set drop_rate=0.25, drop_connect_rate=0.2
|
1408 |
+
model = _gen_mixnet_m(
|
1409 |
+
'mixnet_xl', channel_multiplier=1.6, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
|
1410 |
+
return model
|
1411 |
+
|
1412 |
+
|
1413 |
+
def mixnet_xxl(pretrained=False, **kwargs):
|
1414 |
+
"""Creates a MixNet Double Extra Large model.
|
1415 |
+
Not a paper spec, experimental def by RW w/ depth scaling.
|
1416 |
+
"""
|
1417 |
+
# NOTE for train set drop_rate=0.3, drop_connect_rate=0.2
|
1418 |
+
model = _gen_mixnet_m(
|
1419 |
+
'mixnet_xxl', channel_multiplier=2.4, depth_multiplier=1.3, pretrained=pretrained, **kwargs)
|
1420 |
+
return model
|
1421 |
+
|
1422 |
+
|
1423 |
+
def tf_mixnet_s(pretrained=False, **kwargs):
|
1424 |
+
"""Creates a MixNet Small model. Tensorflow compatible variant
|
1425 |
+
"""
|
1426 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1427 |
+
kwargs['pad_type'] = 'same'
|
1428 |
+
model = _gen_mixnet_s(
|
1429 |
+
'tf_mixnet_s', channel_multiplier=1.0, pretrained=pretrained, **kwargs)
|
1430 |
+
return model
|
1431 |
+
|
1432 |
+
|
1433 |
+
def tf_mixnet_m(pretrained=False, **kwargs):
|
1434 |
+
"""Creates a MixNet Medium model. Tensorflow compatible variant
|
1435 |
+
"""
|
1436 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1437 |
+
kwargs['pad_type'] = 'same'
|
1438 |
+
model = _gen_mixnet_m(
|
1439 |
+
'tf_mixnet_m', channel_multiplier=1.0, pretrained=pretrained, **kwargs)
|
1440 |
+
return model
|
1441 |
+
|
1442 |
+
|
1443 |
+
def tf_mixnet_l(pretrained=False, **kwargs):
|
1444 |
+
"""Creates a MixNet Large model. Tensorflow compatible variant
|
1445 |
+
"""
|
1446 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
1447 |
+
kwargs['pad_type'] = 'same'
|
1448 |
+
model = _gen_mixnet_m(
|
1449 |
+
'tf_mixnet_l', channel_multiplier=1.3, pretrained=pretrained, **kwargs)
|
1450 |
+
return model
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/helpers.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" Checkpoint loading / state_dict helpers
|
2 |
+
Copyright 2020 Ross Wightman
|
3 |
+
"""
|
4 |
+
import torch
|
5 |
+
import os
|
6 |
+
from collections import OrderedDict
|
7 |
+
try:
|
8 |
+
from torch.hub import load_state_dict_from_url
|
9 |
+
except ImportError:
|
10 |
+
from torch.utils.model_zoo import load_url as load_state_dict_from_url
|
11 |
+
|
12 |
+
|
13 |
+
def load_checkpoint(model, checkpoint_path):
|
14 |
+
if checkpoint_path and os.path.isfile(checkpoint_path):
|
15 |
+
print("=> Loading checkpoint '{}'".format(checkpoint_path))
|
16 |
+
checkpoint = torch.load(checkpoint_path)
|
17 |
+
if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
|
18 |
+
new_state_dict = OrderedDict()
|
19 |
+
for k, v in checkpoint['state_dict'].items():
|
20 |
+
if k.startswith('module'):
|
21 |
+
name = k[7:] # remove `module.`
|
22 |
+
else:
|
23 |
+
name = k
|
24 |
+
new_state_dict[name] = v
|
25 |
+
model.load_state_dict(new_state_dict)
|
26 |
+
else:
|
27 |
+
model.load_state_dict(checkpoint)
|
28 |
+
print("=> Loaded checkpoint '{}'".format(checkpoint_path))
|
29 |
+
else:
|
30 |
+
print("=> Error: No checkpoint found at '{}'".format(checkpoint_path))
|
31 |
+
raise FileNotFoundError()
|
32 |
+
|
33 |
+
|
34 |
+
def load_pretrained(model, url, filter_fn=None, strict=True):
|
35 |
+
if not url:
|
36 |
+
print("=> Warning: Pretrained model URL is empty, using random initialization.")
|
37 |
+
return
|
38 |
+
|
39 |
+
state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu')
|
40 |
+
|
41 |
+
input_conv = 'conv_stem'
|
42 |
+
classifier = 'classifier'
|
43 |
+
in_chans = getattr(model, input_conv).weight.shape[1]
|
44 |
+
num_classes = getattr(model, classifier).weight.shape[0]
|
45 |
+
|
46 |
+
input_conv_weight = input_conv + '.weight'
|
47 |
+
pretrained_in_chans = state_dict[input_conv_weight].shape[1]
|
48 |
+
if in_chans != pretrained_in_chans:
|
49 |
+
if in_chans == 1:
|
50 |
+
print('=> Converting pretrained input conv {} from {} to 1 channel'.format(
|
51 |
+
input_conv_weight, pretrained_in_chans))
|
52 |
+
conv1_weight = state_dict[input_conv_weight]
|
53 |
+
state_dict[input_conv_weight] = conv1_weight.sum(dim=1, keepdim=True)
|
54 |
+
else:
|
55 |
+
print('=> Discarding pretrained input conv {} since input channel count != {}'.format(
|
56 |
+
input_conv_weight, pretrained_in_chans))
|
57 |
+
del state_dict[input_conv_weight]
|
58 |
+
strict = False
|
59 |
+
|
60 |
+
classifier_weight = classifier + '.weight'
|
61 |
+
pretrained_num_classes = state_dict[classifier_weight].shape[0]
|
62 |
+
if num_classes != pretrained_num_classes:
|
63 |
+
print('=> Discarding pretrained classifier since num_classes != {}'.format(pretrained_num_classes))
|
64 |
+
del state_dict[classifier_weight]
|
65 |
+
del state_dict[classifier + '.bias']
|
66 |
+
strict = False
|
67 |
+
|
68 |
+
if filter_fn is not None:
|
69 |
+
state_dict = filter_fn(state_dict)
|
70 |
+
|
71 |
+
model.load_state_dict(state_dict, strict=strict)
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/mobilenetv3.py
ADDED
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" MobileNet-V3
|
2 |
+
|
3 |
+
A PyTorch impl of MobileNet-V3, compatible with TF weights from official impl.
|
4 |
+
|
5 |
+
Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244
|
6 |
+
|
7 |
+
Hacked together by / Copyright 2020 Ross Wightman
|
8 |
+
"""
|
9 |
+
import torch.nn as nn
|
10 |
+
import torch.nn.functional as F
|
11 |
+
|
12 |
+
from .activations import get_act_fn, get_act_layer, HardSwish
|
13 |
+
from .config import layer_config_kwargs
|
14 |
+
from .conv2d_layers import select_conv2d
|
15 |
+
from .helpers import load_pretrained
|
16 |
+
from .efficientnet_builder import *
|
17 |
+
|
18 |
+
__all__ = ['mobilenetv3_rw', 'mobilenetv3_large_075', 'mobilenetv3_large_100', 'mobilenetv3_large_minimal_100',
|
19 |
+
'mobilenetv3_small_075', 'mobilenetv3_small_100', 'mobilenetv3_small_minimal_100',
|
20 |
+
'tf_mobilenetv3_large_075', 'tf_mobilenetv3_large_100', 'tf_mobilenetv3_large_minimal_100',
|
21 |
+
'tf_mobilenetv3_small_075', 'tf_mobilenetv3_small_100', 'tf_mobilenetv3_small_minimal_100']
|
22 |
+
|
23 |
+
model_urls = {
|
24 |
+
'mobilenetv3_rw':
|
25 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_100-35495452.pth',
|
26 |
+
'mobilenetv3_large_075': None,
|
27 |
+
'mobilenetv3_large_100':
|
28 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/mobilenetv3_large_100_ra-f55367f5.pth',
|
29 |
+
'mobilenetv3_large_minimal_100': None,
|
30 |
+
'mobilenetv3_small_075': None,
|
31 |
+
'mobilenetv3_small_100': None,
|
32 |
+
'mobilenetv3_small_minimal_100': None,
|
33 |
+
'tf_mobilenetv3_large_075':
|
34 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_075-150ee8b0.pth',
|
35 |
+
'tf_mobilenetv3_large_100':
|
36 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_100-427764d5.pth',
|
37 |
+
'tf_mobilenetv3_large_minimal_100':
|
38 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_large_minimal_100-8596ae28.pth',
|
39 |
+
'tf_mobilenetv3_small_075':
|
40 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_075-da427f52.pth',
|
41 |
+
'tf_mobilenetv3_small_100':
|
42 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_100-37f49e2b.pth',
|
43 |
+
'tf_mobilenetv3_small_minimal_100':
|
44 |
+
'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_mobilenetv3_small_minimal_100-922a7843.pth',
|
45 |
+
}
|
46 |
+
|
47 |
+
|
48 |
+
class MobileNetV3(nn.Module):
|
49 |
+
""" MobileNet-V3
|
50 |
+
|
51 |
+
A this model utilizes the MobileNet-v3 specific 'efficient head', where global pooling is done before the
|
52 |
+
head convolution without a final batch-norm layer before the classifier.
|
53 |
+
|
54 |
+
Paper: https://arxiv.org/abs/1905.02244
|
55 |
+
"""
|
56 |
+
|
57 |
+
def __init__(self, block_args, num_classes=1000, in_chans=3, stem_size=16, num_features=1280, head_bias=True,
|
58 |
+
channel_multiplier=1.0, pad_type='', act_layer=HardSwish, drop_rate=0., drop_connect_rate=0.,
|
59 |
+
se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, weight_init='goog'):
|
60 |
+
super(MobileNetV3, self).__init__()
|
61 |
+
self.drop_rate = drop_rate
|
62 |
+
|
63 |
+
stem_size = round_channels(stem_size, channel_multiplier)
|
64 |
+
self.conv_stem = select_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type)
|
65 |
+
self.bn1 = nn.BatchNorm2d(stem_size, **norm_kwargs)
|
66 |
+
self.act1 = act_layer(inplace=True)
|
67 |
+
in_chs = stem_size
|
68 |
+
|
69 |
+
builder = EfficientNetBuilder(
|
70 |
+
channel_multiplier, pad_type=pad_type, act_layer=act_layer, se_kwargs=se_kwargs,
|
71 |
+
norm_layer=norm_layer, norm_kwargs=norm_kwargs, drop_connect_rate=drop_connect_rate)
|
72 |
+
self.blocks = nn.Sequential(*builder(in_chs, block_args))
|
73 |
+
in_chs = builder.in_chs
|
74 |
+
|
75 |
+
self.global_pool = nn.AdaptiveAvgPool2d(1)
|
76 |
+
self.conv_head = select_conv2d(in_chs, num_features, 1, padding=pad_type, bias=head_bias)
|
77 |
+
self.act2 = act_layer(inplace=True)
|
78 |
+
self.classifier = nn.Linear(num_features, num_classes)
|
79 |
+
|
80 |
+
for m in self.modules():
|
81 |
+
if weight_init == 'goog':
|
82 |
+
initialize_weight_goog(m)
|
83 |
+
else:
|
84 |
+
initialize_weight_default(m)
|
85 |
+
|
86 |
+
def as_sequential(self):
|
87 |
+
layers = [self.conv_stem, self.bn1, self.act1]
|
88 |
+
layers.extend(self.blocks)
|
89 |
+
layers.extend([
|
90 |
+
self.global_pool, self.conv_head, self.act2,
|
91 |
+
nn.Flatten(), nn.Dropout(self.drop_rate), self.classifier])
|
92 |
+
return nn.Sequential(*layers)
|
93 |
+
|
94 |
+
def features(self, x):
|
95 |
+
x = self.conv_stem(x)
|
96 |
+
x = self.bn1(x)
|
97 |
+
x = self.act1(x)
|
98 |
+
x = self.blocks(x)
|
99 |
+
x = self.global_pool(x)
|
100 |
+
x = self.conv_head(x)
|
101 |
+
x = self.act2(x)
|
102 |
+
return x
|
103 |
+
|
104 |
+
def forward(self, x):
|
105 |
+
x = self.features(x)
|
106 |
+
x = x.flatten(1)
|
107 |
+
if self.drop_rate > 0.:
|
108 |
+
x = F.dropout(x, p=self.drop_rate, training=self.training)
|
109 |
+
return self.classifier(x)
|
110 |
+
|
111 |
+
|
112 |
+
def _create_model(model_kwargs, variant, pretrained=False):
|
113 |
+
as_sequential = model_kwargs.pop('as_sequential', False)
|
114 |
+
model = MobileNetV3(**model_kwargs)
|
115 |
+
if pretrained and model_urls[variant]:
|
116 |
+
load_pretrained(model, model_urls[variant])
|
117 |
+
if as_sequential:
|
118 |
+
model = model.as_sequential()
|
119 |
+
return model
|
120 |
+
|
121 |
+
|
122 |
+
def _gen_mobilenet_v3_rw(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
|
123 |
+
"""Creates a MobileNet-V3 model (RW variant).
|
124 |
+
|
125 |
+
Paper: https://arxiv.org/abs/1905.02244
|
126 |
+
|
127 |
+
This was my first attempt at reproducing the MobileNet-V3 from paper alone. It came close to the
|
128 |
+
eventual Tensorflow reference impl but has a few differences:
|
129 |
+
1. This model has no bias on the head convolution
|
130 |
+
2. This model forces no residual (noskip) on the first DWS block, this is different than MnasNet
|
131 |
+
3. This model always uses ReLU for the SE activation layer, other models in the family inherit their act layer
|
132 |
+
from their parent block
|
133 |
+
4. This model does not enforce divisible by 8 limitation on the SE reduction channel count
|
134 |
+
|
135 |
+
Overall the changes are fairly minor and result in a very small parameter count difference and no
|
136 |
+
top-1/5
|
137 |
+
|
138 |
+
Args:
|
139 |
+
channel_multiplier: multiplier to number of channels per layer.
|
140 |
+
"""
|
141 |
+
arch_def = [
|
142 |
+
# stage 0, 112x112 in
|
143 |
+
['ds_r1_k3_s1_e1_c16_nre_noskip'], # relu
|
144 |
+
# stage 1, 112x112 in
|
145 |
+
['ir_r1_k3_s2_e4_c24_nre', 'ir_r1_k3_s1_e3_c24_nre'], # relu
|
146 |
+
# stage 2, 56x56 in
|
147 |
+
['ir_r3_k5_s2_e3_c40_se0.25_nre'], # relu
|
148 |
+
# stage 3, 28x28 in
|
149 |
+
['ir_r1_k3_s2_e6_c80', 'ir_r1_k3_s1_e2.5_c80', 'ir_r2_k3_s1_e2.3_c80'], # hard-swish
|
150 |
+
# stage 4, 14x14in
|
151 |
+
['ir_r2_k3_s1_e6_c112_se0.25'], # hard-swish
|
152 |
+
# stage 5, 14x14in
|
153 |
+
['ir_r3_k5_s2_e6_c160_se0.25'], # hard-swish
|
154 |
+
# stage 6, 7x7 in
|
155 |
+
['cn_r1_k1_s1_c960'], # hard-swish
|
156 |
+
]
|
157 |
+
with layer_config_kwargs(kwargs):
|
158 |
+
model_kwargs = dict(
|
159 |
+
block_args=decode_arch_def(arch_def),
|
160 |
+
head_bias=False, # one of my mistakes
|
161 |
+
channel_multiplier=channel_multiplier,
|
162 |
+
act_layer=resolve_act_layer(kwargs, 'hard_swish'),
|
163 |
+
se_kwargs=dict(gate_fn=get_act_fn('hard_sigmoid'), reduce_mid=True),
|
164 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
165 |
+
**kwargs,
|
166 |
+
)
|
167 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
168 |
+
return model
|
169 |
+
|
170 |
+
|
171 |
+
def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
|
172 |
+
"""Creates a MobileNet-V3 large/small/minimal models.
|
173 |
+
|
174 |
+
Ref impl: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet_v3.py
|
175 |
+
Paper: https://arxiv.org/abs/1905.02244
|
176 |
+
|
177 |
+
Args:
|
178 |
+
channel_multiplier: multiplier to number of channels per layer.
|
179 |
+
"""
|
180 |
+
if 'small' in variant:
|
181 |
+
num_features = 1024
|
182 |
+
if 'minimal' in variant:
|
183 |
+
act_layer = 'relu'
|
184 |
+
arch_def = [
|
185 |
+
# stage 0, 112x112 in
|
186 |
+
['ds_r1_k3_s2_e1_c16'],
|
187 |
+
# stage 1, 56x56 in
|
188 |
+
['ir_r1_k3_s2_e4.5_c24', 'ir_r1_k3_s1_e3.67_c24'],
|
189 |
+
# stage 2, 28x28 in
|
190 |
+
['ir_r1_k3_s2_e4_c40', 'ir_r2_k3_s1_e6_c40'],
|
191 |
+
# stage 3, 14x14 in
|
192 |
+
['ir_r2_k3_s1_e3_c48'],
|
193 |
+
# stage 4, 14x14in
|
194 |
+
['ir_r3_k3_s2_e6_c96'],
|
195 |
+
# stage 6, 7x7 in
|
196 |
+
['cn_r1_k1_s1_c576'],
|
197 |
+
]
|
198 |
+
else:
|
199 |
+
act_layer = 'hard_swish'
|
200 |
+
arch_def = [
|
201 |
+
# stage 0, 112x112 in
|
202 |
+
['ds_r1_k3_s2_e1_c16_se0.25_nre'], # relu
|
203 |
+
# stage 1, 56x56 in
|
204 |
+
['ir_r1_k3_s2_e4.5_c24_nre', 'ir_r1_k3_s1_e3.67_c24_nre'], # relu
|
205 |
+
# stage 2, 28x28 in
|
206 |
+
['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r2_k5_s1_e6_c40_se0.25'], # hard-swish
|
207 |
+
# stage 3, 14x14 in
|
208 |
+
['ir_r2_k5_s1_e3_c48_se0.25'], # hard-swish
|
209 |
+
# stage 4, 14x14in
|
210 |
+
['ir_r3_k5_s2_e6_c96_se0.25'], # hard-swish
|
211 |
+
# stage 6, 7x7 in
|
212 |
+
['cn_r1_k1_s1_c576'], # hard-swish
|
213 |
+
]
|
214 |
+
else:
|
215 |
+
num_features = 1280
|
216 |
+
if 'minimal' in variant:
|
217 |
+
act_layer = 'relu'
|
218 |
+
arch_def = [
|
219 |
+
# stage 0, 112x112 in
|
220 |
+
['ds_r1_k3_s1_e1_c16'],
|
221 |
+
# stage 1, 112x112 in
|
222 |
+
['ir_r1_k3_s2_e4_c24', 'ir_r1_k3_s1_e3_c24'],
|
223 |
+
# stage 2, 56x56 in
|
224 |
+
['ir_r3_k3_s2_e3_c40'],
|
225 |
+
# stage 3, 28x28 in
|
226 |
+
['ir_r1_k3_s2_e6_c80', 'ir_r1_k3_s1_e2.5_c80', 'ir_r2_k3_s1_e2.3_c80'],
|
227 |
+
# stage 4, 14x14in
|
228 |
+
['ir_r2_k3_s1_e6_c112'],
|
229 |
+
# stage 5, 14x14in
|
230 |
+
['ir_r3_k3_s2_e6_c160'],
|
231 |
+
# stage 6, 7x7 in
|
232 |
+
['cn_r1_k1_s1_c960'],
|
233 |
+
]
|
234 |
+
else:
|
235 |
+
act_layer = 'hard_swish'
|
236 |
+
arch_def = [
|
237 |
+
# stage 0, 112x112 in
|
238 |
+
['ds_r1_k3_s1_e1_c16_nre'], # relu
|
239 |
+
# stage 1, 112x112 in
|
240 |
+
['ir_r1_k3_s2_e4_c24_nre', 'ir_r1_k3_s1_e3_c24_nre'], # relu
|
241 |
+
# stage 2, 56x56 in
|
242 |
+
['ir_r3_k5_s2_e3_c40_se0.25_nre'], # relu
|
243 |
+
# stage 3, 28x28 in
|
244 |
+
['ir_r1_k3_s2_e6_c80', 'ir_r1_k3_s1_e2.5_c80', 'ir_r2_k3_s1_e2.3_c80'], # hard-swish
|
245 |
+
# stage 4, 14x14in
|
246 |
+
['ir_r2_k3_s1_e6_c112_se0.25'], # hard-swish
|
247 |
+
# stage 5, 14x14in
|
248 |
+
['ir_r3_k5_s2_e6_c160_se0.25'], # hard-swish
|
249 |
+
# stage 6, 7x7 in
|
250 |
+
['cn_r1_k1_s1_c960'], # hard-swish
|
251 |
+
]
|
252 |
+
with layer_config_kwargs(kwargs):
|
253 |
+
model_kwargs = dict(
|
254 |
+
block_args=decode_arch_def(arch_def),
|
255 |
+
num_features=num_features,
|
256 |
+
stem_size=16,
|
257 |
+
channel_multiplier=channel_multiplier,
|
258 |
+
act_layer=resolve_act_layer(kwargs, act_layer),
|
259 |
+
se_kwargs=dict(
|
260 |
+
act_layer=get_act_layer('relu'), gate_fn=get_act_fn('hard_sigmoid'), reduce_mid=True, divisor=8),
|
261 |
+
norm_kwargs=resolve_bn_args(kwargs),
|
262 |
+
**kwargs,
|
263 |
+
)
|
264 |
+
model = _create_model(model_kwargs, variant, pretrained)
|
265 |
+
return model
|
266 |
+
|
267 |
+
|
268 |
+
def mobilenetv3_rw(pretrained=False, **kwargs):
|
269 |
+
""" MobileNet-V3 RW
|
270 |
+
Attn: See note in gen function for this variant.
|
271 |
+
"""
|
272 |
+
# NOTE for train set drop_rate=0.2
|
273 |
+
if pretrained:
|
274 |
+
# pretrained model trained with non-default BN epsilon
|
275 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
276 |
+
model = _gen_mobilenet_v3_rw('mobilenetv3_rw', 1.0, pretrained=pretrained, **kwargs)
|
277 |
+
return model
|
278 |
+
|
279 |
+
|
280 |
+
def mobilenetv3_large_075(pretrained=False, **kwargs):
|
281 |
+
""" MobileNet V3 Large 0.75"""
|
282 |
+
# NOTE for train set drop_rate=0.2
|
283 |
+
model = _gen_mobilenet_v3('mobilenetv3_large_075', 0.75, pretrained=pretrained, **kwargs)
|
284 |
+
return model
|
285 |
+
|
286 |
+
|
287 |
+
def mobilenetv3_large_100(pretrained=False, **kwargs):
|
288 |
+
""" MobileNet V3 Large 1.0 """
|
289 |
+
# NOTE for train set drop_rate=0.2
|
290 |
+
model = _gen_mobilenet_v3('mobilenetv3_large_100', 1.0, pretrained=pretrained, **kwargs)
|
291 |
+
return model
|
292 |
+
|
293 |
+
|
294 |
+
def mobilenetv3_large_minimal_100(pretrained=False, **kwargs):
|
295 |
+
""" MobileNet V3 Large (Minimalistic) 1.0 """
|
296 |
+
# NOTE for train set drop_rate=0.2
|
297 |
+
model = _gen_mobilenet_v3('mobilenetv3_large_minimal_100', 1.0, pretrained=pretrained, **kwargs)
|
298 |
+
return model
|
299 |
+
|
300 |
+
|
301 |
+
def mobilenetv3_small_075(pretrained=False, **kwargs):
|
302 |
+
""" MobileNet V3 Small 0.75 """
|
303 |
+
model = _gen_mobilenet_v3('mobilenetv3_small_075', 0.75, pretrained=pretrained, **kwargs)
|
304 |
+
return model
|
305 |
+
|
306 |
+
|
307 |
+
def mobilenetv3_small_100(pretrained=False, **kwargs):
|
308 |
+
""" MobileNet V3 Small 1.0 """
|
309 |
+
model = _gen_mobilenet_v3('mobilenetv3_small_100', 1.0, pretrained=pretrained, **kwargs)
|
310 |
+
return model
|
311 |
+
|
312 |
+
|
313 |
+
def mobilenetv3_small_minimal_100(pretrained=False, **kwargs):
|
314 |
+
""" MobileNet V3 Small (Minimalistic) 1.0 """
|
315 |
+
model = _gen_mobilenet_v3('mobilenetv3_small_minimal_100', 1.0, pretrained=pretrained, **kwargs)
|
316 |
+
return model
|
317 |
+
|
318 |
+
|
319 |
+
def tf_mobilenetv3_large_075(pretrained=False, **kwargs):
|
320 |
+
""" MobileNet V3 Large 0.75. Tensorflow compat variant. """
|
321 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
322 |
+
kwargs['pad_type'] = 'same'
|
323 |
+
model = _gen_mobilenet_v3('tf_mobilenetv3_large_075', 0.75, pretrained=pretrained, **kwargs)
|
324 |
+
return model
|
325 |
+
|
326 |
+
|
327 |
+
def tf_mobilenetv3_large_100(pretrained=False, **kwargs):
|
328 |
+
""" MobileNet V3 Large 1.0. Tensorflow compat variant. """
|
329 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
330 |
+
kwargs['pad_type'] = 'same'
|
331 |
+
model = _gen_mobilenet_v3('tf_mobilenetv3_large_100', 1.0, pretrained=pretrained, **kwargs)
|
332 |
+
return model
|
333 |
+
|
334 |
+
|
335 |
+
def tf_mobilenetv3_large_minimal_100(pretrained=False, **kwargs):
|
336 |
+
""" MobileNet V3 Large Minimalistic 1.0. Tensorflow compat variant. """
|
337 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
338 |
+
kwargs['pad_type'] = 'same'
|
339 |
+
model = _gen_mobilenet_v3('tf_mobilenetv3_large_minimal_100', 1.0, pretrained=pretrained, **kwargs)
|
340 |
+
return model
|
341 |
+
|
342 |
+
|
343 |
+
def tf_mobilenetv3_small_075(pretrained=False, **kwargs):
|
344 |
+
""" MobileNet V3 Small 0.75. Tensorflow compat variant. """
|
345 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
346 |
+
kwargs['pad_type'] = 'same'
|
347 |
+
model = _gen_mobilenet_v3('tf_mobilenetv3_small_075', 0.75, pretrained=pretrained, **kwargs)
|
348 |
+
return model
|
349 |
+
|
350 |
+
|
351 |
+
def tf_mobilenetv3_small_100(pretrained=False, **kwargs):
|
352 |
+
""" MobileNet V3 Small 1.0. Tensorflow compat variant."""
|
353 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
354 |
+
kwargs['pad_type'] = 'same'
|
355 |
+
model = _gen_mobilenet_v3('tf_mobilenetv3_small_100', 1.0, pretrained=pretrained, **kwargs)
|
356 |
+
return model
|
357 |
+
|
358 |
+
|
359 |
+
def tf_mobilenetv3_small_minimal_100(pretrained=False, **kwargs):
|
360 |
+
""" MobileNet V3 Small Minimalistic 1.0. Tensorflow compat variant. """
|
361 |
+
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
|
362 |
+
kwargs['pad_type'] = 'same'
|
363 |
+
model = _gen_mobilenet_v3('tf_mobilenetv3_small_minimal_100', 1.0, pretrained=pretrained, **kwargs)
|
364 |
+
return model
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/model_factory.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .config import set_layer_config
|
2 |
+
from .helpers import load_checkpoint
|
3 |
+
|
4 |
+
from .gen_efficientnet import *
|
5 |
+
from .mobilenetv3 import *
|
6 |
+
|
7 |
+
|
8 |
+
def create_model(
|
9 |
+
model_name='mnasnet_100',
|
10 |
+
pretrained=None,
|
11 |
+
num_classes=1000,
|
12 |
+
in_chans=3,
|
13 |
+
checkpoint_path='',
|
14 |
+
**kwargs):
|
15 |
+
|
16 |
+
model_kwargs = dict(num_classes=num_classes, in_chans=in_chans, pretrained=pretrained, **kwargs)
|
17 |
+
|
18 |
+
if model_name in globals():
|
19 |
+
create_fn = globals()[model_name]
|
20 |
+
model = create_fn(**model_kwargs)
|
21 |
+
else:
|
22 |
+
raise RuntimeError('Unknown model (%s)' % model_name)
|
23 |
+
|
24 |
+
if checkpoint_path and not pretrained:
|
25 |
+
load_checkpoint(model, checkpoint_path)
|
26 |
+
|
27 |
+
return model
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/geffnet/version.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__version__ = '1.0.2'
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/hubconf.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dependencies = ['torch', 'math']
|
2 |
+
|
3 |
+
from geffnet import efficientnet_b0
|
4 |
+
from geffnet import efficientnet_b1
|
5 |
+
from geffnet import efficientnet_b2
|
6 |
+
from geffnet import efficientnet_b3
|
7 |
+
|
8 |
+
from geffnet import efficientnet_es
|
9 |
+
|
10 |
+
from geffnet import efficientnet_lite0
|
11 |
+
|
12 |
+
from geffnet import mixnet_s
|
13 |
+
from geffnet import mixnet_m
|
14 |
+
from geffnet import mixnet_l
|
15 |
+
from geffnet import mixnet_xl
|
16 |
+
|
17 |
+
from geffnet import mobilenetv2_100
|
18 |
+
from geffnet import mobilenetv2_110d
|
19 |
+
from geffnet import mobilenetv2_120d
|
20 |
+
from geffnet import mobilenetv2_140
|
21 |
+
|
22 |
+
from geffnet import mobilenetv3_large_100
|
23 |
+
from geffnet import mobilenetv3_rw
|
24 |
+
from geffnet import mnasnet_a1
|
25 |
+
from geffnet import mnasnet_b1
|
26 |
+
from geffnet import fbnetc_100
|
27 |
+
from geffnet import spnasnet_100
|
28 |
+
|
29 |
+
from geffnet import tf_efficientnet_b0
|
30 |
+
from geffnet import tf_efficientnet_b1
|
31 |
+
from geffnet import tf_efficientnet_b2
|
32 |
+
from geffnet import tf_efficientnet_b3
|
33 |
+
from geffnet import tf_efficientnet_b4
|
34 |
+
from geffnet import tf_efficientnet_b5
|
35 |
+
from geffnet import tf_efficientnet_b6
|
36 |
+
from geffnet import tf_efficientnet_b7
|
37 |
+
from geffnet import tf_efficientnet_b8
|
38 |
+
|
39 |
+
from geffnet import tf_efficientnet_b0_ap
|
40 |
+
from geffnet import tf_efficientnet_b1_ap
|
41 |
+
from geffnet import tf_efficientnet_b2_ap
|
42 |
+
from geffnet import tf_efficientnet_b3_ap
|
43 |
+
from geffnet import tf_efficientnet_b4_ap
|
44 |
+
from geffnet import tf_efficientnet_b5_ap
|
45 |
+
from geffnet import tf_efficientnet_b6_ap
|
46 |
+
from geffnet import tf_efficientnet_b7_ap
|
47 |
+
from geffnet import tf_efficientnet_b8_ap
|
48 |
+
|
49 |
+
from geffnet import tf_efficientnet_b0_ns
|
50 |
+
from geffnet import tf_efficientnet_b1_ns
|
51 |
+
from geffnet import tf_efficientnet_b2_ns
|
52 |
+
from geffnet import tf_efficientnet_b3_ns
|
53 |
+
from geffnet import tf_efficientnet_b4_ns
|
54 |
+
from geffnet import tf_efficientnet_b5_ns
|
55 |
+
from geffnet import tf_efficientnet_b6_ns
|
56 |
+
from geffnet import tf_efficientnet_b7_ns
|
57 |
+
from geffnet import tf_efficientnet_l2_ns_475
|
58 |
+
from geffnet import tf_efficientnet_l2_ns
|
59 |
+
|
60 |
+
from geffnet import tf_efficientnet_es
|
61 |
+
from geffnet import tf_efficientnet_em
|
62 |
+
from geffnet import tf_efficientnet_el
|
63 |
+
|
64 |
+
from geffnet import tf_efficientnet_cc_b0_4e
|
65 |
+
from geffnet import tf_efficientnet_cc_b0_8e
|
66 |
+
from geffnet import tf_efficientnet_cc_b1_8e
|
67 |
+
|
68 |
+
from geffnet import tf_efficientnet_lite0
|
69 |
+
from geffnet import tf_efficientnet_lite1
|
70 |
+
from geffnet import tf_efficientnet_lite2
|
71 |
+
from geffnet import tf_efficientnet_lite3
|
72 |
+
from geffnet import tf_efficientnet_lite4
|
73 |
+
|
74 |
+
from geffnet import tf_mixnet_s
|
75 |
+
from geffnet import tf_mixnet_m
|
76 |
+
from geffnet import tf_mixnet_l
|
77 |
+
|
78 |
+
from geffnet import tf_mobilenetv3_large_075
|
79 |
+
from geffnet import tf_mobilenetv3_large_100
|
80 |
+
from geffnet import tf_mobilenetv3_large_minimal_100
|
81 |
+
from geffnet import tf_mobilenetv3_small_075
|
82 |
+
from geffnet import tf_mobilenetv3_small_100
|
83 |
+
from geffnet import tf_mobilenetv3_small_minimal_100
|
84 |
+
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/onnx_export.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" ONNX export script
|
2 |
+
|
3 |
+
Export PyTorch models as ONNX graphs.
|
4 |
+
|
5 |
+
This export script originally started as an adaptation of code snippets found at
|
6 |
+
https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html
|
7 |
+
|
8 |
+
The default parameters work with PyTorch 1.6 and ONNX 1.7 and produce an optimal ONNX graph
|
9 |
+
for hosting in the ONNX runtime (see onnx_validate.py). To export an ONNX model compatible
|
10 |
+
with caffe2 (see caffe2_benchmark.py and caffe2_validate.py), the --keep-init and --aten-fallback
|
11 |
+
flags are currently required.
|
12 |
+
|
13 |
+
Older versions of PyTorch/ONNX (tested PyTorch 1.4, ONNX 1.5) do not need extra flags for
|
14 |
+
caffe2 compatibility, but they produce a model that isn't as fast running on ONNX runtime.
|
15 |
+
|
16 |
+
Most new release of PyTorch and ONNX cause some sort of breakage in the export / usage of ONNX models.
|
17 |
+
Please do your research and search ONNX and PyTorch issue tracker before asking me. Thanks.
|
18 |
+
|
19 |
+
Copyright 2020 Ross Wightman
|
20 |
+
"""
|
21 |
+
import argparse
|
22 |
+
import torch
|
23 |
+
import numpy as np
|
24 |
+
|
25 |
+
import onnx
|
26 |
+
import geffnet
|
27 |
+
|
28 |
+
parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation')
|
29 |
+
parser.add_argument('output', metavar='ONNX_FILE',
|
30 |
+
help='output model filename')
|
31 |
+
parser.add_argument('--model', '-m', metavar='MODEL', default='mobilenetv3_large_100',
|
32 |
+
help='model architecture (default: mobilenetv3_large_100)')
|
33 |
+
parser.add_argument('--opset', type=int, default=10,
|
34 |
+
help='ONNX opset to use (default: 10)')
|
35 |
+
parser.add_argument('--keep-init', action='store_true', default=False,
|
36 |
+
help='Keep initializers as input. Needed for Caffe2 compatible export in newer PyTorch/ONNX.')
|
37 |
+
parser.add_argument('--aten-fallback', action='store_true', default=False,
|
38 |
+
help='Fallback to ATEN ops. Helps fix AdaptiveAvgPool issue with Caffe2 in newer PyTorch/ONNX.')
|
39 |
+
parser.add_argument('--dynamic-size', action='store_true', default=False,
|
40 |
+
help='Export model width dynamic width/height. Not recommended for "tf" models with SAME padding.')
|
41 |
+
parser.add_argument('-b', '--batch-size', default=1, type=int,
|
42 |
+
metavar='N', help='mini-batch size (default: 1)')
|
43 |
+
parser.add_argument('--img-size', default=None, type=int,
|
44 |
+
metavar='N', help='Input image dimension, uses model default if empty')
|
45 |
+
parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN',
|
46 |
+
help='Override mean pixel value of dataset')
|
47 |
+
parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD',
|
48 |
+
help='Override std deviation of of dataset')
|
49 |
+
parser.add_argument('--num-classes', type=int, default=1000,
|
50 |
+
help='Number classes in dataset')
|
51 |
+
parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
|
52 |
+
help='path to checkpoint (default: none)')
|
53 |
+
|
54 |
+
|
55 |
+
def main():
|
56 |
+
args = parser.parse_args()
|
57 |
+
|
58 |
+
args.pretrained = True
|
59 |
+
if args.checkpoint:
|
60 |
+
args.pretrained = False
|
61 |
+
|
62 |
+
print("==> Creating PyTorch {} model".format(args.model))
|
63 |
+
# NOTE exportable=True flag disables autofn/jit scripted activations and uses Conv2dSameExport layers
|
64 |
+
# for models using SAME padding
|
65 |
+
model = geffnet.create_model(
|
66 |
+
args.model,
|
67 |
+
num_classes=args.num_classes,
|
68 |
+
in_chans=3,
|
69 |
+
pretrained=args.pretrained,
|
70 |
+
checkpoint_path=args.checkpoint,
|
71 |
+
exportable=True)
|
72 |
+
|
73 |
+
model.eval()
|
74 |
+
|
75 |
+
example_input = torch.randn((args.batch_size, 3, args.img_size or 224, args.img_size or 224), requires_grad=True)
|
76 |
+
|
77 |
+
# Run model once before export trace, sets padding for models with Conv2dSameExport. This means
|
78 |
+
# that the padding for models with Conv2dSameExport (most models with tf_ prefix) is fixed for
|
79 |
+
# the input img_size specified in this script.
|
80 |
+
# Opset >= 11 should allow for dynamic padding, however I cannot get it to work due to
|
81 |
+
# issues in the tracing of the dynamic padding or errors attempting to export the model after jit
|
82 |
+
# scripting it (an approach that should work). Perhaps in a future PyTorch or ONNX versions...
|
83 |
+
model(example_input)
|
84 |
+
|
85 |
+
print("==> Exporting model to ONNX format at '{}'".format(args.output))
|
86 |
+
input_names = ["input0"]
|
87 |
+
output_names = ["output0"]
|
88 |
+
dynamic_axes = {'input0': {0: 'batch'}, 'output0': {0: 'batch'}}
|
89 |
+
if args.dynamic_size:
|
90 |
+
dynamic_axes['input0'][2] = 'height'
|
91 |
+
dynamic_axes['input0'][3] = 'width'
|
92 |
+
if args.aten_fallback:
|
93 |
+
export_type = torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK
|
94 |
+
else:
|
95 |
+
export_type = torch.onnx.OperatorExportTypes.ONNX
|
96 |
+
|
97 |
+
torch_out = torch.onnx._export(
|
98 |
+
model, example_input, args.output, export_params=True, verbose=True, input_names=input_names,
|
99 |
+
output_names=output_names, keep_initializers_as_inputs=args.keep_init, dynamic_axes=dynamic_axes,
|
100 |
+
opset_version=args.opset, operator_export_type=export_type)
|
101 |
+
|
102 |
+
print("==> Loading and checking exported model from '{}'".format(args.output))
|
103 |
+
onnx_model = onnx.load(args.output)
|
104 |
+
onnx.checker.check_model(onnx_model) # assuming throw on error
|
105 |
+
print("==> Passed")
|
106 |
+
|
107 |
+
if args.keep_init and args.aten_fallback:
|
108 |
+
import caffe2.python.onnx.backend as onnx_caffe2
|
109 |
+
# Caffe2 loading only works properly in newer PyTorch/ONNX combos when
|
110 |
+
# keep_initializers_as_inputs and aten_fallback are set to True.
|
111 |
+
print("==> Loading model into Caffe2 backend and comparing forward pass.".format(args.output))
|
112 |
+
caffe2_backend = onnx_caffe2.prepare(onnx_model)
|
113 |
+
B = {onnx_model.graph.input[0].name: x.data.numpy()}
|
114 |
+
c2_out = caffe2_backend.run(B)[0]
|
115 |
+
np.testing.assert_almost_equal(torch_out.data.numpy(), c2_out, decimal=5)
|
116 |
+
print("==> Passed")
|
117 |
+
|
118 |
+
|
119 |
+
if __name__ == '__main__':
|
120 |
+
main()
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/onnx_optimize.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" ONNX optimization script
|
2 |
+
|
3 |
+
Run ONNX models through the optimizer to prune unneeded nodes, fuse batchnorm layers into conv, etc.
|
4 |
+
|
5 |
+
NOTE: This isn't working consistently in recent PyTorch/ONNX combos (ie PyTorch 1.6 and ONNX 1.7),
|
6 |
+
it seems time to switch to using the onnxruntime online optimizer (can also be saved for offline).
|
7 |
+
|
8 |
+
Copyright 2020 Ross Wightman
|
9 |
+
"""
|
10 |
+
import argparse
|
11 |
+
import warnings
|
12 |
+
|
13 |
+
import onnx
|
14 |
+
from onnx import optimizer
|
15 |
+
|
16 |
+
|
17 |
+
parser = argparse.ArgumentParser(description="Optimize ONNX model")
|
18 |
+
|
19 |
+
parser.add_argument("model", help="The ONNX model")
|
20 |
+
parser.add_argument("--output", required=True, help="The optimized model output filename")
|
21 |
+
|
22 |
+
|
23 |
+
def traverse_graph(graph, prefix=''):
|
24 |
+
content = []
|
25 |
+
indent = prefix + ' '
|
26 |
+
graphs = []
|
27 |
+
num_nodes = 0
|
28 |
+
for node in graph.node:
|
29 |
+
pn, gs = onnx.helper.printable_node(node, indent, subgraphs=True)
|
30 |
+
assert isinstance(gs, list)
|
31 |
+
content.append(pn)
|
32 |
+
graphs.extend(gs)
|
33 |
+
num_nodes += 1
|
34 |
+
for g in graphs:
|
35 |
+
g_count, g_str = traverse_graph(g)
|
36 |
+
content.append('\n' + g_str)
|
37 |
+
num_nodes += g_count
|
38 |
+
return num_nodes, '\n'.join(content)
|
39 |
+
|
40 |
+
|
41 |
+
def main():
|
42 |
+
args = parser.parse_args()
|
43 |
+
onnx_model = onnx.load(args.model)
|
44 |
+
num_original_nodes, original_graph_str = traverse_graph(onnx_model.graph)
|
45 |
+
|
46 |
+
# Optimizer passes to perform
|
47 |
+
passes = [
|
48 |
+
#'eliminate_deadend',
|
49 |
+
'eliminate_identity',
|
50 |
+
'eliminate_nop_dropout',
|
51 |
+
'eliminate_nop_pad',
|
52 |
+
'eliminate_nop_transpose',
|
53 |
+
'eliminate_unused_initializer',
|
54 |
+
'extract_constant_to_initializer',
|
55 |
+
'fuse_add_bias_into_conv',
|
56 |
+
'fuse_bn_into_conv',
|
57 |
+
'fuse_consecutive_concats',
|
58 |
+
'fuse_consecutive_reduce_unsqueeze',
|
59 |
+
'fuse_consecutive_squeezes',
|
60 |
+
'fuse_consecutive_transposes',
|
61 |
+
#'fuse_matmul_add_bias_into_gemm',
|
62 |
+
'fuse_pad_into_conv',
|
63 |
+
#'fuse_transpose_into_gemm',
|
64 |
+
#'lift_lexical_references',
|
65 |
+
]
|
66 |
+
|
67 |
+
# Apply the optimization on the original serialized model
|
68 |
+
# WARNING I've had issues with optimizer in recent versions of PyTorch / ONNX causing
|
69 |
+
# 'duplicate definition of name' errors, see: https://github.com/onnx/onnx/issues/2401
|
70 |
+
# It may be better to rely on onnxruntime optimizations, see onnx_validate.py script.
|
71 |
+
warnings.warn("I've had issues with optimizer in recent versions of PyTorch / ONNX."
|
72 |
+
"Try onnxruntime optimization if this doesn't work.")
|
73 |
+
optimized_model = optimizer.optimize(onnx_model, passes)
|
74 |
+
|
75 |
+
num_optimized_nodes, optimzied_graph_str = traverse_graph(optimized_model.graph)
|
76 |
+
print('==> The model after optimization:\n{}\n'.format(optimzied_graph_str))
|
77 |
+
print('==> The optimized model has {} nodes, the original had {}.'.format(num_optimized_nodes, num_original_nodes))
|
78 |
+
|
79 |
+
# Save the ONNX model
|
80 |
+
onnx.save(optimized_model, args.output)
|
81 |
+
|
82 |
+
|
83 |
+
if __name__ == "__main__":
|
84 |
+
main()
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/onnx_to_caffe.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
import onnx
|
4 |
+
from caffe2.python.onnx.backend import Caffe2Backend
|
5 |
+
|
6 |
+
|
7 |
+
parser = argparse.ArgumentParser(description="Convert ONNX to Caffe2")
|
8 |
+
|
9 |
+
parser.add_argument("model", help="The ONNX model")
|
10 |
+
parser.add_argument("--c2-prefix", required=True,
|
11 |
+
help="The output file prefix for the caffe2 model init and predict file. ")
|
12 |
+
|
13 |
+
|
14 |
+
def main():
|
15 |
+
args = parser.parse_args()
|
16 |
+
onnx_model = onnx.load(args.model)
|
17 |
+
caffe2_init, caffe2_predict = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model)
|
18 |
+
caffe2_init_str = caffe2_init.SerializeToString()
|
19 |
+
with open(args.c2_prefix + '.init.pb', "wb") as f:
|
20 |
+
f.write(caffe2_init_str)
|
21 |
+
caffe2_predict_str = caffe2_predict.SerializeToString()
|
22 |
+
with open(args.c2_prefix + '.predict.pb', "wb") as f:
|
23 |
+
f.write(caffe2_predict_str)
|
24 |
+
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
main()
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/onnx_validate.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" ONNX-runtime validation script
|
2 |
+
|
3 |
+
This script was created to verify accuracy and performance of exported ONNX
|
4 |
+
models running with the onnxruntime. It utilizes the PyTorch dataloader/processing
|
5 |
+
pipeline for a fair comparison against the originals.
|
6 |
+
|
7 |
+
Copyright 2020 Ross Wightman
|
8 |
+
"""
|
9 |
+
import argparse
|
10 |
+
import numpy as np
|
11 |
+
import onnxruntime
|
12 |
+
from data import create_loader, resolve_data_config, Dataset
|
13 |
+
from utils import AverageMeter
|
14 |
+
import time
|
15 |
+
|
16 |
+
parser = argparse.ArgumentParser(description='Caffe2 ImageNet Validation')
|
17 |
+
parser.add_argument('data', metavar='DIR',
|
18 |
+
help='path to dataset')
|
19 |
+
parser.add_argument('--onnx-input', default='', type=str, metavar='PATH',
|
20 |
+
help='path to onnx model/weights file')
|
21 |
+
parser.add_argument('--onnx-output-opt', default='', type=str, metavar='PATH',
|
22 |
+
help='path to output optimized onnx graph')
|
23 |
+
parser.add_argument('--profile', action='store_true', default=False,
|
24 |
+
help='Enable profiler output.')
|
25 |
+
parser.add_argument('-j', '--workers', default=2, type=int, metavar='N',
|
26 |
+
help='number of data loading workers (default: 2)')
|
27 |
+
parser.add_argument('-b', '--batch-size', default=256, type=int,
|
28 |
+
metavar='N', help='mini-batch size (default: 256)')
|
29 |
+
parser.add_argument('--img-size', default=None, type=int,
|
30 |
+
metavar='N', help='Input image dimension, uses model default if empty')
|
31 |
+
parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN',
|
32 |
+
help='Override mean pixel value of dataset')
|
33 |
+
parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD',
|
34 |
+
help='Override std deviation of of dataset')
|
35 |
+
parser.add_argument('--crop-pct', type=float, default=None, metavar='PCT',
|
36 |
+
help='Override default crop pct of 0.875')
|
37 |
+
parser.add_argument('--interpolation', default='', type=str, metavar='NAME',
|
38 |
+
help='Image resize interpolation type (overrides model)')
|
39 |
+
parser.add_argument('--tf-preprocessing', dest='tf_preprocessing', action='store_true',
|
40 |
+
help='use tensorflow mnasnet preporcessing')
|
41 |
+
parser.add_argument('--print-freq', '-p', default=10, type=int,
|
42 |
+
metavar='N', help='print frequency (default: 10)')
|
43 |
+
|
44 |
+
|
45 |
+
def main():
|
46 |
+
args = parser.parse_args()
|
47 |
+
args.gpu_id = 0
|
48 |
+
|
49 |
+
# Set graph optimization level
|
50 |
+
sess_options = onnxruntime.SessionOptions()
|
51 |
+
sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
|
52 |
+
if args.profile:
|
53 |
+
sess_options.enable_profiling = True
|
54 |
+
if args.onnx_output_opt:
|
55 |
+
sess_options.optimized_model_filepath = args.onnx_output_opt
|
56 |
+
|
57 |
+
session = onnxruntime.InferenceSession(args.onnx_input, sess_options)
|
58 |
+
|
59 |
+
data_config = resolve_data_config(None, args)
|
60 |
+
loader = create_loader(
|
61 |
+
Dataset(args.data, load_bytes=args.tf_preprocessing),
|
62 |
+
input_size=data_config['input_size'],
|
63 |
+
batch_size=args.batch_size,
|
64 |
+
use_prefetcher=False,
|
65 |
+
interpolation=data_config['interpolation'],
|
66 |
+
mean=data_config['mean'],
|
67 |
+
std=data_config['std'],
|
68 |
+
num_workers=args.workers,
|
69 |
+
crop_pct=data_config['crop_pct'],
|
70 |
+
tensorflow_preprocessing=args.tf_preprocessing)
|
71 |
+
|
72 |
+
input_name = session.get_inputs()[0].name
|
73 |
+
|
74 |
+
batch_time = AverageMeter()
|
75 |
+
top1 = AverageMeter()
|
76 |
+
top5 = AverageMeter()
|
77 |
+
end = time.time()
|
78 |
+
for i, (input, target) in enumerate(loader):
|
79 |
+
# run the net and return prediction
|
80 |
+
output = session.run([], {input_name: input.data.numpy()})
|
81 |
+
output = output[0]
|
82 |
+
|
83 |
+
# measure accuracy and record loss
|
84 |
+
prec1, prec5 = accuracy_np(output, target.numpy())
|
85 |
+
top1.update(prec1.item(), input.size(0))
|
86 |
+
top5.update(prec5.item(), input.size(0))
|
87 |
+
|
88 |
+
# measure elapsed time
|
89 |
+
batch_time.update(time.time() - end)
|
90 |
+
end = time.time()
|
91 |
+
|
92 |
+
if i % args.print_freq == 0:
|
93 |
+
print('Test: [{0}/{1}]\t'
|
94 |
+
'Time {batch_time.val:.3f} ({batch_time.avg:.3f}, {rate_avg:.3f}/s, {ms_avg:.3f} ms/sample) \t'
|
95 |
+
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
|
96 |
+
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
|
97 |
+
i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg,
|
98 |
+
ms_avg=100 * batch_time.avg / input.size(0), top1=top1, top5=top5))
|
99 |
+
|
100 |
+
print(' * Prec@1 {top1.avg:.3f} ({top1a:.3f}) Prec@5 {top5.avg:.3f} ({top5a:.3f})'.format(
|
101 |
+
top1=top1, top1a=100-top1.avg, top5=top5, top5a=100.-top5.avg))
|
102 |
+
|
103 |
+
|
104 |
+
def accuracy_np(output, target):
|
105 |
+
max_indices = np.argsort(output, axis=1)[:, ::-1]
|
106 |
+
top5 = 100 * np.equal(max_indices[:, :5], target[:, np.newaxis]).sum(axis=1).mean()
|
107 |
+
top1 = 100 * np.equal(max_indices[:, 0], target).mean()
|
108 |
+
return top1, top5
|
109 |
+
|
110 |
+
|
111 |
+
if __name__ == '__main__':
|
112 |
+
main()
|
extensions/microsoftexcel-controlnet/annotator/normalbae/models/submodules/efficientnet_repo/requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
torch>=1.2.0
|
2 |
+
torchvision>=0.4.0
|