diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ff1446e90bffc5485e346d9fd8eefdeb28cdf646 --- /dev/null +++ b/.gitignore @@ -0,0 +1,146 @@ +.idea/ + +training/ +lightning_logs/ +image_log/ + +*.pth +*.pt +*.ckpt +*.safetensors + +gradio_pose2image_private.py +gradio_canny2image_private.py + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# dlib +**/*.dat diff --git a/annotator/canny/__init__.py b/annotator/canny/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cb0da951dc838ec9dec2131007e036113281800b --- /dev/null +++ b/annotator/canny/__init__.py @@ -0,0 +1,6 @@ +import cv2 + + +class CannyDetector: + def __call__(self, img, low_threshold, high_threshold): + return cv2.Canny(img, low_threshold, high_threshold) diff --git a/annotator/ckpts/ckpts.txt b/annotator/ckpts/ckpts.txt new file mode 100644 index 0000000000000000000000000000000000000000..1978551fb2a9226814eaf58459f414fcfac4e69b --- /dev/null +++ b/annotator/ckpts/ckpts.txt @@ -0,0 +1 @@ +Weights here. \ No newline at end of file diff --git a/annotator/hed/__init__.py b/annotator/hed/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..56532c374df5c26f9ec53e2ac0dd924f4534bbdd --- /dev/null +++ b/annotator/hed/__init__.py @@ -0,0 +1,132 @@ +import numpy as np +import cv2 +import os +import torch +from einops import rearrange +from annotator.util import annotator_ckpts_path + + +class Network(torch.nn.Module): + def __init__(self, model_path): + super().__init__() + + self.netVggOne = torch.nn.Sequential( + torch.nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False), + torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False) + ) + + self.netVggTwo = torch.nn.Sequential( + torch.nn.MaxPool2d(kernel_size=2, stride=2), + torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False), + torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False) + ) + + self.netVggThr = torch.nn.Sequential( + torch.nn.MaxPool2d(kernel_size=2, stride=2), + torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False), + torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False), + torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False) + ) + + self.netVggFou = torch.nn.Sequential( + torch.nn.MaxPool2d(kernel_size=2, stride=2), + torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False), + torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False), + torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False) + ) + + self.netVggFiv = torch.nn.Sequential( + torch.nn.MaxPool2d(kernel_size=2, stride=2), + torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False), + torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False), + torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), + torch.nn.ReLU(inplace=False) + ) + + self.netScoreOne = torch.nn.Conv2d(in_channels=64, out_channels=1, kernel_size=1, stride=1, padding=0) + self.netScoreTwo = torch.nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0) + self.netScoreThr = torch.nn.Conv2d(in_channels=256, out_channels=1, kernel_size=1, stride=1, padding=0) + self.netScoreFou = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0) + self.netScoreFiv = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0) + + self.netCombine = torch.nn.Sequential( + torch.nn.Conv2d(in_channels=5, out_channels=1, kernel_size=1, stride=1, padding=0), + torch.nn.Sigmoid() + ) + + self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load(model_path).items()}) + + def forward(self, tenInput): + tenInput = tenInput * 255.0 + tenInput = tenInput - torch.tensor(data=[104.00698793, 116.66876762, 122.67891434], dtype=tenInput.dtype, device=tenInput.device).view(1, 3, 1, 1) + + tenVggOne = self.netVggOne(tenInput) + tenVggTwo = self.netVggTwo(tenVggOne) + tenVggThr = self.netVggThr(tenVggTwo) + tenVggFou = self.netVggFou(tenVggThr) + tenVggFiv = self.netVggFiv(tenVggFou) + + tenScoreOne = self.netScoreOne(tenVggOne) + tenScoreTwo = self.netScoreTwo(tenVggTwo) + tenScoreThr = self.netScoreThr(tenVggThr) + tenScoreFou = self.netScoreFou(tenVggFou) + tenScoreFiv = self.netScoreFiv(tenVggFiv) + + tenScoreOne = torch.nn.functional.interpolate(input=tenScoreOne, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) + tenScoreTwo = torch.nn.functional.interpolate(input=tenScoreTwo, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) + tenScoreThr = torch.nn.functional.interpolate(input=tenScoreThr, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) + tenScoreFou = torch.nn.functional.interpolate(input=tenScoreFou, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) + tenScoreFiv = torch.nn.functional.interpolate(input=tenScoreFiv, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False) + + return self.netCombine(torch.cat([ tenScoreOne, tenScoreTwo, tenScoreThr, tenScoreFou, tenScoreFiv ], 1)) + + +class HEDdetector: + def __init__(self): + remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/network-bsds500.pth" + modelpath = os.path.join(annotator_ckpts_path, "network-bsds500.pth") + if not os.path.exists(modelpath): + from basicsr.utils.download_util import load_file_from_url + load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path) + self.netNetwork = Network(modelpath).cuda().eval() + + def __call__(self, input_image): + assert input_image.ndim == 3 + input_image = input_image[:, :, ::-1].copy() + with torch.no_grad(): + image_hed = torch.from_numpy(input_image).float().cuda() + image_hed = image_hed / 255.0 + image_hed = rearrange(image_hed, 'h w c -> 1 c h w') + edge = self.netNetwork(image_hed)[0] + edge = (edge.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8) + return edge[0] + + +def nms(x, t, s): + x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s) + + f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8) + f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8) + f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8) + f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8) + + y = np.zeros_like(x) + + for f in [f1, f2, f3, f4]: + np.putmask(y, cv2.dilate(x, kernel=f) == x, x) + + z = np.zeros_like(y, dtype=np.uint8) + z[y > t] = 255 + return z diff --git a/annotator/midas/__init__.py b/annotator/midas/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dc5ac03eea6f5ba7968706f1863c8bc4f8aaaf6a --- /dev/null +++ b/annotator/midas/__init__.py @@ -0,0 +1,38 @@ +import cv2 +import numpy as np +import torch + +from einops import rearrange +from .api import MiDaSInference + + +class MidasDetector: + def __init__(self): + self.model = MiDaSInference(model_type="dpt_hybrid").cuda() + + def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1): + assert input_image.ndim == 3 + image_depth = input_image + with torch.no_grad(): + image_depth = torch.from_numpy(image_depth).float().cuda() + image_depth = image_depth / 127.5 - 1.0 + image_depth = rearrange(image_depth, 'h w c -> 1 c h w') + depth = self.model(image_depth)[0] + + depth_pt = depth.clone() + depth_pt -= torch.min(depth_pt) + depth_pt /= torch.max(depth_pt) + depth_pt = depth_pt.cpu().numpy() + depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8) + + depth_np = depth.cpu().numpy() + x = cv2.Sobel(depth_np, cv2.CV_32F, 1, 0, ksize=3) + y = cv2.Sobel(depth_np, cv2.CV_32F, 0, 1, ksize=3) + z = np.ones_like(x) * a + x[depth_pt < bg_th] = 0 + y[depth_pt < bg_th] = 0 + normal = np.stack([x, y, z], axis=2) + normal /= np.sum(normal ** 2.0, axis=2, keepdims=True) ** 0.5 + normal_image = (normal * 127.5 + 127.5).clip(0, 255).astype(np.uint8) + + return depth_image, normal_image diff --git a/annotator/midas/api.py b/annotator/midas/api.py new file mode 100644 index 0000000000000000000000000000000000000000..1ab9f15bf96bbaffcee0e3e29fc9d3979d6c32e8 --- /dev/null +++ b/annotator/midas/api.py @@ -0,0 +1,169 @@ +# based on https://github.com/isl-org/MiDaS + +import cv2 +import os +import torch +import torch.nn as nn +from torchvision.transforms import Compose + +from .midas.dpt_depth import DPTDepthModel +from .midas.midas_net import MidasNet +from .midas.midas_net_custom import MidasNet_small +from .midas.transforms import Resize, NormalizeImage, PrepareForNet +from annotator.util import annotator_ckpts_path + + +ISL_PATHS = { + "dpt_large": os.path.join(annotator_ckpts_path, "dpt_large-midas-2f21e586.pt"), + "dpt_hybrid": os.path.join(annotator_ckpts_path, "dpt_hybrid-midas-501f0c75.pt"), + "midas_v21": "", + "midas_v21_small": "", +} + +remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt" + + +def disabled_train(self, mode=True): + """Overwrite model.train with this function to make sure train/eval mode + does not change anymore.""" + return self + + +def load_midas_transform(model_type): + # https://github.com/isl-org/MiDaS/blob/master/run.py + # load transform only + if model_type == "dpt_large": # DPT-Large + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_hybrid": # DPT-Hybrid + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "midas_v21": + net_w, net_h = 384, 384 + resize_mode = "upper_bound" + normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + elif model_type == "midas_v21_small": + net_w, net_h = 256, 256 + resize_mode = "upper_bound" + normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + else: + assert False, f"model_type '{model_type}' not implemented, use: --model_type large" + + transform = Compose( + [ + Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method=resize_mode, + image_interpolation_method=cv2.INTER_CUBIC, + ), + normalization, + PrepareForNet(), + ] + ) + + return transform + + +def load_model(model_type): + # https://github.com/isl-org/MiDaS/blob/master/run.py + # load network + model_path = ISL_PATHS[model_type] + if model_type == "dpt_large": # DPT-Large + model = DPTDepthModel( + path=model_path, + backbone="vitl16_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "dpt_hybrid": # DPT-Hybrid + if not os.path.exists(model_path): + from basicsr.utils.download_util import load_file_from_url + load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path) + + model = DPTDepthModel( + path=model_path, + backbone="vitb_rn50_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + elif model_type == "midas_v21": + model = MidasNet(model_path, non_negative=True) + net_w, net_h = 384, 384 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + elif model_type == "midas_v21_small": + model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, + non_negative=True, blocks={'expand': True}) + net_w, net_h = 256, 256 + resize_mode = "upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + else: + print(f"model_type '{model_type}' not implemented, use: --model_type large") + assert False + + transform = Compose( + [ + Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method=resize_mode, + image_interpolation_method=cv2.INTER_CUBIC, + ), + normalization, + PrepareForNet(), + ] + ) + + return model.eval(), transform + + +class MiDaSInference(nn.Module): + MODEL_TYPES_TORCH_HUB = [ + "DPT_Large", + "DPT_Hybrid", + "MiDaS_small" + ] + MODEL_TYPES_ISL = [ + "dpt_large", + "dpt_hybrid", + "midas_v21", + "midas_v21_small", + ] + + def __init__(self, model_type): + super().__init__() + assert (model_type in self.MODEL_TYPES_ISL) + model, _ = load_model(model_type) + self.model = model + self.model.train = disabled_train + + def forward(self, x): + with torch.no_grad(): + prediction = self.model(x) + return prediction + diff --git a/annotator/midas/midas/__init__.py b/annotator/midas/midas/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/annotator/midas/midas/base_model.py b/annotator/midas/midas/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..5cf430239b47ec5ec07531263f26f5c24a2311cd --- /dev/null +++ b/annotator/midas/midas/base_model.py @@ -0,0 +1,16 @@ +import torch + + +class BaseModel(torch.nn.Module): + def load(self, path): + """Load model from file. + + Args: + path (str): file path + """ + parameters = torch.load(path, map_location=torch.device('cpu')) + + if "optimizer" in parameters: + parameters = parameters["model"] + + self.load_state_dict(parameters) diff --git a/annotator/midas/midas/blocks.py b/annotator/midas/midas/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..2145d18fa98060a618536d9a64fe6589e9be4f78 --- /dev/null +++ b/annotator/midas/midas/blocks.py @@ -0,0 +1,342 @@ +import torch +import torch.nn as nn + +from .vit import ( + _make_pretrained_vitb_rn50_384, + _make_pretrained_vitl16_384, + _make_pretrained_vitb16_384, + forward_vit, +) + +def _make_encoder(backbone, features, use_pretrained, groups=1, expand=False, exportable=True, hooks=None, use_vit_only=False, use_readout="ignore",): + if backbone == "vitl16_384": + pretrained = _make_pretrained_vitl16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [256, 512, 1024, 1024], features, groups=groups, expand=expand + ) # ViT-L/16 - 85.0% Top1 (backbone) + elif backbone == "vitb_rn50_384": + pretrained = _make_pretrained_vitb_rn50_384( + use_pretrained, + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) + scratch = _make_scratch( + [256, 512, 768, 768], features, groups=groups, expand=expand + ) # ViT-H/16 - 85.0% Top1 (backbone) + elif backbone == "vitb16_384": + pretrained = _make_pretrained_vitb16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [96, 192, 384, 768], features, groups=groups, expand=expand + ) # ViT-B/16 - 84.6% Top1 (backbone) + elif backbone == "resnext101_wsl": + pretrained = _make_pretrained_resnext101_wsl(use_pretrained) + scratch = _make_scratch([256, 512, 1024, 2048], features, groups=groups, expand=expand) # efficientnet_lite3 + elif backbone == "efficientnet_lite3": + pretrained = _make_pretrained_efficientnet_lite3(use_pretrained, exportable=exportable) + scratch = _make_scratch([32, 48, 136, 384], features, groups=groups, expand=expand) # efficientnet_lite3 + else: + print(f"Backbone '{backbone}' not implemented") + assert False + + return pretrained, scratch + + +def _make_scratch(in_shape, out_shape, groups=1, expand=False): + scratch = nn.Module() + + out_shape1 = out_shape + out_shape2 = out_shape + out_shape3 = out_shape + out_shape4 = out_shape + if expand==True: + out_shape1 = out_shape + out_shape2 = out_shape*2 + out_shape3 = out_shape*4 + out_shape4 = out_shape*8 + + scratch.layer1_rn = nn.Conv2d( + in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer2_rn = nn.Conv2d( + in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer3_rn = nn.Conv2d( + in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer4_rn = nn.Conv2d( + in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + + return scratch + + +def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False): + efficientnet = torch.hub.load( + "rwightman/gen-efficientnet-pytorch", + "tf_efficientnet_lite3", + pretrained=use_pretrained, + exportable=exportable + ) + return _make_efficientnet_backbone(efficientnet) + + +def _make_efficientnet_backbone(effnet): + pretrained = nn.Module() + + pretrained.layer1 = nn.Sequential( + effnet.conv_stem, effnet.bn1, effnet.act1, *effnet.blocks[0:2] + ) + pretrained.layer2 = nn.Sequential(*effnet.blocks[2:3]) + pretrained.layer3 = nn.Sequential(*effnet.blocks[3:5]) + pretrained.layer4 = nn.Sequential(*effnet.blocks[5:9]) + + return pretrained + + +def _make_resnet_backbone(resnet): + pretrained = nn.Module() + pretrained.layer1 = nn.Sequential( + resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1 + ) + + pretrained.layer2 = resnet.layer2 + pretrained.layer3 = resnet.layer3 + pretrained.layer4 = resnet.layer4 + + return pretrained + + +def _make_pretrained_resnext101_wsl(use_pretrained): + resnet = torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl") + return _make_resnet_backbone(resnet) + + + +class Interpolate(nn.Module): + """Interpolation module. + """ + + def __init__(self, scale_factor, mode, align_corners=False): + """Init. + + Args: + scale_factor (float): scaling + mode (str): interpolation mode + """ + super(Interpolate, self).__init__() + + self.interp = nn.functional.interpolate + self.scale_factor = scale_factor + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: interpolated data + """ + + x = self.interp( + x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners + ) + + return x + + +class ResidualConvUnit(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + out = self.relu(x) + out = self.conv1(out) + out = self.relu(out) + out = self.conv2(out) + + return out + x + + +class FeatureFusionBlock(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock, self).__init__() + + self.resConfUnit1 = ResidualConvUnit(features) + self.resConfUnit2 = ResidualConvUnit(features) + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + output += self.resConfUnit1(xs[1]) + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate( + output, scale_factor=2, mode="bilinear", align_corners=True + ) + + return output + + + + +class ResidualConvUnit_custom(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features, activation, bn): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.bn = bn + + self.groups=1 + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + if self.bn==True: + self.bn1 = nn.BatchNorm2d(features) + self.bn2 = nn.BatchNorm2d(features) + + self.activation = activation + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + + out = self.activation(x) + out = self.conv1(out) + if self.bn==True: + out = self.bn1(out) + + out = self.activation(out) + out = self.conv2(out) + if self.bn==True: + out = self.bn2(out) + + if self.groups > 1: + out = self.conv_merge(out) + + return self.skip_add.add(out, x) + + # return out + x + + +class FeatureFusionBlock_custom(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock_custom, self).__init__() + + self.deconv = deconv + self.align_corners = align_corners + + self.groups=1 + + self.expand = expand + out_features = features + if self.expand==True: + out_features = features//2 + + self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1) + + self.resConfUnit1 = ResidualConvUnit_custom(features, activation, bn) + self.resConfUnit2 = ResidualConvUnit_custom(features, activation, bn) + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + res = self.resConfUnit1(xs[1]) + output = self.skip_add.add(output, res) + # output += res + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate( + output, scale_factor=2, mode="bilinear", align_corners=self.align_corners + ) + + output = self.out_conv(output) + + return output + diff --git a/annotator/midas/midas/dpt_depth.py b/annotator/midas/midas/dpt_depth.py new file mode 100644 index 0000000000000000000000000000000000000000..4e9aab5d2767dffea39da5b3f30e2798688216f1 --- /dev/null +++ b/annotator/midas/midas/dpt_depth.py @@ -0,0 +1,109 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .base_model import BaseModel +from .blocks import ( + FeatureFusionBlock, + FeatureFusionBlock_custom, + Interpolate, + _make_encoder, + forward_vit, +) + + +def _make_fusion_block(features, use_bn): + return FeatureFusionBlock_custom( + features, + nn.ReLU(False), + deconv=False, + bn=use_bn, + expand=False, + align_corners=True, + ) + + +class DPT(BaseModel): + def __init__( + self, + head, + features=256, + backbone="vitb_rn50_384", + readout="project", + channels_last=False, + use_bn=False, + ): + + super(DPT, self).__init__() + + self.channels_last = channels_last + + hooks = { + "vitb_rn50_384": [0, 1, 8, 11], + "vitb16_384": [2, 5, 8, 11], + "vitl16_384": [5, 11, 17, 23], + } + + # Instantiate backbone and reassemble blocks + self.pretrained, self.scratch = _make_encoder( + backbone, + features, + False, # Set to true of you want to train from scratch, uses ImageNet weights + groups=1, + expand=False, + exportable=False, + hooks=hooks[backbone], + use_readout=readout, + ) + + self.scratch.refinenet1 = _make_fusion_block(features, use_bn) + self.scratch.refinenet2 = _make_fusion_block(features, use_bn) + self.scratch.refinenet3 = _make_fusion_block(features, use_bn) + self.scratch.refinenet4 = _make_fusion_block(features, use_bn) + + self.scratch.output_conv = head + + + def forward(self, x): + if self.channels_last == True: + x.contiguous(memory_format=torch.channels_last) + + layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return out + + +class DPTDepthModel(DPT): + def __init__(self, path=None, non_negative=True, **kwargs): + features = kwargs["features"] if "features" in kwargs else 256 + + head = nn.Sequential( + nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear", align_corners=True), + nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + super().__init__(head, **kwargs) + + if path is not None: + self.load(path) + + def forward(self, x): + return super().forward(x).squeeze(dim=1) + diff --git a/annotator/midas/midas/midas_net.py b/annotator/midas/midas/midas_net.py new file mode 100644 index 0000000000000000000000000000000000000000..8a954977800b0a0f48807e80fa63041910e33c1f --- /dev/null +++ b/annotator/midas/midas/midas_net.py @@ -0,0 +1,76 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, Interpolate, _make_encoder + + +class MidasNet(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=256, non_negative=True): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet, self).__init__() + + use_pretrained = False if path is None else True + + self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) + + self.scratch.refinenet4 = FeatureFusionBlock(features) + self.scratch.refinenet3 = FeatureFusionBlock(features) + self.scratch.refinenet2 = FeatureFusionBlock(features) + self.scratch.refinenet1 = FeatureFusionBlock(features) + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + ) + + if path: + self.load(path) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) diff --git a/annotator/midas/midas/midas_net_custom.py b/annotator/midas/midas/midas_net_custom.py new file mode 100644 index 0000000000000000000000000000000000000000..50e4acb5e53d5fabefe3dde16ab49c33c2b7797c --- /dev/null +++ b/annotator/midas/midas/midas_net_custom.py @@ -0,0 +1,128 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, FeatureFusionBlock_custom, Interpolate, _make_encoder + + +class MidasNet_small(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=64, backbone="efficientnet_lite3", non_negative=True, exportable=True, channels_last=False, align_corners=True, + blocks={'expand': True}): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet_small, self).__init__() + + use_pretrained = False if path else True + + self.channels_last = channels_last + self.blocks = blocks + self.backbone = backbone + + self.groups = 1 + + features1=features + features2=features + features3=features + features4=features + self.expand = False + if "expand" in self.blocks and self.blocks['expand'] == True: + self.expand = True + features1=features + features2=features*2 + features3=features*4 + features4=features*8 + + self.pretrained, self.scratch = _make_encoder(self.backbone, features, use_pretrained, groups=self.groups, expand=self.expand, exportable=exportable) + + self.scratch.activation = nn.ReLU(False) + + self.scratch.refinenet4 = FeatureFusionBlock_custom(features4, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet3 = FeatureFusionBlock_custom(features3, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet2 = FeatureFusionBlock_custom(features2, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet1 = FeatureFusionBlock_custom(features1, self.scratch.activation, deconv=False, bn=False, align_corners=align_corners) + + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, features//2, kernel_size=3, stride=1, padding=1, groups=self.groups), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(features//2, 32, kernel_size=3, stride=1, padding=1), + self.scratch.activation, + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + if path: + self.load(path) + + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + if self.channels_last==True: + print("self.channels_last = ", self.channels_last) + x.contiguous(memory_format=torch.channels_last) + + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) + + + +def fuse_model(m): + prev_previous_type = nn.Identity() + prev_previous_name = '' + previous_type = nn.Identity() + previous_name = '' + for name, module in m.named_modules(): + if prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d and type(module) == nn.ReLU: + # print("FUSED ", prev_previous_name, previous_name, name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name, name], inplace=True) + elif prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d: + # print("FUSED ", prev_previous_name, previous_name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name], inplace=True) + # elif previous_type == nn.Conv2d and type(module) == nn.ReLU: + # print("FUSED ", previous_name, name) + # torch.quantization.fuse_modules(m, [previous_name, name], inplace=True) + + prev_previous_type = previous_type + prev_previous_name = previous_name + previous_type = type(module) + previous_name = name \ No newline at end of file diff --git a/annotator/midas/midas/transforms.py b/annotator/midas/midas/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..350cbc11662633ad7f8968eb10be2e7de6e384e9 --- /dev/null +++ b/annotator/midas/midas/transforms.py @@ -0,0 +1,234 @@ +import numpy as np +import cv2 +import math + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST + ) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/annotator/midas/midas/vit.py b/annotator/midas/midas/vit.py new file mode 100644 index 0000000000000000000000000000000000000000..ea46b1be88b261b0dec04f3da0256f5f66f88a74 --- /dev/null +++ b/annotator/midas/midas/vit.py @@ -0,0 +1,491 @@ +import torch +import torch.nn as nn +import timm +import types +import math +import torch.nn.functional as F + + +class Slice(nn.Module): + def __init__(self, start_index=1): + super(Slice, self).__init__() + self.start_index = start_index + + def forward(self, x): + return x[:, self.start_index :] + + +class AddReadout(nn.Module): + def __init__(self, start_index=1): + super(AddReadout, self).__init__() + self.start_index = start_index + + def forward(self, x): + if self.start_index == 2: + readout = (x[:, 0] + x[:, 1]) / 2 + else: + readout = x[:, 0] + return x[:, self.start_index :] + readout.unsqueeze(1) + + +class ProjectReadout(nn.Module): + def __init__(self, in_features, start_index=1): + super(ProjectReadout, self).__init__() + self.start_index = start_index + + self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU()) + + def forward(self, x): + readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index :]) + features = torch.cat((x[:, self.start_index :], readout), -1) + + return self.project(features) + + +class Transpose(nn.Module): + def __init__(self, dim0, dim1): + super(Transpose, self).__init__() + self.dim0 = dim0 + self.dim1 = dim1 + + def forward(self, x): + x = x.transpose(self.dim0, self.dim1) + return x + + +def forward_vit(pretrained, x): + b, c, h, w = x.shape + + glob = pretrained.model.forward_flex(x) + + layer_1 = pretrained.activations["1"] + layer_2 = pretrained.activations["2"] + layer_3 = pretrained.activations["3"] + layer_4 = pretrained.activations["4"] + + layer_1 = pretrained.act_postprocess1[0:2](layer_1) + layer_2 = pretrained.act_postprocess2[0:2](layer_2) + layer_3 = pretrained.act_postprocess3[0:2](layer_3) + layer_4 = pretrained.act_postprocess4[0:2](layer_4) + + unflatten = nn.Sequential( + nn.Unflatten( + 2, + torch.Size( + [ + h // pretrained.model.patch_size[1], + w // pretrained.model.patch_size[0], + ] + ), + ) + ) + + if layer_1.ndim == 3: + layer_1 = unflatten(layer_1) + if layer_2.ndim == 3: + layer_2 = unflatten(layer_2) + if layer_3.ndim == 3: + layer_3 = unflatten(layer_3) + if layer_4.ndim == 3: + layer_4 = unflatten(layer_4) + + layer_1 = pretrained.act_postprocess1[3 : len(pretrained.act_postprocess1)](layer_1) + layer_2 = pretrained.act_postprocess2[3 : len(pretrained.act_postprocess2)](layer_2) + layer_3 = pretrained.act_postprocess3[3 : len(pretrained.act_postprocess3)](layer_3) + layer_4 = pretrained.act_postprocess4[3 : len(pretrained.act_postprocess4)](layer_4) + + return layer_1, layer_2, layer_3, layer_4 + + +def _resize_pos_embed(self, posemb, gs_h, gs_w): + posemb_tok, posemb_grid = ( + posemb[:, : self.start_index], + posemb[0, self.start_index :], + ) + + gs_old = int(math.sqrt(len(posemb_grid))) + + posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2) + posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear") + posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1) + + posemb = torch.cat([posemb_tok, posemb_grid], dim=1) + + return posemb + + +def forward_flex(self, x): + b, c, h, w = x.shape + + pos_embed = self._resize_pos_embed( + self.pos_embed, h // self.patch_size[1], w // self.patch_size[0] + ) + + B = x.shape[0] + + if hasattr(self.patch_embed, "backbone"): + x = self.patch_embed.backbone(x) + if isinstance(x, (list, tuple)): + x = x[-1] # last feature if backbone outputs list/tuple of features + + x = self.patch_embed.proj(x).flatten(2).transpose(1, 2) + + if getattr(self, "dist_token", None) is not None: + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + dist_token = self.dist_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, dist_token, x), dim=1) + else: + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + x = torch.cat((cls_tokens, x), dim=1) + + x = x + pos_embed + x = self.pos_drop(x) + + for blk in self.blocks: + x = blk(x) + + x = self.norm(x) + + return x + + +activations = {} + + +def get_activation(name): + def hook(model, input, output): + activations[name] = output + + return hook + + +def get_readout_oper(vit_features, features, use_readout, start_index=1): + if use_readout == "ignore": + readout_oper = [Slice(start_index)] * len(features) + elif use_readout == "add": + readout_oper = [AddReadout(start_index)] * len(features) + elif use_readout == "project": + readout_oper = [ + ProjectReadout(vit_features, start_index) for out_feat in features + ] + else: + assert ( + False + ), "wrong operation for readout token, use_readout can be 'ignore', 'add', or 'project'" + + return readout_oper + + +def _make_vit_b16_backbone( + model, + features=[96, 192, 384, 768], + size=[384, 384], + hooks=[2, 5, 8, 11], + vit_features=768, + use_readout="ignore", + start_index=1, +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index) + + # 32, 48, 136, 384 + pretrained.act_postprocess1 = nn.Sequential( + readout_oper[0], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[0], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[0], + out_channels=features[0], + kernel_size=4, + stride=4, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess2 = nn.Sequential( + readout_oper[1], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[1], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[1], + out_channels=features[1], + kernel_size=2, + stride=2, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess3 = nn.Sequential( + readout_oper[2], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[2], + kernel_size=1, + stride=1, + padding=0, + ), + ) + + pretrained.act_postprocess4 = nn.Sequential( + readout_oper[3], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[3], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ), + ) + + pretrained.model.start_index = start_index + pretrained.model.patch_size = [16, 16] + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitl16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_large_patch16_384", pretrained=pretrained) + + hooks = [5, 11, 17, 23] if hooks == None else hooks + return _make_vit_b16_backbone( + model, + features=[256, 512, 1024, 1024], + hooks=hooks, + vit_features=1024, + use_readout=use_readout, + ) + + +def _make_pretrained_vitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout + ) + + +def _make_pretrained_deitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_deit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout + ) + + +def _make_pretrained_deitb16_distil_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model( + "vit_deit_base_distilled_patch16_384", pretrained=pretrained + ) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, + features=[96, 192, 384, 768], + hooks=hooks, + use_readout=use_readout, + start_index=2, + ) + + +def _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=[0, 1, 8, 11], + vit_features=768, + use_vit_only=False, + use_readout="ignore", + start_index=1, +): + pretrained = nn.Module() + + pretrained.model = model + + if use_vit_only == True: + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + else: + pretrained.model.patch_embed.backbone.stages[0].register_forward_hook( + get_activation("1") + ) + pretrained.model.patch_embed.backbone.stages[1].register_forward_hook( + get_activation("2") + ) + + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index) + + if use_vit_only == True: + pretrained.act_postprocess1 = nn.Sequential( + readout_oper[0], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[0], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[0], + out_channels=features[0], + kernel_size=4, + stride=4, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess2 = nn.Sequential( + readout_oper[1], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[1], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[1], + out_channels=features[1], + kernel_size=2, + stride=2, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + else: + pretrained.act_postprocess1 = nn.Sequential( + nn.Identity(), nn.Identity(), nn.Identity() + ) + pretrained.act_postprocess2 = nn.Sequential( + nn.Identity(), nn.Identity(), nn.Identity() + ) + + pretrained.act_postprocess3 = nn.Sequential( + readout_oper[2], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[2], + kernel_size=1, + stride=1, + padding=0, + ), + ) + + pretrained.act_postprocess4 = nn.Sequential( + readout_oper[3], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[3], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ), + ) + + pretrained.model.start_index = start_index + pretrained.model.patch_size = [16, 16] + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitb_rn50_384( + pretrained, use_readout="ignore", hooks=None, use_vit_only=False +): + model = timm.create_model("vit_base_resnet50_384", pretrained=pretrained) + + hooks = [0, 1, 8, 11] if hooks == None else hooks + return _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) diff --git a/annotator/midas/utils.py b/annotator/midas/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9a9d3b5b66370fa98da9e067ba53ead848ea9a59 --- /dev/null +++ b/annotator/midas/utils.py @@ -0,0 +1,189 @@ +"""Utils for monoDepth.""" +import sys +import re +import numpy as np +import cv2 +import torch + + +def read_pfm(path): + """Read pfm file. + + Args: + path (str): path to file + + Returns: + tuple: (data, scale) + """ + with open(path, "rb") as file: + + color = None + width = None + height = None + scale = None + endian = None + + header = file.readline().rstrip() + if header.decode("ascii") == "PF": + color = True + elif header.decode("ascii") == "Pf": + color = False + else: + raise Exception("Not a PFM file: " + path) + + dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii")) + if dim_match: + width, height = list(map(int, dim_match.groups())) + else: + raise Exception("Malformed PFM header.") + + scale = float(file.readline().decode("ascii").rstrip()) + if scale < 0: + # little-endian + endian = "<" + scale = -scale + else: + # big-endian + endian = ">" + + data = np.fromfile(file, endian + "f") + shape = (height, width, 3) if color else (height, width) + + data = np.reshape(data, shape) + data = np.flipud(data) + + return data, scale + + +def write_pfm(path, image, scale=1): + """Write pfm file. + + Args: + path (str): pathto file + image (array): data + scale (int, optional): Scale. Defaults to 1. + """ + + with open(path, "wb") as file: + color = None + + if image.dtype.name != "float32": + raise Exception("Image dtype must be float32.") + + image = np.flipud(image) + + if len(image.shape) == 3 and image.shape[2] == 3: # color image + color = True + elif ( + len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 + ): # greyscale + color = False + else: + raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") + + file.write("PF\n" if color else "Pf\n".encode()) + file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + + endian = image.dtype.byteorder + + if endian == "<" or endian == "=" and sys.byteorder == "little": + scale = -scale + + file.write("%f\n".encode() % scale) + + image.tofile(file) + + +def read_image(path): + """Read image and output RGB image (0-1). + + Args: + path (str): path to file + + Returns: + array: RGB image (0-1) + """ + img = cv2.imread(path) + + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + + return img + + +def resize_image(img): + """Resize image and make it fit for network. + + Args: + img (array): image + + Returns: + tensor: data ready for network + """ + height_orig = img.shape[0] + width_orig = img.shape[1] + + if width_orig > height_orig: + scale = width_orig / 384 + else: + scale = height_orig / 384 + + height = (np.ceil(height_orig / scale / 32) * 32).astype(int) + width = (np.ceil(width_orig / scale / 32) * 32).astype(int) + + img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA) + + img_resized = ( + torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float() + ) + img_resized = img_resized.unsqueeze(0) + + return img_resized + + +def resize_depth(depth, width, height): + """Resize depth map and bring to CPU (numpy). + + Args: + depth (tensor): depth + width (int): image width + height (int): image height + + Returns: + array: processed depth + """ + depth = torch.squeeze(depth[0, :, :, :]).to("cpu") + + depth_resized = cv2.resize( + depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC + ) + + return depth_resized + +def write_depth(path, depth, bits=1): + """Write depth map to pfm and png file. + + Args: + path (str): filepath without extension + depth (array): depth + """ + write_pfm(path + ".pfm", depth.astype(np.float32)) + + depth_min = depth.min() + depth_max = depth.max() + + max_val = (2**(8*bits))-1 + + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = np.zeros(depth.shape, dtype=depth.type) + + if bits == 1: + cv2.imwrite(path + ".png", out.astype("uint8")) + elif bits == 2: + cv2.imwrite(path + ".png", out.astype("uint16")) + + return diff --git a/annotator/mlsd/__init__.py b/annotator/mlsd/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..42af28c682e781b30f691f65a475b53c9f3adc8b --- /dev/null +++ b/annotator/mlsd/__init__.py @@ -0,0 +1,39 @@ +import cv2 +import numpy as np +import torch +import os + +from einops import rearrange +from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny +from .models.mbv2_mlsd_large import MobileV2_MLSD_Large +from .utils import pred_lines + +from annotator.util import annotator_ckpts_path + + +remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/mlsd_large_512_fp32.pth" + + +class MLSDdetector: + def __init__(self): + model_path = os.path.join(annotator_ckpts_path, "mlsd_large_512_fp32.pth") + if not os.path.exists(model_path): + from basicsr.utils.download_util import load_file_from_url + load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path) + model = MobileV2_MLSD_Large() + model.load_state_dict(torch.load(model_path), strict=True) + self.model = model.cuda().eval() + + def __call__(self, input_image, thr_v, thr_d): + assert input_image.ndim == 3 + img = input_image + img_output = np.zeros_like(img) + try: + with torch.no_grad(): + lines = pred_lines(img, self.model, [img.shape[0], img.shape[1]], thr_v, thr_d) + for line in lines: + x_start, y_start, x_end, y_end = [int(val) for val in line] + cv2.line(img_output, (x_start, y_start), (x_end, y_end), [255, 255, 255], 1) + except Exception as e: + pass + return img_output[:, :, 0] diff --git a/annotator/mlsd/models/mbv2_mlsd_large.py b/annotator/mlsd/models/mbv2_mlsd_large.py new file mode 100644 index 0000000000000000000000000000000000000000..5b9799e7573ca41549b3c3b13ac47b906b369603 --- /dev/null +++ b/annotator/mlsd/models/mbv2_mlsd_large.py @@ -0,0 +1,292 @@ +import os +import sys +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +from torch.nn import functional as F + + +class BlockTypeA(nn.Module): + def __init__(self, in_c1, in_c2, out_c1, out_c2, upscale = True): + super(BlockTypeA, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c2, out_c2, kernel_size=1), + nn.BatchNorm2d(out_c2), + nn.ReLU(inplace=True) + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c1, out_c1, kernel_size=1), + nn.BatchNorm2d(out_c1), + nn.ReLU(inplace=True) + ) + self.upscale = upscale + + def forward(self, a, b): + b = self.conv1(b) + a = self.conv2(a) + if self.upscale: + b = F.interpolate(b, scale_factor=2.0, mode='bilinear', align_corners=True) + return torch.cat((a, b), dim=1) + + +class BlockTypeB(nn.Module): + def __init__(self, in_c, out_c): + super(BlockTypeB, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=1), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c, out_c, kernel_size=3, padding=1), + nn.BatchNorm2d(out_c), + nn.ReLU() + ) + + def forward(self, x): + x = self.conv1(x) + x + x = self.conv2(x) + return x + +class BlockTypeC(nn.Module): + def __init__(self, in_c, out_c): + super(BlockTypeC, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=5, dilation=5), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=1), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + return x + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + self.channel_pad = out_planes - in_planes + self.stride = stride + #padding = (kernel_size - 1) // 2 + + # TFLite uses slightly different padding than PyTorch + if stride == 2: + padding = 0 + else: + padding = (kernel_size - 1) // 2 + + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride) + + + def forward(self, x): + # TFLite uses different padding + if self.stride == 2: + x = F.pad(x, (0, 1, 0, 1), "constant", 0) + #print(x.shape) + + for module in self: + if not isinstance(module, nn.MaxPool2d): + x = module(x) + return x + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__(self, pretrained=True): + """ + MobileNet V2 main class + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + block: Module specifying inverted residual building block for mobilenet + """ + super(MobileNetV2, self).__init__() + + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + width_mult = 1.0 + round_nearest = 8 + + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + #[6, 160, 3, 2], + #[6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + features = [ConvBNReLU(4, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + + self.features = nn.Sequential(*features) + self.fpn_selected = [1, 3, 6, 10, 13] + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + if pretrained: + self._load_pretrained_model() + + def _forward_impl(self, x): + # This exists since TorchScript doesn't support inheritance, so the superclass method + # (this one) needs to have a name other than `forward` that can be accessed in a subclass + fpn_features = [] + for i, f in enumerate(self.features): + if i > self.fpn_selected[-1]: + break + x = f(x) + if i in self.fpn_selected: + fpn_features.append(x) + + c1, c2, c3, c4, c5 = fpn_features + return c1, c2, c3, c4, c5 + + + def forward(self, x): + return self._forward_impl(x) + + def _load_pretrained_model(self): + pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/mobilenet_v2-b0353104.pth') + model_dict = {} + state_dict = self.state_dict() + for k, v in pretrain_dict.items(): + if k in state_dict: + model_dict[k] = v + state_dict.update(model_dict) + self.load_state_dict(state_dict) + + +class MobileV2_MLSD_Large(nn.Module): + def __init__(self): + super(MobileV2_MLSD_Large, self).__init__() + + self.backbone = MobileNetV2(pretrained=False) + ## A, B + self.block15 = BlockTypeA(in_c1= 64, in_c2= 96, + out_c1= 64, out_c2=64, + upscale=False) + self.block16 = BlockTypeB(128, 64) + + ## A, B + self.block17 = BlockTypeA(in_c1 = 32, in_c2 = 64, + out_c1= 64, out_c2= 64) + self.block18 = BlockTypeB(128, 64) + + ## A, B + self.block19 = BlockTypeA(in_c1=24, in_c2=64, + out_c1=64, out_c2=64) + self.block20 = BlockTypeB(128, 64) + + ## A, B, C + self.block21 = BlockTypeA(in_c1=16, in_c2=64, + out_c1=64, out_c2=64) + self.block22 = BlockTypeB(128, 64) + + self.block23 = BlockTypeC(64, 16) + + def forward(self, x): + c1, c2, c3, c4, c5 = self.backbone(x) + + x = self.block15(c4, c5) + x = self.block16(x) + + x = self.block17(c3, x) + x = self.block18(x) + + x = self.block19(c2, x) + x = self.block20(x) + + x = self.block21(c1, x) + x = self.block22(x) + x = self.block23(x) + x = x[:, 7:, :, :] + + return x \ No newline at end of file diff --git a/annotator/mlsd/models/mbv2_mlsd_tiny.py b/annotator/mlsd/models/mbv2_mlsd_tiny.py new file mode 100644 index 0000000000000000000000000000000000000000..e3ed633f2cc23ea1829a627fdb879ab39f641f83 --- /dev/null +++ b/annotator/mlsd/models/mbv2_mlsd_tiny.py @@ -0,0 +1,275 @@ +import os +import sys +import torch +import torch.nn as nn +import torch.utils.model_zoo as model_zoo +from torch.nn import functional as F + + +class BlockTypeA(nn.Module): + def __init__(self, in_c1, in_c2, out_c1, out_c2, upscale = True): + super(BlockTypeA, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c2, out_c2, kernel_size=1), + nn.BatchNorm2d(out_c2), + nn.ReLU(inplace=True) + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c1, out_c1, kernel_size=1), + nn.BatchNorm2d(out_c1), + nn.ReLU(inplace=True) + ) + self.upscale = upscale + + def forward(self, a, b): + b = self.conv1(b) + a = self.conv2(a) + b = F.interpolate(b, scale_factor=2.0, mode='bilinear', align_corners=True) + return torch.cat((a, b), dim=1) + + +class BlockTypeB(nn.Module): + def __init__(self, in_c, out_c): + super(BlockTypeB, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=1), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c, out_c, kernel_size=3, padding=1), + nn.BatchNorm2d(out_c), + nn.ReLU() + ) + + def forward(self, x): + x = self.conv1(x) + x + x = self.conv2(x) + return x + +class BlockTypeC(nn.Module): + def __init__(self, in_c, out_c): + super(BlockTypeC, self).__init__() + self.conv1 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=5, dilation=5), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv2 = nn.Sequential( + nn.Conv2d(in_c, in_c, kernel_size=3, padding=1), + nn.BatchNorm2d(in_c), + nn.ReLU() + ) + self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + return x + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + self.channel_pad = out_planes - in_planes + self.stride = stride + #padding = (kernel_size - 1) // 2 + + # TFLite uses slightly different padding than PyTorch + if stride == 2: + padding = 0 + else: + padding = (kernel_size - 1) // 2 + + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride) + + + def forward(self, x): + # TFLite uses different padding + if self.stride == 2: + x = F.pad(x, (0, 1, 0, 1), "constant", 0) + #print(x.shape) + + for module in self: + if not isinstance(module, nn.MaxPool2d): + x = module(x) + return x + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__(self, pretrained=True): + """ + MobileNet V2 main class + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + block: Module specifying inverted residual building block for mobilenet + """ + super(MobileNetV2, self).__init__() + + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + width_mult = 1.0 + round_nearest = 8 + + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + #[6, 96, 3, 1], + #[6, 160, 3, 2], + #[6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + features = [ConvBNReLU(4, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + self.features = nn.Sequential(*features) + + self.fpn_selected = [3, 6, 10] + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + #if pretrained: + # self._load_pretrained_model() + + def _forward_impl(self, x): + # This exists since TorchScript doesn't support inheritance, so the superclass method + # (this one) needs to have a name other than `forward` that can be accessed in a subclass + fpn_features = [] + for i, f in enumerate(self.features): + if i > self.fpn_selected[-1]: + break + x = f(x) + if i in self.fpn_selected: + fpn_features.append(x) + + c2, c3, c4 = fpn_features + return c2, c3, c4 + + + def forward(self, x): + return self._forward_impl(x) + + def _load_pretrained_model(self): + pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/mobilenet_v2-b0353104.pth') + model_dict = {} + state_dict = self.state_dict() + for k, v in pretrain_dict.items(): + if k in state_dict: + model_dict[k] = v + state_dict.update(model_dict) + self.load_state_dict(state_dict) + + +class MobileV2_MLSD_Tiny(nn.Module): + def __init__(self): + super(MobileV2_MLSD_Tiny, self).__init__() + + self.backbone = MobileNetV2(pretrained=True) + + self.block12 = BlockTypeA(in_c1= 32, in_c2= 64, + out_c1= 64, out_c2=64) + self.block13 = BlockTypeB(128, 64) + + self.block14 = BlockTypeA(in_c1 = 24, in_c2 = 64, + out_c1= 32, out_c2= 32) + self.block15 = BlockTypeB(64, 64) + + self.block16 = BlockTypeC(64, 16) + + def forward(self, x): + c2, c3, c4 = self.backbone(x) + + x = self.block12(c3, c4) + x = self.block13(x) + x = self.block14(c2, x) + x = self.block15(x) + x = self.block16(x) + x = x[:, 7:, :, :] + #print(x.shape) + x = F.interpolate(x, scale_factor=2.0, mode='bilinear', align_corners=True) + + return x \ No newline at end of file diff --git a/annotator/mlsd/utils.py b/annotator/mlsd/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ae3cf9420a33a4abae27c48ac4b90938c7d63cc3 --- /dev/null +++ b/annotator/mlsd/utils.py @@ -0,0 +1,580 @@ +''' +modified by lihaoweicv +pytorch version +''' + +''' +M-LSD +Copyright 2021-present NAVER Corp. +Apache License v2.0 +''' + +import os +import numpy as np +import cv2 +import torch +from torch.nn import functional as F + + +def deccode_output_score_and_ptss(tpMap, topk_n = 200, ksize = 5): + ''' + tpMap: + center: tpMap[1, 0, :, :] + displacement: tpMap[1, 1:5, :, :] + ''' + b, c, h, w = tpMap.shape + assert b==1, 'only support bsize==1' + displacement = tpMap[:, 1:5, :, :][0] + center = tpMap[:, 0, :, :] + heat = torch.sigmoid(center) + hmax = F.max_pool2d( heat, (ksize, ksize), stride=1, padding=(ksize-1)//2) + keep = (hmax == heat).float() + heat = heat * keep + heat = heat.reshape(-1, ) + + scores, indices = torch.topk(heat, topk_n, dim=-1, largest=True) + yy = torch.floor_divide(indices, w).unsqueeze(-1) + xx = torch.fmod(indices, w).unsqueeze(-1) + ptss = torch.cat((yy, xx),dim=-1) + + ptss = ptss.detach().cpu().numpy() + scores = scores.detach().cpu().numpy() + displacement = displacement.detach().cpu().numpy() + displacement = displacement.transpose((1,2,0)) + return ptss, scores, displacement + + +def pred_lines(image, model, + input_shape=[512, 512], + score_thr=0.10, + dist_thr=20.0): + h, w, _ = image.shape + h_ratio, w_ratio = [h / input_shape[0], w / input_shape[1]] + + resized_image = np.concatenate([cv2.resize(image, (input_shape[1], input_shape[0]), interpolation=cv2.INTER_AREA), + np.ones([input_shape[0], input_shape[1], 1])], axis=-1) + + resized_image = resized_image.transpose((2,0,1)) + batch_image = np.expand_dims(resized_image, axis=0).astype('float32') + batch_image = (batch_image / 127.5) - 1.0 + + batch_image = torch.from_numpy(batch_image).float().cuda() + outputs = model(batch_image) + pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3) + start = vmap[:, :, :2] + end = vmap[:, :, 2:] + dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1)) + + segments_list = [] + for center, score in zip(pts, pts_score): + y, x = center + distance = dist_map[y, x] + if score > score_thr and distance > dist_thr: + disp_x_start, disp_y_start, disp_x_end, disp_y_end = vmap[y, x, :] + x_start = x + disp_x_start + y_start = y + disp_y_start + x_end = x + disp_x_end + y_end = y + disp_y_end + segments_list.append([x_start, y_start, x_end, y_end]) + + lines = 2 * np.array(segments_list) # 256 > 512 + lines[:, 0] = lines[:, 0] * w_ratio + lines[:, 1] = lines[:, 1] * h_ratio + lines[:, 2] = lines[:, 2] * w_ratio + lines[:, 3] = lines[:, 3] * h_ratio + + return lines + + +def pred_squares(image, + model, + input_shape=[512, 512], + params={'score': 0.06, + 'outside_ratio': 0.28, + 'inside_ratio': 0.45, + 'w_overlap': 0.0, + 'w_degree': 1.95, + 'w_length': 0.0, + 'w_area': 1.86, + 'w_center': 0.14}): + ''' + shape = [height, width] + ''' + h, w, _ = image.shape + original_shape = [h, w] + + resized_image = np.concatenate([cv2.resize(image, (input_shape[0], input_shape[1]), interpolation=cv2.INTER_AREA), + np.ones([input_shape[0], input_shape[1], 1])], axis=-1) + resized_image = resized_image.transpose((2, 0, 1)) + batch_image = np.expand_dims(resized_image, axis=0).astype('float32') + batch_image = (batch_image / 127.5) - 1.0 + + batch_image = torch.from_numpy(batch_image).float().cuda() + outputs = model(batch_image) + + pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3) + start = vmap[:, :, :2] # (x, y) + end = vmap[:, :, 2:] # (x, y) + dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1)) + + junc_list = [] + segments_list = [] + for junc, score in zip(pts, pts_score): + y, x = junc + distance = dist_map[y, x] + if score > params['score'] and distance > 20.0: + junc_list.append([x, y]) + disp_x_start, disp_y_start, disp_x_end, disp_y_end = vmap[y, x, :] + d_arrow = 1.0 + x_start = x + d_arrow * disp_x_start + y_start = y + d_arrow * disp_y_start + x_end = x + d_arrow * disp_x_end + y_end = y + d_arrow * disp_y_end + segments_list.append([x_start, y_start, x_end, y_end]) + + segments = np.array(segments_list) + + ####### post processing for squares + # 1. get unique lines + point = np.array([[0, 0]]) + point = point[0] + start = segments[:, :2] + end = segments[:, 2:] + diff = start - end + a = diff[:, 1] + b = -diff[:, 0] + c = a * start[:, 0] + b * start[:, 1] + + d = np.abs(a * point[0] + b * point[1] - c) / np.sqrt(a ** 2 + b ** 2 + 1e-10) + theta = np.arctan2(diff[:, 0], diff[:, 1]) * 180 / np.pi + theta[theta < 0.0] += 180 + hough = np.concatenate([d[:, None], theta[:, None]], axis=-1) + + d_quant = 1 + theta_quant = 2 + hough[:, 0] //= d_quant + hough[:, 1] //= theta_quant + _, indices, counts = np.unique(hough, axis=0, return_index=True, return_counts=True) + + acc_map = np.zeros([512 // d_quant + 1, 360 // theta_quant + 1], dtype='float32') + idx_map = np.zeros([512 // d_quant + 1, 360 // theta_quant + 1], dtype='int32') - 1 + yx_indices = hough[indices, :].astype('int32') + acc_map[yx_indices[:, 0], yx_indices[:, 1]] = counts + idx_map[yx_indices[:, 0], yx_indices[:, 1]] = indices + + acc_map_np = acc_map + # acc_map = acc_map[None, :, :, None] + # + # ### fast suppression using tensorflow op + # acc_map = tf.constant(acc_map, dtype=tf.float32) + # max_acc_map = tf.keras.layers.MaxPool2D(pool_size=(5, 5), strides=1, padding='same')(acc_map) + # acc_map = acc_map * tf.cast(tf.math.equal(acc_map, max_acc_map), tf.float32) + # flatten_acc_map = tf.reshape(acc_map, [1, -1]) + # topk_values, topk_indices = tf.math.top_k(flatten_acc_map, k=len(pts)) + # _, h, w, _ = acc_map.shape + # y = tf.expand_dims(topk_indices // w, axis=-1) + # x = tf.expand_dims(topk_indices % w, axis=-1) + # yx = tf.concat([y, x], axis=-1) + + ### fast suppression using pytorch op + acc_map = torch.from_numpy(acc_map_np).unsqueeze(0).unsqueeze(0) + _,_, h, w = acc_map.shape + max_acc_map = F.max_pool2d(acc_map,kernel_size=5, stride=1, padding=2) + acc_map = acc_map * ( (acc_map == max_acc_map).float() ) + flatten_acc_map = acc_map.reshape([-1, ]) + + scores, indices = torch.topk(flatten_acc_map, len(pts), dim=-1, largest=True) + yy = torch.div(indices, w, rounding_mode='floor').unsqueeze(-1) + xx = torch.fmod(indices, w).unsqueeze(-1) + yx = torch.cat((yy, xx), dim=-1) + + yx = yx.detach().cpu().numpy() + + topk_values = scores.detach().cpu().numpy() + indices = idx_map[yx[:, 0], yx[:, 1]] + basis = 5 // 2 + + merged_segments = [] + for yx_pt, max_indice, value in zip(yx, indices, topk_values): + y, x = yx_pt + if max_indice == -1 or value == 0: + continue + segment_list = [] + for y_offset in range(-basis, basis + 1): + for x_offset in range(-basis, basis + 1): + indice = idx_map[y + y_offset, x + x_offset] + cnt = int(acc_map_np[y + y_offset, x + x_offset]) + if indice != -1: + segment_list.append(segments[indice]) + if cnt > 1: + check_cnt = 1 + current_hough = hough[indice] + for new_indice, new_hough in enumerate(hough): + if (current_hough == new_hough).all() and indice != new_indice: + segment_list.append(segments[new_indice]) + check_cnt += 1 + if check_cnt == cnt: + break + group_segments = np.array(segment_list).reshape([-1, 2]) + sorted_group_segments = np.sort(group_segments, axis=0) + x_min, y_min = sorted_group_segments[0, :] + x_max, y_max = sorted_group_segments[-1, :] + + deg = theta[max_indice] + if deg >= 90: + merged_segments.append([x_min, y_max, x_max, y_min]) + else: + merged_segments.append([x_min, y_min, x_max, y_max]) + + # 2. get intersections + new_segments = np.array(merged_segments) # (x1, y1, x2, y2) + start = new_segments[:, :2] # (x1, y1) + end = new_segments[:, 2:] # (x2, y2) + new_centers = (start + end) / 2.0 + diff = start - end + dist_segments = np.sqrt(np.sum(diff ** 2, axis=-1)) + + # ax + by = c + a = diff[:, 1] + b = -diff[:, 0] + c = a * start[:, 0] + b * start[:, 1] + pre_det = a[:, None] * b[None, :] + det = pre_det - np.transpose(pre_det) + + pre_inter_y = a[:, None] * c[None, :] + inter_y = (pre_inter_y - np.transpose(pre_inter_y)) / (det + 1e-10) + pre_inter_x = c[:, None] * b[None, :] + inter_x = (pre_inter_x - np.transpose(pre_inter_x)) / (det + 1e-10) + inter_pts = np.concatenate([inter_x[:, :, None], inter_y[:, :, None]], axis=-1).astype('int32') + + # 3. get corner information + # 3.1 get distance + ''' + dist_segments: + | dist(0), dist(1), dist(2), ...| + dist_inter_to_segment1: + | dist(inter,0), dist(inter,0), dist(inter,0), ... | + | dist(inter,1), dist(inter,1), dist(inter,1), ... | + ... + dist_inter_to_semgnet2: + | dist(inter,0), dist(inter,1), dist(inter,2), ... | + | dist(inter,0), dist(inter,1), dist(inter,2), ... | + ... + ''' + + dist_inter_to_segment1_start = np.sqrt( + np.sum(((inter_pts - start[:, None, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1] + dist_inter_to_segment1_end = np.sqrt( + np.sum(((inter_pts - end[:, None, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1] + dist_inter_to_segment2_start = np.sqrt( + np.sum(((inter_pts - start[None, :, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1] + dist_inter_to_segment2_end = np.sqrt( + np.sum(((inter_pts - end[None, :, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1] + + # sort ascending + dist_inter_to_segment1 = np.sort( + np.concatenate([dist_inter_to_segment1_start, dist_inter_to_segment1_end], axis=-1), + axis=-1) # [n_batch, n_batch, 2] + dist_inter_to_segment2 = np.sort( + np.concatenate([dist_inter_to_segment2_start, dist_inter_to_segment2_end], axis=-1), + axis=-1) # [n_batch, n_batch, 2] + + # 3.2 get degree + inter_to_start = new_centers[:, None, :] - inter_pts + deg_inter_to_start = np.arctan2(inter_to_start[:, :, 1], inter_to_start[:, :, 0]) * 180 / np.pi + deg_inter_to_start[deg_inter_to_start < 0.0] += 360 + inter_to_end = new_centers[None, :, :] - inter_pts + deg_inter_to_end = np.arctan2(inter_to_end[:, :, 1], inter_to_end[:, :, 0]) * 180 / np.pi + deg_inter_to_end[deg_inter_to_end < 0.0] += 360 + + ''' + B -- G + | | + C -- R + B : blue / G: green / C: cyan / R: red + + 0 -- 1 + | | + 3 -- 2 + ''' + # rename variables + deg1_map, deg2_map = deg_inter_to_start, deg_inter_to_end + # sort deg ascending + deg_sort = np.sort(np.concatenate([deg1_map[:, :, None], deg2_map[:, :, None]], axis=-1), axis=-1) + + deg_diff_map = np.abs(deg1_map - deg2_map) + # we only consider the smallest degree of intersect + deg_diff_map[deg_diff_map > 180] = 360 - deg_diff_map[deg_diff_map > 180] + + # define available degree range + deg_range = [60, 120] + + corner_dict = {corner_info: [] for corner_info in range(4)} + inter_points = [] + for i in range(inter_pts.shape[0]): + for j in range(i + 1, inter_pts.shape[1]): + # i, j > line index, always i < j + x, y = inter_pts[i, j, :] + deg1, deg2 = deg_sort[i, j, :] + deg_diff = deg_diff_map[i, j] + + check_degree = deg_diff > deg_range[0] and deg_diff < deg_range[1] + + outside_ratio = params['outside_ratio'] # over ratio >>> drop it! + inside_ratio = params['inside_ratio'] # over ratio >>> drop it! + check_distance = ((dist_inter_to_segment1[i, j, 1] >= dist_segments[i] and \ + dist_inter_to_segment1[i, j, 0] <= dist_segments[i] * outside_ratio) or \ + (dist_inter_to_segment1[i, j, 1] <= dist_segments[i] and \ + dist_inter_to_segment1[i, j, 0] <= dist_segments[i] * inside_ratio)) and \ + ((dist_inter_to_segment2[i, j, 1] >= dist_segments[j] and \ + dist_inter_to_segment2[i, j, 0] <= dist_segments[j] * outside_ratio) or \ + (dist_inter_to_segment2[i, j, 1] <= dist_segments[j] and \ + dist_inter_to_segment2[i, j, 0] <= dist_segments[j] * inside_ratio)) + + if check_degree and check_distance: + corner_info = None + + if (deg1 >= 0 and deg1 <= 45 and deg2 >= 45 and deg2 <= 120) or \ + (deg2 >= 315 and deg1 >= 45 and deg1 <= 120): + corner_info, color_info = 0, 'blue' + elif (deg1 >= 45 and deg1 <= 125 and deg2 >= 125 and deg2 <= 225): + corner_info, color_info = 1, 'green' + elif (deg1 >= 125 and deg1 <= 225 and deg2 >= 225 and deg2 <= 315): + corner_info, color_info = 2, 'black' + elif (deg1 >= 0 and deg1 <= 45 and deg2 >= 225 and deg2 <= 315) or \ + (deg2 >= 315 and deg1 >= 225 and deg1 <= 315): + corner_info, color_info = 3, 'cyan' + else: + corner_info, color_info = 4, 'red' # we don't use it + continue + + corner_dict[corner_info].append([x, y, i, j]) + inter_points.append([x, y]) + + square_list = [] + connect_list = [] + segments_list = [] + for corner0 in corner_dict[0]: + for corner1 in corner_dict[1]: + connect01 = False + for corner0_line in corner0[2:]: + if corner0_line in corner1[2:]: + connect01 = True + break + if connect01: + for corner2 in corner_dict[2]: + connect12 = False + for corner1_line in corner1[2:]: + if corner1_line in corner2[2:]: + connect12 = True + break + if connect12: + for corner3 in corner_dict[3]: + connect23 = False + for corner2_line in corner2[2:]: + if corner2_line in corner3[2:]: + connect23 = True + break + if connect23: + for corner3_line in corner3[2:]: + if corner3_line in corner0[2:]: + # SQUARE!!! + ''' + 0 -- 1 + | | + 3 -- 2 + square_list: + order: 0 > 1 > 2 > 3 + | x0, y0, x1, y1, x2, y2, x3, y3 | + | x0, y0, x1, y1, x2, y2, x3, y3 | + ... + connect_list: + order: 01 > 12 > 23 > 30 + | line_idx01, line_idx12, line_idx23, line_idx30 | + | line_idx01, line_idx12, line_idx23, line_idx30 | + ... + segments_list: + order: 0 > 1 > 2 > 3 + | line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j | + | line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j | + ... + ''' + square_list.append(corner0[:2] + corner1[:2] + corner2[:2] + corner3[:2]) + connect_list.append([corner0_line, corner1_line, corner2_line, corner3_line]) + segments_list.append(corner0[2:] + corner1[2:] + corner2[2:] + corner3[2:]) + + def check_outside_inside(segments_info, connect_idx): + # return 'outside or inside', min distance, cover_param, peri_param + if connect_idx == segments_info[0]: + check_dist_mat = dist_inter_to_segment1 + else: + check_dist_mat = dist_inter_to_segment2 + + i, j = segments_info + min_dist, max_dist = check_dist_mat[i, j, :] + connect_dist = dist_segments[connect_idx] + if max_dist > connect_dist: + return 'outside', min_dist, 0, 1 + else: + return 'inside', min_dist, -1, -1 + + top_square = None + + try: + map_size = input_shape[0] / 2 + squares = np.array(square_list).reshape([-1, 4, 2]) + score_array = [] + connect_array = np.array(connect_list) + segments_array = np.array(segments_list).reshape([-1, 4, 2]) + + # get degree of corners: + squares_rollup = np.roll(squares, 1, axis=1) + squares_rolldown = np.roll(squares, -1, axis=1) + vec1 = squares_rollup - squares + normalized_vec1 = vec1 / (np.linalg.norm(vec1, axis=-1, keepdims=True) + 1e-10) + vec2 = squares_rolldown - squares + normalized_vec2 = vec2 / (np.linalg.norm(vec2, axis=-1, keepdims=True) + 1e-10) + inner_products = np.sum(normalized_vec1 * normalized_vec2, axis=-1) # [n_squares, 4] + squares_degree = np.arccos(inner_products) * 180 / np.pi # [n_squares, 4] + + # get square score + overlap_scores = [] + degree_scores = [] + length_scores = [] + + for connects, segments, square, degree in zip(connect_array, segments_array, squares, squares_degree): + ''' + 0 -- 1 + | | + 3 -- 2 + + # segments: [4, 2] + # connects: [4] + ''' + + ###################################### OVERLAP SCORES + cover = 0 + perimeter = 0 + # check 0 > 1 > 2 > 3 + square_length = [] + + for start_idx in range(4): + end_idx = (start_idx + 1) % 4 + + connect_idx = connects[start_idx] # segment idx of segment01 + start_segments = segments[start_idx] + end_segments = segments[end_idx] + + start_point = square[start_idx] + end_point = square[end_idx] + + # check whether outside or inside + start_position, start_min, start_cover_param, start_peri_param = check_outside_inside(start_segments, + connect_idx) + end_position, end_min, end_cover_param, end_peri_param = check_outside_inside(end_segments, connect_idx) + + cover += dist_segments[connect_idx] + start_cover_param * start_min + end_cover_param * end_min + perimeter += dist_segments[connect_idx] + start_peri_param * start_min + end_peri_param * end_min + + square_length.append( + dist_segments[connect_idx] + start_peri_param * start_min + end_peri_param * end_min) + + overlap_scores.append(cover / perimeter) + ###################################### + ###################################### DEGREE SCORES + ''' + deg0 vs deg2 + deg1 vs deg3 + ''' + deg0, deg1, deg2, deg3 = degree + deg_ratio1 = deg0 / deg2 + if deg_ratio1 > 1.0: + deg_ratio1 = 1 / deg_ratio1 + deg_ratio2 = deg1 / deg3 + if deg_ratio2 > 1.0: + deg_ratio2 = 1 / deg_ratio2 + degree_scores.append((deg_ratio1 + deg_ratio2) / 2) + ###################################### + ###################################### LENGTH SCORES + ''' + len0 vs len2 + len1 vs len3 + ''' + len0, len1, len2, len3 = square_length + len_ratio1 = len0 / len2 if len2 > len0 else len2 / len0 + len_ratio2 = len1 / len3 if len3 > len1 else len3 / len1 + length_scores.append((len_ratio1 + len_ratio2) / 2) + + ###################################### + + overlap_scores = np.array(overlap_scores) + overlap_scores /= np.max(overlap_scores) + + degree_scores = np.array(degree_scores) + # degree_scores /= np.max(degree_scores) + + length_scores = np.array(length_scores) + + ###################################### AREA SCORES + area_scores = np.reshape(squares, [-1, 4, 2]) + area_x = area_scores[:, :, 0] + area_y = area_scores[:, :, 1] + correction = area_x[:, -1] * area_y[:, 0] - area_y[:, -1] * area_x[:, 0] + area_scores = np.sum(area_x[:, :-1] * area_y[:, 1:], axis=-1) - np.sum(area_y[:, :-1] * area_x[:, 1:], axis=-1) + area_scores = 0.5 * np.abs(area_scores + correction) + area_scores /= (map_size * map_size) # np.max(area_scores) + ###################################### + + ###################################### CENTER SCORES + centers = np.array([[256 // 2, 256 // 2]], dtype='float32') # [1, 2] + # squares: [n, 4, 2] + square_centers = np.mean(squares, axis=1) # [n, 2] + center2center = np.sqrt(np.sum((centers - square_centers) ** 2)) + center_scores = center2center / (map_size / np.sqrt(2.0)) + + ''' + score_w = [overlap, degree, area, center, length] + ''' + score_w = [0.0, 1.0, 10.0, 0.5, 1.0] + score_array = params['w_overlap'] * overlap_scores \ + + params['w_degree'] * degree_scores \ + + params['w_area'] * area_scores \ + - params['w_center'] * center_scores \ + + params['w_length'] * length_scores + + best_square = [] + + sorted_idx = np.argsort(score_array)[::-1] + score_array = score_array[sorted_idx] + squares = squares[sorted_idx] + + except Exception as e: + pass + + '''return list + merged_lines, squares, scores + ''' + + try: + new_segments[:, 0] = new_segments[:, 0] * 2 / input_shape[1] * original_shape[1] + new_segments[:, 1] = new_segments[:, 1] * 2 / input_shape[0] * original_shape[0] + new_segments[:, 2] = new_segments[:, 2] * 2 / input_shape[1] * original_shape[1] + new_segments[:, 3] = new_segments[:, 3] * 2 / input_shape[0] * original_shape[0] + except: + new_segments = [] + + try: + squares[:, :, 0] = squares[:, :, 0] * 2 / input_shape[1] * original_shape[1] + squares[:, :, 1] = squares[:, :, 1] * 2 / input_shape[0] * original_shape[0] + except: + squares = [] + score_array = [] + + try: + inter_points = np.array(inter_points) + inter_points[:, 0] = inter_points[:, 0] * 2 / input_shape[1] * original_shape[1] + inter_points[:, 1] = inter_points[:, 1] * 2 / input_shape[0] * original_shape[0] + except: + inter_points = [] + + return new_segments, squares, score_array, inter_points diff --git a/annotator/openpose/__init__.py b/annotator/openpose/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8c26f1b37dae854f51da938da2fa67a8ef48ce5a --- /dev/null +++ b/annotator/openpose/__init__.py @@ -0,0 +1,44 @@ +import os +os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" + +import torch +import numpy as np +from . import util +from .body import Body +from .hand import Hand +from annotator.util import annotator_ckpts_path + + +body_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/body_pose_model.pth" +hand_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/hand_pose_model.pth" + + +class OpenposeDetector: + def __init__(self): + body_modelpath = os.path.join(annotator_ckpts_path, "body_pose_model.pth") + hand_modelpath = os.path.join(annotator_ckpts_path, "hand_pose_model.pth") + + if not os.path.exists(hand_modelpath): + from basicsr.utils.download_util import load_file_from_url + load_file_from_url(body_model_path, model_dir=annotator_ckpts_path) + load_file_from_url(hand_model_path, model_dir=annotator_ckpts_path) + + self.body_estimation = Body(body_modelpath) + self.hand_estimation = Hand(hand_modelpath) + + def __call__(self, oriImg, hand=False): + oriImg = oriImg[:, :, ::-1].copy() + with torch.no_grad(): + candidate, subset = self.body_estimation(oriImg) + canvas = np.zeros_like(oriImg) + canvas = util.draw_bodypose(canvas, candidate, subset) + if hand: + hands_list = util.handDetect(candidate, subset, oriImg) + all_hand_peaks = [] + for x, y, w, is_left in hands_list: + peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :]) + peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x) + peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y) + all_hand_peaks.append(peaks) + canvas = util.draw_handpose(canvas, all_hand_peaks) + return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist()) diff --git a/annotator/openpose/body.py b/annotator/openpose/body.py new file mode 100644 index 0000000000000000000000000000000000000000..7c3cf7a388b4ac81004524e64125e383bdd455bd --- /dev/null +++ b/annotator/openpose/body.py @@ -0,0 +1,219 @@ +import cv2 +import numpy as np +import math +import time +from scipy.ndimage.filters import gaussian_filter +import matplotlib.pyplot as plt +import matplotlib +import torch +from torchvision import transforms + +from . import util +from .model import bodypose_model + +class Body(object): + def __init__(self, model_path): + self.model = bodypose_model() + if torch.cuda.is_available(): + self.model = self.model.cuda() + print('cuda') + model_dict = util.transfer(self.model, torch.load(model_path)) + self.model.load_state_dict(model_dict) + self.model.eval() + + def __call__(self, oriImg): + # scale_search = [0.5, 1.0, 1.5, 2.0] + scale_search = [0.5] + boxsize = 368 + stride = 8 + padValue = 128 + thre1 = 0.1 + thre2 = 0.05 + multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] + heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19)) + paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38)) + + for m in range(len(multiplier)): + scale = multiplier[m] + imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) + imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue) + im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 + im = np.ascontiguousarray(im) + + data = torch.from_numpy(im).float() + if torch.cuda.is_available(): + data = data.cuda() + # data = data.permute([2, 0, 1]).unsqueeze(0).float() + with torch.no_grad(): + Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data) + Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy() + Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy() + + # extract outputs, resize, and remove padding + # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps + heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps + heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) + heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] + heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) + + # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs + paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs + paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) + paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] + paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) + + heatmap_avg += heatmap_avg + heatmap / len(multiplier) + paf_avg += + paf / len(multiplier) + + all_peaks = [] + peak_counter = 0 + + for part in range(18): + map_ori = heatmap_avg[:, :, part] + one_heatmap = gaussian_filter(map_ori, sigma=3) + + map_left = np.zeros(one_heatmap.shape) + map_left[1:, :] = one_heatmap[:-1, :] + map_right = np.zeros(one_heatmap.shape) + map_right[:-1, :] = one_heatmap[1:, :] + map_up = np.zeros(one_heatmap.shape) + map_up[:, 1:] = one_heatmap[:, :-1] + map_down = np.zeros(one_heatmap.shape) + map_down[:, :-1] = one_heatmap[:, 1:] + + peaks_binary = np.logical_and.reduce( + (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1)) + peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse + peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] + peak_id = range(peak_counter, peak_counter + len(peaks)) + peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))] + + all_peaks.append(peaks_with_score_and_id) + peak_counter += len(peaks) + + # find connection in the specified sequence, center 29 is in the position 15 + limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ + [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ + [1, 16], [16, 18], [3, 17], [6, 18]] + # the middle joints heatmap correpondence + mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \ + [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \ + [55, 56], [37, 38], [45, 46]] + + connection_all = [] + special_k = [] + mid_num = 10 + + for k in range(len(mapIdx)): + score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] + candA = all_peaks[limbSeq[k][0] - 1] + candB = all_peaks[limbSeq[k][1] - 1] + nA = len(candA) + nB = len(candB) + indexA, indexB = limbSeq[k] + if (nA != 0 and nB != 0): + connection_candidate = [] + for i in range(nA): + for j in range(nB): + vec = np.subtract(candB[j][:2], candA[i][:2]) + norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) + norm = max(0.001, norm) + vec = np.divide(vec, norm) + + startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ + np.linspace(candA[i][1], candB[j][1], num=mid_num))) + + vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ + for I in range(len(startend))]) + vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ + for I in range(len(startend))]) + + score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) + score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min( + 0.5 * oriImg.shape[0] / norm - 1, 0) + criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts) + criterion2 = score_with_dist_prior > 0 + if criterion1 and criterion2: + connection_candidate.append( + [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]]) + + connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) + connection = np.zeros((0, 5)) + for c in range(len(connection_candidate)): + i, j, s = connection_candidate[c][0:3] + if (i not in connection[:, 3] and j not in connection[:, 4]): + connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) + if (len(connection) >= min(nA, nB)): + break + + connection_all.append(connection) + else: + special_k.append(k) + connection_all.append([]) + + # last number in each row is the total parts number of that person + # the second last number in each row is the score of the overall configuration + subset = -1 * np.ones((0, 20)) + candidate = np.array([item for sublist in all_peaks for item in sublist]) + + for k in range(len(mapIdx)): + if k not in special_k: + partAs = connection_all[k][:, 0] + partBs = connection_all[k][:, 1] + indexA, indexB = np.array(limbSeq[k]) - 1 + + for i in range(len(connection_all[k])): # = 1:size(temp,1) + found = 0 + subset_idx = [-1, -1] + for j in range(len(subset)): # 1:size(subset,1): + if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: + subset_idx[found] = j + found += 1 + + if found == 1: + j = subset_idx[0] + if subset[j][indexB] != partBs[i]: + subset[j][indexB] = partBs[i] + subset[j][-1] += 1 + subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] + elif found == 2: # if found 2 and disjoint, merge them + j1, j2 = subset_idx + membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] + if len(np.nonzero(membership == 2)[0]) == 0: # merge + subset[j1][:-2] += (subset[j2][:-2] + 1) + subset[j1][-2:] += subset[j2][-2:] + subset[j1][-2] += connection_all[k][i][2] + subset = np.delete(subset, j2, 0) + else: # as like found == 1 + subset[j1][indexB] = partBs[i] + subset[j1][-1] += 1 + subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] + + # if find no partA in the subset, create a new subset + elif not found and k < 17: + row = -1 * np.ones(20) + row[indexA] = partAs[i] + row[indexB] = partBs[i] + row[-1] = 2 + row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] + subset = np.vstack([subset, row]) + # delete some rows of subset which has few parts occur + deleteIdx = [] + for i in range(len(subset)): + if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: + deleteIdx.append(i) + subset = np.delete(subset, deleteIdx, axis=0) + + # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts + # candidate: x, y, score, id + return candidate, subset + +if __name__ == "__main__": + body_estimation = Body('../model/body_pose_model.pth') + + test_image = '../images/ski.jpg' + oriImg = cv2.imread(test_image) # B,G,R order + candidate, subset = body_estimation(oriImg) + canvas = util.draw_bodypose(oriImg, candidate, subset) + plt.imshow(canvas[:, :, [2, 1, 0]]) + plt.show() diff --git a/annotator/openpose/hand.py b/annotator/openpose/hand.py new file mode 100644 index 0000000000000000000000000000000000000000..3d0bf17165ad7eb225332b51f4a2aa16718664b2 --- /dev/null +++ b/annotator/openpose/hand.py @@ -0,0 +1,86 @@ +import cv2 +import json +import numpy as np +import math +import time +from scipy.ndimage.filters import gaussian_filter +import matplotlib.pyplot as plt +import matplotlib +import torch +from skimage.measure import label + +from .model import handpose_model +from . import util + +class Hand(object): + def __init__(self, model_path): + self.model = handpose_model() + if torch.cuda.is_available(): + self.model = self.model.cuda() + print('cuda') + model_dict = util.transfer(self.model, torch.load(model_path)) + self.model.load_state_dict(model_dict) + self.model.eval() + + def __call__(self, oriImg): + scale_search = [0.5, 1.0, 1.5, 2.0] + # scale_search = [0.5] + boxsize = 368 + stride = 8 + padValue = 128 + thre = 0.05 + multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] + heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22)) + # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38)) + + for m in range(len(multiplier)): + scale = multiplier[m] + imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) + imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue) + im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 + im = np.ascontiguousarray(im) + + data = torch.from_numpy(im).float() + if torch.cuda.is_available(): + data = data.cuda() + # data = data.permute([2, 0, 1]).unsqueeze(0).float() + with torch.no_grad(): + output = self.model(data).cpu().numpy() + # output = self.model(data).numpy()q + + # extract outputs, resize, and remove padding + heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps + heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) + heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] + heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) + + heatmap_avg += heatmap / len(multiplier) + + all_peaks = [] + for part in range(21): + map_ori = heatmap_avg[:, :, part] + one_heatmap = gaussian_filter(map_ori, sigma=3) + binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8) + # 全部小于阈值 + if np.sum(binary) == 0: + all_peaks.append([0, 0]) + continue + label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim) + max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1 + label_img[label_img != max_index] = 0 + map_ori[label_img == 0] = 0 + + y, x = util.npmax(map_ori) + all_peaks.append([x, y]) + return np.array(all_peaks) + +if __name__ == "__main__": + hand_estimation = Hand('../model/hand_pose_model.pth') + + # test_image = '../images/hand.jpg' + test_image = '../images/hand.jpg' + oriImg = cv2.imread(test_image) # B,G,R order + peaks = hand_estimation(oriImg) + canvas = util.draw_handpose(oriImg, peaks, True) + cv2.imshow('', canvas) + cv2.waitKey(0) \ No newline at end of file diff --git a/annotator/openpose/model.py b/annotator/openpose/model.py new file mode 100644 index 0000000000000000000000000000000000000000..5dfc80de827a17beccb9b0f3f7588545be78c9de --- /dev/null +++ b/annotator/openpose/model.py @@ -0,0 +1,219 @@ +import torch +from collections import OrderedDict + +import torch +import torch.nn as nn + +def make_layers(block, no_relu_layers): + layers = [] + for layer_name, v in block.items(): + if 'pool' in layer_name: + layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1], + padding=v[2]) + layers.append((layer_name, layer)) + else: + conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], + kernel_size=v[2], stride=v[3], + padding=v[4]) + layers.append((layer_name, conv2d)) + if layer_name not in no_relu_layers: + layers.append(('relu_'+layer_name, nn.ReLU(inplace=True))) + + return nn.Sequential(OrderedDict(layers)) + +class bodypose_model(nn.Module): + def __init__(self): + super(bodypose_model, self).__init__() + + # these layers have no relu layer + no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\ + 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\ + 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\ + 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1'] + blocks = {} + block0 = OrderedDict([ + ('conv1_1', [3, 64, 3, 1, 1]), + ('conv1_2', [64, 64, 3, 1, 1]), + ('pool1_stage1', [2, 2, 0]), + ('conv2_1', [64, 128, 3, 1, 1]), + ('conv2_2', [128, 128, 3, 1, 1]), + ('pool2_stage1', [2, 2, 0]), + ('conv3_1', [128, 256, 3, 1, 1]), + ('conv3_2', [256, 256, 3, 1, 1]), + ('conv3_3', [256, 256, 3, 1, 1]), + ('conv3_4', [256, 256, 3, 1, 1]), + ('pool3_stage1', [2, 2, 0]), + ('conv4_1', [256, 512, 3, 1, 1]), + ('conv4_2', [512, 512, 3, 1, 1]), + ('conv4_3_CPM', [512, 256, 3, 1, 1]), + ('conv4_4_CPM', [256, 128, 3, 1, 1]) + ]) + + + # Stage 1 + block1_1 = OrderedDict([ + ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]), + ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]), + ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]), + ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]), + ('conv5_5_CPM_L1', [512, 38, 1, 1, 0]) + ]) + + block1_2 = OrderedDict([ + ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]), + ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]), + ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]), + ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]), + ('conv5_5_CPM_L2', [512, 19, 1, 1, 0]) + ]) + blocks['block1_1'] = block1_1 + blocks['block1_2'] = block1_2 + + self.model0 = make_layers(block0, no_relu_layers) + + # Stages 2 - 6 + for i in range(2, 7): + blocks['block%d_1' % i] = OrderedDict([ + ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]), + ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]), + ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]), + ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0]) + ]) + + blocks['block%d_2' % i] = OrderedDict([ + ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]), + ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]), + ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]), + ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0]) + ]) + + for k in blocks.keys(): + blocks[k] = make_layers(blocks[k], no_relu_layers) + + self.model1_1 = blocks['block1_1'] + self.model2_1 = blocks['block2_1'] + self.model3_1 = blocks['block3_1'] + self.model4_1 = blocks['block4_1'] + self.model5_1 = blocks['block5_1'] + self.model6_1 = blocks['block6_1'] + + self.model1_2 = blocks['block1_2'] + self.model2_2 = blocks['block2_2'] + self.model3_2 = blocks['block3_2'] + self.model4_2 = blocks['block4_2'] + self.model5_2 = blocks['block5_2'] + self.model6_2 = blocks['block6_2'] + + + def forward(self, x): + + out1 = self.model0(x) + + out1_1 = self.model1_1(out1) + out1_2 = self.model1_2(out1) + out2 = torch.cat([out1_1, out1_2, out1], 1) + + out2_1 = self.model2_1(out2) + out2_2 = self.model2_2(out2) + out3 = torch.cat([out2_1, out2_2, out1], 1) + + out3_1 = self.model3_1(out3) + out3_2 = self.model3_2(out3) + out4 = torch.cat([out3_1, out3_2, out1], 1) + + out4_1 = self.model4_1(out4) + out4_2 = self.model4_2(out4) + out5 = torch.cat([out4_1, out4_2, out1], 1) + + out5_1 = self.model5_1(out5) + out5_2 = self.model5_2(out5) + out6 = torch.cat([out5_1, out5_2, out1], 1) + + out6_1 = self.model6_1(out6) + out6_2 = self.model6_2(out6) + + return out6_1, out6_2 + +class handpose_model(nn.Module): + def __init__(self): + super(handpose_model, self).__init__() + + # these layers have no relu layer + no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\ + 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6'] + # stage 1 + block1_0 = OrderedDict([ + ('conv1_1', [3, 64, 3, 1, 1]), + ('conv1_2', [64, 64, 3, 1, 1]), + ('pool1_stage1', [2, 2, 0]), + ('conv2_1', [64, 128, 3, 1, 1]), + ('conv2_2', [128, 128, 3, 1, 1]), + ('pool2_stage1', [2, 2, 0]), + ('conv3_1', [128, 256, 3, 1, 1]), + ('conv3_2', [256, 256, 3, 1, 1]), + ('conv3_3', [256, 256, 3, 1, 1]), + ('conv3_4', [256, 256, 3, 1, 1]), + ('pool3_stage1', [2, 2, 0]), + ('conv4_1', [256, 512, 3, 1, 1]), + ('conv4_2', [512, 512, 3, 1, 1]), + ('conv4_3', [512, 512, 3, 1, 1]), + ('conv4_4', [512, 512, 3, 1, 1]), + ('conv5_1', [512, 512, 3, 1, 1]), + ('conv5_2', [512, 512, 3, 1, 1]), + ('conv5_3_CPM', [512, 128, 3, 1, 1]) + ]) + + block1_1 = OrderedDict([ + ('conv6_1_CPM', [128, 512, 1, 1, 0]), + ('conv6_2_CPM', [512, 22, 1, 1, 0]) + ]) + + blocks = {} + blocks['block1_0'] = block1_0 + blocks['block1_1'] = block1_1 + + # stage 2-6 + for i in range(2, 7): + blocks['block%d' % i] = OrderedDict([ + ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]), + ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]), + ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]), + ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0]) + ]) + + for k in blocks.keys(): + blocks[k] = make_layers(blocks[k], no_relu_layers) + + self.model1_0 = blocks['block1_0'] + self.model1_1 = blocks['block1_1'] + self.model2 = blocks['block2'] + self.model3 = blocks['block3'] + self.model4 = blocks['block4'] + self.model5 = blocks['block5'] + self.model6 = blocks['block6'] + + def forward(self, x): + out1_0 = self.model1_0(x) + out1_1 = self.model1_1(out1_0) + concat_stage2 = torch.cat([out1_1, out1_0], 1) + out_stage2 = self.model2(concat_stage2) + concat_stage3 = torch.cat([out_stage2, out1_0], 1) + out_stage3 = self.model3(concat_stage3) + concat_stage4 = torch.cat([out_stage3, out1_0], 1) + out_stage4 = self.model4(concat_stage4) + concat_stage5 = torch.cat([out_stage4, out1_0], 1) + out_stage5 = self.model5(concat_stage5) + concat_stage6 = torch.cat([out_stage5, out1_0], 1) + out_stage6 = self.model6(concat_stage6) + return out_stage6 + + diff --git a/annotator/openpose/util.py b/annotator/openpose/util.py new file mode 100644 index 0000000000000000000000000000000000000000..6f91ae0e65abaf0cbd62d803f56498991141e61b --- /dev/null +++ b/annotator/openpose/util.py @@ -0,0 +1,164 @@ +import math +import numpy as np +import matplotlib +import cv2 + + +def padRightDownCorner(img, stride, padValue): + h = img.shape[0] + w = img.shape[1] + + pad = 4 * [None] + pad[0] = 0 # up + pad[1] = 0 # left + pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down + pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right + + img_padded = img + pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1)) + img_padded = np.concatenate((pad_up, img_padded), axis=0) + pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1)) + img_padded = np.concatenate((pad_left, img_padded), axis=1) + pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1)) + img_padded = np.concatenate((img_padded, pad_down), axis=0) + pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1)) + img_padded = np.concatenate((img_padded, pad_right), axis=1) + + return img_padded, pad + +# transfer caffe model to pytorch which will match the layer name +def transfer(model, model_weights): + transfered_model_weights = {} + for weights_name in model.state_dict().keys(): + transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])] + return transfered_model_weights + +# draw the body keypoint and lims +def draw_bodypose(canvas, candidate, subset): + stickwidth = 4 + limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \ + [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \ + [1, 16], [16, 18], [3, 17], [6, 18]] + + colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ + [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ + [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] + for i in range(18): + for n in range(len(subset)): + index = int(subset[n][i]) + if index == -1: + continue + x, y = candidate[index][0:2] + cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1) + for i in range(17): + for n in range(len(subset)): + index = subset[n][np.array(limbSeq[i]) - 1] + if -1 in index: + continue + cur_canvas = canvas.copy() + Y = candidate[index.astype(int), 0] + X = candidate[index.astype(int), 1] + mX = np.mean(X) + mY = np.mean(Y) + length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 + angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) + polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) + cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) + canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) + # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]]) + # plt.imshow(canvas[:, :, [2, 1, 0]]) + return canvas + + +# image drawed by opencv is not good. +def draw_handpose(canvas, all_hand_peaks, show_number=False): + edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \ + [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]] + + for peaks in all_hand_peaks: + for ie, e in enumerate(edges): + if np.sum(np.all(peaks[e], axis=1)==0)==0: + x1, y1 = peaks[e[0]] + x2, y2 = peaks[e[1]] + cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2) + + for i, keyponit in enumerate(peaks): + x, y = keyponit + cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1) + if show_number: + cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA) + return canvas + +# detect hand according to body pose keypoints +# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp +def handDetect(candidate, subset, oriImg): + # right hand: wrist 4, elbow 3, shoulder 2 + # left hand: wrist 7, elbow 6, shoulder 5 + ratioWristElbow = 0.33 + detect_result = [] + image_height, image_width = oriImg.shape[0:2] + for person in subset.astype(int): + # if any of three not detected + has_left = np.sum(person[[5, 6, 7]] == -1) == 0 + has_right = np.sum(person[[2, 3, 4]] == -1) == 0 + if not (has_left or has_right): + continue + hands = [] + #left hand + if has_left: + left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]] + x1, y1 = candidate[left_shoulder_index][:2] + x2, y2 = candidate[left_elbow_index][:2] + x3, y3 = candidate[left_wrist_index][:2] + hands.append([x1, y1, x2, y2, x3, y3, True]) + # right hand + if has_right: + right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]] + x1, y1 = candidate[right_shoulder_index][:2] + x2, y2 = candidate[right_elbow_index][:2] + x3, y3 = candidate[right_wrist_index][:2] + hands.append([x1, y1, x2, y2, x3, y3, False]) + + for x1, y1, x2, y2, x3, y3, is_left in hands: + # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox + # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]); + # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]); + # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow); + # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder); + # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder); + x = x3 + ratioWristElbow * (x3 - x2) + y = y3 + ratioWristElbow * (y3 - y2) + distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2) + distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) + width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder) + # x-y refers to the center --> offset to topLeft point + # handRectangle.x -= handRectangle.width / 2.f; + # handRectangle.y -= handRectangle.height / 2.f; + x -= width / 2 + y -= width / 2 # width = height + # overflow the image + if x < 0: x = 0 + if y < 0: y = 0 + width1 = width + width2 = width + if x + width > image_width: width1 = image_width - x + if y + width > image_height: width2 = image_height - y + width = min(width1, width2) + # the max hand box value is 20 pixels + if width >= 20: + detect_result.append([int(x), int(y), int(width), is_left]) + + ''' + return value: [[x, y, w, True if left hand else False]]. + width=height since the network require squared input. + x, y is the coordinate of top left + ''' + return detect_result + +# get max index of 2d array +def npmax(array): + arrayindex = array.argmax(1) + arrayvalue = array.max(1) + i = arrayvalue.argmax() + j = arrayindex[i] + return i, j diff --git a/annotator/uniformer/__init__.py b/annotator/uniformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6be429542e4908c2b7648e7ee7c9c5f8253e7c94 --- /dev/null +++ b/annotator/uniformer/__init__.py @@ -0,0 +1,23 @@ +import os + +from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot +from annotator.uniformer.mmseg.core.evaluation import get_palette +from annotator.util import annotator_ckpts_path + + +checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth" + + +class UniformerDetector: + def __init__(self): + modelpath = os.path.join(annotator_ckpts_path, "upernet_global_small.pth") + if not os.path.exists(modelpath): + from basicsr.utils.download_util import load_file_from_url + load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path) + config_file = os.path.join(os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py") + self.model = init_segmentor(config_file, modelpath).cuda() + + def __call__(self, img): + result = inference_segmentor(self.model, img) + res_img = show_result_pyplot(self.model, img, result, get_palette('ade'), opacity=1) + return res_img diff --git a/annotator/uniformer/configs/_base_/datasets/ade20k.py b/annotator/uniformer/configs/_base_/datasets/ade20k.py new file mode 100644 index 0000000000000000000000000000000000000000..efc8b4bb20c981f3db6df7eb52b3dc0744c94cc0 --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/ade20k.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/datasets/chase_db1.py b/annotator/uniformer/configs/_base_/datasets/chase_db1.py new file mode 100644 index 0000000000000000000000000000000000000000..298594ea925f87f22b37094a2ec50e370aec96a0 --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/chase_db1.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'ChaseDB1Dataset' +data_root = 'data/CHASE_DB1' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (960, 999) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/datasets/cityscapes.py b/annotator/uniformer/configs/_base_/datasets/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..f21867c63e1835f6fceb61f066e802fd8fd2a735 --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/cityscapes.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/train', + ann_dir='gtFine/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py b/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py new file mode 100644 index 0000000000000000000000000000000000000000..336c7b254fe392b4703039fec86a83acdbd2e1a5 --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py @@ -0,0 +1,35 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (769, 769) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2049, 1025), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/datasets/drive.py b/annotator/uniformer/configs/_base_/datasets/drive.py new file mode 100644 index 0000000000000000000000000000000000000000..06e8ff606e0d2a4514ec8b7d2c6c436a32efcbf4 --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/drive.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'DRIVEDataset' +data_root = 'data/DRIVE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (584, 565) +crop_size = (64, 64) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/datasets/hrf.py b/annotator/uniformer/configs/_base_/datasets/hrf.py new file mode 100644 index 0000000000000000000000000000000000000000..242d790eb1b83e75cf6b7eaa7a35c674099311ad --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/hrf.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'HRFDataset' +data_root = 'data/HRF' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (2336, 3504) +crop_size = (256, 256) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/datasets/pascal_context.py b/annotator/uniformer/configs/_base_/datasets/pascal_context.py new file mode 100644 index 0000000000000000000000000000000000000000..ff65bad1b86d7e3a5980bb5b9fc55798dc8df5f4 --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/pascal_context.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py b/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py new file mode 100644 index 0000000000000000000000000000000000000000..37585abab89834b95cd5bdd993b994fca1db65f6 --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset59' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py b/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py new file mode 100644 index 0000000000000000000000000000000000000000..ba1d42d0c5781f56dc177d860d856bb34adce555 --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py @@ -0,0 +1,57 @@ +# dataset settings +dataset_type = 'PascalVOCDataset' +data_root = 'data/VOCdevkit/VOC2012' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py b/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..3f23b6717d53ad29f02dd15046802a2631a5076b --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py @@ -0,0 +1,9 @@ +_base_ = './pascal_voc12.py' +# dataset settings +data = dict( + train=dict( + ann_dir=['SegmentationClass', 'SegmentationClassAug'], + split=[ + 'ImageSets/Segmentation/train.txt', + 'ImageSets/Segmentation/aug.txt' + ])) diff --git a/annotator/uniformer/configs/_base_/datasets/stare.py b/annotator/uniformer/configs/_base_/datasets/stare.py new file mode 100644 index 0000000000000000000000000000000000000000..3f71b25488cc11a6b4d582ac52b5a24e1ad1cf8e --- /dev/null +++ b/annotator/uniformer/configs/_base_/datasets/stare.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'STAREDataset' +data_root = 'data/STARE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (605, 700) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/annotator/uniformer/configs/_base_/default_runtime.py b/annotator/uniformer/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..b564cc4e7e7d9a67dacaaddecb100e4d8f5c005b --- /dev/null +++ b/annotator/uniformer/configs/_base_/default_runtime.py @@ -0,0 +1,14 @@ +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] +cudnn_benchmark = True diff --git a/annotator/uniformer/configs/_base_/models/ann_r50-d8.py b/annotator/uniformer/configs/_base_/models/ann_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..a2cb653827e44e6015b3b83bc578003e614a6aa1 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/ann_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ANNHead', + in_channels=[1024, 2048], + in_index=[2, 3], + channels=512, + project_channels=256, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py b/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..c8f5316cbcf3896ba9de7ca2c801eba512f01d5e --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='APCHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py b/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..794148f576b9e215c3c6963e73dffe98204b7717 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='CCHead', + in_channels=2048, + in_index=3, + channels=512, + recurrence=2, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/cgnet.py b/annotator/uniformer/configs/_base_/models/cgnet.py new file mode 100644 index 0000000000000000000000000000000000000000..eff8d9458c877c5db894957e0b1b4597e40da6ab --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/cgnet.py @@ -0,0 +1,35 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='CGNet', + norm_cfg=norm_cfg, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16)), + decode_head=dict( + type='FCNHead', + in_channels=256, + in_index=2, + channels=256, + num_convs=0, + concat_input=False, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0, + class_weight=[ + 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, + 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, + 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, + 10.396974, 10.055647 + ])), + # model training and testing settings + train_cfg=dict(sampler=None), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/danet_r50-d8.py b/annotator/uniformer/configs/_base_/models/danet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..2c934939fac48525f22ad86f489a041dd7db7d09 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/danet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py b/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..d7a43bee01422ad4795dd27874e0cd4bb6cbfecf --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py b/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..0cd262999d8b2cb8e14a5c32190ae73f479d8e81 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='ASPPHead', + in_channels=64, + in_index=4, + channels=16, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py b/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..050e39e091d816df9028d23aa3ecf9db74e441e1 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DepthwiseSeparableASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py b/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..d22ba52640bebd805b3b8d07025e276dfb023759 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DMHead', + in_channels=2048, + in_index=3, + channels=512, + filter_sizes=(1, 3, 5, 7), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py b/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..edb4c174c51e34c103737ba39bfc48bf831e561d --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DNLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py b/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..26adcd430926de0862204a71d345f2543167f27b --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EMAHead', + in_channels=2048, + in_index=3, + channels=256, + ema_channels=512, + num_bases=64, + num_stages=3, + momentum=0.1, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py b/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..be777123a886503172a95fe0719e956a147bbd68 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(1, 2, 3), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/fast_scnn.py b/annotator/uniformer/configs/_base_/models/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..32fdeb659355a5ce5ef2cc7c2f30742703811cdf --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/fast_scnn.py @@ -0,0 +1,57 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='FastSCNN', + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='DepthwiseSeparableFCNHead', + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=32, + num_convs=1, + num_classes=19, + in_index=-2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=64, + channels=32, + num_convs=1, + num_classes=19, + in_index=-3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/fcn_hr18.py b/annotator/uniformer/configs/_base_/models/fcn_hr18.py new file mode 100644 index 0000000000000000000000000000000000000000..c3e299bc89ada56ca14bbffcbdb08a586b8ed9e9 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/fcn_hr18.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + channels=sum([18, 36, 72, 144]), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py b/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..5e98f6cc918b6146fc6d613c6918e825ef1355c3 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py @@ -0,0 +1,45 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py b/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..a33e7972877f902d0e7d18401ca675e3e4e60a18 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py @@ -0,0 +1,51 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='FCNHead', + in_channels=64, + in_index=4, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/annotator/uniformer/configs/_base_/models/fpn_r50.py b/annotator/uniformer/configs/_base_/models/fpn_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..86ab327db92e44c14822d65f1c9277cb007f17c1 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/fpn_r50.py @@ -0,0 +1,36 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/fpn_uniformer.py b/annotator/uniformer/configs/_base_/models/fpn_uniformer.py new file mode 100644 index 0000000000000000000000000000000000000000..8aae98c5991055bfcc08e82ccdc09f8b1d9f8a8d --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/fpn_uniformer.py @@ -0,0 +1,35 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4., + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1), + neck=dict( + type='FPN', + in_channels=[64, 128, 320, 512], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole') +) diff --git a/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py b/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..3d2ad69f5c22adfe79d5fdabf920217628987166 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='GCHead', + in_channels=2048, + in_index=3, + channels=512, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py b/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..93258242a90695cc94a7c6bd41562d6a75988771 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py @@ -0,0 +1,25 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='MobileNetV3', + arch='large', + out_indices=(1, 3, 16), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 24, 960), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py b/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..5674a39854cafd1f2e363bac99c58ccae62f24da --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='NLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py b/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py new file mode 100644 index 0000000000000000000000000000000000000000..c60f62a7cdf3f5c5096a7a7e725e8268fddcb057 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py @@ -0,0 +1,68 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py b/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..615aa3ff703942b6c22b2d6e9642504dd3e41ebd --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=[ + dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=2048, + in_index=3, + channels=512, + ocr_channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/pointrend_r50.py b/annotator/uniformer/configs/_base_/models/pointrend_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..9d323dbf9466d41e0800aa57ef84045f3d874bdf --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/pointrend_r50.py @@ -0,0 +1,56 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict( + num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), + test_cfg=dict( + mode='whole', + subdivision_steps=2, + subdivision_num_points=8196, + scale_factor=2)) diff --git a/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py b/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..689513fa9d2a40f14bf0ae4ae61f38f0dcc1b3da --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSAHead', + in_channels=2048, + in_index=3, + channels=512, + mask_size=(97, 97), + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py b/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..f451e08ad2eb0732dcb806b1851eb978d4acf136 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py b/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..fcff9ec4f41fad158344ecd77313dc14564f3682 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='PSPHead', + in_channels=64, + in_index=4, + channels=16, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/annotator/uniformer/configs/_base_/models/upernet_r50.py b/annotator/uniformer/configs/_base_/models/upernet_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..10974962fdd7136031fd06de1700f497d355ceaa --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/upernet_r50.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='UPerHead', + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/annotator/uniformer/configs/_base_/models/upernet_uniformer.py b/annotator/uniformer/configs/_base_/models/upernet_uniformer.py new file mode 100644 index 0000000000000000000000000000000000000000..41aa4db809dc6e2c508e98051f61807d07477903 --- /dev/null +++ b/annotator/uniformer/configs/_base_/models/upernet_uniformer.py @@ -0,0 +1,43 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + mlp_ratio=4., + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1), + decode_head=dict( + type='UPerHead', + in_channels=[64, 128, 320, 512], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=320, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/annotator/uniformer/configs/_base_/schedules/schedule_160k.py b/annotator/uniformer/configs/_base_/schedules/schedule_160k.py new file mode 100644 index 0000000000000000000000000000000000000000..52603890b10f25faf8eec9f9e5a4468fae09b811 --- /dev/null +++ b/annotator/uniformer/configs/_base_/schedules/schedule_160k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=160000) +checkpoint_config = dict(by_epoch=False, interval=16000) +evaluation = dict(interval=16000, metric='mIoU') diff --git a/annotator/uniformer/configs/_base_/schedules/schedule_20k.py b/annotator/uniformer/configs/_base_/schedules/schedule_20k.py new file mode 100644 index 0000000000000000000000000000000000000000..bf780a1b6f6521833c6a5859675147824efa599d --- /dev/null +++ b/annotator/uniformer/configs/_base_/schedules/schedule_20k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=20000) +checkpoint_config = dict(by_epoch=False, interval=2000) +evaluation = dict(interval=2000, metric='mIoU') diff --git a/annotator/uniformer/configs/_base_/schedules/schedule_40k.py b/annotator/uniformer/configs/_base_/schedules/schedule_40k.py new file mode 100644 index 0000000000000000000000000000000000000000..cdbf841abcb26eed87bf76ab816aff4bae0630ee --- /dev/null +++ b/annotator/uniformer/configs/_base_/schedules/schedule_40k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=40000) +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric='mIoU') diff --git a/annotator/uniformer/configs/_base_/schedules/schedule_80k.py b/annotator/uniformer/configs/_base_/schedules/schedule_80k.py new file mode 100644 index 0000000000000000000000000000000000000000..c190cee6bdc7922b688ea75dc8f152fa15c24617 --- /dev/null +++ b/annotator/uniformer/configs/_base_/schedules/schedule_80k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=80000) +checkpoint_config = dict(by_epoch=False, interval=8000) +evaluation = dict(interval=8000, metric='mIoU') diff --git a/annotator/uniformer/exp/upernet_global_small/config.py b/annotator/uniformer/exp/upernet_global_small/config.py new file mode 100644 index 0000000000000000000000000000000000000000..01db96bf9b0be531aa0eaf62fee51543712f8670 --- /dev/null +++ b/annotator/uniformer/exp/upernet_global_small/config.py @@ -0,0 +1,38 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=False + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/annotator/uniformer/exp/upernet_global_small/run.sh b/annotator/uniformer/exp/upernet_global_small/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..9fb22edfa7a32624ea08a63fe7d720c40db3b696 --- /dev/null +++ b/annotator/uniformer/exp/upernet_global_small/run.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/train.py ${work_path}/config.py \ + --launcher pytorch \ + --options model.backbone.pretrained_path='your_model_path/uniformer_small_in1k.pth' \ + --work-dir ${work_path}/ckpt \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/annotator/uniformer/exp/upernet_global_small/test.sh b/annotator/uniformer/exp/upernet_global_small/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..d9a85e7a0d3b7c96b060f473d41254b37a382fcb --- /dev/null +++ b/annotator/uniformer/exp/upernet_global_small/test.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +work_path=$(dirname $0) +PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \ +python -m torch.distributed.launch --nproc_per_node=8 \ + tools/test.py ${work_path}/test_config_h32.py \ + ${work_path}/ckpt/latest.pth \ + --launcher pytorch \ + --eval mIoU \ + 2>&1 | tee -a ${work_path}/log.txt diff --git a/annotator/uniformer/exp/upernet_global_small/test_config_g.py b/annotator/uniformer/exp/upernet_global_small/test_config_g.py new file mode 100644 index 0000000000000000000000000000000000000000..e43737a98a3b174a9f2fe059c06d511144686459 --- /dev/null +++ b/annotator/uniformer/exp/upernet_global_small/test_config_g.py @@ -0,0 +1,38 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=False, + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/annotator/uniformer/exp/upernet_global_small/test_config_h32.py b/annotator/uniformer/exp/upernet_global_small/test_config_h32.py new file mode 100644 index 0000000000000000000000000000000000000000..a31e3874f76f9f7b089ac8834d85df2441af9b0e --- /dev/null +++ b/annotator/uniformer/exp/upernet_global_small/test_config_h32.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=False, + hybrid=True, + window_size=32 + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/annotator/uniformer/exp/upernet_global_small/test_config_w32.py b/annotator/uniformer/exp/upernet_global_small/test_config_w32.py new file mode 100644 index 0000000000000000000000000000000000000000..3d9e06f029e46c14cb9ddb39319cabe86fef9b44 --- /dev/null +++ b/annotator/uniformer/exp/upernet_global_small/test_config_w32.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../configs/_base_/models/upernet_uniformer.py', + '../../configs/_base_/datasets/ade20k.py', + '../../configs/_base_/default_runtime.py', + '../../configs/_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + type='UniFormer', + embed_dim=[64, 128, 320, 512], + layers=[3, 4, 8, 3], + head_dim=64, + drop_path_rate=0.25, + windows=True, + hybrid=False, + window_size=32 + ), + decode_head=dict( + in_channels=[64, 128, 320, 512], + num_classes=150 + ), + auxiliary_head=dict( + in_channels=320, + num_classes=150 + )) + +# AdamW optimizer, no weight decay for position embedding & layer norm in backbone +optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01, + paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.)})) + +lr_config = dict(_delete_=True, policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, min_lr=0.0, by_epoch=False) + +data=dict(samples_per_gpu=2) \ No newline at end of file diff --git a/annotator/uniformer/mmcv/__init__.py b/annotator/uniformer/mmcv/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..210a2989138380559f23045b568d0fbbeb918c03 --- /dev/null +++ b/annotator/uniformer/mmcv/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# flake8: noqa +from .arraymisc import * +from .fileio import * +from .image import * +from .utils import * +from .version import * +from .video import * +from .visualization import * + +# The following modules are not imported to this level, so mmcv may be used +# without PyTorch. +# - runner +# - parallel +# - op diff --git a/annotator/uniformer/mmcv/arraymisc/__init__.py b/annotator/uniformer/mmcv/arraymisc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4b4700d6139ae3d604ff6e542468cce4200c020c --- /dev/null +++ b/annotator/uniformer/mmcv/arraymisc/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .quantization import dequantize, quantize + +__all__ = ['quantize', 'dequantize'] diff --git a/annotator/uniformer/mmcv/arraymisc/quantization.py b/annotator/uniformer/mmcv/arraymisc/quantization.py new file mode 100644 index 0000000000000000000000000000000000000000..8e47a3545780cf071a1ef8195efb0b7b662c8186 --- /dev/null +++ b/annotator/uniformer/mmcv/arraymisc/quantization.py @@ -0,0 +1,55 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + + +def quantize(arr, min_val, max_val, levels, dtype=np.int64): + """Quantize an array of (-inf, inf) to [0, levels-1]. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the quantized array. + + Returns: + tuple: Quantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError( + f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError( + f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + arr = np.clip(arr, min_val, max_val) - min_val + quantized_arr = np.minimum( + np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1) + + return quantized_arr + + +def dequantize(arr, min_val, max_val, levels, dtype=np.float64): + """Dequantize an array. + + Args: + arr (ndarray): Input array. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. + levels (int): Quantization levels. + dtype (np.type): The type of the dequantized array. + + Returns: + tuple: Dequantized array. + """ + if not (isinstance(levels, int) and levels > 1): + raise ValueError( + f'levels must be a positive integer, but got {levels}') + if min_val >= max_val: + raise ValueError( + f'min_val ({min_val}) must be smaller than max_val ({max_val})') + + dequantized_arr = (arr + 0.5).astype(dtype) * (max_val - + min_val) / levels + min_val + + return dequantized_arr diff --git a/annotator/uniformer/mmcv/cnn/__init__.py b/annotator/uniformer/mmcv/cnn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7246c897430f0cc7ce12719ad8608824fc734446 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/__init__.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .alexnet import AlexNet +# yapf: disable +from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS, + ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule, + ConvTranspose2d, ConvTranspose3d, ConvWS2d, + DepthwiseSeparableConvModule, GeneralizedAttention, + HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d, + NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish, + build_activation_layer, build_conv_layer, + build_norm_layer, build_padding_layer, build_plugin_layer, + build_upsample_layer, conv_ws_2d, is_norm) +from .builder import MODELS, build_model_from_cfg +# yapf: enable +from .resnet import ResNet, make_res_layer +from .utils import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit, + NormalInit, PretrainedInit, TruncNormalInit, UniformInit, + XavierInit, bias_init_with_prob, caffe2_xavier_init, + constant_init, fuse_conv_bn, get_model_complexity_info, + initialize, kaiming_init, normal_init, trunc_normal_init, + uniform_init, xavier_init) +from .vgg import VGG, make_vgg_layer + +__all__ = [ + 'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer', + 'constant_init', 'xavier_init', 'normal_init', 'trunc_normal_init', + 'uniform_init', 'kaiming_init', 'caffe2_xavier_init', + 'bias_init_with_prob', 'ConvModule', 'build_activation_layer', + 'build_conv_layer', 'build_norm_layer', 'build_padding_layer', + 'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'NonLocal1d', + 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish', + 'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', + 'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', + 'get_model_complexity_info', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d', + 'fuse_conv_bn', 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d', + 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', + 'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', + 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', + 'Caffe2XavierInit', 'MODELS', 'build_model_from_cfg' +] diff --git a/annotator/uniformer/mmcv/cnn/alexnet.py b/annotator/uniformer/mmcv/cnn/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..89e36b8c7851f895d9ae7f07149f0e707456aab0 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/alexnet.py @@ -0,0 +1,61 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn + + +class AlexNet(nn.Module): + """AlexNet backbone. + + Args: + num_classes (int): number of classes for classification. + """ + + def __init__(self, num_classes=-1): + super(AlexNet, self).__init__() + self.num_classes = num_classes + self.features = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(64, 192, kernel_size=5, padding=2), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Conv2d(192, 384, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(384, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, 256, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2), + ) + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(256 * 6 * 6, 4096), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(inplace=True), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + # use default initializer + pass + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + + x = self.features(x) + if self.num_classes > 0: + x = x.view(x.size(0), 256 * 6 * 6) + x = self.classifier(x) + + return x diff --git a/annotator/uniformer/mmcv/cnn/bricks/__init__.py b/annotator/uniformer/mmcv/cnn/bricks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f33124ed23fc6f27119a37bcb5ab004d3572be0 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/__init__.py @@ -0,0 +1,35 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .activation import build_activation_layer +from .context_block import ContextBlock +from .conv import build_conv_layer +from .conv2d_adaptive_padding import Conv2dAdaptivePadding +from .conv_module import ConvModule +from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d +from .depthwise_separable_conv_module import DepthwiseSeparableConvModule +from .drop import Dropout, DropPath +from .generalized_attention import GeneralizedAttention +from .hsigmoid import HSigmoid +from .hswish import HSwish +from .non_local import NonLocal1d, NonLocal2d, NonLocal3d +from .norm import build_norm_layer, is_norm +from .padding import build_padding_layer +from .plugin import build_plugin_layer +from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS) +from .scale import Scale +from .swish import Swish +from .upsample import build_upsample_layer +from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, + Linear, MaxPool2d, MaxPool3d) + +__all__ = [ + 'ConvModule', 'build_activation_layer', 'build_conv_layer', + 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', + 'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d', + 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention', + 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', + 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d', + 'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear', + 'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', + 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath' +] diff --git a/annotator/uniformer/mmcv/cnn/bricks/activation.py b/annotator/uniformer/mmcv/cnn/bricks/activation.py new file mode 100644 index 0000000000000000000000000000000000000000..cab2712287d5ef7be2f079dcb54a94b96394eab5 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/activation.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from annotator.uniformer.mmcv.utils import TORCH_VERSION, build_from_cfg, digit_version +from .registry import ACTIVATION_LAYERS + +for module in [ + nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, + nn.Sigmoid, nn.Tanh +]: + ACTIVATION_LAYERS.register_module(module=module) + + +@ACTIVATION_LAYERS.register_module(name='Clip') +@ACTIVATION_LAYERS.register_module() +class Clamp(nn.Module): + """Clamp activation layer. + + This activation function is to clamp the feature map value within + :math:`[min, max]`. More details can be found in ``torch.clamp()``. + + Args: + min (Number | optional): Lower-bound of the range to be clamped to. + Default to -1. + max (Number | optional): Upper-bound of the range to be clamped to. + Default to 1. + """ + + def __init__(self, min=-1., max=1.): + super(Clamp, self).__init__() + self.min = min + self.max = max + + def forward(self, x): + """Forward function. + + Args: + x (torch.Tensor): The input tensor. + + Returns: + torch.Tensor: Clamped tensor. + """ + return torch.clamp(x, min=self.min, max=self.max) + + +class GELU(nn.Module): + r"""Applies the Gaussian Error Linear Units function: + + .. math:: + \text{GELU}(x) = x * \Phi(x) + where :math:`\Phi(x)` is the Cumulative Distribution Function for + Gaussian Distribution. + + Shape: + - Input: :math:`(N, *)` where `*` means, any number of additional + dimensions + - Output: :math:`(N, *)`, same shape as the input + + .. image:: scripts/activation_images/GELU.png + + Examples:: + + >>> m = nn.GELU() + >>> input = torch.randn(2) + >>> output = m(input) + """ + + def forward(self, input): + return F.gelu(input) + + +if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.4')): + ACTIVATION_LAYERS.register_module(module=GELU) +else: + ACTIVATION_LAYERS.register_module(module=nn.GELU) + + +def build_activation_layer(cfg): + """Build activation layer. + + Args: + cfg (dict): The activation layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an activation layer. + + Returns: + nn.Module: Created activation layer. + """ + return build_from_cfg(cfg, ACTIVATION_LAYERS) diff --git a/annotator/uniformer/mmcv/cnn/bricks/context_block.py b/annotator/uniformer/mmcv/cnn/bricks/context_block.py new file mode 100644 index 0000000000000000000000000000000000000000..d60fdb904c749ce3b251510dff3cc63cea70d42e --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/context_block.py @@ -0,0 +1,125 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn + +from ..utils import constant_init, kaiming_init +from .registry import PLUGIN_LAYERS + + +def last_zero_init(m): + if isinstance(m, nn.Sequential): + constant_init(m[-1], val=0) + else: + constant_init(m, val=0) + + +@PLUGIN_LAYERS.register_module() +class ContextBlock(nn.Module): + """ContextBlock module in GCNet. + + See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + (https://arxiv.org/abs/1904.11492) for details. + + Args: + in_channels (int): Channels of the input feature map. + ratio (float): Ratio of channels of transform bottleneck + pooling_type (str): Pooling method for context modeling. + Options are 'att' and 'avg', stand for attention pooling and + average pooling respectively. Default: 'att'. + fusion_types (Sequence[str]): Fusion method for feature fusion, + Options are 'channels_add', 'channel_mul', stand for channelwise + addition and multiplication respectively. Default: ('channel_add',) + """ + + _abbr_ = 'context_block' + + def __init__(self, + in_channels, + ratio, + pooling_type='att', + fusion_types=('channel_add', )): + super(ContextBlock, self).__init__() + assert pooling_type in ['avg', 'att'] + assert isinstance(fusion_types, (list, tuple)) + valid_fusion_types = ['channel_add', 'channel_mul'] + assert all([f in valid_fusion_types for f in fusion_types]) + assert len(fusion_types) > 0, 'at least one fusion should be used' + self.in_channels = in_channels + self.ratio = ratio + self.planes = int(in_channels * ratio) + self.pooling_type = pooling_type + self.fusion_types = fusion_types + if pooling_type == 'att': + self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1) + self.softmax = nn.Softmax(dim=2) + else: + self.avg_pool = nn.AdaptiveAvgPool2d(1) + if 'channel_add' in fusion_types: + self.channel_add_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + else: + self.channel_add_conv = None + if 'channel_mul' in fusion_types: + self.channel_mul_conv = nn.Sequential( + nn.Conv2d(self.in_channels, self.planes, kernel_size=1), + nn.LayerNorm([self.planes, 1, 1]), + nn.ReLU(inplace=True), # yapf: disable + nn.Conv2d(self.planes, self.in_channels, kernel_size=1)) + else: + self.channel_mul_conv = None + self.reset_parameters() + + def reset_parameters(self): + if self.pooling_type == 'att': + kaiming_init(self.conv_mask, mode='fan_in') + self.conv_mask.inited = True + + if self.channel_add_conv is not None: + last_zero_init(self.channel_add_conv) + if self.channel_mul_conv is not None: + last_zero_init(self.channel_mul_conv) + + def spatial_pool(self, x): + batch, channel, height, width = x.size() + if self.pooling_type == 'att': + input_x = x + # [N, C, H * W] + input_x = input_x.view(batch, channel, height * width) + # [N, 1, C, H * W] + input_x = input_x.unsqueeze(1) + # [N, 1, H, W] + context_mask = self.conv_mask(x) + # [N, 1, H * W] + context_mask = context_mask.view(batch, 1, height * width) + # [N, 1, H * W] + context_mask = self.softmax(context_mask) + # [N, 1, H * W, 1] + context_mask = context_mask.unsqueeze(-1) + # [N, 1, C, 1] + context = torch.matmul(input_x, context_mask) + # [N, C, 1, 1] + context = context.view(batch, channel, 1, 1) + else: + # [N, C, 1, 1] + context = self.avg_pool(x) + + return context + + def forward(self, x): + # [N, C, 1, 1] + context = self.spatial_pool(x) + + out = x + if self.channel_mul_conv is not None: + # [N, C, 1, 1] + channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) + out = out * channel_mul_term + if self.channel_add_conv is not None: + # [N, C, 1, 1] + channel_add_term = self.channel_add_conv(context) + out = out + channel_add_term + + return out diff --git a/annotator/uniformer/mmcv/cnn/bricks/conv.py b/annotator/uniformer/mmcv/cnn/bricks/conv.py new file mode 100644 index 0000000000000000000000000000000000000000..cf54491997a48ac3e7fadc4183ab7bf3e831024c --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/conv.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch import nn + +from .registry import CONV_LAYERS + +CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) +CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) +CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) +CONV_LAYERS.register_module('Conv', module=nn.Conv2d) + + +def build_conv_layer(cfg, *args, **kwargs): + """Build convolution layer. + + Args: + cfg (None or dict): The conv layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate an conv layer. + args (argument list): Arguments passed to the `__init__` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the `__init__` + method of the corresponding conv layer. + + Returns: + nn.Module: Created conv layer. + """ + if cfg is None: + cfg_ = dict(type='Conv2d') + else: + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in CONV_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + else: + conv_layer = CONV_LAYERS.get(layer_type) + + layer = conv_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/annotator/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/annotator/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py new file mode 100644 index 0000000000000000000000000000000000000000..b45e758ac6cf8dfb0382d072fe09125bc7e9b888 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +from torch import nn +from torch.nn import functional as F + +from .registry import CONV_LAYERS + + +@CONV_LAYERS.register_module() +class Conv2dAdaptivePadding(nn.Conv2d): + """Implementation of 2D convolution in tensorflow with `padding` as "same", + which applies padding to input (if needed) so that input image gets fully + covered by filter and stride you specified. For stride 1, this will ensure + that output image size is same as input. For stride of 2, output dimensions + will be half, for example. + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super().__init__(in_channels, out_channels, kernel_size, stride, 0, + dilation, groups, bias) + + def forward(self, x): + img_h, img_w = x.size()[-2:] + kernel_h, kernel_w = self.weight.size()[-2:] + stride_h, stride_w = self.stride + output_h = math.ceil(img_h / stride_h) + output_w = math.ceil(img_w / stride_w) + pad_h = ( + max((output_h - 1) * self.stride[0] + + (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0)) + pad_w = ( + max((output_w - 1) * self.stride[1] + + (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0)) + if pad_h > 0 or pad_w > 0: + x = F.pad(x, [ + pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 + ]) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) diff --git a/annotator/uniformer/mmcv/cnn/bricks/conv_module.py b/annotator/uniformer/mmcv/cnn/bricks/conv_module.py new file mode 100644 index 0000000000000000000000000000000000000000..e60e7e62245071c77b652093fddebff3948d7c3e --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/conv_module.py @@ -0,0 +1,206 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn + +from annotator.uniformer.mmcv.utils import _BatchNorm, _InstanceNorm +from ..utils import constant_init, kaiming_init +from .activation import build_activation_layer +from .conv import build_conv_layer +from .norm import build_norm_layer +from .padding import build_padding_layer +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class ConvModule(nn.Module): + """A conv block that bundles conv/norm/activation layers. + + This block simplifies the usage of convolution layers, which are commonly + used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU). + It is based upon three build methods: `build_conv_layer()`, + `build_norm_layer()` and `build_activation_layer()`. + + Besides, we add some additional features in this module. + 1. Automatically set `bias` of the conv layer. + 2. Spectral norm is supported. + 3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only + supports zero and circular padding, and we add "reflect" padding mode. + + Args: + in_channels (int): Number of channels in the input feature map. + Same as that in ``nn._ConvNd``. + out_channels (int): Number of channels produced by the convolution. + Same as that in ``nn._ConvNd``. + kernel_size (int | tuple[int]): Size of the convolving kernel. + Same as that in ``nn._ConvNd``. + stride (int | tuple[int]): Stride of the convolution. + Same as that in ``nn._ConvNd``. + padding (int | tuple[int]): Zero-padding added to both sides of + the input. Same as that in ``nn._ConvNd``. + dilation (int | tuple[int]): Spacing between kernel elements. + Same as that in ``nn._ConvNd``. + groups (int): Number of blocked connections from input channels to + output channels. Same as that in ``nn._ConvNd``. + bias (bool | str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise + False. Default: "auto". + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + inplace (bool): Whether to use inplace mode for activation. + Default: True. + with_spectral_norm (bool): Whether use spectral norm in conv module. + Default: False. + padding_mode (str): If the `padding_mode` has not been supported by + current `Conv2d` in PyTorch, we will use our own padding layer + instead. Currently, we support ['zeros', 'circular'] with official + implementation and ['reflect'] with our own implementation. + Default: 'zeros'. + order (tuple[str]): The order of conv/norm/activation layers. It is a + sequence of "conv", "norm" and "act". Common examples are + ("conv", "norm", "act") and ("act", "conv", "norm"). + Default: ('conv', 'norm', 'act'). + """ + + _abbr_ = 'conv_block' + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias='auto', + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + inplace=True, + with_spectral_norm=False, + padding_mode='zeros', + order=('conv', 'norm', 'act')): + super(ConvModule, self).__init__() + assert conv_cfg is None or isinstance(conv_cfg, dict) + assert norm_cfg is None or isinstance(norm_cfg, dict) + assert act_cfg is None or isinstance(act_cfg, dict) + official_padding_mode = ['zeros', 'circular'] + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.inplace = inplace + self.with_spectral_norm = with_spectral_norm + self.with_explicit_padding = padding_mode not in official_padding_mode + self.order = order + assert isinstance(self.order, tuple) and len(self.order) == 3 + assert set(order) == set(['conv', 'norm', 'act']) + + self.with_norm = norm_cfg is not None + self.with_activation = act_cfg is not None + # if the conv layer is before a norm layer, bias is unnecessary. + if bias == 'auto': + bias = not self.with_norm + self.with_bias = bias + + if self.with_explicit_padding: + pad_cfg = dict(type=padding_mode) + self.padding_layer = build_padding_layer(pad_cfg, padding) + + # reset padding to 0 for conv module + conv_padding = 0 if self.with_explicit_padding else padding + # build convolution layer + self.conv = build_conv_layer( + conv_cfg, + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=conv_padding, + dilation=dilation, + groups=groups, + bias=bias) + # export the attributes of self.conv to a higher level for convenience + self.in_channels = self.conv.in_channels + self.out_channels = self.conv.out_channels + self.kernel_size = self.conv.kernel_size + self.stride = self.conv.stride + self.padding = padding + self.dilation = self.conv.dilation + self.transposed = self.conv.transposed + self.output_padding = self.conv.output_padding + self.groups = self.conv.groups + + if self.with_spectral_norm: + self.conv = nn.utils.spectral_norm(self.conv) + + # build normalization layers + if self.with_norm: + # norm layer is after conv layer + if order.index('norm') > order.index('conv'): + norm_channels = out_channels + else: + norm_channels = in_channels + self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) + self.add_module(self.norm_name, norm) + if self.with_bias: + if isinstance(norm, (_BatchNorm, _InstanceNorm)): + warnings.warn( + 'Unnecessary conv bias before batch/instance norm') + else: + self.norm_name = None + + # build activation layer + if self.with_activation: + act_cfg_ = act_cfg.copy() + # nn.Tanh has no 'inplace' argument + if act_cfg_['type'] not in [ + 'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish' + ]: + act_cfg_.setdefault('inplace', inplace) + self.activate = build_activation_layer(act_cfg_) + + # Use msra init by default + self.init_weights() + + @property + def norm(self): + if self.norm_name: + return getattr(self, self.norm_name) + else: + return None + + def init_weights(self): + # 1. It is mainly for customized conv layers with their own + # initialization manners by calling their own ``init_weights()``, + # and we do not want ConvModule to override the initialization. + # 2. For customized conv layers without their own initialization + # manners (that is, they don't have their own ``init_weights()``) + # and PyTorch's conv layers, they will be initialized by + # this method with default ``kaiming_init``. + # Note: For PyTorch's conv layers, they will be overwritten by our + # initialization implementation using default ``kaiming_init``. + if not hasattr(self.conv, 'init_weights'): + if self.with_activation and self.act_cfg['type'] == 'LeakyReLU': + nonlinearity = 'leaky_relu' + a = self.act_cfg.get('negative_slope', 0.01) + else: + nonlinearity = 'relu' + a = 0 + kaiming_init(self.conv, a=a, nonlinearity=nonlinearity) + if self.with_norm: + constant_init(self.norm, 1, bias=0) + + def forward(self, x, activate=True, norm=True): + for layer in self.order: + if layer == 'conv': + if self.with_explicit_padding: + x = self.padding_layer(x) + x = self.conv(x) + elif layer == 'norm' and norm and self.with_norm: + x = self.norm(x) + elif layer == 'act' and activate and self.with_activation: + x = self.activate(x) + return x diff --git a/annotator/uniformer/mmcv/cnn/bricks/conv_ws.py b/annotator/uniformer/mmcv/cnn/bricks/conv_ws.py new file mode 100644 index 0000000000000000000000000000000000000000..a3941e27874993418b3b5708d5a7485f175ff9c8 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/conv_ws.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .registry import CONV_LAYERS + + +def conv_ws_2d(input, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + eps=1e-5): + c_in = weight.size(0) + weight_flat = weight.view(c_in, -1) + mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) + std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) + weight = (weight - mean) / (std + eps) + return F.conv2d(input, weight, bias, stride, padding, dilation, groups) + + +@CONV_LAYERS.register_module('ConvWS') +class ConvWS2d(nn.Conv2d): + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + eps=1e-5): + super(ConvWS2d, self).__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.eps = eps + + def forward(self, x): + return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups, self.eps) + + +@CONV_LAYERS.register_module(name='ConvAWS') +class ConvAWS2d(nn.Conv2d): + """AWS (Adaptive Weight Standardization) + + This is a variant of Weight Standardization + (https://arxiv.org/pdf/1903.10520.pdf) + It is used in DetectoRS to avoid NaN + (https://arxiv.org/pdf/2006.02334.pdf) + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the conv kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If set True, adds a learnable bias to the + output. Default: True + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.register_buffer('weight_gamma', + torch.ones(self.out_channels, 1, 1, 1)) + self.register_buffer('weight_beta', + torch.zeros(self.out_channels, 1, 1, 1)) + + def _get_weight(self, weight): + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + weight = (weight - mean) / std + weight = self.weight_gamma * weight + self.weight_beta + return weight + + def forward(self, x): + weight = self._get_weight(self.weight) + return F.conv2d(x, weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + """Override default load function. + + AWS overrides the function _load_from_state_dict to recover + weight_gamma and weight_beta if they are missing. If weight_gamma and + weight_beta are found in the checkpoint, this function will return + after super()._load_from_state_dict. Otherwise, it will compute the + mean and std of the pretrained weights and store them in weight_beta + and weight_gamma. + """ + + self.weight_gamma.data.fill_(-1) + local_missing_keys = [] + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, local_missing_keys, + unexpected_keys, error_msgs) + if self.weight_gamma.data.mean() > 0: + for k in local_missing_keys: + missing_keys.append(k) + return + weight = self.weight.data + weight_flat = weight.view(weight.size(0), -1) + mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) + std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) + self.weight_beta.data.copy_(mean) + self.weight_gamma.data.copy_(std) + missing_gamma_beta = [ + k for k in local_missing_keys + if k.endswith('weight_gamma') or k.endswith('weight_beta') + ] + for k in missing_gamma_beta: + local_missing_keys.remove(k) + for k in local_missing_keys: + missing_keys.append(k) diff --git a/annotator/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/annotator/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py new file mode 100644 index 0000000000000000000000000000000000000000..722d5d8d71f75486e2db3008907c4eadfca41d63 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .conv_module import ConvModule + + +class DepthwiseSeparableConvModule(nn.Module): + """Depthwise separable convolution module. + + See https://arxiv.org/pdf/1704.04861.pdf for details. + + This module can replace a ConvModule with the conv block replaced by two + conv block: depthwise conv block and pointwise conv block. The depthwise + conv block contains depthwise-conv/norm/activation layers. The pointwise + conv block contains pointwise-conv/norm/activation layers. It should be + noted that there will be norm/activation layer in the depthwise conv block + if `norm_cfg` and `act_cfg` are specified. + + Args: + in_channels (int): Number of channels in the input feature map. + Same as that in ``nn._ConvNd``. + out_channels (int): Number of channels produced by the convolution. + Same as that in ``nn._ConvNd``. + kernel_size (int | tuple[int]): Size of the convolving kernel. + Same as that in ``nn._ConvNd``. + stride (int | tuple[int]): Stride of the convolution. + Same as that in ``nn._ConvNd``. Default: 1. + padding (int | tuple[int]): Zero-padding added to both sides of + the input. Same as that in ``nn._ConvNd``. Default: 0. + dilation (int | tuple[int]): Spacing between kernel elements. + Same as that in ``nn._ConvNd``. Default: 1. + norm_cfg (dict): Default norm config for both depthwise ConvModule and + pointwise ConvModule. Default: None. + act_cfg (dict): Default activation config for both depthwise ConvModule + and pointwise ConvModule. Default: dict(type='ReLU'). + dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is + 'default', it will be the same as `norm_cfg`. Default: 'default'. + pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: 'default'. + kwargs (optional): Other shared arguments for depthwise and pointwise + ConvModule. See ConvModule for ref. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + dw_norm_cfg='default', + dw_act_cfg='default', + pw_norm_cfg='default', + pw_act_cfg='default', + **kwargs): + super(DepthwiseSeparableConvModule, self).__init__() + assert 'groups' not in kwargs, 'groups should not be specified' + + # if norm/activation config of depthwise/pointwise ConvModule is not + # specified, use default config. + dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg + dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg + pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg + pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg + + # depthwise convolution + self.depthwise_conv = ConvModule( + in_channels, + in_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=in_channels, + norm_cfg=dw_norm_cfg, + act_cfg=dw_act_cfg, + **kwargs) + + self.pointwise_conv = ConvModule( + in_channels, + out_channels, + 1, + norm_cfg=pw_norm_cfg, + act_cfg=pw_act_cfg, + **kwargs) + + def forward(self, x): + x = self.depthwise_conv(x) + x = self.pointwise_conv(x) + return x diff --git a/annotator/uniformer/mmcv/cnn/bricks/drop.py b/annotator/uniformer/mmcv/cnn/bricks/drop.py new file mode 100644 index 0000000000000000000000000000000000000000..b7b4fccd457a0d51fb10c789df3c8537fe7b67c1 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/drop.py @@ -0,0 +1,65 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + +from annotator.uniformer.mmcv import build_from_cfg +from .registry import DROPOUT_LAYERS + + +def drop_path(x, drop_prob=0., training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + We follow the implementation + https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 + """ + if drop_prob == 0. or not training: + return x + keep_prob = 1 - drop_prob + # handle tensors with different dimensions, not just 4D tensors. + shape = (x.shape[0], ) + (1, ) * (x.ndim - 1) + random_tensor = keep_prob + torch.rand( + shape, dtype=x.dtype, device=x.device) + output = x.div(keep_prob) * random_tensor.floor() + return output + + +@DROPOUT_LAYERS.register_module() +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + We follow the implementation + https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501 + + Args: + drop_prob (float): Probability of the path to be zeroed. Default: 0.1 + """ + + def __init__(self, drop_prob=0.1): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +@DROPOUT_LAYERS.register_module() +class Dropout(nn.Dropout): + """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of + ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with + ``DropPath`` + + Args: + drop_prob (float): Probability of the elements to be + zeroed. Default: 0.5. + inplace (bool): Do the operation inplace or not. Default: False. + """ + + def __init__(self, drop_prob=0.5, inplace=False): + super().__init__(p=drop_prob, inplace=inplace) + + +def build_dropout(cfg, default_args=None): + """Builder for drop out layers.""" + return build_from_cfg(cfg, DROPOUT_LAYERS, default_args) diff --git a/annotator/uniformer/mmcv/cnn/bricks/generalized_attention.py b/annotator/uniformer/mmcv/cnn/bricks/generalized_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..988d9adf2f289ef223bd1c680a5ae1d3387f0269 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/generalized_attention.py @@ -0,0 +1,412 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import kaiming_init +from .registry import PLUGIN_LAYERS + + +@PLUGIN_LAYERS.register_module() +class GeneralizedAttention(nn.Module): + """GeneralizedAttention module. + + See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' + (https://arxiv.org/abs/1711.07971) for details. + + Args: + in_channels (int): Channels of the input feature map. + spatial_range (int): The spatial range. -1 indicates no spatial range + constraint. Default: -1. + num_heads (int): The head number of empirical_attention module. + Default: 9. + position_embedding_dim (int): The position embedding dimension. + Default: -1. + position_magnitude (int): A multiplier acting on coord difference. + Default: 1. + kv_stride (int): The feature stride acting on key/value feature map. + Default: 2. + q_stride (int): The feature stride acting on query feature map. + Default: 1. + attention_type (str): A binary indicator string for indicating which + items in generalized empirical_attention module are used. + Default: '1111'. + + - '1000' indicates 'query and key content' (appr - appr) item, + - '0100' indicates 'query content and relative position' + (appr - position) item, + - '0010' indicates 'key content only' (bias - appr) item, + - '0001' indicates 'relative position only' (bias - position) item. + """ + + _abbr_ = 'gen_attention_block' + + def __init__(self, + in_channels, + spatial_range=-1, + num_heads=9, + position_embedding_dim=-1, + position_magnitude=1, + kv_stride=2, + q_stride=1, + attention_type='1111'): + + super(GeneralizedAttention, self).__init__() + + # hard range means local range for non-local operation + self.position_embedding_dim = ( + position_embedding_dim + if position_embedding_dim > 0 else in_channels) + + self.position_magnitude = position_magnitude + self.num_heads = num_heads + self.in_channels = in_channels + self.spatial_range = spatial_range + self.kv_stride = kv_stride + self.q_stride = q_stride + self.attention_type = [bool(int(_)) for _ in attention_type] + self.qk_embed_dim = in_channels // num_heads + out_c = self.qk_embed_dim * num_heads + + if self.attention_type[0] or self.attention_type[1]: + self.query_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_c, + kernel_size=1, + bias=False) + self.query_conv.kaiming_init = True + + if self.attention_type[0] or self.attention_type[2]: + self.key_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=out_c, + kernel_size=1, + bias=False) + self.key_conv.kaiming_init = True + + self.v_dim = in_channels // num_heads + self.value_conv = nn.Conv2d( + in_channels=in_channels, + out_channels=self.v_dim * num_heads, + kernel_size=1, + bias=False) + self.value_conv.kaiming_init = True + + if self.attention_type[1] or self.attention_type[3]: + self.appr_geom_fc_x = nn.Linear( + self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_x.kaiming_init = True + + self.appr_geom_fc_y = nn.Linear( + self.position_embedding_dim // 2, out_c, bias=False) + self.appr_geom_fc_y.kaiming_init = True + + if self.attention_type[2]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.appr_bias = nn.Parameter(appr_bias_value) + + if self.attention_type[3]: + stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2) + geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv + self.geom_bias = nn.Parameter(geom_bias_value) + + self.proj_conv = nn.Conv2d( + in_channels=self.v_dim * num_heads, + out_channels=in_channels, + kernel_size=1, + bias=True) + self.proj_conv.kaiming_init = True + self.gamma = nn.Parameter(torch.zeros(1)) + + if self.spatial_range >= 0: + # only works when non local is after 3*3 conv + if in_channels == 256: + max_len = 84 + elif in_channels == 512: + max_len = 42 + + max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) + local_constraint_map = np.ones( + (max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) + for iy in range(max_len): + for ix in range(max_len): + local_constraint_map[ + iy, ix, + max((iy - self.spatial_range) // + self.kv_stride, 0):min((iy + self.spatial_range + + 1) // self.kv_stride + + 1, max_len), + max((ix - self.spatial_range) // + self.kv_stride, 0):min((ix + self.spatial_range + + 1) // self.kv_stride + + 1, max_len)] = 0 + + self.local_constraint_map = nn.Parameter( + torch.from_numpy(local_constraint_map).byte(), + requires_grad=False) + + if self.q_stride > 1: + self.q_downsample = nn.AvgPool2d( + kernel_size=1, stride=self.q_stride) + else: + self.q_downsample = None + + if self.kv_stride > 1: + self.kv_downsample = nn.AvgPool2d( + kernel_size=1, stride=self.kv_stride) + else: + self.kv_downsample = None + + self.init_weights() + + def get_position_embedding(self, + h, + w, + h_kv, + w_kv, + q_stride, + kv_stride, + device, + dtype, + feat_dim, + wave_length=1000): + # the default type of Tensor is float32, leading to type mismatch + # in fp16 mode. Cast it to support fp16 mode. + h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype) + h_idxs = h_idxs.view((h, 1)) * q_stride + + w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype) + w_idxs = w_idxs.view((w, 1)) * q_stride + + h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to( + device=device, dtype=dtype) + h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride + + w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to( + device=device, dtype=dtype) + w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride + + # (h, h_kv, 1) + h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0) + h_diff *= self.position_magnitude + + # (w, w_kv, 1) + w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0) + w_diff *= self.position_magnitude + + feat_range = torch.arange(0, feat_dim / 4).to( + device=device, dtype=dtype) + + dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype) + dim_mat = dim_mat**((4. / feat_dim) * feat_range) + dim_mat = dim_mat.view((1, 1, -1)) + + embedding_x = torch.cat( + ((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2) + + embedding_y = torch.cat( + ((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2) + + return embedding_x, embedding_y + + def forward(self, x_input): + num_heads = self.num_heads + + # use empirical_attention + if self.q_downsample is not None: + x_q = self.q_downsample(x_input) + else: + x_q = x_input + n, _, h, w = x_q.shape + + if self.kv_downsample is not None: + x_kv = self.kv_downsample(x_input) + else: + x_kv = x_input + _, _, h_kv, w_kv = x_kv.shape + + if self.attention_type[0] or self.attention_type[1]: + proj_query = self.query_conv(x_q).view( + (n, num_heads, self.qk_embed_dim, h * w)) + proj_query = proj_query.permute(0, 1, 3, 2) + + if self.attention_type[0] or self.attention_type[2]: + proj_key = self.key_conv(x_kv).view( + (n, num_heads, self.qk_embed_dim, h_kv * w_kv)) + + if self.attention_type[1] or self.attention_type[3]: + position_embed_x, position_embed_y = self.get_position_embedding( + h, w, h_kv, w_kv, self.q_stride, self.kv_stride, + x_input.device, x_input.dtype, self.position_embedding_dim) + # (n, num_heads, w, w_kv, dim) + position_feat_x = self.appr_geom_fc_x(position_embed_x).\ + view(1, w, w_kv, num_heads, self.qk_embed_dim).\ + permute(0, 3, 1, 2, 4).\ + repeat(n, 1, 1, 1, 1) + + # (n, num_heads, h, h_kv, dim) + position_feat_y = self.appr_geom_fc_y(position_embed_y).\ + view(1, h, h_kv, num_heads, self.qk_embed_dim).\ + permute(0, 3, 1, 2, 4).\ + repeat(n, 1, 1, 1, 1) + + position_feat_x /= math.sqrt(2) + position_feat_y /= math.sqrt(2) + + # accelerate for saliency only + if (np.sum(self.attention_type) == 1) and self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim).\ + repeat(n, 1, 1, 1) + + energy = torch.matmul(appr_bias, proj_key).\ + view(n, num_heads, 1, h_kv * w_kv) + + h = 1 + w = 1 + else: + # (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for + if not self.attention_type[0]: + energy = torch.zeros( + n, + num_heads, + h, + w, + h_kv, + w_kv, + dtype=x_input.dtype, + device=x_input.device) + + # attention_type[0]: appr - appr + # attention_type[1]: appr - position + # attention_type[2]: bias - appr + # attention_type[3]: bias - position + if self.attention_type[0] or self.attention_type[2]: + if self.attention_type[0] and self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim) + energy = torch.matmul(proj_query + appr_bias, proj_key).\ + view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[0]: + energy = torch.matmul(proj_query, proj_key).\ + view(n, num_heads, h, w, h_kv, w_kv) + + elif self.attention_type[2]: + appr_bias = self.appr_bias.\ + view(1, num_heads, 1, self.qk_embed_dim).\ + repeat(n, 1, 1, 1) + + energy += torch.matmul(appr_bias, proj_key).\ + view(n, num_heads, 1, 1, h_kv, w_kv) + + if self.attention_type[1] or self.attention_type[3]: + if self.attention_type[1] and self.attention_type[3]: + geom_bias = self.geom_bias.\ + view(1, num_heads, 1, self.qk_embed_dim) + + proj_query_reshape = (proj_query + geom_bias).\ + view(n, num_heads, h, w, self.qk_embed_dim) + + energy_x = torch.matmul( + proj_query_reshape.permute(0, 1, 3, 2, 4), + position_feat_x.permute(0, 1, 2, 4, 3)) + energy_x = energy_x.\ + permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul( + proj_query_reshape, + position_feat_y.permute(0, 1, 2, 4, 3)) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[1]: + proj_query_reshape = proj_query.\ + view(n, num_heads, h, w, self.qk_embed_dim) + proj_query_reshape = proj_query_reshape.\ + permute(0, 1, 3, 2, 4) + position_feat_x_reshape = position_feat_x.\ + permute(0, 1, 2, 4, 3) + position_feat_y_reshape = position_feat_y.\ + permute(0, 1, 2, 4, 3) + + energy_x = torch.matmul(proj_query_reshape, + position_feat_x_reshape) + energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4) + + energy_y = torch.matmul(proj_query_reshape, + position_feat_y_reshape) + energy_y = energy_y.unsqueeze(5) + + energy += energy_x + energy_y + + elif self.attention_type[3]: + geom_bias = self.geom_bias.\ + view(1, num_heads, self.qk_embed_dim, 1).\ + repeat(n, 1, 1, 1) + + position_feat_x_reshape = position_feat_x.\ + view(n, num_heads, w*w_kv, self.qk_embed_dim) + + position_feat_y_reshape = position_feat_y.\ + view(n, num_heads, h * h_kv, self.qk_embed_dim) + + energy_x = torch.matmul(position_feat_x_reshape, geom_bias) + energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv) + + energy_y = torch.matmul(position_feat_y_reshape, geom_bias) + energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1) + + energy += energy_x + energy_y + + energy = energy.view(n, num_heads, h * w, h_kv * w_kv) + + if self.spatial_range >= 0: + cur_local_constraint_map = \ + self.local_constraint_map[:h, :w, :h_kv, :w_kv].\ + contiguous().\ + view(1, 1, h*w, h_kv*w_kv) + + energy = energy.masked_fill_(cur_local_constraint_map, + float('-inf')) + + attention = F.softmax(energy, 3) + + proj_value = self.value_conv(x_kv) + proj_value_reshape = proj_value.\ + view((n, num_heads, self.v_dim, h_kv * w_kv)).\ + permute(0, 1, 3, 2) + + out = torch.matmul(attention, proj_value_reshape).\ + permute(0, 1, 3, 2).\ + contiguous().\ + view(n, self.v_dim * self.num_heads, h, w) + + out = self.proj_conv(out) + + # output is downsampled, upsample back to input size + if self.q_downsample is not None: + out = F.interpolate( + out, + size=x_input.shape[2:], + mode='bilinear', + align_corners=False) + + out = self.gamma * out + x_input + return out + + def init_weights(self): + for m in self.modules(): + if hasattr(m, 'kaiming_init') and m.kaiming_init: + kaiming_init( + m, + mode='fan_in', + nonlinearity='leaky_relu', + bias=0, + distribution='uniform', + a=1) diff --git a/annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py b/annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py new file mode 100644 index 0000000000000000000000000000000000000000..30b1a3d6580cf0360710426fbea1f05acdf07b4b --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py @@ -0,0 +1,34 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSigmoid(nn.Module): + """Hard Sigmoid Module. Apply the hard sigmoid function: + Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value) + Default: Hsigmoid(x) = min(max((x + 1) / 2, 0), 1) + + Args: + bias (float): Bias of the input feature map. Default: 1.0. + divisor (float): Divisor of the input feature map. Default: 2.0. + min_value (float): Lower bound value. Default: 0.0. + max_value (float): Upper bound value. Default: 1.0. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, bias=1.0, divisor=2.0, min_value=0.0, max_value=1.0): + super(HSigmoid, self).__init__() + self.bias = bias + self.divisor = divisor + assert self.divisor != 0 + self.min_value = min_value + self.max_value = max_value + + def forward(self, x): + x = (x + self.bias) / self.divisor + + return x.clamp_(self.min_value, self.max_value) diff --git a/annotator/uniformer/mmcv/cnn/bricks/hswish.py b/annotator/uniformer/mmcv/cnn/bricks/hswish.py new file mode 100644 index 0000000000000000000000000000000000000000..7e0c090ff037c99ee6c5c84c4592e87beae02208 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/hswish.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class HSwish(nn.Module): + """Hard Swish Module. + + This module applies the hard swish function: + + .. math:: + Hswish(x) = x * ReLU6(x + 3) / 6 + + Args: + inplace (bool): can optionally do the operation in-place. + Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, inplace=False): + super(HSwish, self).__init__() + self.act = nn.ReLU6(inplace) + + def forward(self, x): + return x * self.act(x + 3) / 6 diff --git a/annotator/uniformer/mmcv/cnn/bricks/non_local.py b/annotator/uniformer/mmcv/cnn/bricks/non_local.py new file mode 100644 index 0000000000000000000000000000000000000000..92d00155ef275c1201ea66bba30470a1785cc5d7 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/non_local.py @@ -0,0 +1,306 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta + +import torch +import torch.nn as nn + +from ..utils import constant_init, normal_init +from .conv_module import ConvModule +from .registry import PLUGIN_LAYERS + + +class _NonLocalNd(nn.Module, metaclass=ABCMeta): + """Basic Non-local module. + + This module is proposed in + "Non-local Neural Networks" + Paper reference: https://arxiv.org/abs/1711.07971 + Code reference: https://github.com/AlexHex7/Non-local_pytorch + + Args: + in_channels (int): Channels of the input feature map. + reduction (int): Channel reduction ratio. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + `1/sqrt(inter_channels)` when the mode is `embedded_gaussian`. + Default: True. + conv_cfg (None | dict): The config dict for convolution layers. + If not specified, it will use `nn.Conv2d` for convolution layers. + Default: None. + norm_cfg (None | dict): The config dict for normalization layers. + Default: None. (This parameter is only applicable to conv_out.) + mode (str): Options are `gaussian`, `concatenation`, + `embedded_gaussian` and `dot_product`. Default: embedded_gaussian. + """ + + def __init__(self, + in_channels, + reduction=2, + use_scale=True, + conv_cfg=None, + norm_cfg=None, + mode='embedded_gaussian', + **kwargs): + super(_NonLocalNd, self).__init__() + self.in_channels = in_channels + self.reduction = reduction + self.use_scale = use_scale + self.inter_channels = max(in_channels // reduction, 1) + self.mode = mode + + if mode not in [ + 'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation' + ]: + raise ValueError("Mode should be in 'gaussian', 'concatenation', " + f"'embedded_gaussian' or 'dot_product', but got " + f'{mode} instead.') + + # g, theta, phi are defaulted as `nn.ConvNd`. + # Here we use ConvModule for potential usage. + self.g = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + self.conv_out = ConvModule( + self.inter_channels, + self.in_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + if self.mode != 'gaussian': + self.theta = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + self.phi = ConvModule( + self.in_channels, + self.inter_channels, + kernel_size=1, + conv_cfg=conv_cfg, + act_cfg=None) + + if self.mode == 'concatenation': + self.concat_project = ConvModule( + self.inter_channels * 2, + 1, + kernel_size=1, + stride=1, + padding=0, + bias=False, + act_cfg=dict(type='ReLU')) + + self.init_weights(**kwargs) + + def init_weights(self, std=0.01, zeros_init=True): + if self.mode != 'gaussian': + for m in [self.g, self.theta, self.phi]: + normal_init(m.conv, std=std) + else: + normal_init(self.g.conv, std=std) + if zeros_init: + if self.conv_out.norm_cfg is None: + constant_init(self.conv_out.conv, 0) + else: + constant_init(self.conv_out.norm, 0) + else: + if self.conv_out.norm_cfg is None: + normal_init(self.conv_out.conv, std=std) + else: + normal_init(self.conv_out.norm, std=std) + + def gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def embedded_gaussian(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= theta_x.shape[-1]**0.5 + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def dot_product(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + pairwise_weight /= pairwise_weight.shape[-1] + return pairwise_weight + + def concatenation(self, theta_x, phi_x): + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + h = theta_x.size(2) + w = phi_x.size(3) + theta_x = theta_x.repeat(1, 1, 1, w) + phi_x = phi_x.repeat(1, 1, h, 1) + + concat_feature = torch.cat([theta_x, phi_x], dim=1) + pairwise_weight = self.concat_project(concat_feature) + n, _, h, w = pairwise_weight.size() + pairwise_weight = pairwise_weight.view(n, h, w) + pairwise_weight /= pairwise_weight.shape[-1] + + return pairwise_weight + + def forward(self, x): + # Assume `reduction = 1`, then `inter_channels = C` + # or `inter_channels = C` when `mode="gaussian"` + + # NonLocal1d x: [N, C, H] + # NonLocal2d x: [N, C, H, W] + # NonLocal3d x: [N, C, T, H, W] + n = x.size(0) + + # NonLocal1d g_x: [N, H, C] + # NonLocal2d g_x: [N, HxW, C] + # NonLocal3d g_x: [N, TxHxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H] + # NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW] + # NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + pairwise_func = getattr(self, self.mode) + # NonLocal1d pairwise_weight: [N, H, H] + # NonLocal2d pairwise_weight: [N, HxW, HxW] + # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # NonLocal1d y: [N, H, C] + # NonLocal2d y: [N, HxW, C] + # NonLocal3d y: [N, TxHxW, C] + y = torch.matmul(pairwise_weight, g_x) + # NonLocal1d y: [N, C, H] + # NonLocal2d y: [N, C, H, W] + # NonLocal3d y: [N, C, T, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, + *x.size()[2:]) + + output = x + self.conv_out(y) + + return output + + +class NonLocal1d(_NonLocalNd): + """1D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv1d'). + """ + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv1d'), + **kwargs): + super(NonLocal1d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool1d(kernel_size=2) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +@PLUGIN_LAYERS.register_module() +class NonLocal2d(_NonLocalNd): + """2D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv2d'). + """ + + _abbr_ = 'nonlocal_block' + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv2d'), + **kwargs): + super(NonLocal2d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer + + +class NonLocal3d(_NonLocalNd): + """3D Non-local module. + + Args: + in_channels (int): Same as `NonLocalND`. + sub_sample (bool): Whether to apply max pooling after pairwise + function (Note that the `sub_sample` is applied on spatial only). + Default: False. + conv_cfg (None | dict): Same as `NonLocalND`. + Default: dict(type='Conv3d'). + """ + + def __init__(self, + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv3d'), + **kwargs): + super(NonLocal3d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) + self.sub_sample = sub_sample + + if sub_sample: + max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) + self.g = nn.Sequential(self.g, max_pool_layer) + if self.mode != 'gaussian': + self.phi = nn.Sequential(self.phi, max_pool_layer) + else: + self.phi = max_pool_layer diff --git a/annotator/uniformer/mmcv/cnn/bricks/norm.py b/annotator/uniformer/mmcv/cnn/bricks/norm.py new file mode 100644 index 0000000000000000000000000000000000000000..408f4b42731b19a3beeef68b6a5e610d0bbc18b3 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/norm.py @@ -0,0 +1,144 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect + +import torch.nn as nn + +from annotator.uniformer.mmcv.utils import is_tuple_of +from annotator.uniformer.mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm +from .registry import NORM_LAYERS + +NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d) +NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d) +NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm) +NORM_LAYERS.register_module('GN', module=nn.GroupNorm) +NORM_LAYERS.register_module('LN', module=nn.LayerNorm) +NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d) +NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d) + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + When we build a norm layer with `build_norm_layer()`, we want to preserve + the norm type in variable names, e.g, self.bn1, self.gn. This method will + infer the abbreviation to map class types to abbreviations. + + Rule 1: If the class has the property "_abbr_", return the property. + Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or + InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and + "in" respectively. + Rule 3: If the class name contains "batch", "group", "layer" or "instance", + the abbreviation of this layer will be "bn", "gn", "ln" and "in" + respectively. + Rule 4: Otherwise, the abbreviation falls back to "norm". + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + if not inspect.isclass(class_type): + raise TypeError( + f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN + return 'in' + elif issubclass(class_type, _BatchNorm): + return 'bn' + elif issubclass(class_type, nn.GroupNorm): + return 'gn' + elif issubclass(class_type, nn.LayerNorm): + return 'ln' + else: + class_name = class_type.__name__.lower() + if 'batch' in class_name: + return 'bn' + elif 'group' in class_name: + return 'gn' + elif 'layer' in class_name: + return 'ln' + elif 'instance' in class_name: + return 'in' + else: + return 'norm_layer' + + +def build_norm_layer(cfg, num_features, postfix=''): + """Build normalization layer. + + Args: + cfg (dict): The norm layer config, which should contain: + + - type (str): Layer type. + - layer args: Args needed to instantiate a norm layer. + - requires_grad (bool, optional): Whether stop gradient updates. + num_features (int): Number of input channels. + postfix (int | str): The postfix to be appended into norm abbreviation + to create named layer. + + Returns: + (str, nn.Module): The first element is the layer name consisting of + abbreviation and postfix, e.g., bn1, gn. The second element is the + created norm layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in NORM_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + + norm_layer = NORM_LAYERS.get(layer_type) + abbr = infer_abbr(norm_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + requires_grad = cfg_.pop('requires_grad', True) + cfg_.setdefault('eps', 1e-5) + if layer_type != 'GN': + layer = norm_layer(num_features, **cfg_) + if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'): + layer._specify_ddp_gpu_num(1) + else: + assert 'num_groups' in cfg_ + layer = norm_layer(num_channels=num_features, **cfg_) + + for param in layer.parameters(): + param.requires_grad = requires_grad + + return name, layer + + +def is_norm(layer, exclude=None): + """Check if a layer is a normalization layer. + + Args: + layer (nn.Module): The layer to be checked. + exclude (type | tuple[type]): Types to be excluded. + + Returns: + bool: Whether the layer is a norm layer. + """ + if exclude is not None: + if not isinstance(exclude, tuple): + exclude = (exclude, ) + if not is_tuple_of(exclude, type): + raise TypeError( + f'"exclude" must be either None or type or a tuple of types, ' + f'but got {type(exclude)}: {exclude}') + + if exclude and isinstance(layer, exclude): + return False + + all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm) + return isinstance(layer, all_norm_bases) diff --git a/annotator/uniformer/mmcv/cnn/bricks/padding.py b/annotator/uniformer/mmcv/cnn/bricks/padding.py new file mode 100644 index 0000000000000000000000000000000000000000..e4ac6b28a1789bd551c613a7d3e7b622433ac7ec --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/padding.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn + +from .registry import PADDING_LAYERS + +PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) +PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) +PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) + + +def build_padding_layer(cfg, *args, **kwargs): + """Build padding layer. + + Args: + cfg (None or dict): The padding layer config, which should contain: + - type (str): Layer type. + - layer args: Args needed to instantiate a padding layer. + + Returns: + nn.Module: Created padding layer. + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + + cfg_ = cfg.copy() + padding_type = cfg_.pop('type') + if padding_type not in PADDING_LAYERS: + raise KeyError(f'Unrecognized padding type {padding_type}.') + else: + padding_layer = PADDING_LAYERS.get(padding_type) + + layer = padding_layer(*args, **kwargs, **cfg_) + + return layer diff --git a/annotator/uniformer/mmcv/cnn/bricks/plugin.py b/annotator/uniformer/mmcv/cnn/bricks/plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..07c010d4053174dd41107aa654ea67e82b46a25c --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/plugin.py @@ -0,0 +1,88 @@ +import inspect +import platform + +from .registry import PLUGIN_LAYERS + +if platform.system() == 'Windows': + import regex as re +else: + import re + + +def infer_abbr(class_type): + """Infer abbreviation from the class name. + + This method will infer the abbreviation to map class types to + abbreviations. + + Rule 1: If the class has the property "abbr", return the property. + Rule 2: Otherwise, the abbreviation falls back to snake case of class + name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``. + + Args: + class_type (type): The norm layer type. + + Returns: + str: The inferred abbreviation. + """ + + def camel2snack(word): + """Convert camel case word into snack case. + + Modified from `inflection lib + `_. + + Example:: + + >>> camel2snack("FancyBlock") + 'fancy_block' + """ + + word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word) + word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word) + word = word.replace('-', '_') + return word.lower() + + if not inspect.isclass(class_type): + raise TypeError( + f'class_type must be a type, but got {type(class_type)}') + if hasattr(class_type, '_abbr_'): + return class_type._abbr_ + else: + return camel2snack(class_type.__name__) + + +def build_plugin_layer(cfg, postfix='', **kwargs): + """Build plugin layer. + + Args: + cfg (None or dict): cfg should contain: + type (str): identify plugin layer type. + layer args: args needed to instantiate a plugin layer. + postfix (int, str): appended into norm abbreviation to + create named layer. Default: ''. + + Returns: + tuple[str, nn.Module]: + name (str): abbreviation + postfix + layer (nn.Module): created plugin layer + """ + if not isinstance(cfg, dict): + raise TypeError('cfg must be a dict') + if 'type' not in cfg: + raise KeyError('the cfg dict must contain the key "type"') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in PLUGIN_LAYERS: + raise KeyError(f'Unrecognized plugin type {layer_type}') + + plugin_layer = PLUGIN_LAYERS.get(layer_type) + abbr = infer_abbr(plugin_layer) + + assert isinstance(postfix, (int, str)) + name = abbr + str(postfix) + + layer = plugin_layer(**kwargs, **cfg_) + + return name, layer diff --git a/annotator/uniformer/mmcv/cnn/bricks/registry.py b/annotator/uniformer/mmcv/cnn/bricks/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..39eabc58db4b5954478a2ac1ab91cea5e45ab055 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/registry.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from annotator.uniformer.mmcv.utils import Registry + +CONV_LAYERS = Registry('conv layer') +NORM_LAYERS = Registry('norm layer') +ACTIVATION_LAYERS = Registry('activation layer') +PADDING_LAYERS = Registry('padding layer') +UPSAMPLE_LAYERS = Registry('upsample layer') +PLUGIN_LAYERS = Registry('plugin layer') + +DROPOUT_LAYERS = Registry('drop out layers') +POSITIONAL_ENCODING = Registry('position encoding') +ATTENTION = Registry('attention') +FEEDFORWARD_NETWORK = Registry('feed-forward Network') +TRANSFORMER_LAYER = Registry('transformerLayer') +TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence') diff --git a/annotator/uniformer/mmcv/cnn/bricks/scale.py b/annotator/uniformer/mmcv/cnn/bricks/scale.py new file mode 100644 index 0000000000000000000000000000000000000000..c905fffcc8bf998d18d94f927591963c428025e2 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/scale.py @@ -0,0 +1,21 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +class Scale(nn.Module): + """A learnable scale parameter. + + This layer scales the input by a learnable factor. It multiplies a + learnable scale parameter of shape (1,) with input of any shape. + + Args: + scale (float): Initial value of scale factor. Default: 1.0 + """ + + def __init__(self, scale=1.0): + super(Scale, self).__init__() + self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) + + def forward(self, x): + return x * self.scale diff --git a/annotator/uniformer/mmcv/cnn/bricks/swish.py b/annotator/uniformer/mmcv/cnn/bricks/swish.py new file mode 100644 index 0000000000000000000000000000000000000000..e2ca8ed7b749413f011ae54aac0cab27e6f0b51f --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/swish.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + +from .registry import ACTIVATION_LAYERS + + +@ACTIVATION_LAYERS.register_module() +class Swish(nn.Module): + """Swish Module. + + This module applies the swish function: + + .. math:: + Swish(x) = x * Sigmoid(x) + + Returns: + Tensor: The output tensor. + """ + + def __init__(self): + super(Swish, self).__init__() + + def forward(self, x): + return x * torch.sigmoid(x) diff --git a/annotator/uniformer/mmcv/cnn/bricks/transformer.py b/annotator/uniformer/mmcv/cnn/bricks/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..e61ae0dd941a7be00b3e41a3de833ec50470a45f --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/transformer.py @@ -0,0 +1,595 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import warnings + +import torch +import torch.nn as nn + +from annotator.uniformer.mmcv import ConfigDict, deprecated_api_warning +from annotator.uniformer.mmcv.cnn import Linear, build_activation_layer, build_norm_layer +from annotator.uniformer.mmcv.runner.base_module import BaseModule, ModuleList, Sequential +from annotator.uniformer.mmcv.utils import build_from_cfg +from .drop import build_dropout +from .registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING, + TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE) + +# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file +try: + from annotator.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention # noqa F401 + warnings.warn( + ImportWarning( + '``MultiScaleDeformableAttention`` has been moved to ' + '``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501 + '``from annotator.uniformer.mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501 + 'to ``from annotator.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501 + )) + +except ImportError: + warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from ' + '``mmcv.ops.multi_scale_deform_attn``, ' + 'You should install ``mmcv-full`` if you need this module. ') + + +def build_positional_encoding(cfg, default_args=None): + """Builder for Position Encoding.""" + return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) + + +def build_attention(cfg, default_args=None): + """Builder for attention.""" + return build_from_cfg(cfg, ATTENTION, default_args) + + +def build_feedforward_network(cfg, default_args=None): + """Builder for feed-forward network (FFN).""" + return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args) + + +def build_transformer_layer(cfg, default_args=None): + """Builder for transformer layer.""" + return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args) + + +def build_transformer_layer_sequence(cfg, default_args=None): + """Builder for transformer encoder and transformer decoder.""" + return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args) + + +@ATTENTION.register_module() +class MultiheadAttention(BaseModule): + """A wrapper for ``torch.nn.MultiheadAttention``. + + This module implements MultiheadAttention with identity connection, + and positional encoding is also passed as input. + + Args: + embed_dims (int): The embedding dimension. + num_heads (int): Parallel attention heads. + attn_drop (float): A Dropout layer on attn_output_weights. + Default: 0.0. + proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. + Default: 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): When it is True, Key, Query and Value are shape of + (batch, n, embed_dim), otherwise (n, batch, embed_dim). + Default to False. + """ + + def __init__(self, + embed_dims, + num_heads, + attn_drop=0., + proj_drop=0., + dropout_layer=dict(type='Dropout', drop_prob=0.), + init_cfg=None, + batch_first=False, + **kwargs): + super(MultiheadAttention, self).__init__(init_cfg) + if 'dropout' in kwargs: + warnings.warn('The arguments `dropout` in MultiheadAttention ' + 'has been deprecated, now you can separately ' + 'set `attn_drop`(float), proj_drop(float), ' + 'and `dropout_layer`(dict) ') + attn_drop = kwargs['dropout'] + dropout_layer['drop_prob'] = kwargs.pop('dropout') + + self.embed_dims = embed_dims + self.num_heads = num_heads + self.batch_first = batch_first + + self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop, + **kwargs) + + self.proj_drop = nn.Dropout(proj_drop) + self.dropout_layer = build_dropout( + dropout_layer) if dropout_layer else nn.Identity() + + @deprecated_api_warning({'residual': 'identity'}, + cls_name='MultiheadAttention') + def forward(self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_pos=None, + attn_mask=None, + key_padding_mask=None, + **kwargs): + """Forward function for `MultiheadAttention`. + + **kwargs allow passing a more general data flow when combining + with other operations in `transformerlayer`. + + Args: + query (Tensor): The input query with shape [num_queries, bs, + embed_dims] if self.batch_first is False, else + [bs, num_queries embed_dims]. + key (Tensor): The key tensor with shape [num_keys, bs, + embed_dims] if self.batch_first is False, else + [bs, num_keys, embed_dims] . + If None, the ``query`` will be used. Defaults to None. + value (Tensor): The value tensor with same shape as `key`. + Same in `nn.MultiheadAttention.forward`. Defaults to None. + If None, the `key` will be used. + identity (Tensor): This tensor, with the same shape as x, + will be used for the identity link. + If None, `x` will be used. Defaults to None. + query_pos (Tensor): The positional encoding for query, with + the same shape as `x`. If not None, it will + be added to `x` before forward function. Defaults to None. + key_pos (Tensor): The positional encoding for `key`, with the + same shape as `key`. Defaults to None. If not None, it will + be added to `key` before forward function. If None, and + `query_pos` has the same shape as `key`, then `query_pos` + will be used for `key_pos`. Defaults to None. + attn_mask (Tensor): ByteTensor mask with shape [num_queries, + num_keys]. Same in `nn.MultiheadAttention.forward`. + Defaults to None. + key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys]. + Defaults to None. + + Returns: + Tensor: forwarded results with shape + [num_queries, bs, embed_dims] + if self.batch_first is False, else + [bs, num_queries embed_dims]. + """ + + if key is None: + key = query + if value is None: + value = key + if identity is None: + identity = query + if key_pos is None: + if query_pos is not None: + # use query_pos if key_pos is not available + if query_pos.shape == key.shape: + key_pos = query_pos + else: + warnings.warn(f'position encoding of key is' + f'missing in {self.__class__.__name__}.') + if query_pos is not None: + query = query + query_pos + if key_pos is not None: + key = key + key_pos + + # Because the dataflow('key', 'query', 'value') of + # ``torch.nn.MultiheadAttention`` is (num_query, batch, + # embed_dims), We should adjust the shape of dataflow from + # batch_first (batch, num_query, embed_dims) to num_query_first + # (num_query ,batch, embed_dims), and recover ``attn_output`` + # from num_query_first to batch_first. + if self.batch_first: + query = query.transpose(0, 1) + key = key.transpose(0, 1) + value = value.transpose(0, 1) + + out = self.attn( + query=query, + key=key, + value=value, + attn_mask=attn_mask, + key_padding_mask=key_padding_mask)[0] + + if self.batch_first: + out = out.transpose(0, 1) + + return identity + self.dropout_layer(self.proj_drop(out)) + + +@FEEDFORWARD_NETWORK.register_module() +class FFN(BaseModule): + """Implements feed-forward networks (FFNs) with identity connection. + + Args: + embed_dims (int): The feature dimension. Same as + `MultiheadAttention`. Defaults: 256. + feedforward_channels (int): The hidden dimension of FFNs. + Defaults: 1024. + num_fcs (int, optional): The number of fully-connected layers in + FFNs. Default: 2. + act_cfg (dict, optional): The activation config for FFNs. + Default: dict(type='ReLU') + ffn_drop (float, optional): Probability of an element to be + zeroed in FFN. Default 0.0. + add_identity (bool, optional): Whether to add the + identity connection. Default: `True`. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + @deprecated_api_warning( + { + 'dropout': 'ffn_drop', + 'add_residual': 'add_identity' + }, + cls_name='FFN') + def __init__(self, + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0., + dropout_layer=None, + add_identity=True, + init_cfg=None, + **kwargs): + super(FFN, self).__init__(init_cfg) + assert num_fcs >= 2, 'num_fcs should be no less ' \ + f'than 2. got {num_fcs}.' + self.embed_dims = embed_dims + self.feedforward_channels = feedforward_channels + self.num_fcs = num_fcs + self.act_cfg = act_cfg + self.activate = build_activation_layer(act_cfg) + + layers = [] + in_channels = embed_dims + for _ in range(num_fcs - 1): + layers.append( + Sequential( + Linear(in_channels, feedforward_channels), self.activate, + nn.Dropout(ffn_drop))) + in_channels = feedforward_channels + layers.append(Linear(feedforward_channels, embed_dims)) + layers.append(nn.Dropout(ffn_drop)) + self.layers = Sequential(*layers) + self.dropout_layer = build_dropout( + dropout_layer) if dropout_layer else torch.nn.Identity() + self.add_identity = add_identity + + @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN') + def forward(self, x, identity=None): + """Forward function for `FFN`. + + The function would add x to the output tensor if residue is None. + """ + out = self.layers(x) + if not self.add_identity: + return self.dropout_layer(out) + if identity is None: + identity = x + return identity + self.dropout_layer(out) + + +@TRANSFORMER_LAYER.register_module() +class BaseTransformerLayer(BaseModule): + """Base `TransformerLayer` for vision transformer. + + It can be built from `mmcv.ConfigDict` and support more flexible + customization, for example, using any number of `FFN or LN ` and + use different kinds of `attention` by specifying a list of `ConfigDict` + named `attn_cfgs`. It is worth mentioning that it supports `prenorm` + when you specifying `norm` as the first element of `operation_order`. + More details about the `prenorm`: `On Layer Normalization in the + Transformer Architecture `_ . + + Args: + attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): + Configs for `self_attention` or `cross_attention` modules, + The order of the configs in the list should be consistent with + corresponding attentions in operation_order. + If it is a dict, all of the attention modules in operation_order + will be built with this config. Default: None. + ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )): + Configs for FFN, The order of the configs in the list should be + consistent with corresponding ffn in operation_order. + If it is a dict, all of the attention modules in operation_order + will be built with this config. + operation_order (tuple[str]): The execution order of operation + in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm'). + Support `prenorm` when you specifying first element as `norm`. + Default:None. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): Key, Query and Value are shape + of (batch, n, embed_dim) + or (n, batch, embed_dim). Default to False. + """ + + def __init__(self, + attn_cfgs=None, + ffn_cfgs=dict( + type='FFN', + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0., + act_cfg=dict(type='ReLU', inplace=True), + ), + operation_order=None, + norm_cfg=dict(type='LN'), + init_cfg=None, + batch_first=False, + **kwargs): + + deprecated_args = dict( + feedforward_channels='feedforward_channels', + ffn_dropout='ffn_drop', + ffn_num_fcs='num_fcs') + for ori_name, new_name in deprecated_args.items(): + if ori_name in kwargs: + warnings.warn( + f'The arguments `{ori_name}` in BaseTransformerLayer ' + f'has been deprecated, now you should set `{new_name}` ' + f'and other FFN related arguments ' + f'to a dict named `ffn_cfgs`. ') + ffn_cfgs[new_name] = kwargs[ori_name] + + super(BaseTransformerLayer, self).__init__(init_cfg) + + self.batch_first = batch_first + + assert set(operation_order) & set( + ['self_attn', 'norm', 'ffn', 'cross_attn']) == \ + set(operation_order), f'The operation_order of' \ + f' {self.__class__.__name__} should ' \ + f'contains all four operation type ' \ + f"{['self_attn', 'norm', 'ffn', 'cross_attn']}" + + num_attn = operation_order.count('self_attn') + operation_order.count( + 'cross_attn') + if isinstance(attn_cfgs, dict): + attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)] + else: + assert num_attn == len(attn_cfgs), f'The length ' \ + f'of attn_cfg {num_attn} is ' \ + f'not consistent with the number of attention' \ + f'in operation_order {operation_order}.' + + self.num_attn = num_attn + self.operation_order = operation_order + self.norm_cfg = norm_cfg + self.pre_norm = operation_order[0] == 'norm' + self.attentions = ModuleList() + + index = 0 + for operation_name in operation_order: + if operation_name in ['self_attn', 'cross_attn']: + if 'batch_first' in attn_cfgs[index]: + assert self.batch_first == attn_cfgs[index]['batch_first'] + else: + attn_cfgs[index]['batch_first'] = self.batch_first + attention = build_attention(attn_cfgs[index]) + # Some custom attentions used as `self_attn` + # or `cross_attn` can have different behavior. + attention.operation_name = operation_name + self.attentions.append(attention) + index += 1 + + self.embed_dims = self.attentions[0].embed_dims + + self.ffns = ModuleList() + num_ffns = operation_order.count('ffn') + if isinstance(ffn_cfgs, dict): + ffn_cfgs = ConfigDict(ffn_cfgs) + if isinstance(ffn_cfgs, dict): + ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)] + assert len(ffn_cfgs) == num_ffns + for ffn_index in range(num_ffns): + if 'embed_dims' not in ffn_cfgs[ffn_index]: + ffn_cfgs['embed_dims'] = self.embed_dims + else: + assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims + self.ffns.append( + build_feedforward_network(ffn_cfgs[ffn_index], + dict(type='FFN'))) + + self.norms = ModuleList() + num_norms = operation_order.count('norm') + for _ in range(num_norms): + self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1]) + + def forward(self, + query, + key=None, + value=None, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs): + """Forward function for `TransformerDecoderLayer`. + + **kwargs contains some specific arguments of attentions. + + Args: + query (Tensor): The input query with shape + [num_queries, bs, embed_dims] if + self.batch_first is False, else + [bs, num_queries embed_dims]. + key (Tensor): The key tensor with shape [num_keys, bs, + embed_dims] if self.batch_first is False, else + [bs, num_keys, embed_dims] . + value (Tensor): The value tensor with same shape as `key`. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. + Default: None. + attn_masks (List[Tensor] | None): 2D Tensor used in + calculation of corresponding attention. The length of + it should equal to the number of `attention` in + `operation_order`. Default: None. + query_key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_queries]. Only used in `self_attn` layer. + Defaults to None. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_keys]. Default: None. + + Returns: + Tensor: forwarded results with shape [num_queries, bs, embed_dims]. + """ + + norm_index = 0 + attn_index = 0 + ffn_index = 0 + identity = query + if attn_masks is None: + attn_masks = [None for _ in range(self.num_attn)] + elif isinstance(attn_masks, torch.Tensor): + attn_masks = [ + copy.deepcopy(attn_masks) for _ in range(self.num_attn) + ] + warnings.warn(f'Use same attn_mask in all attentions in ' + f'{self.__class__.__name__} ') + else: + assert len(attn_masks) == self.num_attn, f'The length of ' \ + f'attn_masks {len(attn_masks)} must be equal ' \ + f'to the number of attention in ' \ + f'operation_order {self.num_attn}' + + for layer in self.operation_order: + if layer == 'self_attn': + temp_key = temp_value = query + query = self.attentions[attn_index]( + query, + temp_key, + temp_value, + identity if self.pre_norm else None, + query_pos=query_pos, + key_pos=query_pos, + attn_mask=attn_masks[attn_index], + key_padding_mask=query_key_padding_mask, + **kwargs) + attn_index += 1 + identity = query + + elif layer == 'norm': + query = self.norms[norm_index](query) + norm_index += 1 + + elif layer == 'cross_attn': + query = self.attentions[attn_index]( + query, + key, + value, + identity if self.pre_norm else None, + query_pos=query_pos, + key_pos=key_pos, + attn_mask=attn_masks[attn_index], + key_padding_mask=key_padding_mask, + **kwargs) + attn_index += 1 + identity = query + + elif layer == 'ffn': + query = self.ffns[ffn_index]( + query, identity if self.pre_norm else None) + ffn_index += 1 + + return query + + +@TRANSFORMER_LAYER_SEQUENCE.register_module() +class TransformerLayerSequence(BaseModule): + """Base class for TransformerEncoder and TransformerDecoder in vision + transformer. + + As base-class of Encoder and Decoder in vision transformer. + Support customization such as specifying different kind + of `transformer_layer` in `transformer_coder`. + + Args: + transformerlayer (list[obj:`mmcv.ConfigDict`] | + obj:`mmcv.ConfigDict`): Config of transformerlayer + in TransformerCoder. If it is obj:`mmcv.ConfigDict`, + it would be repeated `num_layer` times to a + list[`mmcv.ConfigDict`]. Default: None. + num_layers (int): The number of `TransformerLayer`. Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None): + super(TransformerLayerSequence, self).__init__(init_cfg) + if isinstance(transformerlayers, dict): + transformerlayers = [ + copy.deepcopy(transformerlayers) for _ in range(num_layers) + ] + else: + assert isinstance(transformerlayers, list) and \ + len(transformerlayers) == num_layers + self.num_layers = num_layers + self.layers = ModuleList() + for i in range(num_layers): + self.layers.append(build_transformer_layer(transformerlayers[i])) + self.embed_dims = self.layers[0].embed_dims + self.pre_norm = self.layers[0].pre_norm + + def forward(self, + query, + key, + value, + query_pos=None, + key_pos=None, + attn_masks=None, + query_key_padding_mask=None, + key_padding_mask=None, + **kwargs): + """Forward function for `TransformerCoder`. + + Args: + query (Tensor): Input query with shape + `(num_queries, bs, embed_dims)`. + key (Tensor): The key tensor with shape + `(num_keys, bs, embed_dims)`. + value (Tensor): The value tensor with shape + `(num_keys, bs, embed_dims)`. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. + Default: None. + attn_masks (List[Tensor], optional): Each element is 2D Tensor + which is used in calculation of corresponding attention in + operation_order. Default: None. + query_key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_queries]. Only used in self-attention + Default: None. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_keys]. Default: None. + + Returns: + Tensor: results with shape [num_queries, bs, embed_dims]. + """ + for layer in self.layers: + query = layer( + query, + key, + value, + query_pos=query_pos, + key_pos=key_pos, + attn_masks=attn_masks, + query_key_padding_mask=query_key_padding_mask, + key_padding_mask=key_padding_mask, + **kwargs) + return query diff --git a/annotator/uniformer/mmcv/cnn/bricks/upsample.py b/annotator/uniformer/mmcv/cnn/bricks/upsample.py new file mode 100644 index 0000000000000000000000000000000000000000..a1a353767d0ce8518f0d7289bed10dba0178ed12 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/upsample.py @@ -0,0 +1,84 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +import torch.nn.functional as F + +from ..utils import xavier_init +from .registry import UPSAMPLE_LAYERS + +UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample) +UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample) + + +@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle') +class PixelShufflePack(nn.Module): + """Pixel Shuffle upsample layer. + + This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to + achieve a simple upsampling with pixel shuffle. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + scale_factor (int): Upsample ratio. + upsample_kernel (int): Kernel size of the conv layer to expand the + channels. + """ + + def __init__(self, in_channels, out_channels, scale_factor, + upsample_kernel): + super(PixelShufflePack, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.scale_factor = scale_factor + self.upsample_kernel = upsample_kernel + self.upsample_conv = nn.Conv2d( + self.in_channels, + self.out_channels * scale_factor * scale_factor, + self.upsample_kernel, + padding=(self.upsample_kernel - 1) // 2) + self.init_weights() + + def init_weights(self): + xavier_init(self.upsample_conv, distribution='uniform') + + def forward(self, x): + x = self.upsample_conv(x) + x = F.pixel_shuffle(x, self.scale_factor) + return x + + +def build_upsample_layer(cfg, *args, **kwargs): + """Build upsample layer. + + Args: + cfg (dict): The upsample layer config, which should contain: + + - type (str): Layer type. + - scale_factor (int): Upsample ratio, which is not applicable to + deconv. + - layer args: Args needed to instantiate a upsample layer. + args (argument list): Arguments passed to the ``__init__`` + method of the corresponding conv layer. + kwargs (keyword arguments): Keyword arguments passed to the + ``__init__`` method of the corresponding conv layer. + + Returns: + nn.Module: Created upsample layer. + """ + if not isinstance(cfg, dict): + raise TypeError(f'cfg must be a dict, but got {type(cfg)}') + if 'type' not in cfg: + raise KeyError( + f'the cfg dict must contain the key "type", but got {cfg}') + cfg_ = cfg.copy() + + layer_type = cfg_.pop('type') + if layer_type not in UPSAMPLE_LAYERS: + raise KeyError(f'Unrecognized upsample type {layer_type}') + else: + upsample = UPSAMPLE_LAYERS.get(layer_type) + + if upsample is nn.Upsample: + cfg_['mode'] = layer_type + layer = upsample(*args, **kwargs, **cfg_) + return layer diff --git a/annotator/uniformer/mmcv/cnn/bricks/wrappers.py b/annotator/uniformer/mmcv/cnn/bricks/wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..8aebf67bf52355a513f21756ee74fe510902d075 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/bricks/wrappers.py @@ -0,0 +1,180 @@ +# Copyright (c) OpenMMLab. All rights reserved. +r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501 + +Wrap some nn modules to support empty tensor input. Currently, these wrappers +are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask +heads are trained on only positive RoIs. +""" +import math + +import torch +import torch.nn as nn +from torch.nn.modules.utils import _pair, _triple + +from .registry import CONV_LAYERS, UPSAMPLE_LAYERS + +if torch.__version__ == 'parrots': + TORCH_VERSION = torch.__version__ +else: + # torch.__version__ could be 1.3.1+cu92, we only need the first two + # for comparison + TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2]) + + +def obsolete_torch_version(torch_version, version_threshold): + return torch_version == 'parrots' or torch_version <= version_threshold + + +class NewEmptyTensorOp(torch.autograd.Function): + + @staticmethod + def forward(ctx, x, new_shape): + ctx.shape = x.shape + return x.new_empty(new_shape) + + @staticmethod + def backward(ctx, grad): + shape = ctx.shape + return NewEmptyTensorOp.apply(grad, shape), None + + +@CONV_LAYERS.register_module('Conv', force=True) +class Conv2d(nn.Conv2d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, + self.padding, self.stride, self.dilation): + o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module('Conv3d', force=True) +class Conv3d(nn.Conv3d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size, + self.padding, self.stride, self.dilation): + o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1 + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv') +@UPSAMPLE_LAYERS.register_module('deconv', force=True) +class ConvTranspose2d(nn.ConvTranspose2d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size, + self.padding, self.stride, + self.dilation, self.output_padding): + out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv3d') +@UPSAMPLE_LAYERS.register_module('deconv3d', force=True) +class ConvTranspose3d(nn.ConvTranspose3d): + + def forward(self, x): + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): + out_shape = [x.shape[0], self.out_channels] + for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size, + self.padding, self.stride, + self.dilation, self.output_padding): + out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op) + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) + + +class MaxPool2d(nn.MaxPool2d): + + def forward(self, x): + # PyTorch 1.9 does not support empty tensor inference yet + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): + out_shape = list(x.shape[:2]) + for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size), + _pair(self.padding), _pair(self.stride), + _pair(self.dilation)): + o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 + o = math.ceil(o) if self.ceil_mode else math.floor(o) + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + return empty + + return super().forward(x) + + +class MaxPool3d(nn.MaxPool3d): + + def forward(self, x): + # PyTorch 1.9 does not support empty tensor inference yet + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): + out_shape = list(x.shape[:2]) + for i, k, p, s, d in zip(x.shape[-3:], _triple(self.kernel_size), + _triple(self.padding), + _triple(self.stride), + _triple(self.dilation)): + o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1 + o = math.ceil(o) if self.ceil_mode else math.floor(o) + out_shape.append(o) + empty = NewEmptyTensorOp.apply(x, out_shape) + return empty + + return super().forward(x) + + +class Linear(torch.nn.Linear): + + def forward(self, x): + # empty tensor forward of Linear layer is supported in Pytorch 1.6 + if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)): + out_shape = [x.shape[0], self.out_features] + empty = NewEmptyTensorOp.apply(x, out_shape) + if self.training: + # produce dummy gradient to avoid DDP warning. + dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 + return empty + dummy + else: + return empty + + return super().forward(x) diff --git a/annotator/uniformer/mmcv/cnn/builder.py b/annotator/uniformer/mmcv/cnn/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..7567316c566bd3aca6d8f65a84b00e9e890948a7 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/builder.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..runner import Sequential +from ..utils import Registry, build_from_cfg + + +def build_model_from_cfg(cfg, registry, default_args=None): + """Build a PyTorch model from config dict(s). Different from + ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built. + + Args: + cfg (dict, list[dict]): The config of modules, is is either a config + dict or a list of config dicts. If cfg is a list, a + the built modules will be wrapped with ``nn.Sequential``. + registry (:obj:`Registry`): A registry the module belongs to. + default_args (dict, optional): Default arguments to build the module. + Defaults to None. + + Returns: + nn.Module: A built nn module. + """ + if isinstance(cfg, list): + modules = [ + build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg + ] + return Sequential(*modules) + else: + return build_from_cfg(cfg, registry, default_args) + + +MODELS = Registry('model', build_func=build_model_from_cfg) diff --git a/annotator/uniformer/mmcv/cnn/resnet.py b/annotator/uniformer/mmcv/cnn/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..1cb3ac057ee2d52c46fc94685b5d4e698aad8d5f --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/resnet.py @@ -0,0 +1,316 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn +import torch.utils.checkpoint as cp + +from .utils import constant_init, kaiming_init + + +def conv3x3(in_planes, out_planes, stride=1, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): + super(BasicBlock, self).__init__() + assert style in ['pytorch', 'caffe'] + self.conv1 = conv3x3(inplanes, planes, stride, dilation) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + assert not with_cp + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): + """Bottleneck block. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if + it is "caffe", the stride-two layer is the first 1x1 conv layer. + """ + super(Bottleneck, self).__init__() + assert style in ['pytorch', 'caffe'] + if style == 'pytorch': + conv1_stride = 1 + conv2_stride = stride + else: + conv1_stride = stride + conv2_stride = 1 + self.conv1 = nn.Conv2d( + inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False) + self.conv2 = nn.Conv2d( + planes, + planes, + kernel_size=3, + stride=conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.bn1 = nn.BatchNorm2d(planes) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + def forward(self, x): + + def _inner_forward(x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +def make_res_layer(block, + inplanes, + planes, + blocks, + stride=1, + dilation=1, + style='pytorch', + with_cp=False): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append( + block( + inplanes, + planes, + stride, + dilation, + downsample, + style=style, + with_cp=with_cp)) + inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp)) + + return nn.Sequential(*layers) + + +class ResNet(nn.Module): + """ResNet backbone. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + num_stages (int): Resnet stages, normally 4. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + with_cp=False): + super(ResNet, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + assert num_stages >= 1 and num_stages <= 4 + block, stage_blocks = self.arch_settings[depth] + stage_blocks = stage_blocks[:num_stages] + assert len(strides) == len(dilations) == num_stages + assert max(out_indices) < num_stages + + self.out_indices = out_indices + self.style = style + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + self.with_cp = with_cp + + self.inplanes = 64 + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + self.res_layers = [] + for i, num_blocks in enumerate(stage_blocks): + stride = strides[i] + dilation = dilations[i] + planes = 64 * 2**i + res_layer = make_res_layer( + block, + self.inplanes, + planes, + num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + with_cp=with_cp) + self.inplanes = planes * block.expansion + layer_name = f'layer{i + 1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(ResNet, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + if mode and self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for param in self.bn1.parameters(): + param.requires_grad = False + self.bn1.eval() + self.bn1.weight.requires_grad = False + self.bn1.bias.requires_grad = False + for i in range(1, self.frozen_stages + 1): + mod = getattr(self, f'layer{i}') + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/annotator/uniformer/mmcv/cnn/utils/__init__.py b/annotator/uniformer/mmcv/cnn/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a263e31c1e3977712827ca229bbc04910b4e928e --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/utils/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .flops_counter import get_model_complexity_info +from .fuse_conv_bn import fuse_conv_bn +from .sync_bn import revert_sync_batchnorm +from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit, + KaimingInit, NormalInit, PretrainedInit, + TruncNormalInit, UniformInit, XavierInit, + bias_init_with_prob, caffe2_xavier_init, + constant_init, initialize, kaiming_init, normal_init, + trunc_normal_init, uniform_init, xavier_init) + +__all__ = [ + 'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init', + 'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init', + 'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize', + 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', + 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', + 'Caffe2XavierInit', 'revert_sync_batchnorm' +] diff --git a/annotator/uniformer/mmcv/cnn/utils/flops_counter.py b/annotator/uniformer/mmcv/cnn/utils/flops_counter.py new file mode 100644 index 0000000000000000000000000000000000000000..d10af5feca7f4b8c0ba359b7b1c826f754e048be --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/utils/flops_counter.py @@ -0,0 +1,599 @@ +# Modified from flops-counter.pytorch by Vladislav Sovrasov +# original repo: https://github.com/sovrasov/flops-counter.pytorch + +# MIT License + +# Copyright (c) 2018 Vladislav Sovrasov + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import sys +from functools import partial + +import numpy as np +import torch +import torch.nn as nn + +import annotator.uniformer.mmcv as mmcv + + +def get_model_complexity_info(model, + input_shape, + print_per_layer_stat=True, + as_strings=True, + input_constructor=None, + flush=False, + ost=sys.stdout): + """Get complexity information of a model. + + This method can calculate FLOPs and parameter counts of a model with + corresponding input shape. It can also print complexity information for + each layer in a model. + + Supported layers are listed as below: + - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``. + - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``, + ``nn.ReLU6``. + - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``, + ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``, + ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``, + ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``, + ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``. + - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``, + ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``, + ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``. + - Linear: ``nn.Linear``. + - Deconvolution: ``nn.ConvTranspose2d``. + - Upsample: ``nn.Upsample``. + + Args: + model (nn.Module): The model for complexity calculation. + input_shape (tuple): Input shape used for calculation. + print_per_layer_stat (bool): Whether to print complexity information + for each layer in a model. Default: True. + as_strings (bool): Output FLOPs and params counts in a string form. + Default: True. + input_constructor (None | callable): If specified, it takes a callable + method that generates input. otherwise, it will generate a random + tensor with input shape to calculate FLOPs. Default: None. + flush (bool): same as that in :func:`print`. Default: False. + ost (stream): same as ``file`` param in :func:`print`. + Default: sys.stdout. + + Returns: + tuple[float | str]: If ``as_strings`` is set to True, it will return + FLOPs and parameter counts in a string format. otherwise, it will + return those in a float number format. + """ + assert type(input_shape) is tuple + assert len(input_shape) >= 1 + assert isinstance(model, nn.Module) + flops_model = add_flops_counting_methods(model) + flops_model.eval() + flops_model.start_flops_count() + if input_constructor: + input = input_constructor(input_shape) + _ = flops_model(**input) + else: + try: + batch = torch.ones(()).new_empty( + (1, *input_shape), + dtype=next(flops_model.parameters()).dtype, + device=next(flops_model.parameters()).device) + except StopIteration: + # Avoid StopIteration for models which have no parameters, + # like `nn.Relu()`, `nn.AvgPool2d`, etc. + batch = torch.ones(()).new_empty((1, *input_shape)) + + _ = flops_model(batch) + + flops_count, params_count = flops_model.compute_average_flops_cost() + if print_per_layer_stat: + print_model_with_flops( + flops_model, flops_count, params_count, ost=ost, flush=flush) + flops_model.stop_flops_count() + + if as_strings: + return flops_to_string(flops_count), params_to_string(params_count) + + return flops_count, params_count + + +def flops_to_string(flops, units='GFLOPs', precision=2): + """Convert FLOPs number into a string. + + Note that Here we take a multiply-add counts as one FLOP. + + Args: + flops (float): FLOPs number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'GFLOPs', + 'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically + choose the most suitable unit for FLOPs. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted FLOPs number with units. + + Examples: + >>> flops_to_string(1e9) + '1.0 GFLOPs' + >>> flops_to_string(2e5, 'MFLOPs') + '0.2 MFLOPs' + >>> flops_to_string(3e-9, None) + '3e-09 FLOPs' + """ + if units is None: + if flops // 10**9 > 0: + return str(round(flops / 10.**9, precision)) + ' GFLOPs' + elif flops // 10**6 > 0: + return str(round(flops / 10.**6, precision)) + ' MFLOPs' + elif flops // 10**3 > 0: + return str(round(flops / 10.**3, precision)) + ' KFLOPs' + else: + return str(flops) + ' FLOPs' + else: + if units == 'GFLOPs': + return str(round(flops / 10.**9, precision)) + ' ' + units + elif units == 'MFLOPs': + return str(round(flops / 10.**6, precision)) + ' ' + units + elif units == 'KFLOPs': + return str(round(flops / 10.**3, precision)) + ' ' + units + else: + return str(flops) + ' FLOPs' + + +def params_to_string(num_params, units=None, precision=2): + """Convert parameter number into a string. + + Args: + num_params (float): Parameter number to be converted. + units (str | None): Converted FLOPs units. Options are None, 'M', + 'K' and ''. If set to None, it will automatically choose the most + suitable unit for Parameter number. Default: None. + precision (int): Digit number after the decimal point. Default: 2. + + Returns: + str: The converted parameter number with units. + + Examples: + >>> params_to_string(1e9) + '1000.0 M' + >>> params_to_string(2e5) + '200.0 k' + >>> params_to_string(3e-9) + '3e-09' + """ + if units is None: + if num_params // 10**6 > 0: + return str(round(num_params / 10**6, precision)) + ' M' + elif num_params // 10**3: + return str(round(num_params / 10**3, precision)) + ' k' + else: + return str(num_params) + else: + if units == 'M': + return str(round(num_params / 10.**6, precision)) + ' ' + units + elif units == 'K': + return str(round(num_params / 10.**3, precision)) + ' ' + units + else: + return str(num_params) + + +def print_model_with_flops(model, + total_flops, + total_params, + units='GFLOPs', + precision=3, + ost=sys.stdout, + flush=False): + """Print a model with FLOPs for each layer. + + Args: + model (nn.Module): The model to be printed. + total_flops (float): Total FLOPs of the model. + total_params (float): Total parameter counts of the model. + units (str | None): Converted FLOPs units. Default: 'GFLOPs'. + precision (int): Digit number after the decimal point. Default: 3. + ost (stream): same as `file` param in :func:`print`. + Default: sys.stdout. + flush (bool): same as that in :func:`print`. Default: False. + + Example: + >>> class ExampleModel(nn.Module): + + >>> def __init__(self): + >>> super().__init__() + >>> self.conv1 = nn.Conv2d(3, 8, 3) + >>> self.conv2 = nn.Conv2d(8, 256, 3) + >>> self.conv3 = nn.Conv2d(256, 8, 3) + >>> self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) + >>> self.flatten = nn.Flatten() + >>> self.fc = nn.Linear(8, 1) + + >>> def forward(self, x): + >>> x = self.conv1(x) + >>> x = self.conv2(x) + >>> x = self.conv3(x) + >>> x = self.avg_pool(x) + >>> x = self.flatten(x) + >>> x = self.fc(x) + >>> return x + + >>> model = ExampleModel() + >>> x = (3, 16, 16) + to print the complexity information state for each layer, you can use + >>> get_model_complexity_info(model, x) + or directly use + >>> print_model_with_flops(model, 4579784.0, 37361) + ExampleModel( + 0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs, + (conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1)) # noqa: E501 + (conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1)) + (conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1)) + (avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1)) + (flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, ) + (fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True) + ) + """ + + def accumulate_params(self): + if is_supported_instance(self): + return self.__params__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_params() + return sum + + def accumulate_flops(self): + if is_supported_instance(self): + return self.__flops__ / model.__batch_counter__ + else: + sum = 0 + for m in self.children(): + sum += m.accumulate_flops() + return sum + + def flops_repr(self): + accumulated_num_params = self.accumulate_params() + accumulated_flops_cost = self.accumulate_flops() + return ', '.join([ + params_to_string( + accumulated_num_params, units='M', precision=precision), + '{:.3%} Params'.format(accumulated_num_params / total_params), + flops_to_string( + accumulated_flops_cost, units=units, precision=precision), + '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), + self.original_extra_repr() + ]) + + def add_extra_repr(m): + m.accumulate_flops = accumulate_flops.__get__(m) + m.accumulate_params = accumulate_params.__get__(m) + flops_extra_repr = flops_repr.__get__(m) + if m.extra_repr != flops_extra_repr: + m.original_extra_repr = m.extra_repr + m.extra_repr = flops_extra_repr + assert m.extra_repr != m.original_extra_repr + + def del_extra_repr(m): + if hasattr(m, 'original_extra_repr'): + m.extra_repr = m.original_extra_repr + del m.original_extra_repr + if hasattr(m, 'accumulate_flops'): + del m.accumulate_flops + + model.apply(add_extra_repr) + print(model, file=ost, flush=flush) + model.apply(del_extra_repr) + + +def get_model_parameters_number(model): + """Calculate parameter number of a model. + + Args: + model (nn.module): The model for parameter number calculation. + + Returns: + float: Parameter number of the model. + """ + num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + return num_params + + +def add_flops_counting_methods(net_main_module): + # adding additional methods to the existing module object, + # this is done this way so that each function has access to self object + net_main_module.start_flops_count = start_flops_count.__get__( + net_main_module) + net_main_module.stop_flops_count = stop_flops_count.__get__( + net_main_module) + net_main_module.reset_flops_count = reset_flops_count.__get__( + net_main_module) + net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # noqa: E501 + net_main_module) + + net_main_module.reset_flops_count() + + return net_main_module + + +def compute_average_flops_cost(self): + """Compute average FLOPs cost. + + A method to compute average FLOPs cost, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + + Returns: + float: Current mean flops consumption per image. + """ + batches_count = self.__batch_counter__ + flops_sum = 0 + for module in self.modules(): + if is_supported_instance(module): + flops_sum += module.__flops__ + params_sum = get_model_parameters_number(self) + return flops_sum / batches_count, params_sum + + +def start_flops_count(self): + """Activate the computation of mean flops consumption per image. + + A method to activate the computation of mean flops consumption per image. + which will be available after ``add_flops_counting_methods()`` is called on + a desired net object. It should be called before running the network. + """ + add_batch_counter_hook_function(self) + + def add_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + return + + else: + handle = module.register_forward_hook( + get_modules_mapping()[type(module)]) + + module.__flops_handle__ = handle + + self.apply(partial(add_flops_counter_hook_function)) + + +def stop_flops_count(self): + """Stop computing the mean flops consumption per image. + + A method to stop computing the mean flops consumption per image, which will + be available after ``add_flops_counting_methods()`` is called on a desired + net object. It can be called to pause the computation whenever. + """ + remove_batch_counter_hook_function(self) + self.apply(remove_flops_counter_hook_function) + + +def reset_flops_count(self): + """Reset statistics computed so far. + + A method to Reset computed statistics, which will be available after + `add_flops_counting_methods()` is called on a desired net object. + """ + add_batch_counter_variables_or_reset(self) + self.apply(add_flops_counter_variable_or_reset) + + +# ---- Internal functions +def empty_flops_counter_hook(module, input, output): + module.__flops__ += 0 + + +def upsample_flops_counter_hook(module, input, output): + output_size = output[0] + batch_size = output_size.shape[0] + output_elements_count = batch_size + for val in output_size.shape[1:]: + output_elements_count *= val + module.__flops__ += int(output_elements_count) + + +def relu_flops_counter_hook(module, input, output): + active_elements_count = output.numel() + module.__flops__ += int(active_elements_count) + + +def linear_flops_counter_hook(module, input, output): + input = input[0] + output_last_dim = output.shape[ + -1] # pytorch checks dimensions, so here we don't care much + module.__flops__ += int(np.prod(input.shape) * output_last_dim) + + +def pool_flops_counter_hook(module, input, output): + input = input[0] + module.__flops__ += int(np.prod(input.shape)) + + +def norm_flops_counter_hook(module, input, output): + input = input[0] + + batch_flops = np.prod(input.shape) + if (getattr(module, 'affine', False) + or getattr(module, 'elementwise_affine', False)): + batch_flops *= 2 + module.__flops__ += int(batch_flops) + + +def deconv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + input_height, input_width = input.shape[2:] + + kernel_height, kernel_width = conv_module.kernel_size + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = ( + kernel_height * kernel_width * in_channels * filters_per_channel) + + active_elements_count = batch_size * input_height * input_width + overall_conv_flops = conv_per_position_flops * active_elements_count + bias_flops = 0 + if conv_module.bias is not None: + output_height, output_width = output.shape[2:] + bias_flops = out_channels * batch_size * output_height * output_height + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def conv_flops_counter_hook(conv_module, input, output): + # Can have multiple inputs, getting the first one + input = input[0] + + batch_size = input.shape[0] + output_dims = list(output.shape[2:]) + + kernel_dims = list(conv_module.kernel_size) + in_channels = conv_module.in_channels + out_channels = conv_module.out_channels + groups = conv_module.groups + + filters_per_channel = out_channels // groups + conv_per_position_flops = int( + np.prod(kernel_dims)) * in_channels * filters_per_channel + + active_elements_count = batch_size * int(np.prod(output_dims)) + + overall_conv_flops = conv_per_position_flops * active_elements_count + + bias_flops = 0 + + if conv_module.bias is not None: + + bias_flops = out_channels * active_elements_count + + overall_flops = overall_conv_flops + bias_flops + + conv_module.__flops__ += int(overall_flops) + + +def batch_counter_hook(module, input, output): + batch_size = 1 + if len(input) > 0: + # Can have multiple inputs, getting the first one + input = input[0] + batch_size = len(input) + else: + pass + print('Warning! No positional inputs found for a module, ' + 'assuming batch size is 1.') + module.__batch_counter__ += batch_size + + +def add_batch_counter_variables_or_reset(module): + + module.__batch_counter__ = 0 + + +def add_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + return + + handle = module.register_forward_hook(batch_counter_hook) + module.__batch_counter_handle__ = handle + + +def remove_batch_counter_hook_function(module): + if hasattr(module, '__batch_counter_handle__'): + module.__batch_counter_handle__.remove() + del module.__batch_counter_handle__ + + +def add_flops_counter_variable_or_reset(module): + if is_supported_instance(module): + if hasattr(module, '__flops__') or hasattr(module, '__params__'): + print('Warning: variables __flops__ or __params__ are already ' + 'defined for the module' + type(module).__name__ + + ' ptflops can affect your code!') + module.__flops__ = 0 + module.__params__ = get_model_parameters_number(module) + + +def is_supported_instance(module): + if type(module) in get_modules_mapping(): + return True + return False + + +def remove_flops_counter_hook_function(module): + if is_supported_instance(module): + if hasattr(module, '__flops_handle__'): + module.__flops_handle__.remove() + del module.__flops_handle__ + + +def get_modules_mapping(): + return { + # convolutions + nn.Conv1d: conv_flops_counter_hook, + nn.Conv2d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook, + nn.Conv3d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook, + # activations + nn.ReLU: relu_flops_counter_hook, + nn.PReLU: relu_flops_counter_hook, + nn.ELU: relu_flops_counter_hook, + nn.LeakyReLU: relu_flops_counter_hook, + nn.ReLU6: relu_flops_counter_hook, + # poolings + nn.MaxPool1d: pool_flops_counter_hook, + nn.AvgPool1d: pool_flops_counter_hook, + nn.AvgPool2d: pool_flops_counter_hook, + nn.MaxPool2d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook, + nn.MaxPool3d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook, + nn.AvgPool3d: pool_flops_counter_hook, + nn.AdaptiveMaxPool1d: pool_flops_counter_hook, + nn.AdaptiveAvgPool1d: pool_flops_counter_hook, + nn.AdaptiveMaxPool2d: pool_flops_counter_hook, + nn.AdaptiveAvgPool2d: pool_flops_counter_hook, + nn.AdaptiveMaxPool3d: pool_flops_counter_hook, + nn.AdaptiveAvgPool3d: pool_flops_counter_hook, + # normalizations + nn.BatchNorm1d: norm_flops_counter_hook, + nn.BatchNorm2d: norm_flops_counter_hook, + nn.BatchNorm3d: norm_flops_counter_hook, + nn.GroupNorm: norm_flops_counter_hook, + nn.InstanceNorm1d: norm_flops_counter_hook, + nn.InstanceNorm2d: norm_flops_counter_hook, + nn.InstanceNorm3d: norm_flops_counter_hook, + nn.LayerNorm: norm_flops_counter_hook, + # FC + nn.Linear: linear_flops_counter_hook, + mmcv.cnn.bricks.Linear: linear_flops_counter_hook, + # Upscale + nn.Upsample: upsample_flops_counter_hook, + # Deconvolution + nn.ConvTranspose2d: deconv_flops_counter_hook, + mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook, + } diff --git a/annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py b/annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..cb7076f80bf37f7931185bf0293ffcc1ce19c8ef --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +def _fuse_conv_bn(conv, bn): + """Fuse conv and bn into one module. + + Args: + conv (nn.Module): Conv to be fused. + bn (nn.Module): BN to be fused. + + Returns: + nn.Module: Fused module. + """ + conv_w = conv.weight + conv_b = conv.bias if conv.bias is not None else torch.zeros_like( + bn.running_mean) + + factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) + conv.weight = nn.Parameter(conv_w * + factor.reshape([conv.out_channels, 1, 1, 1])) + conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) + return conv + + +def fuse_conv_bn(module): + """Recursively fuse conv and bn in a module. + + During inference, the functionary of batch norm layers is turned off + but only the mean and var alone channels are used, which exposes the + chance to fuse it with the preceding conv layers to save computations and + simplify network structures. + + Args: + module (nn.Module): Module to be fused. + + Returns: + nn.Module: Fused module. + """ + last_conv = None + last_conv_name = None + + for name, child in module.named_children(): + if isinstance(child, + (nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)): + if last_conv is None: # only fuse BN that is after Conv + continue + fused_conv = _fuse_conv_bn(last_conv, child) + module._modules[last_conv_name] = fused_conv + # To reduce changes, set BN as Identity instead of deleting it. + module._modules[name] = nn.Identity() + last_conv = None + elif isinstance(child, nn.Conv2d): + last_conv = child + last_conv_name = name + else: + fuse_conv_bn(child) + return module diff --git a/annotator/uniformer/mmcv/cnn/utils/sync_bn.py b/annotator/uniformer/mmcv/cnn/utils/sync_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..f78f39181d75bb85c53e8c7c8eaf45690e9f0bee --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/utils/sync_bn.py @@ -0,0 +1,59 @@ +import torch + +import annotator.uniformer.mmcv as mmcv + + +class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): + """A general BatchNorm layer without input dimension check. + + Reproduced from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc + is `_check_input_dim` that is designed for tensor sanity checks. + The check has been bypassed in this class for the convenience of converting + SyncBatchNorm. + """ + + def _check_input_dim(self, input): + return + + +def revert_sync_batchnorm(module): + """Helper function to convert all `SyncBatchNorm` (SyncBN) and + `mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to + `BatchNormXd` layers. + + Adapted from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + + Args: + module (nn.Module): The module containing `SyncBatchNorm` layers. + + Returns: + module_output: The converted module with `BatchNormXd` layers. + """ + module_output = module + module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm] + if hasattr(mmcv, 'ops'): + module_checklist.append(mmcv.ops.SyncBatchNorm) + if isinstance(module, tuple(module_checklist)): + module_output = _BatchNormXd(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + # no_grad() may not be needed here but + # just to be consistent with `convert_sync_batchnorm()` + with torch.no_grad(): + module_output.weight = module.weight + module_output.bias = module.bias + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + module_output.training = module.training + # qconfig exists in quantized models + if hasattr(module, 'qconfig'): + module_output.qconfig = module.qconfig + for name, child in module.named_children(): + module_output.add_module(name, revert_sync_batchnorm(child)) + del module + return module_output diff --git a/annotator/uniformer/mmcv/cnn/utils/weight_init.py b/annotator/uniformer/mmcv/cnn/utils/weight_init.py new file mode 100644 index 0000000000000000000000000000000000000000..287a1d0bffe26e023029d48634d9b761deda7ba4 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/utils/weight_init.py @@ -0,0 +1,684 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import math +import warnings + +import numpy as np +import torch +import torch.nn as nn +from torch import Tensor + +from annotator.uniformer.mmcv.utils import Registry, build_from_cfg, get_logger, print_log + +INITIALIZERS = Registry('initializer') + + +def update_init_info(module, init_info): + """Update the `_params_init_info` in the module if the value of parameters + are changed. + + Args: + module (obj:`nn.Module`): The module of PyTorch with a user-defined + attribute `_params_init_info` which records the initialization + information. + init_info (str): The string that describes the initialization. + """ + assert hasattr( + module, + '_params_init_info'), f'Can not find `_params_init_info` in {module}' + for name, param in module.named_parameters(): + + assert param in module._params_init_info, ( + f'Find a new :obj:`Parameter` ' + f'named `{name}` during executing the ' + f'`init_weights` of ' + f'`{module.__class__.__name__}`. ' + f'Please do not add or ' + f'replace parameters during executing ' + f'the `init_weights`. ') + + # The parameter has been changed during executing the + # `init_weights` of module + mean_value = param.data.mean() + if module._params_init_info[param]['tmp_mean_value'] != mean_value: + module._params_init_info[param]['init_info'] = init_info + module._params_init_info[param]['tmp_mean_value'] = mean_value + + +def constant_init(module, val, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.constant_(module.weight, val) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def xavier_init(module, gain=1, bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.xavier_uniform_(module.weight, gain=gain) + else: + nn.init.xavier_normal_(module.weight, gain=gain) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def normal_init(module, mean=0, std=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.normal_(module.weight, mean, std) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def trunc_normal_init(module: nn.Module, + mean: float = 0, + std: float = 1, + a: float = -2, + b: float = 2, + bias: float = 0) -> None: + if hasattr(module, 'weight') and module.weight is not None: + trunc_normal_(module.weight, mean, std, a, b) # type: ignore + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) # type: ignore + + +def uniform_init(module, a=0, b=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.uniform_(module.weight, a, b) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def kaiming_init(module, + a=0, + mode='fan_out', + nonlinearity='relu', + bias=0, + distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.kaiming_uniform_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + else: + nn.init.kaiming_normal_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def caffe2_xavier_init(module, bias=0): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + kaiming_init( + module, + a=1, + mode='fan_in', + nonlinearity='leaky_relu', + bias=bias, + distribution='uniform') + + +def bias_init_with_prob(prior_prob): + """initialize conv/fc bias value according to a given probability value.""" + bias_init = float(-np.log((1 - prior_prob) / prior_prob)) + return bias_init + + +def _get_bases_name(m): + return [b.__name__ for b in m.__class__.__bases__] + + +class BaseInit(object): + + def __init__(self, *, bias=0, bias_prob=None, layer=None): + self.wholemodule = False + if not isinstance(bias, (int, float)): + raise TypeError(f'bias must be a number, but got a {type(bias)}') + + if bias_prob is not None: + if not isinstance(bias_prob, float): + raise TypeError(f'bias_prob type must be float, \ + but got {type(bias_prob)}') + + if layer is not None: + if not isinstance(layer, (str, list)): + raise TypeError(f'layer must be a str or a list of str, \ + but got a {type(layer)}') + else: + layer = [] + + if bias_prob is not None: + self.bias = bias_init_with_prob(bias_prob) + else: + self.bias = bias + self.layer = [layer] if isinstance(layer, str) else layer + + def _get_init_info(self): + info = f'{self.__class__.__name__}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Constant') +class ConstantInit(BaseInit): + """Initialize module parameters with constant values. + + Args: + val (int | float): the value to fill the weights in the module with + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, val, **kwargs): + super().__init__(**kwargs) + self.val = val + + def __call__(self, module): + + def init(m): + if self.wholemodule: + constant_init(m, self.val, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + constant_init(m, self.val, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Xavier') +class XavierInit(BaseInit): + r"""Initialize module parameters with values according to the method + described in `Understanding the difficulty of training deep feedforward + neural networks - Glorot, X. & Bengio, Y. (2010). + `_ + + Args: + gain (int | float): an optional scaling factor. Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` + or ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, gain=1, distribution='normal', **kwargs): + super().__init__(**kwargs) + self.gain = gain + self.distribution = distribution + + def __call__(self, module): + + def init(m): + if self.wholemodule: + xavier_init(m, self.gain, self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + xavier_init(m, self.gain, self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: gain={self.gain}, ' \ + f'distribution={self.distribution}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Normal') +class NormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. + + Args: + mean (int | float):the mean of the normal distribution. Defaults to 0. + std (int | float): the standard deviation of the normal distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, mean=0, std=1, **kwargs): + super().__init__(**kwargs) + self.mean = mean + self.std = std + + def __call__(self, module): + + def init(m): + if self.wholemodule: + normal_init(m, self.mean, self.std, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + normal_init(m, self.mean, self.std, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: mean={self.mean},' \ + f' std={self.std}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='TruncNormal') +class TruncNormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values + outside :math:`[a, b]`. + + Args: + mean (float): the mean of the normal distribution. Defaults to 0. + std (float): the standard deviation of the normal distribution. + Defaults to 1. + a (float): The minimum cutoff value. + b ( float): The maximum cutoff value. + bias (float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, + mean: float = 0, + std: float = 1, + a: float = -2, + b: float = 2, + **kwargs) -> None: + super().__init__(**kwargs) + self.mean = mean + self.std = std + self.a = a + self.b = b + + def __call__(self, module: nn.Module) -> None: + + def init(m): + if self.wholemodule: + trunc_normal_init(m, self.mean, self.std, self.a, self.b, + self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + trunc_normal_init(m, self.mean, self.std, self.a, self.b, + self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a}, b={self.b},' \ + f' mean={self.mean}, std={self.std}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Uniform') +class UniformInit(BaseInit): + r"""Initialize module parameters with values drawn from the uniform + distribution :math:`\mathcal{U}(a, b)`. + + Args: + a (int | float): the lower bound of the uniform distribution. + Defaults to 0. + b (int | float): the upper bound of the uniform distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, a=0, b=1, **kwargs): + super().__init__(**kwargs) + self.a = a + self.b = b + + def __call__(self, module): + + def init(m): + if self.wholemodule: + uniform_init(m, self.a, self.b, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + uniform_init(m, self.a, self.b, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a},' \ + f' b={self.b}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Kaiming') +class KaimingInit(BaseInit): + r"""Initialize module parameters with the values according to the method + described in `Delving deep into rectifiers: Surpassing human-level + performance on ImageNet classification - He, K. et al. (2015). + `_ + + Args: + a (int | float): the negative slope of the rectifier used after this + layer (only used with ``'leaky_relu'``). Defaults to 0. + mode (str): either ``'fan_in'`` or ``'fan_out'``. Choosing + ``'fan_in'`` preserves the magnitude of the variance of the weights + in the forward pass. Choosing ``'fan_out'`` preserves the + magnitudes in the backwards pass. Defaults to ``'fan_out'``. + nonlinearity (str): the non-linear function (`nn.functional` name), + recommended to use only with ``'relu'`` or ``'leaky_relu'`` . + Defaults to 'relu'. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` or + ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, + a=0, + mode='fan_out', + nonlinearity='relu', + distribution='normal', + **kwargs): + super().__init__(**kwargs) + self.a = a + self.mode = mode + self.nonlinearity = nonlinearity + self.distribution = distribution + + def __call__(self, module): + + def init(m): + if self.wholemodule: + kaiming_init(m, self.a, self.mode, self.nonlinearity, + self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + kaiming_init(m, self.a, self.mode, self.nonlinearity, + self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' \ + f'nonlinearity={self.nonlinearity}, ' \ + f'distribution ={self.distribution}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Caffe2Xavier') +class Caffe2XavierInit(KaimingInit): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + def __init__(self, **kwargs): + super().__init__( + a=1, + mode='fan_in', + nonlinearity='leaky_relu', + distribution='uniform', + **kwargs) + + def __call__(self, module): + super().__call__(module) + + +@INITIALIZERS.register_module(name='Pretrained') +class PretrainedInit(object): + """Initialize module by loading a pretrained model. + + Args: + checkpoint (str): the checkpoint file of the pretrained model should + be load. + prefix (str, optional): the prefix of a sub-module in the pretrained + model. it is for loading a part of the pretrained model to + initialize. For example, if we would like to only load the + backbone of a detector model, we can set ``prefix='backbone.'``. + Defaults to None. + map_location (str): map tensors into proper locations. + """ + + def __init__(self, checkpoint, prefix=None, map_location=None): + self.checkpoint = checkpoint + self.prefix = prefix + self.map_location = map_location + + def __call__(self, module): + from annotator.uniformer.mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint, + load_state_dict) + logger = get_logger('mmcv') + if self.prefix is None: + print_log(f'load model from: {self.checkpoint}', logger=logger) + load_checkpoint( + module, + self.checkpoint, + map_location=self.map_location, + strict=False, + logger=logger) + else: + print_log( + f'load {self.prefix} in model from: {self.checkpoint}', + logger=logger) + state_dict = _load_checkpoint_with_prefix( + self.prefix, self.checkpoint, map_location=self.map_location) + load_state_dict(module, state_dict, strict=False, logger=logger) + + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: load from {self.checkpoint}' + return info + + +def _initialize(module, cfg, wholemodule=False): + func = build_from_cfg(cfg, INITIALIZERS) + # wholemodule flag is for override mode, there is no layer key in override + # and initializer will give init values for the whole module with the name + # in override. + func.wholemodule = wholemodule + func(module) + + +def _initialize_override(module, override, cfg): + if not isinstance(override, (dict, list)): + raise TypeError(f'override must be a dict or a list of dict, \ + but got {type(override)}') + + override = [override] if isinstance(override, dict) else override + + for override_ in override: + + cp_override = copy.deepcopy(override_) + name = cp_override.pop('name', None) + if name is None: + raise ValueError('`override` must contain the key "name",' + f'but got {cp_override}') + # if override only has name key, it means use args in init_cfg + if not cp_override: + cp_override.update(cfg) + # if override has name key and other args except type key, it will + # raise error + elif 'type' not in cp_override.keys(): + raise ValueError( + f'`override` need "type" key, but got {cp_override}') + + if hasattr(module, name): + _initialize(getattr(module, name), cp_override, wholemodule=True) + else: + raise RuntimeError(f'module did not have attribute {name}, ' + f'but init_cfg is {cp_override}.') + + +def initialize(module, init_cfg): + """Initialize a module. + + Args: + module (``torch.nn.Module``): the module will be initialized. + init_cfg (dict | list[dict]): initialization configuration dict to + define initializer. OpenMMLab has implemented 6 initializers + including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``, + ``Kaiming``, and ``Pretrained``. + Example: + >>> module = nn.Linear(2, 3, bias=True) + >>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2) + >>> initialize(module, init_cfg) + + >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) + >>> # define key ``'layer'`` for initializing layer with different + >>> # configuration + >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1), + dict(type='Constant', layer='Linear', val=2)] + >>> initialize(module, init_cfg) + + >>> # define key``'override'`` to initialize some specific part in + >>> # module + >>> class FooNet(nn.Module): + >>> def __init__(self): + >>> super().__init__() + >>> self.feat = nn.Conv2d(3, 16, 3) + >>> self.reg = nn.Conv2d(16, 10, 3) + >>> self.cls = nn.Conv2d(16, 5, 3) + >>> model = FooNet() + >>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d', + >>> override=dict(type='Constant', name='reg', val=3, bias=4)) + >>> initialize(model, init_cfg) + + >>> model = ResNet(depth=50) + >>> # Initialize weights with the pretrained model. + >>> init_cfg = dict(type='Pretrained', + checkpoint='torchvision://resnet50') + >>> initialize(model, init_cfg) + + >>> # Initialize weights of a sub-module with the specific part of + >>> # a pretrained model by using "prefix". + >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ + >>> 'retinanet_r50_fpn_1x_coco/'\ + >>> 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' + >>> init_cfg = dict(type='Pretrained', + checkpoint=url, prefix='backbone.') + """ + if not isinstance(init_cfg, (dict, list)): + raise TypeError(f'init_cfg must be a dict or a list of dict, \ + but got {type(init_cfg)}') + + if isinstance(init_cfg, dict): + init_cfg = [init_cfg] + + for cfg in init_cfg: + # should deeply copy the original config because cfg may be used by + # other modules, e.g., one init_cfg shared by multiple bottleneck + # blocks, the expected cfg will be changed after pop and will change + # the initialization behavior of other modules + cp_cfg = copy.deepcopy(cfg) + override = cp_cfg.pop('override', None) + _initialize(module, cp_cfg) + + if override is not None: + cp_cfg.pop('layer', None) + _initialize_override(module, override, cp_cfg) + else: + # All attributes in module have same initialization. + pass + + +def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float, + b: float) -> Tensor: + # Method based on + # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + # Modified from + # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1. + math.erf(x / math.sqrt(2.))) / 2. + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' + 'The distribution of values may be incorrect.', + stacklevel=2) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + lower = norm_cdf((a - mean) / std) + upper = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [lower, upper], then translate + # to [2lower-1, 2upper-1]. + tensor.uniform_(2 * lower - 1, 2 * upper - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor: Tensor, + mean: float = 0., + std: float = 1., + a: float = -2., + b: float = 2.) -> Tensor: + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + + Modified from + https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + + Args: + tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`. + mean (float): the mean of the normal distribution. + std (float): the standard deviation of the normal distribution. + a (float): the minimum cutoff value. + b (float): the maximum cutoff value. + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/annotator/uniformer/mmcv/cnn/vgg.py b/annotator/uniformer/mmcv/cnn/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..8778b649561a45a9652b1a15a26c2d171e58f3e1 --- /dev/null +++ b/annotator/uniformer/mmcv/cnn/vgg.py @@ -0,0 +1,175 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +import torch.nn as nn + +from .utils import constant_init, kaiming_init, normal_init + + +def conv3x3(in_planes, out_planes, dilation=1): + """3x3 convolution with padding.""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + padding=dilation, + dilation=dilation) + + +def make_vgg_layer(inplanes, + planes, + num_blocks, + dilation=1, + with_bn=False, + ceil_mode=False): + layers = [] + for _ in range(num_blocks): + layers.append(conv3x3(inplanes, planes, dilation)) + if with_bn: + layers.append(nn.BatchNorm2d(planes)) + layers.append(nn.ReLU(inplace=True)) + inplanes = planes + layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode)) + + return layers + + +class VGG(nn.Module): + """VGG backbone. + + Args: + depth (int): Depth of vgg, from {11, 13, 16, 19}. + with_bn (bool): Use BatchNorm or not. + num_classes (int): number of classes for classification. + num_stages (int): VGG stages, normally 5. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze + running stats (mean and var). + bn_frozen (bool): Whether to freeze weight and bias of BN layers. + """ + + arch_settings = { + 11: (1, 1, 2, 2, 2), + 13: (2, 2, 2, 2, 2), + 16: (2, 2, 3, 3, 3), + 19: (2, 2, 4, 4, 4) + } + + def __init__(self, + depth, + with_bn=False, + num_classes=-1, + num_stages=5, + dilations=(1, 1, 1, 1, 1), + out_indices=(0, 1, 2, 3, 4), + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + ceil_mode=False, + with_last_pool=True): + super(VGG, self).__init__() + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for vgg') + assert num_stages >= 1 and num_stages <= 5 + stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + assert len(dilations) == num_stages + assert max(out_indices) <= num_stages + + self.num_classes = num_classes + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.bn_eval = bn_eval + self.bn_frozen = bn_frozen + + self.inplanes = 3 + start_idx = 0 + vgg_layers = [] + self.range_sub_modules = [] + for i, num_blocks in enumerate(self.stage_blocks): + num_modules = num_blocks * (2 + with_bn) + 1 + end_idx = start_idx + num_modules + dilation = dilations[i] + planes = 64 * 2**i if i < 4 else 512 + vgg_layer = make_vgg_layer( + self.inplanes, + planes, + num_blocks, + dilation=dilation, + with_bn=with_bn, + ceil_mode=ceil_mode) + vgg_layers.extend(vgg_layer) + self.inplanes = planes + self.range_sub_modules.append([start_idx, end_idx]) + start_idx = end_idx + if not with_last_pool: + vgg_layers.pop(-1) + self.range_sub_modules[-1][1] -= 1 + self.module_name = 'features' + self.add_module(self.module_name, nn.Sequential(*vgg_layers)) + + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(True), + nn.Dropout(), + nn.Linear(4096, num_classes), + ) + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = logging.getLogger() + from ..runner import load_checkpoint + load_checkpoint(self, pretrained, strict=False, logger=logger) + elif pretrained is None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + kaiming_init(m) + elif isinstance(m, nn.BatchNorm2d): + constant_init(m, 1) + elif isinstance(m, nn.Linear): + normal_init(m, std=0.01) + else: + raise TypeError('pretrained must be a str or None') + + def forward(self, x): + outs = [] + vgg_layers = getattr(self, self.module_name) + for i in range(len(self.stage_blocks)): + for j in range(*self.range_sub_modules[i]): + vgg_layer = vgg_layers[j] + x = vgg_layer(x) + if i in self.out_indices: + outs.append(x) + if self.num_classes > 0: + x = x.view(x.size(0), -1) + x = self.classifier(x) + outs.append(x) + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def train(self, mode=True): + super(VGG, self).train(mode) + if self.bn_eval: + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eval() + if self.bn_frozen: + for params in m.parameters(): + params.requires_grad = False + vgg_layers = getattr(self, self.module_name) + if mode and self.frozen_stages >= 0: + for i in range(self.frozen_stages): + for j in range(*self.range_sub_modules[i]): + mod = vgg_layers[j] + mod.eval() + for param in mod.parameters(): + param.requires_grad = False diff --git a/annotator/uniformer/mmcv/engine/__init__.py b/annotator/uniformer/mmcv/engine/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3193b7f664e19ce2458d81c836597fa22e4bb082 --- /dev/null +++ b/annotator/uniformer/mmcv/engine/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test, + single_gpu_test) + +__all__ = [ + 'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', + 'single_gpu_test' +] diff --git a/annotator/uniformer/mmcv/engine/test.py b/annotator/uniformer/mmcv/engine/test.py new file mode 100644 index 0000000000000000000000000000000000000000..8dbeef271db634ec2dadfda3bc0b5ef9c7a677ff --- /dev/null +++ b/annotator/uniformer/mmcv/engine/test.py @@ -0,0 +1,202 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import pickle +import shutil +import tempfile +import time + +import torch +import torch.distributed as dist + +import annotator.uniformer.mmcv as mmcv +from annotator.uniformer.mmcv.runner import get_dist_info + + +def single_gpu_test(model, data_loader): + """Test model with a single gpu. + + This method tests model with a single gpu and displays test progress bar. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for data in data_loader: + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + # Assume result has the same length of batch_size + # refer to https://github.com/open-mmlab/mmcv/issues/985 + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): + """Test model with multiple gpus. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting + ``gpu_collect=True``, it encodes results to gpu tensors and use gpu + communication for results collection. On cpu mode it saves the results on + different gpus to ``tmpdir`` and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + time.sleep(2) # This line can prevent deadlock problem in some cases. + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + if rank == 0: + batch_size = len(result) + batch_size_all = batch_size * world_size + if batch_size_all + prog_bar.completed > len(dataset): + batch_size_all = len(dataset) - prog_bar.completed + for _ in range(batch_size_all): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results + + +def collect_results_cpu(result_part, size, tmpdir=None): + """Collect results under cpu mode. + + On cpu mode, this function will save the results on different gpus to + ``tmpdir`` and collect them by the rank 0 worker. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + tmpdir (str | None): temporal directory for collected results to + store. If set to None, it will create a random temporal directory + for it. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN, ), + 32, + dtype=torch.uint8, + device='cuda') + if rank == 0: + mmcv.mkdir_or_exist('.dist_test') + tmpdir = tempfile.mkdtemp(dir='.dist_test') + tmpdir = torch.tensor( + bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[:len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl')) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, f'part_{i}.pkl') + part_result = mmcv.load(part_file) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def collect_results_gpu(result_part, size): + """Collect results under gpu mode. + + On gpu mode, this function will encode results to gpu tensors and use gpu + communication for results collection. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # dump result part to tensor with pickle + part_tensor = torch.tensor( + bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + # gather all result part tensor shape + shape_tensor = torch.tensor(part_tensor.shape, device='cuda') + shape_list = [shape_tensor.clone() for _ in range(world_size)] + dist.all_gather(shape_list, shape_tensor) + # padding result part tensor to max length + shape_max = torch.tensor(shape_list).max() + part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') + part_send[:shape_tensor[0]] = part_tensor + part_recv_list = [ + part_tensor.new_zeros(shape_max) for _ in range(world_size) + ] + # gather all result part + dist.all_gather(part_recv_list, part_send) + + if rank == 0: + part_list = [] + for recv, shape in zip(part_recv_list, shape_list): + part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + return ordered_results diff --git a/annotator/uniformer/mmcv/fileio/__init__.py b/annotator/uniformer/mmcv/fileio/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2051b85f7e59bff7bdbaa131849ce8cd31f059a4 --- /dev/null +++ b/annotator/uniformer/mmcv/fileio/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .file_client import BaseStorageBackend, FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler +from .io import dump, load, register_handler +from .parse import dict_from_file, list_from_file + +__all__ = [ + 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler', + 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler', + 'list_from_file', 'dict_from_file' +] diff --git a/annotator/uniformer/mmcv/fileio/file_client.py b/annotator/uniformer/mmcv/fileio/file_client.py new file mode 100644 index 0000000000000000000000000000000000000000..950f0c1aeab14b8e308a7455ccd64a95b5d98add --- /dev/null +++ b/annotator/uniformer/mmcv/fileio/file_client.py @@ -0,0 +1,1148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import os +import os.path as osp +import re +import tempfile +import warnings +from abc import ABCMeta, abstractmethod +from contextlib import contextmanager +from pathlib import Path +from typing import Iterable, Iterator, Optional, Tuple, Union +from urllib.request import urlopen + +import annotator.uniformer.mmcv as mmcv +from annotator.uniformer.mmcv.utils.misc import has_method +from annotator.uniformer.mmcv.utils.path import is_filepath + + +class BaseStorageBackend(metaclass=ABCMeta): + """Abstract class of storage backends. + + All backends need to implement two apis: ``get()`` and ``get_text()``. + ``get()`` reads the file as a byte stream and ``get_text()`` reads the file + as texts. + """ + + # a flag to indicate whether the backend can create a symlink for a file + _allow_symlink = False + + @property + def name(self): + return self.__class__.__name__ + + @property + def allow_symlink(self): + return self._allow_symlink + + @abstractmethod + def get(self, filepath): + pass + + @abstractmethod + def get_text(self, filepath): + pass + + +class CephBackend(BaseStorageBackend): + """Ceph storage backend (for internal use). + + Args: + path_mapping (dict|None): path mapping dict from local path to Petrel + path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` + will be replaced by ``dst``. Default: None. + + .. warning:: + :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + """ + + def __init__(self, path_mapping=None): + try: + import ceph + except ImportError: + raise ImportError('Please install ceph to enable CephBackend.') + + warnings.warn( + 'CephBackend will be deprecated, please use PetrelBackend instead') + self._client = ceph.S3Client() + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def get(self, filepath): + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class PetrelBackend(BaseStorageBackend): + """Petrel storage backend (for internal use). + + PetrelBackend supports reading and writing data to multiple clusters. + If the file path contains the cluster name, PetrelBackend will read data + from specified cluster or write data to it. Otherwise, PetrelBackend will + access the default cluster. + + Args: + path_mapping (dict, optional): Path mapping dict from local path to + Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in + ``filepath`` will be replaced by ``dst``. Default: None. + enable_mc (bool, optional): Whether to enable memcached support. + Default: True. + + Examples: + >>> filepath1 = 's3://path/of/file' + >>> filepath2 = 'cluster-name:s3://path/of/file' + >>> client = PetrelBackend() + >>> client.get(filepath1) # get data from default cluster + >>> client.get(filepath2) # get data from 'cluster-name' cluster + """ + + def __init__(self, + path_mapping: Optional[dict] = None, + enable_mc: bool = True): + try: + from petrel_client import client + except ImportError: + raise ImportError('Please install petrel_client to enable ' + 'PetrelBackend.') + + self._client = client.Client(enable_mc=enable_mc) + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def _map_path(self, filepath: Union[str, Path]) -> str: + """Map ``filepath`` to a string path whose prefix will be replaced by + :attr:`self.path_mapping`. + + Args: + filepath (str): Path to be mapped. + """ + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + return filepath + + def _format_path(self, filepath: str) -> str: + """Convert a ``filepath`` to standard format of petrel oss. + + If the ``filepath`` is concatenated by ``os.path.join``, in a Windows + environment, the ``filepath`` will be the format of + 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the + above ``filepath`` will be converted to 's3://bucket_name/image.jpg'. + + Args: + filepath (str): Path to be formatted. + """ + return re.sub(r'\\+', '/', filepath) + + def get(self, filepath: Union[str, Path]) -> memoryview: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + memoryview: A memory view of expected bytes object to avoid + copying. The memoryview object can be converted to bytes by + ``value_buf.tobytes()``. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return str(self.get(filepath), encoding=encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Save data to a given ``filepath``. + + Args: + obj (bytes): Data to be saved. + filepath (str or Path): Path to write data. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.put(filepath, obj) + + def put_text(self, + obj: str, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> None: + """Save data to a given ``filepath``. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to encode the ``obj``. + Default: 'utf-8'. + """ + self.put(bytes(obj, encoding=encoding), filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + if not has_method(self._client, 'delete'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `delete` method, please use a higher version or dev' + ' branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.delete(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + if not (has_method(self._client, 'contains') + and has_method(self._client, 'isdir')): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `contains` and `isdir` methods, please use a higher' + 'version or dev branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) or self._client.isdir(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + if not has_method(self._client, 'isdir'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `isdir` method, please use a higher version or dev' + ' branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + if not has_method(self._client, 'contains'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `contains` method, please use a higher version or ' + 'dev branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result after concatenation. + """ + filepath = self._format_path(self._map_path(filepath)) + if filepath.endswith('/'): + filepath = filepath[:-1] + formatted_paths = [filepath] + for path in filepaths: + formatted_paths.append(self._format_path(self._map_path(path))) + return '/'.join(formatted_paths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download a file from ``filepath`` and return a temporary path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str | Path): Download a file from ``filepath``. + + Examples: + >>> client = PetrelBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('s3://path/of/your/file') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one temporary path. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + assert self.isfile(filepath) + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + Petrel has no concept of directories but it simulates the directory + hierarchy in the filesystem through public prefixes. In addition, + if the returned path ends with '/', it means the path is a public + prefix which is a logical directory. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + In addition, the returned path of directory will not contains the + suffix '/' which is consistent with other backends. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if not has_method(self._client, 'list'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `list` method, please use a higher version or dev' + ' branch instead.')) + + dir_path = self._map_path(dir_path) + dir_path = self._format_path(dir_path) + if list_dir and suffix is not None: + raise TypeError( + '`list_dir` should be False when `suffix` is not None') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + # Petrel's simulated directory hierarchy assumes that directory paths + # should end with `/` + if not dir_path.endswith('/'): + dir_path += '/' + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive): + for path in self._client.list(dir_path): + # the `self.isdir` is not used here to determine whether path + # is a directory, because `self.isdir` relies on + # `self._client.list` + if path.endswith('/'): # a directory path + next_dir_path = self.join_path(dir_path, path) + if list_dir: + # get the relative path and exclude the last + # character '/' + rel_dir = next_dir_path[len(root):-1] + yield rel_dir + if recursive: + yield from _list_dir_or_file(next_dir_path, list_dir, + list_file, suffix, + recursive) + else: # a file path + absolute_path = self.join_path(dir_path, path) + rel_path = absolute_path[len(root):] + if (suffix is None + or rel_path.endswith(suffix)) and list_file: + yield rel_path + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive) + + +class MemcachedBackend(BaseStorageBackend): + """Memcached storage backend. + + Attributes: + server_list_cfg (str): Config file for memcached server list. + client_cfg (str): Config file for memcached client. + sys_path (str | None): Additional path to be appended to `sys.path`. + Default: None. + """ + + def __init__(self, server_list_cfg, client_cfg, sys_path=None): + if sys_path is not None: + import sys + sys.path.append(sys_path) + try: + import mc + except ImportError: + raise ImportError( + 'Please install memcached to enable MemcachedBackend.') + + self.server_list_cfg = server_list_cfg + self.client_cfg = client_cfg + self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, + self.client_cfg) + # mc.pyvector servers as a point which points to a memory cache + self._mc_buffer = mc.pyvector() + + def get(self, filepath): + filepath = str(filepath) + import mc + self._client.Get(filepath, self._mc_buffer) + value_buf = mc.ConvertBuffer(self._mc_buffer) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class LmdbBackend(BaseStorageBackend): + """Lmdb storage backend. + + Args: + db_path (str): Lmdb database path. + readonly (bool, optional): Lmdb environment parameter. If True, + disallow any write operations. Default: True. + lock (bool, optional): Lmdb environment parameter. If False, when + concurrent access occurs, do not lock the database. Default: False. + readahead (bool, optional): Lmdb environment parameter. If False, + disable the OS filesystem readahead mechanism, which may improve + random read performance when a database is larger than RAM. + Default: False. + + Attributes: + db_path (str): Lmdb database path. + """ + + def __init__(self, + db_path, + readonly=True, + lock=False, + readahead=False, + **kwargs): + try: + import lmdb + except ImportError: + raise ImportError('Please install lmdb to enable LmdbBackend.') + + self.db_path = str(db_path) + self._client = lmdb.open( + self.db_path, + readonly=readonly, + lock=lock, + readahead=readahead, + **kwargs) + + def get(self, filepath): + """Get values according to the filepath. + + Args: + filepath (str | obj:`Path`): Here, filepath is the lmdb key. + """ + filepath = str(filepath) + with self._client.begin(write=False) as txn: + value_buf = txn.get(filepath.encode('ascii')) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class HardDiskBackend(BaseStorageBackend): + """Raw hard disks storage backend.""" + + _allow_symlink = True + + def get(self, filepath: Union[str, Path]) -> bytes: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes: Expected bytes object. + """ + with open(filepath, 'rb') as f: + value_buf = f.read() + return value_buf + + def get_text(self, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + with open(filepath, 'r', encoding=encoding) as f: + value_buf = f.read() + return value_buf + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` will create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'wb') as f: + f.write(obj) + + def put_text(self, + obj: str, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` will create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'w', encoding=encoding) as f: + f.write(obj) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + os.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return osp.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return osp.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return osp.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return osp.join(filepath, *filepaths) + + @contextmanager + def get_local_path( + self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]: + """Only for unified API and do nothing.""" + yield filepath + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if list_dir and suffix is not None: + raise TypeError('`suffix` should be None when `list_dir` is True') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + if (suffix is None + or rel_path.endswith(suffix)) and list_file: + yield rel_path + elif osp.isdir(entry.path): + if list_dir: + rel_dir = osp.relpath(entry.path, root) + yield rel_dir + if recursive: + yield from _list_dir_or_file(entry.path, list_dir, + list_file, suffix, + recursive) + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive) + + +class HTTPBackend(BaseStorageBackend): + """HTTP and HTTPS storage bachend.""" + + def get(self, filepath): + value_buf = urlopen(filepath).read() + return value_buf + + def get_text(self, filepath, encoding='utf-8'): + value_buf = urlopen(filepath).read() + return value_buf.decode(encoding) + + @contextmanager + def get_local_path(self, filepath: str) -> Iterable[str]: + """Download a file from ``filepath``. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str): Download a file from ``filepath``. + + Examples: + >>> client = HTTPBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('http://path/of/your/file') as path: + ... # do something here + """ + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + +class FileClient: + """A general file client to access files in different backends. + + The client loads a file or text in a specified backend from its path + and returns it as a binary or text file. There are two ways to choose a + backend, the name of backend and the prefix of path. Although both of them + can be used to choose a storage backend, ``backend`` has a higher priority + that is if they are all set, the storage backend will be chosen by the + backend argument. If they are all `None`, the disk backend will be chosen. + Note that It can also register other backend accessor with a given name, + prefixes, and backend class. In addition, We use the singleton pattern to + avoid repeated object creation. If the arguments are the same, the same + object will be returned. + + Args: + backend (str, optional): The storage backend type. Options are "disk", + "ceph", "memcached", "lmdb", "http" and "petrel". Default: None. + prefix (str, optional): The prefix of the registered storage backend. + Options are "s3", "http", "https". Default: None. + + Examples: + >>> # only set backend + >>> file_client = FileClient(backend='petrel') + >>> # only set prefix + >>> file_client = FileClient(prefix='s3') + >>> # set both backend and prefix but use backend to choose client + >>> file_client = FileClient(backend='petrel', prefix='s3') + >>> # if the arguments are the same, the same object is returned + >>> file_client1 = FileClient(backend='petrel') + >>> file_client1 is file_client + True + + Attributes: + client (:obj:`BaseStorageBackend`): The backend object. + """ + + _backends = { + 'disk': HardDiskBackend, + 'ceph': CephBackend, + 'memcached': MemcachedBackend, + 'lmdb': LmdbBackend, + 'petrel': PetrelBackend, + 'http': HTTPBackend, + } + # This collection is used to record the overridden backends, and when a + # backend appears in the collection, the singleton pattern is disabled for + # that backend, because if the singleton pattern is used, then the object + # returned will be the backend before overwriting + _overridden_backends = set() + _prefix_to_backends = { + 's3': PetrelBackend, + 'http': HTTPBackend, + 'https': HTTPBackend, + } + _overridden_prefixes = set() + + _instances = {} + + def __new__(cls, backend=None, prefix=None, **kwargs): + if backend is None and prefix is None: + backend = 'disk' + if backend is not None and backend not in cls._backends: + raise ValueError( + f'Backend {backend} is not supported. Currently supported ones' + f' are {list(cls._backends.keys())}') + if prefix is not None and prefix not in cls._prefix_to_backends: + raise ValueError( + f'prefix {prefix} is not supported. Currently supported ones ' + f'are {list(cls._prefix_to_backends.keys())}') + + # concatenate the arguments to a unique key for determining whether + # objects with the same arguments were created + arg_key = f'{backend}:{prefix}' + for key, value in kwargs.items(): + arg_key += f':{key}:{value}' + + # if a backend was overridden, it will create a new object + if (arg_key in cls._instances + and backend not in cls._overridden_backends + and prefix not in cls._overridden_prefixes): + _instance = cls._instances[arg_key] + else: + # create a new object and put it to _instance + _instance = super().__new__(cls) + if backend is not None: + _instance.client = cls._backends[backend](**kwargs) + else: + _instance.client = cls._prefix_to_backends[prefix](**kwargs) + + cls._instances[arg_key] = _instance + + return _instance + + @property + def name(self): + return self.client.name + + @property + def allow_symlink(self): + return self.client.allow_symlink + + @staticmethod + def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]: + """Parse the prefix of a uri. + + Args: + uri (str | Path): Uri to be parsed that contains the file prefix. + + Examples: + >>> FileClient.parse_uri_prefix('s3://path/of/your/file') + 's3' + + Returns: + str | None: Return the prefix of uri if the uri contains '://' + else ``None``. + """ + assert is_filepath(uri) + uri = str(uri) + if '://' not in uri: + return None + else: + prefix, _ = uri.split('://') + # In the case of PetrelBackend, the prefix may contains the cluster + # name like clusterName:s3 + if ':' in prefix: + _, prefix = prefix.split(':') + return prefix + + @classmethod + def infer_client(cls, + file_client_args: Optional[dict] = None, + uri: Optional[Union[str, Path]] = None) -> 'FileClient': + """Infer a suitable file client based on the URI and arguments. + + Args: + file_client_args (dict, optional): Arguments to instantiate a + FileClient. Default: None. + uri (str | Path, optional): Uri to be parsed that contains the file + prefix. Default: None. + + Examples: + >>> uri = 's3://path/of/your/file' + >>> file_client = FileClient.infer_client(uri=uri) + >>> file_client_args = {'backend': 'petrel'} + >>> file_client = FileClient.infer_client(file_client_args) + + Returns: + FileClient: Instantiated FileClient object. + """ + assert file_client_args is not None or uri is not None + if file_client_args is None: + file_prefix = cls.parse_uri_prefix(uri) # type: ignore + return cls(prefix=file_prefix) + else: + return cls(**file_client_args) + + @classmethod + def _register_backend(cls, name, backend, force=False, prefixes=None): + if not isinstance(name, str): + raise TypeError('the backend name should be a string, ' + f'but got {type(name)}') + if not inspect.isclass(backend): + raise TypeError( + f'backend should be a class but got {type(backend)}') + if not issubclass(backend, BaseStorageBackend): + raise TypeError( + f'backend {backend} is not a subclass of BaseStorageBackend') + if not force and name in cls._backends: + raise KeyError( + f'{name} is already registered as a storage backend, ' + 'add "force=True" if you want to override it') + + if name in cls._backends and force: + cls._overridden_backends.add(name) + cls._backends[name] = backend + + if prefixes is not None: + if isinstance(prefixes, str): + prefixes = [prefixes] + else: + assert isinstance(prefixes, (list, tuple)) + for prefix in prefixes: + if prefix not in cls._prefix_to_backends: + cls._prefix_to_backends[prefix] = backend + elif (prefix in cls._prefix_to_backends) and force: + cls._overridden_prefixes.add(prefix) + cls._prefix_to_backends[prefix] = backend + else: + raise KeyError( + f'{prefix} is already registered as a storage backend,' + ' add "force=True" if you want to override it') + + @classmethod + def register_backend(cls, name, backend=None, force=False, prefixes=None): + """Register a backend to FileClient. + + This method can be used as a normal class method or a decorator. + + .. code-block:: python + + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + FileClient.register_backend('new', NewBackend) + + or + + .. code-block:: python + + @FileClient.register_backend('new') + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + Args: + name (str): The name of the registered backend. + backend (class, optional): The backend class to be registered, + which must be a subclass of :class:`BaseStorageBackend`. + When this method is used as a decorator, backend is None. + Defaults to None. + force (bool, optional): Whether to override the backend if the name + has already been registered. Defaults to False. + prefixes (str or list[str] or tuple[str], optional): The prefixes + of the registered storage backend. Default: None. + `New in version 1.3.15.` + """ + if backend is not None: + cls._register_backend( + name, backend, force=force, prefixes=prefixes) + return + + def _register(backend_cls): + cls._register_backend( + name, backend_cls, force=force, prefixes=prefixes) + return backend_cls + + return _register + + def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]: + """Read data from a given ``filepath`` with 'rb' mode. + + Note: + There are two types of return values for ``get``, one is ``bytes`` + and the other is ``memoryview``. The advantage of using memoryview + is that you can avoid copying, and if you want to convert it to + ``bytes``, you can use ``.tobytes()``. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes | memoryview: Expected bytes object or a memory view of the + bytes object. + """ + return self.client.get(filepath) + + def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return self.client.get_text(filepath, encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` should create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + self.client.put(obj, filepath) + + def put_text(self, obj: str, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` should create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str, optional): The encoding format used to open the + `filepath`. Default: 'utf-8'. + """ + self.client.put_text(obj, filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str, Path): Path to be removed. + """ + self.client.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return self.client.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return self.client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return self.client.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return self.client.join_path(filepath, *filepaths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download data from ``filepath`` and write the data to local path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Note: + If the ``filepath`` is a local path, just return itself. + + .. warning:: + ``get_local_path`` is an experimental interface that may change in + the future. + + Args: + filepath (str or Path): Path to be read data. + + Examples: + >>> file_client = FileClient(prefix='s3') + >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one path. + """ + with self.client.get_local_path(str(filepath)) as local_path: + yield local_path + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, + suffix, recursive) diff --git a/annotator/uniformer/mmcv/fileio/handlers/__init__.py b/annotator/uniformer/mmcv/fileio/handlers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aa24d91972837b8756b225f4879bac20436eb72a --- /dev/null +++ b/annotator/uniformer/mmcv/fileio/handlers/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import BaseFileHandler +from .json_handler import JsonHandler +from .pickle_handler import PickleHandler +from .yaml_handler import YamlHandler + +__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] diff --git a/annotator/uniformer/mmcv/fileio/handlers/base.py b/annotator/uniformer/mmcv/fileio/handlers/base.py new file mode 100644 index 0000000000000000000000000000000000000000..288878bc57282fbb2f12b32290152ca8e9d3cab0 --- /dev/null +++ b/annotator/uniformer/mmcv/fileio/handlers/base.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + + +class BaseFileHandler(metaclass=ABCMeta): + # `str_like` is a flag to indicate whether the type of file object is + # str-like object or bytes-like object. Pickle only processes bytes-like + # objects but json only processes str-like object. If it is str-like + # object, `StringIO` will be used to process the buffer. + str_like = True + + @abstractmethod + def load_from_fileobj(self, file, **kwargs): + pass + + @abstractmethod + def dump_to_fileobj(self, obj, file, **kwargs): + pass + + @abstractmethod + def dump_to_str(self, obj, **kwargs): + pass + + def load_from_path(self, filepath, mode='r', **kwargs): + with open(filepath, mode) as f: + return self.load_from_fileobj(f, **kwargs) + + def dump_to_path(self, obj, filepath, mode='w', **kwargs): + with open(filepath, mode) as f: + self.dump_to_fileobj(obj, f, **kwargs) diff --git a/annotator/uniformer/mmcv/fileio/handlers/json_handler.py b/annotator/uniformer/mmcv/fileio/handlers/json_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..18d4f15f74139d20adff18b20be5529c592a66b6 --- /dev/null +++ b/annotator/uniformer/mmcv/fileio/handlers/json_handler.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json + +import numpy as np + +from .base import BaseFileHandler + + +def set_default(obj): + """Set default json values for non-serializable values. + + It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. + It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, + etc.) into plain numbers of plain python built-in types. + """ + if isinstance(obj, (set, range)): + return list(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + elif isinstance(obj, np.generic): + return obj.item() + raise TypeError(f'{type(obj)} is unsupported for json dump') + + +class JsonHandler(BaseFileHandler): + + def load_from_fileobj(self, file): + return json.load(file) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('default', set_default) + json.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('default', set_default) + return json.dumps(obj, **kwargs) diff --git a/annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py b/annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..b37c79bed4ef9fd8913715e62dbe3fc5cafdc3aa --- /dev/null +++ b/annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pickle + +from .base import BaseFileHandler + + +class PickleHandler(BaseFileHandler): + + str_like = False + + def load_from_fileobj(self, file, **kwargs): + return pickle.load(file, **kwargs) + + def load_from_path(self, filepath, **kwargs): + return super(PickleHandler, self).load_from_path( + filepath, mode='rb', **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('protocol', 2) + return pickle.dumps(obj, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('protocol', 2) + pickle.dump(obj, file, **kwargs) + + def dump_to_path(self, obj, filepath, **kwargs): + super(PickleHandler, self).dump_to_path( + obj, filepath, mode='wb', **kwargs) diff --git a/annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py b/annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..c5aa2eea1e8c76f8baf753d1c8c959dee665e543 --- /dev/null +++ b/annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import yaml + +try: + from yaml import CLoader as Loader, CDumper as Dumper +except ImportError: + from yaml import Loader, Dumper + +from .base import BaseFileHandler # isort:skip + + +class YamlHandler(BaseFileHandler): + + def load_from_fileobj(self, file, **kwargs): + kwargs.setdefault('Loader', Loader) + return yaml.load(file, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('Dumper', Dumper) + yaml.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('Dumper', Dumper) + return yaml.dump(obj, **kwargs) diff --git a/annotator/uniformer/mmcv/fileio/io.py b/annotator/uniformer/mmcv/fileio/io.py new file mode 100644 index 0000000000000000000000000000000000000000..aaefde58aa3ea5b58f86249ce7e1c40c186eb8dd --- /dev/null +++ b/annotator/uniformer/mmcv/fileio/io.py @@ -0,0 +1,151 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from io import BytesIO, StringIO +from pathlib import Path + +from ..utils import is_list_of, is_str +from .file_client import FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler + +file_handlers = { + 'json': JsonHandler(), + 'yaml': YamlHandler(), + 'yml': YamlHandler(), + 'pickle': PickleHandler(), + 'pkl': PickleHandler() +} + + +def load(file, file_format=None, file_client_args=None, **kwargs): + """Load data from json/yaml/pickle files. + + This method provides a unified api for loading data from serialized files. + + Note: + In v1.3.16 and later, ``load`` supports loading data from serialized + files those can be storaged in different backends. + + Args: + file (str or :obj:`Path` or file-like object): Filename or a file-like + object. + file_format (str, optional): If not specified, the file format will be + inferred from the file extension, otherwise use the specified one. + Currently supported formats include "json", "yaml/yml" and + "pickle/pkl". + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> load('/path/of/your/file') # file is storaged in disk + >>> load('https://path/of/your/file') # file is storaged in Internet + >>> load('s3://path/of/your/file') # file is storaged in petrel + + Returns: + The content from the file. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None and is_str(file): + file_format = file.split('.')[-1] + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO(file_client.get_text(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + else: + with BytesIO(file_client.get(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + elif hasattr(file, 'read'): + obj = handler.load_from_fileobj(file, **kwargs) + else: + raise TypeError('"file" must be a filepath str or a file-object') + return obj + + +def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs): + """Dump data to json/yaml/pickle strings or files. + + This method provides a unified api for dumping data as strings or to files, + and also supports custom arguments for each file format. + + Note: + In v1.3.16 and later, ``dump`` supports dumping data as strings or to + files which is saved to different backends. + + Args: + obj (any): The python object to be dumped. + file (str or :obj:`Path` or file-like object, optional): If not + specified, then the object is dumped to a str, otherwise to a file + specified by the filename or file-like object. + file_format (str, optional): Same as :func:`load`. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dump('hello world', '/path/of/your/file') # disk + >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel + + Returns: + bool: True for success, False otherwise. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None: + if is_str(file): + file_format = file.split('.')[-1] + elif file is None: + raise ValueError( + 'file_format must be specified since file is None') + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if file is None: + return handler.dump_to_str(obj, **kwargs) + elif is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put_text(f.getvalue(), file) + else: + with BytesIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put(f.getvalue(), file) + elif hasattr(file, 'write'): + handler.dump_to_fileobj(obj, file, **kwargs) + else: + raise TypeError('"file" must be a filename str or a file-object') + + +def _register_handler(handler, file_formats): + """Register a handler for some file extensions. + + Args: + handler (:obj:`BaseFileHandler`): Handler to be registered. + file_formats (str or list[str]): File formats to be handled by this + handler. + """ + if not isinstance(handler, BaseFileHandler): + raise TypeError( + f'handler must be a child of BaseFileHandler, not {type(handler)}') + if isinstance(file_formats, str): + file_formats = [file_formats] + if not is_list_of(file_formats, str): + raise TypeError('file_formats must be a str or a list of str') + for ext in file_formats: + file_handlers[ext] = handler + + +def register_handler(file_formats, **kwargs): + + def wrap(cls): + _register_handler(cls(**kwargs), file_formats) + return cls + + return wrap diff --git a/annotator/uniformer/mmcv/fileio/parse.py b/annotator/uniformer/mmcv/fileio/parse.py new file mode 100644 index 0000000000000000000000000000000000000000..f60f0d611b8d75692221d0edd7dc993b0a6445c9 --- /dev/null +++ b/annotator/uniformer/mmcv/fileio/parse.py @@ -0,0 +1,97 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from io import StringIO + +from .file_client import FileClient + + +def list_from_file(filename, + prefix='', + offset=0, + max_num=0, + encoding='utf-8', + file_client_args=None): + """Load a text file and parse the content as a list of strings. + + Note: + In v1.3.16 and later, ``list_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a list for strings. + + Args: + filename (str): Filename. + prefix (str): The prefix to be inserted to the beginning of each item. + offset (int): The offset of lines. + max_num (int): The maximum number of lines to be read, + zeros and negatives mean no limitation. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> list_from_file('/path/of/your/file') # disk + ['hello', 'world'] + >>> list_from_file('s3://path/of/your/file') # ceph or petrel + ['hello', 'world'] + + Returns: + list[str]: A list of strings. + """ + cnt = 0 + item_list = [] + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for _ in range(offset): + f.readline() + for line in f: + if 0 < max_num <= cnt: + break + item_list.append(prefix + line.rstrip('\n\r')) + cnt += 1 + return item_list + + +def dict_from_file(filename, + key_type=str, + encoding='utf-8', + file_client_args=None): + """Load a text file and parse the content as a dict. + + Each line of the text file will be two or more columns split by + whitespaces or tabs. The first column will be parsed as dict keys, and + the following columns will be parsed as dict values. + + Note: + In v1.3.16 and later, ``dict_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a dict. + + Args: + filename(str): Filename. + key_type(type): Type of the dict keys. str is user by default and + type conversion will be performed if specified. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dict_from_file('/path/of/your/file') # disk + {'key1': 'value1', 'key2': 'value2'} + >>> dict_from_file('s3://path/of/your/file') # ceph or petrel + {'key1': 'value1', 'key2': 'value2'} + + Returns: + dict: The parsed contents. + """ + mapping = {} + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for line in f: + items = line.rstrip('\n').split() + assert len(items) >= 2 + key = key_type(items[0]) + val = items[1:] if len(items) > 2 else items[1] + mapping[key] = val + return mapping diff --git a/annotator/uniformer/mmcv/image/__init__.py b/annotator/uniformer/mmcv/image/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d0051d609d3de4e7562e3fe638335c66617c4d91 --- /dev/null +++ b/annotator/uniformer/mmcv/image/__init__.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr, + gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert, + rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb) +from .geometric import (cutout, imcrop, imflip, imflip_, impad, + impad_to_multiple, imrescale, imresize, imresize_like, + imresize_to_multiple, imrotate, imshear, imtranslate, + rescale_size) +from .io import imfrombytes, imread, imwrite, supported_backends, use_backend +from .misc import tensor2imgs +from .photometric import (adjust_brightness, adjust_color, adjust_contrast, + adjust_lighting, adjust_sharpness, auto_contrast, + clahe, imdenormalize, imequalize, iminvert, + imnormalize, imnormalize_, lut_transform, posterize, + solarize) + +__all__ = [ + 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb', + 'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale', + 'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size', + 'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate', + 'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend', + 'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize', + 'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr', + 'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize', + 'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe', + 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting' +] diff --git a/annotator/uniformer/mmcv/image/colorspace.py b/annotator/uniformer/mmcv/image/colorspace.py new file mode 100644 index 0000000000000000000000000000000000000000..814533952fdfda23d67cb6a3073692d8c1156add --- /dev/null +++ b/annotator/uniformer/mmcv/image/colorspace.py @@ -0,0 +1,306 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + + +def imconvert(img, src, dst): + """Convert an image from the src colorspace to dst colorspace. + + Args: + img (ndarray): The input image. + src (str): The source colorspace, e.g., 'rgb', 'hsv'. + dst (str): The destination colorspace, e.g., 'rgb', 'hsv'. + + Returns: + ndarray: The converted image. + """ + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + out_img = cv2.cvtColor(img, code) + return out_img + + +def bgr2gray(img, keepdim=False): + """Convert a BGR image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def rgb2gray(img, keepdim=False): + """Convert a RGB image to grayscale image. + + Args: + img (ndarray): The input image. + keepdim (bool): If False (by default), then return the grayscale image + with 2 dims, otherwise 3 dims. + + Returns: + ndarray: The converted grayscale image. + """ + out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + if keepdim: + out_img = out_img[..., None] + return out_img + + +def gray2bgr(img): + """Convert a grayscale image to BGR image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted BGR image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + return out_img + + +def gray2rgb(img): + """Convert a grayscale image to RGB image. + + Args: + img (ndarray): The input image. + + Returns: + ndarray: The converted RGB image. + """ + img = img[..., None] if img.ndim == 2 else img + out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + return out_img + + +def _convert_input_type_range(img): + """Convert the type and range of the input image. + + It converts the input image to np.float32 type and range of [0, 1]. + It is mainly used for pre-processing the input image in colorspace + conversion functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + (ndarray): The converted image with type of np.float32 and range of + [0, 1]. + """ + img_type = img.dtype + img = img.astype(np.float32) + if img_type == np.float32: + pass + elif img_type == np.uint8: + img /= 255. + else: + raise TypeError('The img type should be np.float32 or np.uint8, ' + f'but got {img_type}') + return img + + +def _convert_output_type_range(img, dst_type): + """Convert the type and range of the image according to dst_type. + + It converts the image to desired type and range. If `dst_type` is np.uint8, + images will be converted to np.uint8 type with range [0, 255]. If + `dst_type` is np.float32, it converts the image to np.float32 type with + range [0, 1]. + It is mainly used for post-processing images in colorspace conversion + functions such as rgb2ycbcr and ycbcr2rgb. + + Args: + img (ndarray): The image to be converted with np.float32 type and + range [0, 255]. + dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it + converts the image to np.uint8 type with range [0, 255]. If + dst_type is np.float32, it converts the image to np.float32 type + with range [0, 1]. + + Returns: + (ndarray): The converted image with desired type and range. + """ + if dst_type not in (np.uint8, np.float32): + raise TypeError('The dst_type should be np.float32 or np.uint8, ' + f'but got {dst_type}') + if dst_type == np.uint8: + img = img.round() + else: + img /= 255. + return img.astype(dst_type) + + +def rgb2ycbcr(img, y_only=False): + """Convert a RGB image to YCbCr image. + + This function produces the same results as Matlab's `rgb2ycbcr` function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0 + else: + out_img = np.matmul( + img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786], + [24.966, 112.0, -18.214]]) + [16, 128, 128] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def bgr2ycbcr(img, y_only=False): + """Convert a BGR image to YCbCr image. + + The bgr version of rgb2ycbcr. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + y_only (bool): Whether to only return Y channel. Default: False. + + Returns: + ndarray: The converted YCbCr image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) + if y_only: + out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0 + else: + out_img = np.matmul( + img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786], + [65.481, -37.797, 112.0]]) + [16, 128, 128] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2rgb(img): + """Convert a YCbCr image to RGB image. + + This function produces the same results as Matlab's ycbcr2rgb function. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted RGB image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], + [0, -0.00153632, 0.00791071], + [0.00625893, -0.00318811, 0]]) * 255.0 + [ + -222.921, 135.576, -276.836 + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def ycbcr2bgr(img): + """Convert a YCbCr image to BGR image. + + The bgr version of ycbcr2rgb. + It implements the ITU-R BT.601 conversion for standard-definition + television. See more details in + https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion. + + It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`. + In OpenCV, it implements a JPEG conversion. See more details in + https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion. + + Args: + img (ndarray): The input image. It accepts: + 1. np.uint8 type with range [0, 255]; + 2. np.float32 type with range [0, 1]. + + Returns: + ndarray: The converted BGR image. The output image has the same type + and range as input image. + """ + img_type = img.dtype + img = _convert_input_type_range(img) * 255 + out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621], + [0.00791071, -0.00153632, 0], + [0, -0.00318811, 0.00625893]]) * 255.0 + [ + -276.836, 135.576, -222.921 + ] + out_img = _convert_output_type_range(out_img, img_type) + return out_img + + +def convert_color_factory(src, dst): + + code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') + + def convert_color(img): + out_img = cv2.cvtColor(img, code) + return out_img + + convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()} + image. + + Args: + img (ndarray or str): The input image. + + Returns: + ndarray: The converted {dst.upper()} image. + """ + + return convert_color + + +bgr2rgb = convert_color_factory('bgr', 'rgb') + +rgb2bgr = convert_color_factory('rgb', 'bgr') + +bgr2hsv = convert_color_factory('bgr', 'hsv') + +hsv2bgr = convert_color_factory('hsv', 'bgr') + +bgr2hls = convert_color_factory('bgr', 'hls') + +hls2bgr = convert_color_factory('hls', 'bgr') diff --git a/annotator/uniformer/mmcv/image/geometric.py b/annotator/uniformer/mmcv/image/geometric.py new file mode 100644 index 0000000000000000000000000000000000000000..cf97c201cb4e43796c911919d03fb26a07ed817d --- /dev/null +++ b/annotator/uniformer/mmcv/image/geometric.py @@ -0,0 +1,728 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers + +import cv2 +import numpy as np + +from ..utils import to_2tuple +from .io import imread_backend + +try: + from PIL import Image +except ImportError: + Image = None + + +def _scale_size(size, scale): + """Rescale a size by a ratio. + + Args: + size (tuple[int]): (w, h). + scale (float | tuple(float)): Scaling factor. + + Returns: + tuple[int]: scaled size. + """ + if isinstance(scale, (float, int)): + scale = (scale, scale) + w, h = size + return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5) + + +cv2_interp_codes = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, + 'area': cv2.INTER_AREA, + 'lanczos': cv2.INTER_LANCZOS4 +} + +if Image is not None: + pillow_interp_codes = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + +def imresize(img, + size, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None): + """Resize image to a given size. + + Args: + img (ndarray): The input image. + size (tuple[int]): Target size (w, h). + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. + out (ndarray): The output destination. + backend (str | None): The image resize backend type. Options are `cv2`, + `pillow`, `None`. If backend is None, the global imread_backend + specified by ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = img.shape[:2] + if backend is None: + backend = imread_backend + if backend not in ['cv2', 'pillow']: + raise ValueError(f'backend: {backend} is not supported for resize.' + f"Supported backends are 'cv2', 'pillow'") + + if backend == 'pillow': + assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' + pil_image = Image.fromarray(img) + pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) + resized_img = np.array(pil_image) + else: + resized_img = cv2.resize( + img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) + if not return_scale: + return resized_img + else: + w_scale = size[0] / w + h_scale = size[1] / h + return resized_img, w_scale, h_scale + + +def imresize_to_multiple(img, + divisor, + size=None, + scale_factor=None, + keep_ratio=False, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None): + """Resize image according to a given size or scale factor and then rounds + up the the resized or rescaled image size to the nearest value that can be + divided by the divisor. + + Args: + img (ndarray): The input image. + divisor (int | tuple): Resized image size will be a multiple of + divisor. If divisor is a tuple, divisor should be + (w_divisor, h_divisor). + size (None | int | tuple[int]): Target size (w, h). Default: None. + scale_factor (None | float | tuple[float]): Multiplier for spatial + size. Should match input size if it is a tuple and the 2D style is + (w_scale_factor, h_scale_factor). Default: None. + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. Default: False. + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Interpolation method, accepted values are + "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' + backend, "nearest", "bilinear" for 'pillow' backend. + out (ndarray): The output destination. + backend (str | None): The image resize backend type. Options are `cv2`, + `pillow`, `None`. If backend is None, the global imread_backend + specified by ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = img.shape[:2] + if size is not None and scale_factor is not None: + raise ValueError('only one of size or scale_factor should be defined') + elif size is None and scale_factor is None: + raise ValueError('one of size or scale_factor should be defined') + elif size is not None: + size = to_2tuple(size) + if keep_ratio: + size = rescale_size((w, h), size, return_scale=False) + else: + size = _scale_size((w, h), scale_factor) + + divisor = to_2tuple(divisor) + size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)]) + resized_img, w_scale, h_scale = imresize( + img, + size, + return_scale=True, + interpolation=interpolation, + out=out, + backend=backend) + if return_scale: + return resized_img, w_scale, h_scale + else: + return resized_img + + +def imresize_like(img, + dst_img, + return_scale=False, + interpolation='bilinear', + backend=None): + """Resize image to the same size of a given image. + + Args: + img (ndarray): The input image. + dst_img (ndarray): The target image. + return_scale (bool): Whether to return `w_scale` and `h_scale`. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or + `resized_img`. + """ + h, w = dst_img.shape[:2] + return imresize(img, (w, h), return_scale, interpolation, backend=backend) + + +def rescale_size(old_size, scale, return_scale=False): + """Calculate the new size to be rescaled to. + + Args: + old_size (tuple[int]): The old size (w, h) of image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image size. + + Returns: + tuple[int]: The new rescaled image size. + """ + w, h = old_size + if isinstance(scale, (float, int)): + if scale <= 0: + raise ValueError(f'Invalid scale {scale}, must be positive.') + scale_factor = scale + elif isinstance(scale, tuple): + max_long_edge = max(scale) + max_short_edge = min(scale) + scale_factor = min(max_long_edge / max(h, w), + max_short_edge / min(h, w)) + else: + raise TypeError( + f'Scale must be a number or tuple of int, but got {type(scale)}') + + new_size = _scale_size((w, h), scale_factor) + + if return_scale: + return new_size, scale_factor + else: + return new_size + + +def imrescale(img, + scale, + return_scale=False, + interpolation='bilinear', + backend=None): + """Resize image while keeping the aspect ratio. + + Args: + img (ndarray): The input image. + scale (float | tuple[int]): The scaling factor or maximum size. + If it is a float number, then the image will be rescaled by this + factor, else if it is a tuple of 2 integers, then the image will + be rescaled as large as possible within the scale. + return_scale (bool): Whether to return the scaling factor besides the + rescaled image. + interpolation (str): Same as :func:`resize`. + backend (str | None): Same as :func:`resize`. + + Returns: + ndarray: The rescaled image. + """ + h, w = img.shape[:2] + new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) + rescaled_img = imresize( + img, new_size, interpolation=interpolation, backend=backend) + if return_scale: + return rescaled_img, scale_factor + else: + return rescaled_img + + +def imflip(img, direction='horizontal'): + """Flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image. + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return np.flip(img, axis=1) + elif direction == 'vertical': + return np.flip(img, axis=0) + else: + return np.flip(img, axis=(0, 1)) + + +def imflip_(img, direction='horizontal'): + """Inplace flip an image horizontally or vertically. + + Args: + img (ndarray): Image to be flipped. + direction (str): The flip direction, either "horizontal" or + "vertical" or "diagonal". + + Returns: + ndarray: The flipped image (inplace). + """ + assert direction in ['horizontal', 'vertical', 'diagonal'] + if direction == 'horizontal': + return cv2.flip(img, 1, img) + elif direction == 'vertical': + return cv2.flip(img, 0, img) + else: + return cv2.flip(img, -1, img) + + +def imrotate(img, + angle, + center=None, + scale=1.0, + border_value=0, + interpolation='bilinear', + auto_bound=False): + """Rotate an image. + + Args: + img (ndarray): Image to be rotated. + angle (float): Rotation angle in degrees, positive values mean + clockwise rotation. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. + scale (float): Isotropic scale factor. + border_value (int): Border value. + interpolation (str): Same as :func:`resize`. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. + + Returns: + ndarray: The rotated image. + """ + if center is not None and auto_bound: + raise ValueError('`auto_bound` conflicts with `center`') + h, w = img.shape[:2] + if center is None: + center = ((w - 1) * 0.5, (h - 1) * 0.5) + assert isinstance(center, tuple) + + matrix = cv2.getRotationMatrix2D(center, -angle, scale) + if auto_bound: + cos = np.abs(matrix[0, 0]) + sin = np.abs(matrix[0, 1]) + new_w = h * sin + w * cos + new_h = h * cos + w * sin + matrix[0, 2] += (new_w - w) * 0.5 + matrix[1, 2] += (new_h - h) * 0.5 + w = int(np.round(new_w)) + h = int(np.round(new_h)) + rotated = cv2.warpAffine( + img, + matrix, (w, h), + flags=cv2_interp_codes[interpolation], + borderValue=border_value) + return rotated + + +def bbox_clip(bboxes, img_shape): + """Clip bboxes to fit the image shape. + + Args: + bboxes (ndarray): Shape (..., 4*k) + img_shape (tuple[int]): (height, width) of the image. + + Returns: + ndarray: Clipped bboxes. + """ + assert bboxes.shape[-1] % 4 == 0 + cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) + cmin[0::2] = img_shape[1] - 1 + cmin[1::2] = img_shape[0] - 1 + clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) + return clipped_bboxes + + +def bbox_scaling(bboxes, scale, clip_shape=None): + """Scaling bboxes w.r.t the box center. + + Args: + bboxes (ndarray): Shape(..., 4). + scale (float): Scaling factor. + clip_shape (tuple[int], optional): If specified, bboxes that exceed the + boundary will be clipped according to the given shape (h, w). + + Returns: + ndarray: Scaled bboxes. + """ + if float(scale) == 1.0: + scaled_bboxes = bboxes.copy() + else: + w = bboxes[..., 2] - bboxes[..., 0] + 1 + h = bboxes[..., 3] - bboxes[..., 1] + 1 + dw = (w * (scale - 1)) * 0.5 + dh = (h * (scale - 1)) * 0.5 + scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) + if clip_shape is not None: + return bbox_clip(scaled_bboxes, clip_shape) + else: + return scaled_bboxes + + +def imcrop(img, bboxes, scale=1.0, pad_fill=None): + """Crop image patches. + + 3 steps: scale the bboxes -> clip bboxes -> crop and pad. + + Args: + img (ndarray): Image to be cropped. + bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. + scale (float, optional): Scale ratio of bboxes, the default value + 1.0 means no padding. + pad_fill (Number | list[Number]): Value to be filled for padding. + Default: None, which means no padding. + + Returns: + list[ndarray] | ndarray: The cropped image patches. + """ + chn = 1 if img.ndim == 2 else img.shape[2] + if pad_fill is not None: + if isinstance(pad_fill, (int, float)): + pad_fill = [pad_fill for _ in range(chn)] + assert len(pad_fill) == chn + + _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes + scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) + clipped_bbox = bbox_clip(scaled_bboxes, img.shape) + + patches = [] + for i in range(clipped_bbox.shape[0]): + x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) + if pad_fill is None: + patch = img[y1:y2 + 1, x1:x2 + 1, ...] + else: + _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) + if chn == 1: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) + else: + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) + patch = np.array( + pad_fill, dtype=img.dtype) * np.ones( + patch_shape, dtype=img.dtype) + x_start = 0 if _x1 >= 0 else -_x1 + y_start = 0 if _y1 >= 0 else -_y1 + w = x2 - x1 + 1 + h = y2 - y1 + 1 + patch[y_start:y_start + h, x_start:x_start + w, + ...] = img[y1:y1 + h, x1:x1 + w, ...] + patches.append(patch) + + if bboxes.ndim == 1: + return patches[0] + else: + return patches + + +def impad(img, + *, + shape=None, + padding=None, + pad_val=0, + padding_mode='constant'): + """Pad the given image to a certain shape or pad on all sides with + specified padding mode and padding value. + + Args: + img (ndarray): Image to be padded. + shape (tuple[int]): Expected padding shape (h, w). Default: None. + padding (int or tuple[int]): Padding on each border. If a single int is + provided this is used to pad all borders. If tuple of length 2 is + provided this is the padding on left/right and top/bottom + respectively. If a tuple of length 4 is provided this is the + padding for the left, top, right and bottom borders respectively. + Default: None. Note that `shape` and `padding` can not be both + set. + pad_val (Number | Sequence[Number]): Values to be filled in padding + areas when padding_mode is 'constant'. Default: 0. + padding_mode (str): Type of padding. Should be: constant, edge, + reflect or symmetric. Default: constant. + + - constant: pads with a constant value, this value is specified + with pad_val. + - edge: pads with the last value at the edge of the image. + - reflect: pads with reflection of image without repeating the + last value on the edge. For example, padding [1, 2, 3, 4] + with 2 elements on both sides in reflect mode will result + in [3, 2, 1, 2, 3, 4, 3, 2]. + - symmetric: pads with reflection of image repeating the last + value on the edge. For example, padding [1, 2, 3, 4] with + 2 elements on both sides in symmetric mode will result in + [2, 1, 1, 2, 3, 4, 4, 3] + + Returns: + ndarray: The padded image. + """ + + assert (shape is not None) ^ (padding is not None) + if shape is not None: + padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) + + # check pad_val + if isinstance(pad_val, tuple): + assert len(pad_val) == img.shape[-1] + elif not isinstance(pad_val, numbers.Number): + raise TypeError('pad_val must be a int or a tuple. ' + f'But received {type(pad_val)}') + + # check padding + if isinstance(padding, tuple) and len(padding) in [2, 4]: + if len(padding) == 2: + padding = (padding[0], padding[1], padding[0], padding[1]) + elif isinstance(padding, numbers.Number): + padding = (padding, padding, padding, padding) + else: + raise ValueError('Padding must be a int or a 2, or 4 element tuple.' + f'But received {padding}') + + # check padding mode + assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] + + border_type = { + 'constant': cv2.BORDER_CONSTANT, + 'edge': cv2.BORDER_REPLICATE, + 'reflect': cv2.BORDER_REFLECT_101, + 'symmetric': cv2.BORDER_REFLECT + } + img = cv2.copyMakeBorder( + img, + padding[1], + padding[3], + padding[0], + padding[2], + border_type[padding_mode], + value=pad_val) + + return img + + +def impad_to_multiple(img, divisor, pad_val=0): + """Pad an image to ensure each edge to be multiple to some number. + + Args: + img (ndarray): Image to be padded. + divisor (int): Padded image edges will be multiple to divisor. + pad_val (Number | Sequence[Number]): Same as :func:`impad`. + + Returns: + ndarray: The padded image. + """ + pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor + pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor + return impad(img, shape=(pad_h, pad_w), pad_val=pad_val) + + +def cutout(img, shape, pad_val=0): + """Randomly cut out a rectangle from the original img. + + Args: + img (ndarray): Image to be cutout. + shape (int | tuple[int]): Expected cutout shape (h, w). If given as a + int, the value will be used for both h and w. + pad_val (int | float | tuple[int | float]): Values to be filled in the + cut area. Defaults to 0. + + Returns: + ndarray: The cutout image. + """ + + channels = 1 if img.ndim == 2 else img.shape[2] + if isinstance(shape, int): + cut_h, cut_w = shape, shape + else: + assert isinstance(shape, tuple) and len(shape) == 2, \ + f'shape must be a int or a tuple with length 2, but got type ' \ + f'{type(shape)} instead.' + cut_h, cut_w = shape + if isinstance(pad_val, (int, float)): + pad_val = tuple([pad_val] * channels) + elif isinstance(pad_val, tuple): + assert len(pad_val) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(pad_val), channels) + else: + raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`') + + img_h, img_w = img.shape[:2] + y0 = np.random.uniform(img_h) + x0 = np.random.uniform(img_w) + + y1 = int(max(0, y0 - cut_h / 2.)) + x1 = int(max(0, x0 - cut_w / 2.)) + y2 = min(img_h, y1 + cut_h) + x2 = min(img_w, x1 + cut_w) + + if img.ndim == 2: + patch_shape = (y2 - y1, x2 - x1) + else: + patch_shape = (y2 - y1, x2 - x1, channels) + + img_cutout = img.copy() + patch = np.array( + pad_val, dtype=img.dtype) * np.ones( + patch_shape, dtype=img.dtype) + img_cutout[y1:y2, x1:x2, ...] = patch + + return img_cutout + + +def _get_shear_matrix(magnitude, direction='horizontal'): + """Generate the shear matrix for transformation. + + Args: + magnitude (int | float): The magnitude used for shear. + direction (str): The flip direction, either "horizontal" + or "vertical". + + Returns: + ndarray: The shear matrix with dtype float32. + """ + if direction == 'horizontal': + shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]]) + elif direction == 'vertical': + shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]]) + return shear_matrix + + +def imshear(img, + magnitude, + direction='horizontal', + border_value=0, + interpolation='bilinear'): + """Shear an image. + + Args: + img (ndarray): Image to be sheared with format (h, w) + or (h, w, c). + magnitude (int | float): The magnitude used for shear. + direction (str): The flip direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The sheared image. + """ + assert direction in ['horizontal', + 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(border_value), channels) + else: + raise ValueError( + f'Invalid type {type(border_value)} for `border_value`') + shear_matrix = _get_shear_matrix(magnitude, direction) + sheared = cv2.warpAffine( + img, + shear_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. shearing masks whose channels large + # than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation]) + return sheared + + +def _get_translate_matrix(offset, direction='horizontal'): + """Generate the translate matrix. + + Args: + offset (int | float): The offset used for translate. + direction (str): The translate direction, either + "horizontal" or "vertical". + + Returns: + ndarray: The translate matrix with dtype float32. + """ + if direction == 'horizontal': + translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]]) + elif direction == 'vertical': + translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]]) + return translate_matrix + + +def imtranslate(img, + offset, + direction='horizontal', + border_value=0, + interpolation='bilinear'): + """Translate an image. + + Args: + img (ndarray): Image to be translated with format + (h, w) or (h, w, c). + offset (int | float): The offset used for translate. + direction (str): The translate direction, either "horizontal" + or "vertical". + border_value (int | tuple[int]): Value used in case of a + constant border. + interpolation (str): Same as :func:`resize`. + + Returns: + ndarray: The translated image. + """ + assert direction in ['horizontal', + 'vertical'], f'Invalid direction: {direction}' + height, width = img.shape[:2] + if img.ndim == 2: + channels = 1 + elif img.ndim == 3: + channels = img.shape[-1] + if isinstance(border_value, int): + border_value = tuple([border_value] * channels) + elif isinstance(border_value, tuple): + assert len(border_value) == channels, \ + 'Expected the num of elements in tuple equals the channels' \ + 'of input image. Found {} vs {}'.format( + len(border_value), channels) + else: + raise ValueError( + f'Invalid type {type(border_value)} for `border_value`.') + translate_matrix = _get_translate_matrix(offset, direction) + translated = cv2.warpAffine( + img, + translate_matrix, + (width, height), + # Note case when the number elements in `border_value` + # greater than 3 (e.g. translating masks whose channels + # large than 3) will raise TypeError in `cv2.warpAffine`. + # Here simply slice the first 3 values in `border_value`. + borderValue=border_value[:3], + flags=cv2_interp_codes[interpolation]) + return translated diff --git a/annotator/uniformer/mmcv/image/io.py b/annotator/uniformer/mmcv/image/io.py new file mode 100644 index 0000000000000000000000000000000000000000..d3fa2e8cc06b1a7b0b69de6406980b15d61a1e5d --- /dev/null +++ b/annotator/uniformer/mmcv/image/io.py @@ -0,0 +1,258 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import io +import os.path as osp +from pathlib import Path + +import cv2 +import numpy as np +from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, + IMREAD_UNCHANGED) + +from annotator.uniformer.mmcv.utils import check_file_exist, is_str, mkdir_or_exist + +try: + from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG +except ImportError: + TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None + +try: + from PIL import Image, ImageOps +except ImportError: + Image = None + +try: + import tifffile +except ImportError: + tifffile = None + +jpeg = None +supported_backends = ['cv2', 'turbojpeg', 'pillow', 'tifffile'] + +imread_flags = { + 'color': IMREAD_COLOR, + 'grayscale': IMREAD_GRAYSCALE, + 'unchanged': IMREAD_UNCHANGED, + 'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR, + 'grayscale_ignore_orientation': + IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE +} + +imread_backend = 'cv2' + + +def use_backend(backend): + """Select a backend for image decoding. + + Args: + backend (str): The image decoding backend type. Options are `cv2`, + `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG) + and `tifffile`. `turbojpeg` is faster but it only supports `.jpeg` + file format. + """ + assert backend in supported_backends + global imread_backend + imread_backend = backend + if imread_backend == 'turbojpeg': + if TurboJPEG is None: + raise ImportError('`PyTurboJPEG` is not installed') + global jpeg + if jpeg is None: + jpeg = TurboJPEG() + elif imread_backend == 'pillow': + if Image is None: + raise ImportError('`Pillow` is not installed') + elif imread_backend == 'tifffile': + if tifffile is None: + raise ImportError('`tifffile` is not installed') + + +def _jpegflag(flag='color', channel_order='bgr'): + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'color': + if channel_order == 'bgr': + return TJPF_BGR + elif channel_order == 'rgb': + return TJCS_RGB + elif flag == 'grayscale': + return TJPF_GRAY + else: + raise ValueError('flag must be "color" or "grayscale"') + + +def _pillow2array(img, flag='color', channel_order='bgr'): + """Convert a pillow image to numpy array. + + Args: + img (:obj:`PIL.Image.Image`): The image loaded using PIL + flag (str): Flags specifying the color type of a loaded image, + candidates are 'color', 'grayscale' and 'unchanged'. + Default to 'color'. + channel_order (str): The channel order of the output image array, + candidates are 'bgr' and 'rgb'. Default to 'bgr'. + + Returns: + np.ndarray: The converted numpy array + """ + channel_order = channel_order.lower() + if channel_order not in ['rgb', 'bgr']: + raise ValueError('channel order must be either "rgb" or "bgr"') + + if flag == 'unchanged': + array = np.array(img) + if array.ndim >= 3 and array.shape[2] >= 3: # color image + array[:, :, :3] = array[:, :, (2, 1, 0)] # RGB to BGR + else: + # Handle exif orientation tag + if flag in ['color', 'grayscale']: + img = ImageOps.exif_transpose(img) + # If the image mode is not 'RGB', convert it to 'RGB' first. + if img.mode != 'RGB': + if img.mode != 'LA': + # Most formats except 'LA' can be directly converted to RGB + img = img.convert('RGB') + else: + # When the mode is 'LA', the default conversion will fill in + # the canvas with black, which sometimes shadows black objects + # in the foreground. + # + # Therefore, a random color (124, 117, 104) is used for canvas + img_rgba = img.convert('RGBA') + img = Image.new('RGB', img_rgba.size, (124, 117, 104)) + img.paste(img_rgba, mask=img_rgba.split()[3]) # 3 is alpha + if flag in ['color', 'color_ignore_orientation']: + array = np.array(img) + if channel_order != 'rgb': + array = array[:, :, ::-1] # RGB to BGR + elif flag in ['grayscale', 'grayscale_ignore_orientation']: + img = img.convert('L') + array = np.array(img) + else: + raise ValueError( + 'flag must be "color", "grayscale", "unchanged", ' + f'"color_ignore_orientation" or "grayscale_ignore_orientation"' + f' but got {flag}') + return array + + +def imread(img_or_path, flag='color', channel_order='bgr', backend=None): + """Read an image. + + Args: + img_or_path (ndarray or str or Path): Either a numpy array or str or + pathlib.Path. If it is a numpy array (loaded image), then + it will be returned as is. + flag (str): Flags specifying the color type of a loaded image, + candidates are `color`, `grayscale`, `unchanged`, + `color_ignore_orientation` and `grayscale_ignore_orientation`. + By default, `cv2` and `pillow` backend would rotate the image + according to its EXIF info unless called with `unchanged` or + `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend + always ignore image's EXIF info regardless of the flag. + The `turbojpeg` backend only supports `color` and `grayscale`. + channel_order (str): Order of channel, candidates are `bgr` and `rgb`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. + If backend is None, the global imread_backend specified by + ``mmcv.use_backend()`` will be used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") + if isinstance(img_or_path, Path): + img_or_path = str(img_or_path) + + if isinstance(img_or_path, np.ndarray): + return img_or_path + elif is_str(img_or_path): + check_file_exist(img_or_path, + f'img file does not exist: {img_or_path}') + if backend == 'turbojpeg': + with open(img_or_path, 'rb') as in_file: + img = jpeg.decode(in_file.read(), + _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + img = Image.open(img_or_path) + img = _pillow2array(img, flag, channel_order) + return img + elif backend == 'tifffile': + img = tifffile.imread(img_or_path) + return img + else: + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imread(img_or_path, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + else: + raise TypeError('"img" must be a numpy array or a str or ' + 'a pathlib.Path object') + + +def imfrombytes(content, flag='color', channel_order='bgr', backend=None): + """Read an image from bytes. + + Args: + content (bytes): Image bytes got from files or other streams. + flag (str): Same as :func:`imread`. + backend (str | None): The image decoding backend type. Options are + `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the + global imread_backend specified by ``mmcv.use_backend()`` will be + used. Default: None. + + Returns: + ndarray: Loaded image array. + """ + + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") + if backend == 'turbojpeg': + img = jpeg.decode(content, _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + buff = io.BytesIO(content) + img = Image.open(buff) + img = _pillow2array(img, flag, channel_order) + return img + else: + img_np = np.frombuffer(content, np.uint8) + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imdecode(img_np, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img + + +def imwrite(img, file_path, params=None, auto_mkdir=True): + """Write image to file. + + Args: + img (ndarray): Image array to be written. + file_path (str): Image file path. + params (None or list): Same as opencv :func:`imwrite` interface. + auto_mkdir (bool): If the parent folder of `file_path` does not exist, + whether to create it automatically. + + Returns: + bool: Successful or not. + """ + if auto_mkdir: + dir_name = osp.abspath(osp.dirname(file_path)) + mkdir_or_exist(dir_name) + return cv2.imwrite(file_path, img, params) diff --git a/annotator/uniformer/mmcv/image/misc.py b/annotator/uniformer/mmcv/image/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..3e61f05e3b05e4c7b40de4eb6c8eb100e6da41d0 --- /dev/null +++ b/annotator/uniformer/mmcv/image/misc.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + +import annotator.uniformer.mmcv as mmcv + +try: + import torch +except ImportError: + torch = None + + +def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): + """Convert tensor to 3-channel images. + + Args: + tensor (torch.Tensor): Tensor that contains multiple images, shape ( + N, C, H, W). + mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0). + std (tuple[float], optional): Standard deviation of images. + Defaults to (1, 1, 1). + to_rgb (bool, optional): Whether the tensor was converted to RGB + format in the first place. If so, convert it back to BGR. + Defaults to True. + + Returns: + list[np.ndarray]: A list that contains multiple images. + """ + + if torch is None: + raise RuntimeError('pytorch is not installed') + assert torch.is_tensor(tensor) and tensor.ndim == 4 + assert len(mean) == 3 + assert len(std) == 3 + + num_imgs = tensor.size(0) + mean = np.array(mean, dtype=np.float32) + std = np.array(std, dtype=np.float32) + imgs = [] + for img_id in range(num_imgs): + img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) + img = mmcv.imdenormalize( + img, mean, std, to_bgr=to_rgb).astype(np.uint8) + imgs.append(np.ascontiguousarray(img)) + return imgs diff --git a/annotator/uniformer/mmcv/image/photometric.py b/annotator/uniformer/mmcv/image/photometric.py new file mode 100644 index 0000000000000000000000000000000000000000..5085d012019c0cbf56f66f421a378278c1a058ae --- /dev/null +++ b/annotator/uniformer/mmcv/image/photometric.py @@ -0,0 +1,428 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import cv2 +import numpy as np + +from ..utils import is_tuple_of +from .colorspace import bgr2gray, gray2bgr + + +def imnormalize(img, mean, std, to_rgb=True): + """Normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + img = img.copy().astype(np.float32) + return imnormalize_(img, mean, std, to_rgb) + + +def imnormalize_(img, mean, std, to_rgb=True): + """Inplace normalize an image with mean and std. + + Args: + img (ndarray): Image to be normalized. + mean (ndarray): The mean to be used for normalize. + std (ndarray): The std to be used for normalize. + to_rgb (bool): Whether to convert to rgb. + + Returns: + ndarray: The normalized image. + """ + # cv2 inplace normalization does not accept uint8 + assert img.dtype != np.uint8 + mean = np.float64(mean.reshape(1, -1)) + stdinv = 1 / np.float64(std.reshape(1, -1)) + if to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + cv2.subtract(img, mean, img) # inplace + cv2.multiply(img, stdinv, img) # inplace + return img + + +def imdenormalize(img, mean, std, to_bgr=True): + assert img.dtype != np.uint8 + mean = mean.reshape(1, -1).astype(np.float64) + std = std.reshape(1, -1).astype(np.float64) + img = cv2.multiply(img, std) # make a copy + cv2.add(img, mean, img) # inplace + if to_bgr: + cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img) # inplace + return img + + +def iminvert(img): + """Invert (negate) an image. + + Args: + img (ndarray): Image to be inverted. + + Returns: + ndarray: The inverted image. + """ + return np.full_like(img, 255) - img + + +def solarize(img, thr=128): + """Solarize an image (invert all pixel values above a threshold) + + Args: + img (ndarray): Image to be solarized. + thr (int): Threshold for solarizing (0 - 255). + + Returns: + ndarray: The solarized image. + """ + img = np.where(img < thr, img, 255 - img) + return img + + +def posterize(img, bits): + """Posterize an image (reduce the number of bits for each color channel) + + Args: + img (ndarray): Image to be posterized. + bits (int): Number of bits (1 to 8) to use for posterizing. + + Returns: + ndarray: The posterized image. + """ + shift = 8 - bits + img = np.left_shift(np.right_shift(img, shift), shift) + return img + + +def adjust_color(img, alpha=1, beta=None, gamma=0): + r"""It blends the source image and its gray image: + + .. math:: + output = img * alpha + gray\_img * beta + gamma + + Args: + img (ndarray): The input source image. + alpha (int | float): Weight for the source image. Default 1. + beta (int | float): Weight for the converted gray image. + If None, it's assigned the value (1 - `alpha`). + gamma (int | float): Scalar added to each sum. + Same as :func:`cv2.addWeighted`. Default 0. + + Returns: + ndarray: Colored image which has the same size and dtype as input. + """ + gray_img = bgr2gray(img) + gray_img = np.tile(gray_img[..., None], [1, 1, 3]) + if beta is None: + beta = 1 - alpha + colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma) + if not colored_img.dtype == np.uint8: + # Note when the dtype of `img` is not the default `np.uint8` + # (e.g. np.float32), the value in `colored_img` got from cv2 + # is not guaranteed to be in range [0, 255], so here clip + # is needed. + colored_img = np.clip(colored_img, 0, 255) + return colored_img + + +def imequalize(img): + """Equalize the image histogram. + + This function applies a non-linear mapping to the input image, + in order to create a uniform distribution of grayscale values + in the output image. + + Args: + img (ndarray): Image to be equalized. + + Returns: + ndarray: The equalized image. + """ + + def _scale_channel(im, c): + """Scale the data in the corresponding channel.""" + im = im[:, :, c] + # Compute the histogram of the image channel. + histo = np.histogram(im, 256, (0, 255))[0] + # For computing the step, filter out the nonzeros. + nonzero_histo = histo[histo > 0] + step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255 + if not step: + lut = np.array(range(256)) + else: + # Compute the cumulative sum, shifted by step // 2 + # and then normalized by step. + lut = (np.cumsum(histo) + (step // 2)) // step + # Shift lut, prepending with 0. + lut = np.concatenate([[0], lut[:-1]], 0) + # handle potential integer overflow + lut[lut > 255] = 255 + # If step is zero, return the original image. + # Otherwise, index from lut. + return np.where(np.equal(step, 0), im, lut[im]) + + # Scales each channel independently and then stacks + # the result. + s1 = _scale_channel(img, 0) + s2 = _scale_channel(img, 1) + s3 = _scale_channel(img, 2) + equalized_img = np.stack([s1, s2, s3], axis=-1) + return equalized_img.astype(img.dtype) + + +def adjust_brightness(img, factor=1.): + """Adjust image brightness. + + This function controls the brightness of an image. An + enhancement factor of 0.0 gives a black image. + A factor of 1.0 gives the original image. This function + blends the source image and the degenerated black image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be brightened. + factor (float): A value controls the enhancement. + Factor 1.0 returns the original image, lower + factors mean less color (brightness, contrast, + etc), and higher values more. Default 1. + + Returns: + ndarray: The brightened image. + """ + degenerated = np.zeros_like(img) + # Note manually convert the dtype to np.float32, to + # achieve as close results as PIL.ImageEnhance.Brightness. + # Set beta=1-factor, and gamma=0 + brightened_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + brightened_img = np.clip(brightened_img, 0, 255) + return brightened_img.astype(img.dtype) + + +def adjust_contrast(img, factor=1.): + """Adjust image contrast. + + This function controls the contrast of an image. An + enhancement factor of 0.0 gives a solid grey + image. A factor of 1.0 gives the original image. It + blends the source image and the degenerated mean image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be contrasted. BGR order. + factor (float): Same as :func:`mmcv.adjust_brightness`. + + Returns: + ndarray: The contrasted image. + """ + gray_img = bgr2gray(img) + hist = np.histogram(gray_img, 256, (0, 255))[0] + mean = round(np.sum(gray_img) / np.sum(hist)) + degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) + degenerated = gray2bgr(degenerated) + contrasted_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + contrasted_img = np.clip(contrasted_img, 0, 255) + return contrasted_img.astype(img.dtype) + + +def auto_contrast(img, cutoff=0): + """Auto adjust image contrast. + + This function maximize (normalize) image contrast by first removing cutoff + percent of the lightest and darkest pixels from the histogram and remapping + the image so that the darkest pixel becomes black (0), and the lightest + becomes white (255). + + Args: + img (ndarray): Image to be contrasted. BGR order. + cutoff (int | float | tuple): The cutoff percent of the lightest and + darkest pixels to be removed. If given as tuple, it shall be + (low, high). Otherwise, the single value will be used for both. + Defaults to 0. + + Returns: + ndarray: The contrasted image. + """ + + def _auto_contrast_channel(im, c, cutoff): + im = im[:, :, c] + # Compute the histogram of the image channel. + histo = np.histogram(im, 256, (0, 255))[0] + # Remove cut-off percent pixels from histo + histo_sum = np.cumsum(histo) + cut_low = histo_sum[-1] * cutoff[0] // 100 + cut_high = histo_sum[-1] - histo_sum[-1] * cutoff[1] // 100 + histo_sum = np.clip(histo_sum, cut_low, cut_high) - cut_low + histo = np.concatenate([[histo_sum[0]], np.diff(histo_sum)], 0) + + # Compute mapping + low, high = np.nonzero(histo)[0][0], np.nonzero(histo)[0][-1] + # If all the values have been cut off, return the origin img + if low >= high: + return im + scale = 255.0 / (high - low) + offset = -low * scale + lut = np.array(range(256)) + lut = lut * scale + offset + lut = np.clip(lut, 0, 255) + return lut[im] + + if isinstance(cutoff, (int, float)): + cutoff = (cutoff, cutoff) + else: + assert isinstance(cutoff, tuple), 'cutoff must be of type int, ' \ + f'float or tuple, but got {type(cutoff)} instead.' + # Auto adjusts contrast for each channel independently and then stacks + # the result. + s1 = _auto_contrast_channel(img, 0, cutoff) + s2 = _auto_contrast_channel(img, 1, cutoff) + s3 = _auto_contrast_channel(img, 2, cutoff) + contrasted_img = np.stack([s1, s2, s3], axis=-1) + return contrasted_img.astype(img.dtype) + + +def adjust_sharpness(img, factor=1., kernel=None): + """Adjust image sharpness. + + This function controls the sharpness of an image. An + enhancement factor of 0.0 gives a blurred image. A + factor of 1.0 gives the original image. And a factor + of 2.0 gives a sharpened image. It blends the source + image and the degenerated mean image: + + .. math:: + output = img * factor + degenerated * (1 - factor) + + Args: + img (ndarray): Image to be sharpened. BGR order. + factor (float): Same as :func:`mmcv.adjust_brightness`. + kernel (np.ndarray, optional): Filter kernel to be applied on the img + to obtain the degenerated img. Defaults to None. + + Note: + No value sanity check is enforced on the kernel set by users. So with + an inappropriate kernel, the ``adjust_sharpness`` may fail to perform + the function its name indicates but end up performing whatever + transform determined by the kernel. + + Returns: + ndarray: The sharpened image. + """ + + if kernel is None: + # adopted from PIL.ImageFilter.SMOOTH + kernel = np.array([[1., 1., 1.], [1., 5., 1.], [1., 1., 1.]]) / 13 + assert isinstance(kernel, np.ndarray), \ + f'kernel must be of type np.ndarray, but got {type(kernel)} instead.' + assert kernel.ndim == 2, \ + f'kernel must have a dimension of 2, but got {kernel.ndim} instead.' + + degenerated = cv2.filter2D(img, -1, kernel) + sharpened_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + sharpened_img = np.clip(sharpened_img, 0, 255) + return sharpened_img.astype(img.dtype) + + +def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True): + """AlexNet-style PCA jitter. + + This data augmentation is proposed in `ImageNet Classification with Deep + Convolutional Neural Networks + `_. + + Args: + img (ndarray): Image to be adjusted lighting. BGR order. + eigval (ndarray): the eigenvalue of the convariance matrix of pixel + values, respectively. + eigvec (ndarray): the eigenvector of the convariance matrix of pixel + values, respectively. + alphastd (float): The standard deviation for distribution of alpha. + Defaults to 0.1 + to_rgb (bool): Whether to convert img to rgb. + + Returns: + ndarray: The adjusted image. + """ + assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), \ + f'eigval and eigvec should both be of type np.ndarray, got ' \ + f'{type(eigval)} and {type(eigvec)} instead.' + + assert eigval.ndim == 1 and eigvec.ndim == 2 + assert eigvec.shape == (3, eigval.shape[0]) + n_eigval = eigval.shape[0] + assert isinstance(alphastd, float), 'alphastd should be of type float, ' \ + f'got {type(alphastd)} instead.' + + img = img.copy().astype(np.float32) + if to_rgb: + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace + + alpha = np.random.normal(0, alphastd, n_eigval) + alter = eigvec \ + * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) \ + * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval)) + alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape) + img_adjusted = img + alter + return img_adjusted + + +def lut_transform(img, lut_table): + """Transform array by look-up table. + + The function lut_transform fills the output array with values from the + look-up table. Indices of the entries are taken from the input array. + + Args: + img (ndarray): Image to be transformed. + lut_table (ndarray): look-up table of 256 elements; in case of + multi-channel input array, the table should either have a single + channel (in this case the same table is used for all channels) or + the same number of channels as in the input array. + + Returns: + ndarray: The transformed image. + """ + assert isinstance(img, np.ndarray) + assert 0 <= np.min(img) and np.max(img) <= 255 + assert isinstance(lut_table, np.ndarray) + assert lut_table.shape == (256, ) + + return cv2.LUT(np.array(img, dtype=np.uint8), lut_table) + + +def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Args: + img (ndarray): Image to be processed. + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + + Returns: + ndarray: The processed image. + """ + assert isinstance(img, np.ndarray) + assert img.ndim == 2 + assert isinstance(clip_limit, (float, int)) + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + + clahe = cv2.createCLAHE(clip_limit, tile_grid_size) + return clahe.apply(np.array(img, dtype=np.uint8)) diff --git a/annotator/uniformer/mmcv/model_zoo/deprecated.json b/annotator/uniformer/mmcv/model_zoo/deprecated.json new file mode 100644 index 0000000000000000000000000000000000000000..25cf6f28caecc22a77e3136fefa6b8dfc0e6cb5b --- /dev/null +++ b/annotator/uniformer/mmcv/model_zoo/deprecated.json @@ -0,0 +1,6 @@ +{ + "resnet50_caffe": "detectron/resnet50_caffe", + "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr", + "resnet101_caffe": "detectron/resnet101_caffe", + "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr" +} diff --git a/annotator/uniformer/mmcv/model_zoo/mmcls.json b/annotator/uniformer/mmcv/model_zoo/mmcls.json new file mode 100644 index 0000000000000000000000000000000000000000..bdb311d9fe6d9f317290feedc9e37236c6cf6e8f --- /dev/null +++ b/annotator/uniformer/mmcv/model_zoo/mmcls.json @@ -0,0 +1,31 @@ +{ + "vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_batch256_imagenet_20210208-4271cd6c.pth", + "vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_batch256_imagenet_20210208-4d1d6080.pth", + "vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_batch256_imagenet_20210208-db26f1a5.pth", + "vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_batch256_imagenet_20210208-e6920e4a.pth", + "vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_batch256_imagenet_20210207-f244902c.pth", + "vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_batch256_imagenet_20210207-1a8b7864.pth", + "vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_batch256_imagenet_20210208-7e55cd29.pth", + "vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_batch256_imagenet_20210208-da620c4f.pth", + "resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_batch256_imagenet_20200708-34ab8f90.pth", + "resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_batch256_imagenet_20200708-32ffb4f7.pth", + "resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth", + "resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_batch256_imagenet_20200708-753f3608.pth", + "resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_batch256_imagenet_20200708-ec25b1f9.pth", + "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_batch256_imagenet_20200708-1ad0ce94.pth", + "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_batch256_imagenet_20200708-9cb302ef.pth", + "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_batch256_imagenet_20200708-e79cb6a2.pth", + "resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth", + "resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth", + "resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth", + "resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth", + "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth", + "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth", + "resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth", + "resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth", + "resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth", + "resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth", + "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth", + "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth", + "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth" +} diff --git a/annotator/uniformer/mmcv/model_zoo/open_mmlab.json b/annotator/uniformer/mmcv/model_zoo/open_mmlab.json new file mode 100644 index 0000000000000000000000000000000000000000..8311db4feef92faa0841c697d75efbee8430c3a0 --- /dev/null +++ b/annotator/uniformer/mmcv/model_zoo/open_mmlab.json @@ -0,0 +1,50 @@ +{ + "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth", + "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth", + "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth", + "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth", + "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth", + "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth", + "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth", + "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth", + "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth", + "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth", + "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth", + "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth", + "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth", + "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth", + "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth", + "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth", + "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth", + "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth", + "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth", + "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth", + "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth", + "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth", + "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth", + "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth", + "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth", + "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth", + "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth", + "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth", + "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth", + "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth", + "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth", + "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth", + "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth", + "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth", + "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth", + "resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth", + "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth", + "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth", + "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth", + "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth", + "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth", + "contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth", + "contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth", + "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth", + "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth", + "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth", + "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth", + "mmdet/mobilenet_v2": "https://download.openmmlab.com/mmdetection/v2.0/third_party/mobilenet_v2_batch256_imagenet-ff34753d.pth" +} diff --git a/annotator/uniformer/mmcv/ops/__init__.py b/annotator/uniformer/mmcv/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..999e090a458ee148ceca0649f1e3806a40e909bd --- /dev/null +++ b/annotator/uniformer/mmcv/ops/__init__.py @@ -0,0 +1,81 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .assign_score_withk import assign_score_withk +from .ball_query import ball_query +from .bbox import bbox_overlaps +from .border_align import BorderAlign, border_align +from .box_iou_rotated import box_iou_rotated +from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive +from .cc_attention import CrissCrossAttention +from .contour_expand import contour_expand +from .corner_pool import CornerPool +from .correlation import Correlation +from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d +from .deform_roi_pool import (DeformRoIPool, DeformRoIPoolPack, + ModulatedDeformRoIPoolPack, deform_roi_pool) +from .deprecated_wrappers import Conv2d_deprecated as Conv2d +from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d +from .deprecated_wrappers import Linear_deprecated as Linear +from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d +from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss, + sigmoid_focal_loss, softmax_focal_loss) +from .furthest_point_sample import (furthest_point_sample, + furthest_point_sample_with_dist) +from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu +from .gather_points import gather_points +from .group_points import GroupAll, QueryAndGroup, grouping_operation +from .info import (get_compiler_version, get_compiling_cuda_version, + get_onnxruntime_op_path) +from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev +from .knn import knn +from .masked_conv import MaskedConv2d, masked_conv2d +from .modulated_deform_conv import (ModulatedDeformConv2d, + ModulatedDeformConv2dPack, + modulated_deform_conv2d) +from .multi_scale_deform_attn import MultiScaleDeformableAttention +from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms +from .pixel_group import pixel_group +from .point_sample import (SimpleRoIAlign, point_sample, + rel_roi_point_to_rel_img_point) +from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, + points_in_boxes_part) +from .points_sampler import PointsSampler +from .psa_mask import PSAMask +from .roi_align import RoIAlign, roi_align +from .roi_align_rotated import RoIAlignRotated, roi_align_rotated +from .roi_pool import RoIPool, roi_pool +from .roiaware_pool3d import RoIAwarePool3d +from .roipoint_pool3d import RoIPointPool3d +from .saconv import SAConv2d +from .scatter_points import DynamicScatter, dynamic_scatter +from .sync_bn import SyncBatchNorm +from .three_interpolate import three_interpolate +from .three_nn import three_nn +from .tin_shift import TINShift, tin_shift +from .upfirdn2d import upfirdn2d +from .voxelize import Voxelization, voxelization + +__all__ = [ + 'bbox_overlaps', 'CARAFE', 'CARAFENaive', 'CARAFEPack', 'carafe', + 'carafe_naive', 'CornerPool', 'DeformConv2d', 'DeformConv2dPack', + 'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack', + 'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss', + 'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss', + 'get_compiler_version', 'get_compiling_cuda_version', + 'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d', + 'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack', + 'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match', + 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d', + 'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask', + 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign', + 'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk', + 'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query', + 'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu', + 'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup', + 'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn', + 'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign', + 'border_align', 'gather_points', 'furthest_point_sample', + 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', + 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', + 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', + 'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all' +] diff --git a/annotator/uniformer/mmcv/ops/assign_score_withk.py b/annotator/uniformer/mmcv/ops/assign_score_withk.py new file mode 100644 index 0000000000000000000000000000000000000000..4906adaa2cffd1b46912fbe7d4f87ef2f9fa0012 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/assign_score_withk.py @@ -0,0 +1,123 @@ +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['assign_score_withk_forward', 'assign_score_withk_backward']) + + +class AssignScoreWithK(Function): + r"""Perform weighted sum to generate output features according to scores. + Modified from `PAConv `_. + + This is a memory-efficient CUDA implementation of assign_scores operation, + which first transform all point features with weight bank, then assemble + neighbor features with ``knn_idx`` and perform weighted sum of ``scores``. + + See the `paper `_ appendix Sec. D for + more detailed descriptions. + + Note: + This implementation assumes using ``neighbor`` kernel input, which is + (point_features - center_features, point_features). + See https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/model/ + pointnet2/paconv.py#L128 for more details. + """ + + @staticmethod + def forward(ctx, + scores, + point_features, + center_features, + knn_idx, + aggregate='sum'): + """ + Args: + scores (torch.Tensor): (B, npoint, K, M), predicted scores to + aggregate weight matrices in the weight bank. + ``npoint`` is the number of sampled centers. + ``K`` is the number of queried neighbors. + ``M`` is the number of weight matrices in the weight bank. + point_features (torch.Tensor): (B, N, M, out_dim) + Pre-computed point features to be aggregated. + center_features (torch.Tensor): (B, N, M, out_dim) + Pre-computed center features to be aggregated. + knn_idx (torch.Tensor): (B, npoint, K), index of sampled kNN. + We assume the first idx in each row is the idx of the center. + aggregate (str, optional): Aggregation method. + Can be 'sum', 'avg' or 'max'. Defaults: 'sum'. + + Returns: + torch.Tensor: (B, out_dim, npoint, K), the aggregated features. + """ + agg = {'sum': 0, 'avg': 1, 'max': 2} + + B, N, M, out_dim = point_features.size() + _, npoint, K, _ = scores.size() + + output = point_features.new_zeros((B, out_dim, npoint, K)) + ext_module.assign_score_withk_forward( + point_features.contiguous(), + center_features.contiguous(), + scores.contiguous(), + knn_idx.contiguous(), + output, + B=B, + N0=N, + N1=npoint, + M=M, + K=K, + O=out_dim, + aggregate=agg[aggregate]) + + ctx.save_for_backward(output, point_features, center_features, scores, + knn_idx) + ctx.agg = agg[aggregate] + + return output + + @staticmethod + def backward(ctx, grad_out): + """ + Args: + grad_out (torch.Tensor): (B, out_dim, npoint, K) + + Returns: + grad_scores (torch.Tensor): (B, npoint, K, M) + grad_point_features (torch.Tensor): (B, N, M, out_dim) + grad_center_features (torch.Tensor): (B, N, M, out_dim) + """ + _, point_features, center_features, scores, knn_idx = ctx.saved_tensors + + agg = ctx.agg + + B, N, M, out_dim = point_features.size() + _, npoint, K, _ = scores.size() + + grad_point_features = point_features.new_zeros(point_features.shape) + grad_center_features = center_features.new_zeros(center_features.shape) + grad_scores = scores.new_zeros(scores.shape) + + ext_module.assign_score_withk_backward( + grad_out.contiguous(), + point_features.contiguous(), + center_features.contiguous(), + scores.contiguous(), + knn_idx.contiguous(), + grad_point_features, + grad_center_features, + grad_scores, + B=B, + N0=N, + N1=npoint, + M=M, + K=K, + O=out_dim, + aggregate=agg) + + return grad_scores, grad_point_features, \ + grad_center_features, None, None + + +assign_score_withk = AssignScoreWithK.apply diff --git a/annotator/uniformer/mmcv/ops/ball_query.py b/annotator/uniformer/mmcv/ops/ball_query.py new file mode 100644 index 0000000000000000000000000000000000000000..d0466847c6e5c1239e359a0397568413ebc1504a --- /dev/null +++ b/annotator/uniformer/mmcv/ops/ball_query.py @@ -0,0 +1,55 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['ball_query_forward']) + + +class BallQuery(Function): + """Find nearby points in spherical space.""" + + @staticmethod + def forward(ctx, min_radius: float, max_radius: float, sample_num: int, + xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor: + """ + Args: + min_radius (float): minimum radius of the balls. + max_radius (float): maximum radius of the balls. + sample_num (int): maximum number of features in the balls. + xyz (Tensor): (B, N, 3) xyz coordinates of the features. + center_xyz (Tensor): (B, npoint, 3) centers of the ball query. + + Returns: + Tensor: (B, npoint, nsample) tensor with the indices of + the features that form the query balls. + """ + assert center_xyz.is_contiguous() + assert xyz.is_contiguous() + assert min_radius < max_radius + + B, N, _ = xyz.size() + npoint = center_xyz.size(1) + idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int) + + ext_module.ball_query_forward( + center_xyz, + xyz, + idx, + b=B, + n=N, + m=npoint, + min_radius=min_radius, + max_radius=max_radius, + nsample=sample_num) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None, None + + +ball_query = BallQuery.apply diff --git a/annotator/uniformer/mmcv/ops/bbox.py b/annotator/uniformer/mmcv/ops/bbox.py new file mode 100644 index 0000000000000000000000000000000000000000..0c4d58b6c91f652933974f519acd3403a833e906 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/bbox.py @@ -0,0 +1,72 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps']) + + +def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): + """Calculate overlap between two set of bboxes. + + If ``aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Args: + bboxes1 (Tensor): shape (m, 4) in format or empty. + bboxes2 (Tensor): shape (n, 4) in format or empty. + If aligned is ``True``, then m and n must be equal. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (m, n) if aligned == False else shape (m, 1) + + Example: + >>> bboxes1 = torch.FloatTensor([ + >>> [0, 0, 10, 10], + >>> [10, 10, 20, 20], + >>> [32, 32, 38, 42], + >>> ]) + >>> bboxes2 = torch.FloatTensor([ + >>> [0, 0, 10, 20], + >>> [0, 10, 10, 19], + >>> [10, 10, 20, 20], + >>> ]) + >>> bbox_overlaps(bboxes1, bboxes2) + tensor([[0.5000, 0.0000, 0.0000], + [0.0000, 0.0000, 1.0000], + [0.0000, 0.0000, 0.0000]]) + + Example: + >>> empty = torch.FloatTensor([]) + >>> nonempty = torch.FloatTensor([ + >>> [0, 0, 10, 9], + >>> ]) + >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) + >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) + >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) + """ + + mode_dict = {'iou': 0, 'iof': 1} + assert mode in mode_dict.keys() + mode_flag = mode_dict[mode] + # Either the boxes are empty or the length of boxes' last dimension is 4 + assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0) + assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0) + assert offset == 1 or offset == 0 + + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if aligned: + assert rows == cols + + if rows * cols == 0: + return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols) + + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows, cols)) + ext_module.bbox_overlaps( + bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) + return ious diff --git a/annotator/uniformer/mmcv/ops/border_align.py b/annotator/uniformer/mmcv/ops/border_align.py new file mode 100644 index 0000000000000000000000000000000000000000..ff305be328e9b0a15e1bbb5e6b41beb940f55c81 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/border_align.py @@ -0,0 +1,109 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# modified from +# https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['border_align_forward', 'border_align_backward']) + + +class BorderAlignFunction(Function): + + @staticmethod + def symbolic(g, input, boxes, pool_size): + return g.op( + 'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) + + @staticmethod + def forward(ctx, input, boxes, pool_size): + ctx.pool_size = pool_size + ctx.input_shape = input.size() + + assert boxes.ndim == 3, 'boxes must be with shape [B, H*W, 4]' + assert boxes.size(2) == 4, \ + 'the last dimension of boxes must be (x1, y1, x2, y2)' + assert input.size(1) % 4 == 0, \ + 'the channel for input feature must be divisible by factor 4' + + # [B, C//4, H*W, 4] + output_shape = (input.size(0), input.size(1) // 4, boxes.size(1), 4) + output = input.new_zeros(output_shape) + # `argmax_idx` only used for backward + argmax_idx = input.new_zeros(output_shape).to(torch.int) + + ext_module.border_align_forward( + input, boxes, output, argmax_idx, pool_size=ctx.pool_size) + + ctx.save_for_backward(boxes, argmax_idx) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + boxes, argmax_idx = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + # complex head architecture may cause grad_output uncontiguous + grad_output = grad_output.contiguous() + ext_module.border_align_backward( + grad_output, + boxes, + argmax_idx, + grad_input, + pool_size=ctx.pool_size) + return grad_input, None, None + + +border_align = BorderAlignFunction.apply + + +class BorderAlign(nn.Module): + r"""Border align pooling layer. + + Applies border_align over the input feature based on predicted bboxes. + The details were described in the paper + `BorderDet: Border Feature for Dense Object Detection + `_. + + For each border line (e.g. top, left, bottom or right) of each box, + border_align does the following: + 1. uniformly samples `pool_size`+1 positions on this line, involving \ + the start and end points. + 2. the corresponding features on these points are computed by \ + bilinear interpolation. + 3. max pooling over all the `pool_size`+1 positions are used for \ + computing pooled feature. + + Args: + pool_size (int): number of positions sampled over the boxes' borders + (e.g. top, bottom, left, right). + + """ + + def __init__(self, pool_size): + super(BorderAlign, self).__init__() + self.pool_size = pool_size + + def forward(self, input, boxes): + """ + Args: + input: Features with shape [N,4C,H,W]. Channels ranged in [0,C), + [C,2C), [2C,3C), [3C,4C) represent the top, left, bottom, + right features respectively. + boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2). + + Returns: + Tensor: Pooled features with shape [N,C,H*W,4]. The order is + (top,left,bottom,right) for the last dimension. + """ + return border_align(input, boxes, self.pool_size) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(pool_size={self.pool_size})' + return s diff --git a/annotator/uniformer/mmcv/ops/box_iou_rotated.py b/annotator/uniformer/mmcv/ops/box_iou_rotated.py new file mode 100644 index 0000000000000000000000000000000000000000..2d78015e9c2a9e7a52859b4e18f84a9aa63481a0 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/box_iou_rotated.py @@ -0,0 +1,45 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated']) + + +def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): + """Return intersection-over-union (Jaccard index) of boxes. + + Both sets of boxes are expected to be in + (x_center, y_center, width, height, angle) format. + + If ``aligned`` is ``False``, then calculate the ious between each bbox + of bboxes1 and bboxes2, otherwise the ious between each aligned pair of + bboxes1 and bboxes2. + + Arguments: + boxes1 (Tensor): rotated bboxes 1. \ + It has shape (N, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. + boxes2 (Tensor): rotated bboxes 2. \ + It has shape (M, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. + mode (str): "iou" (intersection over union) or iof (intersection over + foreground). + + Returns: + ious(Tensor): shape (N, M) if aligned == False else shape (N,) + """ + assert mode in ['iou', 'iof'] + mode_dict = {'iou': 0, 'iof': 1} + mode_flag = mode_dict[mode] + rows = bboxes1.size(0) + cols = bboxes2.size(0) + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows * cols)) + bboxes1 = bboxes1.contiguous() + bboxes2 = bboxes2.contiguous() + ext_module.box_iou_rotated( + bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) + if not aligned: + ious = ious.view(rows, cols) + return ious diff --git a/annotator/uniformer/mmcv/ops/carafe.py b/annotator/uniformer/mmcv/ops/carafe.py new file mode 100644 index 0000000000000000000000000000000000000000..5154cb3abfccfbbe0a1b2daa67018dbf80aaf6d2 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/carafe.py @@ -0,0 +1,287 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd import Function +from torch.nn.modules.module import Module + +from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'carafe_naive_forward', 'carafe_naive_backward', 'carafe_forward', + 'carafe_backward' +]) + + +class CARAFENaiveFunction(Function): + + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'mmcv::MMCVCARAFENaive', + features, + masks, + kernel_size_i=kernel_size, + group_size_i=group_size, + scale_factor_f=scale_factor) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + ext_module.carafe_naive_forward( + features, + masks, + output, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + grad_input = torch.zeros_like(features) + grad_masks = torch.zeros_like(masks) + ext_module.carafe_naive_backward( + grad_output.contiguous(), + features, + masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + return grad_input, grad_masks, None, None, None + + +carafe_naive = CARAFENaiveFunction.apply + + +class CARAFENaive(Module): + + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFENaive, self).__init__() + + assert isinstance(kernel_size, int) and isinstance( + group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe_naive(features, masks, self.kernel_size, self.group_size, + self.scale_factor) + + +class CARAFEFunction(Function): + + @staticmethod + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): + return g.op( + 'mmcv::MMCVCARAFE', + features, + masks, + kernel_size_i=kernel_size, + group_size_i=group_size, + scale_factor_f=scale_factor) + + @staticmethod + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): + assert scale_factor >= 1 + assert masks.size(1) == kernel_size * kernel_size * group_size + assert masks.size(-1) == features.size(-1) * scale_factor + assert masks.size(-2) == features.size(-2) * scale_factor + assert features.size(1) % group_size == 0 + assert (kernel_size - 1) % 2 == 0 and kernel_size >= 1 + ctx.kernel_size = kernel_size + ctx.group_size = group_size + ctx.scale_factor = scale_factor + ctx.feature_size = features.size() + ctx.mask_size = masks.size() + + n, c, h, w = features.size() + output = features.new_zeros((n, c, h * scale_factor, w * scale_factor)) + routput = features.new_zeros(output.size(), requires_grad=False) + rfeatures = features.new_zeros(features.size(), requires_grad=False) + rmasks = masks.new_zeros(masks.size(), requires_grad=False) + ext_module.carafe_forward( + features, + masks, + rfeatures, + routput, + rmasks, + output, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + + if features.requires_grad or masks.requires_grad: + ctx.save_for_backward(features, masks, rfeatures) + return output + + @staticmethod + def backward(ctx, grad_output): + assert grad_output.is_cuda + + features, masks, rfeatures = ctx.saved_tensors + kernel_size = ctx.kernel_size + group_size = ctx.group_size + scale_factor = ctx.scale_factor + + rgrad_output = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input_hs = torch.zeros_like(grad_output, requires_grad=False) + rgrad_input = torch.zeros_like(features, requires_grad=False) + rgrad_masks = torch.zeros_like(masks, requires_grad=False) + grad_input = torch.zeros_like(features, requires_grad=False) + grad_masks = torch.zeros_like(masks, requires_grad=False) + ext_module.carafe_backward( + grad_output.contiguous(), + rfeatures, + masks, + rgrad_output, + rgrad_input_hs, + rgrad_input, + rgrad_masks, + grad_input, + grad_masks, + kernel_size=kernel_size, + group_size=group_size, + scale_factor=scale_factor) + return grad_input, grad_masks, None, None, None + + +carafe = CARAFEFunction.apply + + +class CARAFE(Module): + """ CARAFE: Content-Aware ReAssembly of FEatures + + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + kernel_size (int): reassemble kernel size + group_size (int): reassemble group size + scale_factor (int): upsample ratio + + Returns: + upsampled feature map + """ + + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFE, self).__init__() + + assert isinstance(kernel_size, int) and isinstance( + group_size, int) and isinstance(scale_factor, int) + self.kernel_size = kernel_size + self.group_size = group_size + self.scale_factor = scale_factor + + def forward(self, features, masks): + return carafe(features, masks, self.kernel_size, self.group_size, + self.scale_factor) + + +@UPSAMPLE_LAYERS.register_module(name='carafe') +class CARAFEPack(nn.Module): + """A unified package of CARAFE upsampler that contains: 1) channel + compressor 2) content encoder 3) CARAFE op. + + Official implementation of ICCV 2019 paper + CARAFE: Content-Aware ReAssembly of FEatures + Please refer to https://arxiv.org/abs/1905.02188 for more details. + + Args: + channels (int): input feature channels + scale_factor (int): upsample ratio + up_kernel (int): kernel size of CARAFE op + up_group (int): group size of CARAFE op + encoder_kernel (int): kernel size of content encoder + encoder_dilation (int): dilation of content encoder + compressed_channels (int): output channels of channels compressor + + Returns: + upsampled feature map + """ + + def __init__(self, + channels, + scale_factor, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64): + super(CARAFEPack, self).__init__() + self.channels = channels + self.scale_factor = scale_factor + self.up_kernel = up_kernel + self.up_group = up_group + self.encoder_kernel = encoder_kernel + self.encoder_dilation = encoder_dilation + self.compressed_channels = compressed_channels + self.channel_compressor = nn.Conv2d(channels, self.compressed_channels, + 1) + self.content_encoder = nn.Conv2d( + self.compressed_channels, + self.up_kernel * self.up_kernel * self.up_group * + self.scale_factor * self.scale_factor, + self.encoder_kernel, + padding=int((self.encoder_kernel - 1) * self.encoder_dilation / 2), + dilation=self.encoder_dilation, + groups=1) + self.init_weights() + + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + normal_init(self.content_encoder, std=0.001) + + def kernel_normalizer(self, mask): + mask = F.pixel_shuffle(mask, self.scale_factor) + n, mask_c, h, w = mask.size() + # use float division explicitly, + # to void inconsistency while exporting to onnx + mask_channel = int(mask_c / float(self.up_kernel**2)) + mask = mask.view(n, mask_channel, -1, h, w) + + mask = F.softmax(mask, dim=2, dtype=mask.dtype) + mask = mask.view(n, mask_c, h, w).contiguous() + + return mask + + def feature_reassemble(self, x, mask): + x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) + return x + + def forward(self, x): + compressed_x = self.channel_compressor(x) + mask = self.content_encoder(compressed_x) + mask = self.kernel_normalizer(mask) + + x = self.feature_reassemble(x, mask) + return x diff --git a/annotator/uniformer/mmcv/ops/cc_attention.py b/annotator/uniformer/mmcv/ops/cc_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..9207aa95e6730bd9b3362dee612059a5f0ce1c5e --- /dev/null +++ b/annotator/uniformer/mmcv/ops/cc_attention.py @@ -0,0 +1,83 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from annotator.uniformer.mmcv.cnn import PLUGIN_LAYERS, Scale + + +def NEG_INF_DIAG(n, device): + """Returns a diagonal matrix of size [n, n]. + + The diagonal are all "-inf". This is for avoiding calculating the + overlapped element in the Criss-Cross twice. + """ + return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0) + + +@PLUGIN_LAYERS.register_module() +class CrissCrossAttention(nn.Module): + """Criss-Cross Attention Module. + + .. note:: + Before v1.3.13, we use a CUDA op. Since v1.3.13, we switch + to a pure PyTorch and equivalent implementation. For more + details, please refer to https://github.com/open-mmlab/mmcv/pull/1201. + + Speed comparison for one forward pass + + - Input size: [2,512,97,97] + - Device: 1 NVIDIA GeForce RTX 2080 Ti + + +-----------------------+---------------+------------+---------------+ + | |PyTorch version|CUDA version|Relative speed | + +=======================+===============+============+===============+ + |with torch.no_grad() |0.00554402 s |0.0299619 s |5.4x | + +-----------------------+---------------+------------+---------------+ + |no with torch.no_grad()|0.00562803 s |0.0301349 s |5.4x | + +-----------------------+---------------+------------+---------------+ + + Args: + in_channels (int): Channels of the input feature map. + """ + + def __init__(self, in_channels): + super().__init__() + self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) + self.value_conv = nn.Conv2d(in_channels, in_channels, 1) + self.gamma = Scale(0.) + self.in_channels = in_channels + + def forward(self, x): + """forward function of Criss-Cross Attention. + + Args: + x (Tensor): Input feature. \ + shape (batch_size, in_channels, height, width) + Returns: + Tensor: Output of the layer, with shape of \ + (batch_size, in_channels, height, width) + """ + B, C, H, W = x.size() + query = self.query_conv(x) + key = self.key_conv(x) + value = self.value_conv(x) + energy_H = torch.einsum('bchw,bciw->bwhi', query, key) + NEG_INF_DIAG( + H, query.device) + energy_H = energy_H.transpose(1, 2) + energy_W = torch.einsum('bchw,bchj->bhwj', query, key) + attn = F.softmax( + torch.cat([energy_H, energy_W], dim=-1), dim=-1) # [B,H,W,(H+W)] + out = torch.einsum('bciw,bhwi->bchw', value, attn[..., :H]) + out += torch.einsum('bchj,bhwj->bchw', value, attn[..., H:]) + + out = self.gamma(out) + x + out = out.contiguous() + + return out + + def __repr__(self): + s = self.__class__.__name__ + s += f'(in_channels={self.in_channels})' + return s diff --git a/annotator/uniformer/mmcv/ops/contour_expand.py b/annotator/uniformer/mmcv/ops/contour_expand.py new file mode 100644 index 0000000000000000000000000000000000000000..ea1111e1768b5f27e118bf7dbc0d9c70a7afd6d7 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/contour_expand.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['contour_expand']) + + +def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, + kernel_num): + """Expand kernel contours so that foreground pixels are assigned into + instances. + + Arguments: + kernel_mask (np.array or Tensor): The instance kernel mask with + size hxw. + internal_kernel_label (np.array or Tensor): The instance internal + kernel label with size hxw. + min_kernel_area (int): The minimum kernel area. + kernel_num (int): The instance kernel number. + + Returns: + label (list): The instance index map with size hxw. + """ + assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) + assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) + assert isinstance(min_kernel_area, int) + assert isinstance(kernel_num, int) + + if isinstance(kernel_mask, np.ndarray): + kernel_mask = torch.from_numpy(kernel_mask) + if isinstance(internal_kernel_label, np.ndarray): + internal_kernel_label = torch.from_numpy(internal_kernel_label) + + if torch.__version__ == 'parrots': + if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0: + label = [] + else: + label = ext_module.contour_expand( + kernel_mask, + internal_kernel_label, + min_kernel_area=min_kernel_area, + kernel_num=kernel_num) + label = label.tolist() + else: + label = ext_module.contour_expand(kernel_mask, internal_kernel_label, + min_kernel_area, kernel_num) + return label diff --git a/annotator/uniformer/mmcv/ops/corner_pool.py b/annotator/uniformer/mmcv/ops/corner_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..a33d798b43d405e4c86bee4cd6389be21ca9c637 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/corner_pool.py @@ -0,0 +1,161 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward', + 'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward', + 'right_pool_forward', 'right_pool_backward' +]) + +_mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3} + + +class TopPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.top_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.top_pool_backward(input, grad_output) + return output + + +class BottomPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.bottom_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.bottom_pool_backward(input, grad_output) + return output + + +class LeftPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.left_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.left_pool_backward(input, grad_output) + return output + + +class RightPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.right_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.right_pool_backward(input, grad_output) + return output + + +class CornerPool(nn.Module): + """Corner Pooling. + + Corner Pooling is a new type of pooling layer that helps a + convolutional network better localize corners of bounding boxes. + + Please refer to https://arxiv.org/abs/1808.01244 for more details. + Code is modified from https://github.com/princeton-vl/CornerNet-Lite. + + Args: + mode(str): Pooling orientation for the pooling layer + + - 'bottom': Bottom Pooling + - 'left': Left Pooling + - 'right': Right Pooling + - 'top': Top Pooling + + Returns: + Feature map after pooling. + """ + + pool_functions = { + 'bottom': BottomPoolFunction, + 'left': LeftPoolFunction, + 'right': RightPoolFunction, + 'top': TopPoolFunction, + } + + cummax_dim_flip = { + 'bottom': (2, False), + 'left': (3, True), + 'right': (3, False), + 'top': (2, True), + } + + def __init__(self, mode): + super(CornerPool, self).__init__() + assert mode in self.pool_functions + self.mode = mode + self.corner_pool = self.pool_functions[mode] + + def forward(self, x): + if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': + if torch.onnx.is_in_onnx_export(): + assert torch.__version__ >= '1.7.0', \ + 'When `cummax` serves as an intermediate component whose '\ + 'outputs is used as inputs for another modules, it\'s '\ + 'expected that pytorch version must be >= 1.7.0, '\ + 'otherwise Error appears like: `RuntimeError: tuple '\ + 'appears in op that does not forward tuples, unsupported '\ + 'kind: prim::PythonOp`.' + + dim, flip = self.cummax_dim_flip[self.mode] + if flip: + x = x.flip(dim) + pool_tensor, _ = torch.cummax(x, dim=dim) + if flip: + pool_tensor = pool_tensor.flip(dim) + return pool_tensor + else: + return self.corner_pool.apply(x) diff --git a/annotator/uniformer/mmcv/ops/correlation.py b/annotator/uniformer/mmcv/ops/correlation.py new file mode 100644 index 0000000000000000000000000000000000000000..3d0b79c301b29915dfaf4d2b1846c59be73127d3 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/correlation.py @@ -0,0 +1,196 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import Tensor, nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['correlation_forward', 'correlation_backward']) + + +class CorrelationFunction(Function): + + @staticmethod + def forward(ctx, + input1, + input2, + kernel_size=1, + max_displacement=1, + stride=1, + padding=1, + dilation=1, + dilation_patch=1): + + ctx.save_for_backward(input1, input2) + + kH, kW = ctx.kernel_size = _pair(kernel_size) + patch_size = max_displacement * 2 + 1 + ctx.patch_size = patch_size + dH, dW = ctx.stride = _pair(stride) + padH, padW = ctx.padding = _pair(padding) + dilationH, dilationW = ctx.dilation = _pair(dilation) + dilation_patchH, dilation_patchW = ctx.dilation_patch = _pair( + dilation_patch) + + output_size = CorrelationFunction._output_size(ctx, input1) + + output = input1.new_zeros(output_size) + + ext_module.correlation_forward( + input1, + input2, + output, + kH=kH, + kW=kW, + patchH=patch_size, + patchW=patch_size, + padH=padH, + padW=padW, + dilationH=dilationH, + dilationW=dilationW, + dilation_patchH=dilation_patchH, + dilation_patchW=dilation_patchW, + dH=dH, + dW=dW) + + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input1, input2 = ctx.saved_tensors + + kH, kW = ctx.kernel_size + patch_size = ctx.patch_size + padH, padW = ctx.padding + dilationH, dilationW = ctx.dilation + dilation_patchH, dilation_patchW = ctx.dilation_patch + dH, dW = ctx.stride + grad_input1 = torch.zeros_like(input1) + grad_input2 = torch.zeros_like(input2) + + ext_module.correlation_backward( + grad_output, + input1, + input2, + grad_input1, + grad_input2, + kH=kH, + kW=kW, + patchH=patch_size, + patchW=patch_size, + padH=padH, + padW=padW, + dilationH=dilationH, + dilationW=dilationW, + dilation_patchH=dilation_patchH, + dilation_patchW=dilation_patchW, + dH=dH, + dW=dW) + return grad_input1, grad_input2, None, None, None, None, None, None + + @staticmethod + def _output_size(ctx, input1): + iH, iW = input1.size(2), input1.size(3) + batch_size = input1.size(0) + kH, kW = ctx.kernel_size + patch_size = ctx.patch_size + dH, dW = ctx.stride + padH, padW = ctx.padding + dilationH, dilationW = ctx.dilation + dilatedKH = (kH - 1) * dilationH + 1 + dilatedKW = (kW - 1) * dilationW + 1 + + oH = int((iH + 2 * padH - dilatedKH) / dH + 1) + oW = int((iW + 2 * padW - dilatedKW) / dW + 1) + + output_size = (batch_size, patch_size, patch_size, oH, oW) + return output_size + + +class Correlation(nn.Module): + r"""Correlation operator + + This correlation operator works for optical flow correlation computation. + + There are two batched tensors with shape :math:`(N, C, H, W)`, + and the correlation output's shape is :math:`(N, max\_displacement \times + 2 + 1, max\_displacement * 2 + 1, H_{out}, W_{out})` + + where + + .. math:: + H_{out} = \left\lfloor\frac{H_{in} + 2 \times padding - + dilation \times (kernel\_size - 1) - 1} + {stride} + 1\right\rfloor + + .. math:: + W_{out} = \left\lfloor\frac{W_{in} + 2 \times padding - dilation + \times (kernel\_size - 1) - 1} + {stride} + 1\right\rfloor + + the correlation item :math:`(N_i, dy, dx)` is formed by taking the sliding + window convolution between input1 and shifted input2, + + .. math:: + Corr(N_i, dx, dy) = + \sum_{c=0}^{C-1} + input1(N_i, c) \star + \mathcal{S}(input2(N_i, c), dy, dx) + + where :math:`\star` is the valid 2d sliding window convolution operator, + and :math:`\mathcal{S}` means shifting the input features (auto-complete + zero marginal), and :math:`dx, dy` are shifting distance, :math:`dx, dy \in + [-max\_displacement \times dilation\_patch, max\_displacement \times + dilation\_patch]`. + + Args: + kernel_size (int): The size of sliding window i.e. local neighborhood + representing the center points and involved in correlation + computation. Defaults to 1. + max_displacement (int): The radius for computing correlation volume, + but the actual working space can be dilated by dilation_patch. + Defaults to 1. + stride (int): The stride of the sliding blocks in the input spatial + dimensions. Defaults to 1. + padding (int): Zero padding added to all four sides of the input1. + Defaults to 0. + dilation (int): The spacing of local neighborhood that will involved + in correlation. Defaults to 1. + dilation_patch (int): The spacing between position need to compute + correlation. Defaults to 1. + """ + + def __init__(self, + kernel_size: int = 1, + max_displacement: int = 1, + stride: int = 1, + padding: int = 0, + dilation: int = 1, + dilation_patch: int = 1) -> None: + super().__init__() + self.kernel_size = kernel_size + self.max_displacement = max_displacement + self.stride = stride + self.padding = padding + self.dilation = dilation + self.dilation_patch = dilation_patch + + def forward(self, input1: Tensor, input2: Tensor) -> Tensor: + return CorrelationFunction.apply(input1, input2, self.kernel_size, + self.max_displacement, self.stride, + self.padding, self.dilation, + self.dilation_patch) + + def __repr__(self) -> str: + s = self.__class__.__name__ + s += f'(kernel_size={self.kernel_size}, ' + s += f'max_displacement={self.max_displacement}, ' + s += f'stride={self.stride}, ' + s += f'padding={self.padding}, ' + s += f'dilation={self.dilation}, ' + s += f'dilation_patch={self.dilation_patch})' + return s diff --git a/annotator/uniformer/mmcv/ops/deform_conv.py b/annotator/uniformer/mmcv/ops/deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..a3f8c75ee774823eea334e3b3732af6a18f55038 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/deform_conv.py @@ -0,0 +1,405 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch import Tensor +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from annotator.uniformer.mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext('_ext', [ + 'deform_conv_forward', 'deform_conv_backward_input', + 'deform_conv_backward_parameters' +]) + + +class DeformConv2dFunction(Function): + + @staticmethod + def symbolic(g, + input, + offset, + weight, + stride, + padding, + dilation, + groups, + deform_groups, + bias=False, + im2col_step=32): + return g.op( + 'mmcv::MMCVDeformConv2d', + input, + offset, + weight, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups, + bias_i=bias, + im2col_step_i=im2col_step) + + @staticmethod + def forward(ctx, + input, + offset, + weight, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=False, + im2col_step=32): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.') + assert bias is False, 'Only support bias is False.' + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.im2col_step = im2col_step + + # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; + # amp won't cast the type of model (float32), but "offset" is cast + # to float16 by nn.Conv2d automatically, leading to the type + # mismatch with input (when it is float32) or weight. + # The flag for whether to use fp16 or amp is the type of "offset", + # we cast weight and input to temporarily support fp16 and amp + # whatever the pytorch version is. + input = input.type_as(offset) + weight = weight.type_as(input) + ctx.save_for_backward(input, offset, weight) + + output = input.new_empty( + DeformConv2dFunction._output_size(ctx, input, weight)) + + ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % + cur_im2col_step) == 0, 'im2col step must divide batchsize' + ext_module.deform_conv_forward( + input, + weight, + offset, + output, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, weight = ctx.saved_tensors + + grad_input = grad_offset = grad_weight = None + + cur_im2col_step = min(ctx.im2col_step, input.size(0)) + assert (input.size(0) % cur_im2col_step + ) == 0, 'batch size must be divisible by im2col_step' + + grad_output = grad_output.contiguous() + if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + ext_module.deform_conv_backward_input( + input, + offset, + grad_output, + grad_input, + grad_offset, + weight, + ctx.bufs_[0], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + im2col_step=cur_im2col_step) + + if ctx.needs_input_grad[2]: + grad_weight = torch.zeros_like(weight) + ext_module.deform_conv_backward_parameters( + input, + offset, + grad_output, + grad_weight, + ctx.bufs_[0], + ctx.bufs_[1], + kW=weight.size(3), + kH=weight.size(2), + dW=ctx.stride[1], + dH=ctx.stride[0], + padW=ctx.padding[1], + padH=ctx.padding[0], + dilationW=ctx.dilation[1], + dilationH=ctx.dilation[0], + group=ctx.groups, + deformable_group=ctx.deform_groups, + scale=1, + im2col_step=cur_im2col_step) + + return grad_input, grad_offset, grad_weight, \ + None, None, None, None, None, None, None + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + + 'x'.join(map(str, output_size)) + ')') + return output_size + + +deform_conv2d = DeformConv2dFunction.apply + + +class DeformConv2d(nn.Module): + r"""Deformable 2D convolution. + + Applies a deformable 2D convolution over an input signal composed of + several input planes. DeformConv2d was described in the paper + `Deformable Convolutional Networks + `_ + + Note: + The argument ``im2col_step`` was added in version 1.3.17, which means + number of samples processed by the ``im2col_cuda_kernel`` per call. + It enables users to define ``batch_size`` and ``im2col_step`` more + flexibly and solved `issue mmcv#1440 + `_. + + Args: + in_channels (int): Number of channels in the input image. + out_channels (int): Number of channels produced by the convolution. + kernel_size(int, tuple): Size of the convolving kernel. + stride(int, tuple): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + deform_groups (int): Number of deformable group partitions. + bias (bool): If True, adds a learnable bias to the output. + Default: False. + im2col_step (int): Number of samples processed by im2col_cuda_kernel + per call. It will work when ``batch_size`` > ``im2col_step``, but + ``batch_size`` must be divisible by ``im2col_step``. Default: 32. + `New in version 1.3.17.` + """ + + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, + cls_name='DeformConv2d') + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, ...]], + stride: Union[int, Tuple[int, ...]] = 1, + padding: Union[int, Tuple[int, ...]] = 0, + dilation: Union[int, Tuple[int, ...]] = 1, + groups: int = 1, + deform_groups: int = 1, + bias: bool = False, + im2col_step: int = 32) -> None: + super(DeformConv2d, self).__init__() + + assert not bias, \ + f'bias={bias} is not supported in DeformConv2d.' + assert in_channels % groups == 0, \ + f'in_channels {in_channels} cannot be divisible by groups {groups}' + assert out_channels % groups == 0, \ + f'out_channels {out_channels} cannot be divisible by groups \ + {groups}' + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + self.im2col_step = im2col_step + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + # only weight, no bias + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // self.groups, + *self.kernel_size)) + + self.reset_parameters() + + def reset_parameters(self): + # switch the initialization of `self.weight` to the standard kaiming + # method described in `Delving deep into rectifiers: Surpassing + # human-level performance on ImageNet classification` - He, K. et al. + # (2015), using a uniform distribution + nn.init.kaiming_uniform_(self.weight, nonlinearity='relu') + + def forward(self, x: Tensor, offset: Tensor) -> Tensor: + """Deformable Convolutional forward function. + + Args: + x (Tensor): Input feature, shape (B, C_in, H_in, W_in) + offset (Tensor): Offset for deformable convolution, shape + (B, deform_groups*kernel_size[0]*kernel_size[1]*2, + H_out, W_out), H_out, W_out are equal to the output's. + + An offset is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. + The spatial arrangement is like: + + .. code:: text + + (x0, y0) (x1, y1) (x2, y2) + (x3, y3) (x4, y4) (x5, y5) + (x6, y6) (x7, y7) (x8, y8) + + Returns: + Tensor: Output of the layer. + """ + # To fix an assert error in deform_conv_cuda.cpp:128 + # input image is smaller than kernel + input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) < + self.kernel_size[1]) + if input_pad: + pad_h = max(self.kernel_size[0] - x.size(2), 0) + pad_w = max(self.kernel_size[1] - x.size(3), 0) + x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous() + offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0) + offset = offset.contiguous() + out = deform_conv2d(x, offset, self.weight, self.stride, self.padding, + self.dilation, self.groups, self.deform_groups, + False, self.im2col_step) + if input_pad: + out = out[:, :, :out.size(2) - pad_h, :out.size(3) - + pad_w].contiguous() + return out + + def __repr__(self): + s = self.__class__.__name__ + s += f'(in_channels={self.in_channels},\n' + s += f'out_channels={self.out_channels},\n' + s += f'kernel_size={self.kernel_size},\n' + s += f'stride={self.stride},\n' + s += f'padding={self.padding},\n' + s += f'dilation={self.dilation},\n' + s += f'groups={self.groups},\n' + s += f'deform_groups={self.deform_groups},\n' + # bias is not supported in DeformConv2d. + s += 'bias=False)' + return s + + +@CONV_LAYERS.register_module('DCN') +class DeformConv2dPack(DeformConv2d): + """A Deformable Conv Encapsulation that acts as normal Conv layers. + + The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`. + The spatial arrangement is like: + + .. code:: text + + (x0, y0) (x1, y1) (x2, y2) + (x3, y3) (x4, y4) (x5, y5) + (x6, y6) (x7, y7) (x8, y8) + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int or tuple[int]): Same as nn.Conv2d. + padding (int or tuple[int]): Same as nn.Conv2d. + dilation (int or tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(DeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=_pair(self.stride), + padding=_pair(self.padding), + dilation=_pair(self.dilation), + bias=True) + self.init_offset() + + def init_offset(self): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + offset = self.conv_offset(x) + return deform_conv2d(x, offset, self.weight, self.stride, self.padding, + self.dilation, self.groups, self.deform_groups, + False, self.im2col_step) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, DeformConvPack loads previous benchmark models. + if (prefix + 'conv_offset.weight' not in state_dict + and prefix[:-1] + '_offset.weight' in state_dict): + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( + prefix[:-1] + '_offset.weight') + if (prefix + 'conv_offset.bias' not in state_dict + and prefix[:-1] + '_offset.bias' in state_dict): + state_dict[prefix + + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + + '_offset.bias') + + if version is not None and version > 1: + print_log( + f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to ' + 'version 2.', + logger='root') + + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, missing_keys, unexpected_keys, + error_msgs) diff --git a/annotator/uniformer/mmcv/ops/deform_roi_pool.py b/annotator/uniformer/mmcv/ops/deform_roi_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..cc245ba91fee252226ba22e76bb94a35db9a629b --- /dev/null +++ b/annotator/uniformer/mmcv/ops/deform_roi_pool.py @@ -0,0 +1,204 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch import nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward']) + + +class DeformRoIPoolFunction(Function): + + @staticmethod + def symbolic(g, input, rois, offset, output_size, spatial_scale, + sampling_ratio, gamma): + return g.op( + 'mmcv::MMCVDeformRoIPool', + input, + rois, + offset, + pooled_height_i=output_size[0], + pooled_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_f=sampling_ratio, + gamma_f=gamma) + + @staticmethod + def forward(ctx, + input, + rois, + offset, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + if offset is None: + offset = input.new_zeros(0) + ctx.output_size = _pair(output_size) + ctx.spatial_scale = float(spatial_scale) + ctx.sampling_ratio = int(sampling_ratio) + ctx.gamma = float(gamma) + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + + ext_module.deform_roi_pool_forward( + input, + rois, + offset, + output, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma) + + ctx.save_for_backward(input, rois, offset) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, rois, offset = ctx.saved_tensors + grad_input = grad_output.new_zeros(input.shape) + grad_offset = grad_output.new_zeros(offset.shape) + + ext_module.deform_roi_pool_backward( + grad_output, + input, + rois, + offset, + grad_input, + grad_offset, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + gamma=ctx.gamma) + if grad_offset.numel() == 0: + grad_offset = None + return grad_input, None, grad_offset, None, None, None, None + + +deform_roi_pool = DeformRoIPoolFunction.apply + + +class DeformRoIPool(nn.Module): + + def __init__(self, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(DeformRoIPool, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.gamma = float(gamma) + + def forward(self, input, rois, offset=None): + return deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + + +class DeformRoIPoolPack(DeformRoIPool): + + def __init__(self, + output_size, + output_channels, + deform_fc_channels=1024, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale, + sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 2)) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], + self.output_size[1]) + return deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + + +class ModulatedDeformRoIPoolPack(DeformRoIPool): + + def __init__(self, + output_size, + output_channels, + deform_fc_channels=1024, + spatial_scale=1.0, + sampling_ratio=0, + gamma=0.1): + super(ModulatedDeformRoIPoolPack, + self).__init__(output_size, spatial_scale, sampling_ratio, gamma) + + self.output_channels = output_channels + self.deform_fc_channels = deform_fc_channels + + self.offset_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 2)) + self.offset_fc[-1].weight.data.zero_() + self.offset_fc[-1].bias.data.zero_() + + self.mask_fc = nn.Sequential( + nn.Linear( + self.output_size[0] * self.output_size[1] * + self.output_channels, self.deform_fc_channels), + nn.ReLU(inplace=True), + nn.Linear(self.deform_fc_channels, + self.output_size[0] * self.output_size[1] * 1), + nn.Sigmoid()) + self.mask_fc[2].weight.data.zero_() + self.mask_fc[2].bias.data.zero_() + + def forward(self, input, rois): + assert input.size(1) == self.output_channels + x = deform_roi_pool(input, rois, None, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + rois_num = rois.size(0) + offset = self.offset_fc(x.view(rois_num, -1)) + offset = offset.view(rois_num, 2, self.output_size[0], + self.output_size[1]) + mask = self.mask_fc(x.view(rois_num, -1)) + mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1]) + d = deform_roi_pool(input, rois, offset, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.gamma) + return d * mask diff --git a/annotator/uniformer/mmcv/ops/deprecated_wrappers.py b/annotator/uniformer/mmcv/ops/deprecated_wrappers.py new file mode 100644 index 0000000000000000000000000000000000000000..a2e593df9ee57637038683d7a1efaa347b2b69e7 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/deprecated_wrappers.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This file is for backward compatibility. +# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks. +import warnings + +from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d + + +class Conv2d_deprecated(Conv2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') + + +class ConvTranspose2d_deprecated(ConvTranspose2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing ConvTranspose2d wrapper from "mmcv.ops" will be ' + 'deprecated in the future. Please import them from "mmcv.cnn" ' + 'instead') + + +class MaxPool2d_deprecated(MaxPool2d): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') + + +class Linear_deprecated(Linear): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn( + 'Importing Linear wrapper from "mmcv.ops" will be deprecated in' + ' the future. Please import them from "mmcv.cnn" instead') diff --git a/annotator/uniformer/mmcv/ops/focal_loss.py b/annotator/uniformer/mmcv/ops/focal_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..763bc93bd2575c49ca8ccf20996bbd92d1e0d1a4 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/focal_loss.py @@ -0,0 +1,212 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward', + 'softmax_focal_loss_forward', 'softmax_focal_loss_backward' +]) + + +class SigmoidFocalLossFunction(Function): + + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'mmcv::MMCVSigmoidFocalLoss', + input, + target, + gamma_f=gamma, + alpha_f=alpha, + weight_f=weight, + reduction_s=reduction) + + @staticmethod + def forward(ctx, + input, + target, + gamma=2.0, + alpha=0.25, + weight=None, + reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + output = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_forward( + input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha) + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input, target, weight) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, target, weight = ctx.saved_tensors + + grad_input = input.new_zeros(input.size()) + + ext_module.sigmoid_focal_loss_backward( + input, + target, + weight, + grad_input, + gamma=ctx.gamma, + alpha=ctx.alpha) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input.size(0) + return grad_input, None, None, None, None, None + + +sigmoid_focal_loss = SigmoidFocalLossFunction.apply + + +class SigmoidFocalLoss(nn.Module): + + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SigmoidFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return sigmoid_focal_loss(input, target, self.gamma, self.alpha, + self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s + + +class SoftmaxFocalLossFunction(Function): + + @staticmethod + def symbolic(g, input, target, gamma, alpha, weight, reduction): + return g.op( + 'mmcv::MMCVSoftmaxFocalLoss', + input, + target, + gamma_f=gamma, + alpha_f=alpha, + weight_f=weight, + reduction_s=reduction) + + @staticmethod + def forward(ctx, + input, + target, + gamma=2.0, + alpha=0.25, + weight=None, + reduction='mean'): + + assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor)) + assert input.dim() == 2 + assert target.dim() == 1 + assert input.size(0) == target.size(0) + if weight is None: + weight = input.new_empty(0) + else: + assert weight.dim() == 1 + assert input.size(1) == weight.size(0) + ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2} + assert reduction in ctx.reduction_dict.keys() + + ctx.gamma = float(gamma) + ctx.alpha = float(alpha) + ctx.reduction = ctx.reduction_dict[reduction] + + channel_stats, _ = torch.max(input, dim=1) + input_softmax = input - channel_stats.unsqueeze(1).expand_as(input) + input_softmax.exp_() + + channel_stats = input_softmax.sum(dim=1) + input_softmax /= channel_stats.unsqueeze(1).expand_as(input) + + output = input.new_zeros(input.size(0)) + ext_module.softmax_focal_loss_forward( + input_softmax, + target, + weight, + output, + gamma=ctx.gamma, + alpha=ctx.alpha) + + if ctx.reduction == ctx.reduction_dict['mean']: + output = output.sum() / input.size(0) + elif ctx.reduction == ctx.reduction_dict['sum']: + output = output.sum() + ctx.save_for_backward(input_softmax, target, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + input_softmax, target, weight = ctx.saved_tensors + buff = input_softmax.new_zeros(input_softmax.size(0)) + grad_input = input_softmax.new_zeros(input_softmax.size()) + + ext_module.softmax_focal_loss_backward( + input_softmax, + target, + weight, + buff, + grad_input, + gamma=ctx.gamma, + alpha=ctx.alpha) + + grad_input *= grad_output + if ctx.reduction == ctx.reduction_dict['mean']: + grad_input /= input_softmax.size(0) + return grad_input, None, None, None, None, None + + +softmax_focal_loss = SoftmaxFocalLossFunction.apply + + +class SoftmaxFocalLoss(nn.Module): + + def __init__(self, gamma, alpha, weight=None, reduction='mean'): + super(SoftmaxFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.register_buffer('weight', weight) + self.reduction = reduction + + def forward(self, input, target): + return softmax_focal_loss(input, target, self.gamma, self.alpha, + self.weight, self.reduction) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(gamma={self.gamma}, ' + s += f'alpha={self.alpha}, ' + s += f'reduction={self.reduction})' + return s diff --git a/annotator/uniformer/mmcv/ops/furthest_point_sample.py b/annotator/uniformer/mmcv/ops/furthest_point_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..374b7a878f1972c183941af28ba1df216ac1a60f --- /dev/null +++ b/annotator/uniformer/mmcv/ops/furthest_point_sample.py @@ -0,0 +1,83 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'furthest_point_sampling_forward', + 'furthest_point_sampling_with_dist_forward' +]) + + +class FurthestPointSampling(Function): + """Uses iterative furthest point sampling to select a set of features whose + corresponding points have the furthest distance.""" + + @staticmethod + def forward(ctx, points_xyz: torch.Tensor, + num_points: int) -> torch.Tensor: + """ + Args: + points_xyz (Tensor): (B, N, 3) where N > num_points. + num_points (int): Number of points in the sampled set. + + Returns: + Tensor: (B, num_points) indices of the sampled points. + """ + assert points_xyz.is_contiguous() + + B, N = points_xyz.size()[:2] + output = torch.cuda.IntTensor(B, num_points) + temp = torch.cuda.FloatTensor(B, N).fill_(1e10) + + ext_module.furthest_point_sampling_forward( + points_xyz, + temp, + output, + b=B, + n=N, + m=num_points, + ) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +class FurthestPointSamplingWithDist(Function): + """Uses iterative furthest point sampling to select a set of features whose + corresponding points have the furthest distance.""" + + @staticmethod + def forward(ctx, points_dist: torch.Tensor, + num_points: int) -> torch.Tensor: + """ + Args: + points_dist (Tensor): (B, N, N) Distance between each point pair. + num_points (int): Number of points in the sampled set. + + Returns: + Tensor: (B, num_points) indices of the sampled points. + """ + assert points_dist.is_contiguous() + + B, N, _ = points_dist.size() + output = points_dist.new_zeros([B, num_points], dtype=torch.int32) + temp = points_dist.new_zeros([B, N]).fill_(1e10) + + ext_module.furthest_point_sampling_with_dist_forward( + points_dist, temp, output, b=B, n=N, m=num_points) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +furthest_point_sample = FurthestPointSampling.apply +furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply diff --git a/annotator/uniformer/mmcv/ops/fused_bias_leakyrelu.py b/annotator/uniformer/mmcv/ops/fused_bias_leakyrelu.py new file mode 100644 index 0000000000000000000000000000000000000000..6d12508469c6c8fa1884debece44c58d158cb6fa --- /dev/null +++ b/annotator/uniformer/mmcv/ops/fused_bias_leakyrelu.py @@ -0,0 +1,268 @@ +# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501 + +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator +# Augmentation (ADA) +# ======================================================================= + +# 1. Definitions + +# "Licensor" means any person or entity that distributes its Work. + +# "Software" means the original work of authorship made available under +# this License. + +# "Work" means the Software and any additions to or derivative works of +# the Software that are made available under this License. + +# The terms "reproduce," "reproduction," "derivative works," and +# "distribution" have the meaning as provided under U.S. copyright law; +# provided, however, that for the purposes of this License, derivative +# works shall not include works that remain separable from, or merely +# link (or bind by name) to the interfaces of, the Work. + +# Works, including the Software, are "made available" under this License +# by including in or with the Work either (a) a copyright notice +# referencing the applicability of this License to the Work, or (b) a +# copy of this License. + +# 2. License Grants + +# 2.1 Copyright Grant. Subject to the terms and conditions of this +# License, each Licensor grants to you a perpetual, worldwide, +# non-exclusive, royalty-free, copyright license to reproduce, +# prepare derivative works of, publicly display, publicly perform, +# sublicense and distribute its Work and any resulting derivative +# works in any form. + +# 3. Limitations + +# 3.1 Redistribution. You may reproduce or distribute the Work only +# if (a) you do so under this License, (b) you include a complete +# copy of this License with your distribution, and (c) you retain +# without modification any copyright, patent, trademark, or +# attribution notices that are present in the Work. + +# 3.2 Derivative Works. You may specify that additional or different +# terms apply to the use, reproduction, and distribution of your +# derivative works of the Work ("Your Terms") only if (a) Your Terms +# provide that the use limitation in Section 3.3 applies to your +# derivative works, and (b) you identify the specific derivative +# works that are subject to Your Terms. Notwithstanding Your Terms, +# this License (including the redistribution requirements in Section +# 3.1) will continue to apply to the Work itself. + +# 3.3 Use Limitation. The Work and any derivative works thereof only +# may be used or intended for use non-commercially. Notwithstanding +# the foregoing, NVIDIA and its affiliates may use the Work and any +# derivative works commercially. As used herein, "non-commercially" +# means for research or evaluation purposes only. + +# 3.4 Patent Claims. If you bring or threaten to bring a patent claim +# against any Licensor (including any claim, cross-claim or +# counterclaim in a lawsuit) to enforce any patents that you allege +# are infringed by any Work, then your rights under this License from +# such Licensor (including the grant in Section 2.1) will terminate +# immediately. + +# 3.5 Trademarks. This License does not grant any rights to use any +# Licensor’s or its affiliates’ names, logos, or trademarks, except +# as necessary to reproduce the notices described in this License. + +# 3.6 Termination. If you violate any term of this License, then your +# rights under this License (including the grant in Section 2.1) will +# terminate immediately. + +# 4. Disclaimer of Warranty. + +# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +# THIS LICENSE. + +# 5. Limitation of Liability. + +# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGES. + +# ======================================================================= + +import torch +import torch.nn.functional as F +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu']) + + +class FusedBiasLeakyReLUFunctionBackward(Function): + """Calculate second order deviation. + + This function is to compute the second order deviation for the fused leaky + relu operation. + """ + + @staticmethod + def forward(ctx, grad_output, out, negative_slope, scale): + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + empty = grad_output.new_empty(0) + + grad_input = ext_module.fused_bias_leakyrelu( + grad_output, + empty, + out, + act=3, + grad=1, + alpha=negative_slope, + scale=scale) + + dim = [0] + + if grad_input.ndim > 2: + dim += list(range(2, grad_input.ndim)) + + grad_bias = grad_input.sum(dim).detach() + + return grad_input, grad_bias + + @staticmethod + def backward(ctx, gradgrad_input, gradgrad_bias): + out, = ctx.saved_tensors + + # The second order deviation, in fact, contains two parts, while the + # the first part is zero. Thus, we direct consider the second part + # which is similar with the first order deviation in implementation. + gradgrad_out = ext_module.fused_bias_leakyrelu( + gradgrad_input, + gradgrad_bias.to(out.dtype), + out, + act=3, + grad=1, + alpha=ctx.negative_slope, + scale=ctx.scale) + + return gradgrad_out, None, None, None + + +class FusedBiasLeakyReLUFunction(Function): + + @staticmethod + def forward(ctx, input, bias, negative_slope, scale): + empty = input.new_empty(0) + + out = ext_module.fused_bias_leakyrelu( + input, + bias, + empty, + act=3, + grad=0, + alpha=negative_slope, + scale=scale) + ctx.save_for_backward(out) + ctx.negative_slope = negative_slope + ctx.scale = scale + + return out + + @staticmethod + def backward(ctx, grad_output): + out, = ctx.saved_tensors + + grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply( + grad_output, out, ctx.negative_slope, ctx.scale) + + return grad_input, grad_bias, None, None + + +class FusedBiasLeakyReLU(nn.Module): + """Fused bias leaky ReLU. + + This function is introduced in the StyleGAN2: + http://arxiv.org/abs/1912.04958 + + The bias term comes from the convolution operation. In addition, to keep + the variance of the feature map or gradients unchanged, they also adopt a + scale similarly with Kaiming initialization. However, since the + :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the + final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 + your own scale. + + TODO: Implement the CPU version. + + Args: + channel (int): The channel number of the feature map. + negative_slope (float, optional): Same as nn.LeakyRelu. + Defaults to 0.2. + scale (float, optional): A scalar to adjust the variance of the feature + map. Defaults to 2**0.5. + """ + + def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5): + super(FusedBiasLeakyReLU, self).__init__() + + self.bias = nn.Parameter(torch.zeros(num_channels)) + self.negative_slope = negative_slope + self.scale = scale + + def forward(self, input): + return fused_bias_leakyrelu(input, self.bias, self.negative_slope, + self.scale) + + +def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5): + """Fused bias leaky ReLU function. + + This function is introduced in the StyleGAN2: + http://arxiv.org/abs/1912.04958 + + The bias term comes from the convolution operation. In addition, to keep + the variance of the feature map or gradients unchanged, they also adopt a + scale similarly with Kaiming initialization. However, since the + :math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the + final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501 + your own scale. + + Args: + input (torch.Tensor): Input feature map. + bias (nn.Parameter): The bias from convolution operation. + negative_slope (float, optional): Same as nn.LeakyRelu. + Defaults to 0.2. + scale (float, optional): A scalar to adjust the variance of the feature + map. Defaults to 2**0.5. + + Returns: + torch.Tensor: Feature map after non-linear activation. + """ + + if not input.is_cuda: + return bias_leakyrelu_ref(input, bias, negative_slope, scale) + + return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype), + negative_slope, scale) + + +def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5): + + if bias is not None: + assert bias.ndim == 1 + assert bias.shape[0] == x.shape[1] + x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)]) + + x = F.leaky_relu(x, negative_slope) + if scale != 1: + x = x * scale + + return x diff --git a/annotator/uniformer/mmcv/ops/gather_points.py b/annotator/uniformer/mmcv/ops/gather_points.py new file mode 100644 index 0000000000000000000000000000000000000000..f52f1677d8ea0facafc56a3672d37adb44677ff3 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/gather_points.py @@ -0,0 +1,57 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['gather_points_forward', 'gather_points_backward']) + + +class GatherPoints(Function): + """Gather points with given index.""" + + @staticmethod + def forward(ctx, features: torch.Tensor, + indices: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, N) features to gather. + indices (Tensor): (B, M) where M is the number of points. + + Returns: + Tensor: (B, C, M) where M is the number of points. + """ + assert features.is_contiguous() + assert indices.is_contiguous() + + B, npoint = indices.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, npoint) + + ext_module.gather_points_forward( + features, indices, output, b=B, c=C, n=N, npoints=npoint) + + ctx.for_backwards = (indices, C, N) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(indices) + return output + + @staticmethod + def backward(ctx, grad_out): + idx, C, N = ctx.for_backwards + B, npoint = idx.size() + + grad_features = torch.cuda.FloatTensor(B, C, N).zero_() + grad_out_data = grad_out.data.contiguous() + ext_module.gather_points_backward( + grad_out_data, + idx, + grad_features.data, + b=B, + c=C, + n=N, + npoints=npoint) + return grad_features, None + + +gather_points = GatherPoints.apply diff --git a/annotator/uniformer/mmcv/ops/group_points.py b/annotator/uniformer/mmcv/ops/group_points.py new file mode 100644 index 0000000000000000000000000000000000000000..6c3ec9d758ebe4e1c2205882af4be154008253a5 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/group_points.py @@ -0,0 +1,224 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple + +import torch +from torch import nn as nn +from torch.autograd import Function + +from ..utils import ext_loader +from .ball_query import ball_query +from .knn import knn + +ext_module = ext_loader.load_ext( + '_ext', ['group_points_forward', 'group_points_backward']) + + +class QueryAndGroup(nn.Module): + """Groups points with a ball query of radius. + + Args: + max_radius (float): The maximum radius of the balls. + If None is given, we will use kNN sampling instead of ball query. + sample_num (int): Maximum number of features to gather in the ball. + min_radius (float, optional): The minimum radius of the balls. + Default: 0. + use_xyz (bool, optional): Whether to use xyz. + Default: True. + return_grouped_xyz (bool, optional): Whether to return grouped xyz. + Default: False. + normalize_xyz (bool, optional): Whether to normalize xyz. + Default: False. + uniform_sample (bool, optional): Whether to sample uniformly. + Default: False + return_unique_cnt (bool, optional): Whether to return the count of + unique samples. Default: False. + return_grouped_idx (bool, optional): Whether to return grouped idx. + Default: False. + """ + + def __init__(self, + max_radius, + sample_num, + min_radius=0, + use_xyz=True, + return_grouped_xyz=False, + normalize_xyz=False, + uniform_sample=False, + return_unique_cnt=False, + return_grouped_idx=False): + super().__init__() + self.max_radius = max_radius + self.min_radius = min_radius + self.sample_num = sample_num + self.use_xyz = use_xyz + self.return_grouped_xyz = return_grouped_xyz + self.normalize_xyz = normalize_xyz + self.uniform_sample = uniform_sample + self.return_unique_cnt = return_unique_cnt + self.return_grouped_idx = return_grouped_idx + if self.return_unique_cnt: + assert self.uniform_sample, \ + 'uniform_sample should be True when ' \ + 'returning the count of unique samples' + if self.max_radius is None: + assert not self.normalize_xyz, \ + 'can not normalize grouped xyz when max_radius is None' + + def forward(self, points_xyz, center_xyz, features=None): + """ + Args: + points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. + center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods. + features (Tensor): (B, C, N) Descriptors of the features. + + Returns: + Tensor: (B, 3 + C, npoint, sample_num) Grouped feature. + """ + # if self.max_radius is None, we will perform kNN instead of ball query + # idx is of shape [B, npoint, sample_num] + if self.max_radius is None: + idx = knn(self.sample_num, points_xyz, center_xyz, False) + idx = idx.transpose(1, 2).contiguous() + else: + idx = ball_query(self.min_radius, self.max_radius, self.sample_num, + points_xyz, center_xyz) + + if self.uniform_sample: + unique_cnt = torch.zeros((idx.shape[0], idx.shape[1])) + for i_batch in range(idx.shape[0]): + for i_region in range(idx.shape[1]): + unique_ind = torch.unique(idx[i_batch, i_region, :]) + num_unique = unique_ind.shape[0] + unique_cnt[i_batch, i_region] = num_unique + sample_ind = torch.randint( + 0, + num_unique, (self.sample_num - num_unique, ), + dtype=torch.long) + all_ind = torch.cat((unique_ind, unique_ind[sample_ind])) + idx[i_batch, i_region, :] = all_ind + + xyz_trans = points_xyz.transpose(1, 2).contiguous() + # (B, 3, npoint, sample_num) + grouped_xyz = grouping_operation(xyz_trans, idx) + grouped_xyz_diff = grouped_xyz - \ + center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets + if self.normalize_xyz: + grouped_xyz_diff /= self.max_radius + + if features is not None: + grouped_features = grouping_operation(features, idx) + if self.use_xyz: + # (B, C + 3, npoint, sample_num) + new_features = torch.cat([grouped_xyz_diff, grouped_features], + dim=1) + else: + new_features = grouped_features + else: + assert (self.use_xyz + ), 'Cannot have not features and not use xyz as a feature!' + new_features = grouped_xyz_diff + + ret = [new_features] + if self.return_grouped_xyz: + ret.append(grouped_xyz) + if self.return_unique_cnt: + ret.append(unique_cnt) + if self.return_grouped_idx: + ret.append(idx) + if len(ret) == 1: + return ret[0] + else: + return tuple(ret) + + +class GroupAll(nn.Module): + """Group xyz with feature. + + Args: + use_xyz (bool): Whether to use xyz. + """ + + def __init__(self, use_xyz: bool = True): + super().__init__() + self.use_xyz = use_xyz + + def forward(self, + xyz: torch.Tensor, + new_xyz: torch.Tensor, + features: torch.Tensor = None): + """ + Args: + xyz (Tensor): (B, N, 3) xyz coordinates of the features. + new_xyz (Tensor): new xyz coordinates of the features. + features (Tensor): (B, C, N) features to group. + + Returns: + Tensor: (B, C + 3, 1, N) Grouped feature. + """ + grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) + if features is not None: + grouped_features = features.unsqueeze(2) + if self.use_xyz: + # (B, 3 + C, 1, N) + new_features = torch.cat([grouped_xyz, grouped_features], + dim=1) + else: + new_features = grouped_features + else: + new_features = grouped_xyz + + return new_features + + +class GroupingOperation(Function): + """Group feature with given index.""" + + @staticmethod + def forward(ctx, features: torch.Tensor, + indices: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, N) tensor of features to group. + indices (Tensor): (B, npoint, nsample) the indices of + features to group with. + + Returns: + Tensor: (B, C, npoint, nsample) Grouped features. + """ + features = features.contiguous() + indices = indices.contiguous() + + B, nfeatures, nsample = indices.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) + + ext_module.group_points_forward(B, C, N, nfeatures, nsample, features, + indices, output) + + ctx.for_backwards = (indices, N) + return output + + @staticmethod + def backward(ctx, + grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients + of the output from forward. + + Returns: + Tensor: (B, C, N) gradient of the features. + """ + idx, N = ctx.for_backwards + + B, C, npoint, nsample = grad_out.size() + grad_features = torch.cuda.FloatTensor(B, C, N).zero_() + + grad_out_data = grad_out.data.contiguous() + ext_module.group_points_backward(B, C, N, npoint, nsample, + grad_out_data, idx, + grad_features.data) + return grad_features, None + + +grouping_operation = GroupingOperation.apply diff --git a/annotator/uniformer/mmcv/ops/info.py b/annotator/uniformer/mmcv/ops/info.py new file mode 100644 index 0000000000000000000000000000000000000000..29f2e5598ae2bb5866ccd15a7d3b4de33c0cd14d --- /dev/null +++ b/annotator/uniformer/mmcv/ops/info.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import glob +import os + +import torch + +if torch.__version__ == 'parrots': + import parrots + + def get_compiler_version(): + return 'GCC ' + parrots.version.compiler + + def get_compiling_cuda_version(): + return parrots.version.cuda +else: + from ..utils import ext_loader + ext_module = ext_loader.load_ext( + '_ext', ['get_compiler_version', 'get_compiling_cuda_version']) + + def get_compiler_version(): + return ext_module.get_compiler_version() + + def get_compiling_cuda_version(): + return ext_module.get_compiling_cuda_version() + + +def get_onnxruntime_op_path(): + wildcard = os.path.join( + os.path.abspath(os.path.dirname(os.path.dirname(__file__))), + '_ext_ort.*.so') + + paths = glob.glob(wildcard) + if len(paths) > 0: + return paths[0] + else: + return '' diff --git a/annotator/uniformer/mmcv/ops/iou3d.py b/annotator/uniformer/mmcv/ops/iou3d.py new file mode 100644 index 0000000000000000000000000000000000000000..6fc71979190323f44c09f8b7e1761cf49cd2d76b --- /dev/null +++ b/annotator/uniformer/mmcv/ops/iou3d.py @@ -0,0 +1,85 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward', + 'iou3d_nms_normal_forward' +]) + + +def boxes_iou_bev(boxes_a, boxes_b): + """Calculate boxes IoU in the Bird's Eye View. + + Args: + boxes_a (torch.Tensor): Input boxes a with shape (M, 5). + boxes_b (torch.Tensor): Input boxes b with shape (N, 5). + + Returns: + ans_iou (torch.Tensor): IoU result with shape (M, N). + """ + ans_iou = boxes_a.new_zeros( + torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) + + ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(), + boxes_b.contiguous(), ans_iou) + + return ans_iou + + +def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None): + """NMS function GPU implementation (for BEV boxes). The overlap of two + boxes for IoU calculation is defined as the exact overlapping area of the + two boxes. In this function, one can also set ``pre_max_size`` and + ``post_max_size``. + + Args: + boxes (torch.Tensor): Input boxes with the shape of [N, 5] + ([x1, y1, x2, y2, ry]). + scores (torch.Tensor): Scores of boxes with the shape of [N]. + thresh (float): Overlap threshold of NMS. + pre_max_size (int, optional): Max size of boxes before NMS. + Default: None. + post_max_size (int, optional): Max size of boxes after NMS. + Default: None. + + Returns: + torch.Tensor: Indexes after NMS. + """ + assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]' + order = scores.sort(0, descending=True)[1] + + if pre_max_size is not None: + order = order[:pre_max_size] + boxes = boxes[order].contiguous() + + keep = torch.zeros(boxes.size(0), dtype=torch.long) + num_out = ext_module.iou3d_nms_forward(boxes, keep, thresh) + keep = order[keep[:num_out].cuda(boxes.device)].contiguous() + if post_max_size is not None: + keep = keep[:post_max_size] + return keep + + +def nms_normal_bev(boxes, scores, thresh): + """Normal NMS function GPU implementation (for BEV boxes). The overlap of + two boxes for IoU calculation is defined as the exact overlapping area of + the two boxes WITH their yaw angle set to 0. + + Args: + boxes (torch.Tensor): Input boxes with shape (N, 5). + scores (torch.Tensor): Scores of predicted boxes with shape (N). + thresh (float): Overlap threshold of NMS. + + Returns: + torch.Tensor: Remaining indices with scores in descending order. + """ + assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]' + order = scores.sort(0, descending=True)[1] + + boxes = boxes[order].contiguous() + + keep = torch.zeros(boxes.size(0), dtype=torch.long) + num_out = ext_module.iou3d_nms_normal_forward(boxes, keep, thresh) + return order[keep[:num_out].cuda(boxes.device)].contiguous() diff --git a/annotator/uniformer/mmcv/ops/knn.py b/annotator/uniformer/mmcv/ops/knn.py new file mode 100644 index 0000000000000000000000000000000000000000..f335785036669fc19239825b0aae6dde3f73bf92 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/knn.py @@ -0,0 +1,77 @@ +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['knn_forward']) + + +class KNN(Function): + r"""KNN (CUDA) based on heap data structure. + Modified from `PAConv `_. + + Find k-nearest points. + """ + + @staticmethod + def forward(ctx, + k: int, + xyz: torch.Tensor, + center_xyz: torch.Tensor = None, + transposed: bool = False) -> torch.Tensor: + """ + Args: + k (int): number of nearest neighbors. + xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N). + xyz coordinates of the features. + center_xyz (Tensor, optional): (B, npoint, 3) if transposed == + False, else (B, 3, npoint). centers of the knn query. + Default: None. + transposed (bool, optional): whether the input tensors are + transposed. Should not explicitly use this keyword when + calling knn (=KNN.apply), just add the fourth param. + Default: False. + + Returns: + Tensor: (B, k, npoint) tensor with the indices of + the features that form k-nearest neighbours. + """ + assert (k > 0) & (k < 100), 'k should be in range(0, 100)' + + if center_xyz is None: + center_xyz = xyz + + if transposed: + xyz = xyz.transpose(2, 1).contiguous() + center_xyz = center_xyz.transpose(2, 1).contiguous() + + assert xyz.is_contiguous() # [B, N, 3] + assert center_xyz.is_contiguous() # [B, npoint, 3] + + center_xyz_device = center_xyz.get_device() + assert center_xyz_device == xyz.get_device(), \ + 'center_xyz and xyz should be put on the same device' + if torch.cuda.current_device() != center_xyz_device: + torch.cuda.set_device(center_xyz_device) + + B, npoint, _ = center_xyz.shape + N = xyz.shape[1] + + idx = center_xyz.new_zeros((B, npoint, k)).int() + dist2 = center_xyz.new_zeros((B, npoint, k)).float() + + ext_module.knn_forward( + xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k) + # idx shape to [B, k, npoint] + idx = idx.transpose(2, 1).contiguous() + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None + + +knn = KNN.apply diff --git a/annotator/uniformer/mmcv/ops/masked_conv.py b/annotator/uniformer/mmcv/ops/masked_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..cd514cc204c1d571ea5dc7e74b038c0f477a008b --- /dev/null +++ b/annotator/uniformer/mmcv/ops/masked_conv.py @@ -0,0 +1,111 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['masked_im2col_forward', 'masked_col2im_forward']) + + +class MaskedConv2dFunction(Function): + + @staticmethod + def symbolic(g, features, mask, weight, bias, padding, stride): + return g.op( + 'mmcv::MMCVMaskedConv2d', + features, + mask, + weight, + bias, + padding_i=padding, + stride_i=stride) + + @staticmethod + def forward(ctx, features, mask, weight, bias, padding=0, stride=1): + assert mask.dim() == 3 and mask.size(0) == 1 + assert features.dim() == 4 and features.size(0) == 1 + assert features.size()[2:] == mask.size()[1:] + pad_h, pad_w = _pair(padding) + stride_h, stride_w = _pair(stride) + if stride_h != 1 or stride_w != 1: + raise ValueError( + 'Stride could not only be 1 in masked_conv2d currently.') + out_channel, in_channel, kernel_h, kernel_w = weight.size() + + batch_size = features.size(0) + out_h = int( + math.floor((features.size(2) + 2 * pad_h - + (kernel_h - 1) - 1) / stride_h + 1)) + out_w = int( + math.floor((features.size(3) + 2 * pad_w - + (kernel_h - 1) - 1) / stride_w + 1)) + mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False) + output = features.new_zeros(batch_size, out_channel, out_h, out_w) + if mask_inds.numel() > 0: + mask_h_idx = mask_inds[:, 0].contiguous() + mask_w_idx = mask_inds[:, 1].contiguous() + data_col = features.new_zeros(in_channel * kernel_h * kernel_w, + mask_inds.size(0)) + ext_module.masked_im2col_forward( + features, + mask_h_idx, + mask_w_idx, + data_col, + kernel_h=kernel_h, + kernel_w=kernel_w, + pad_h=pad_h, + pad_w=pad_w) + + masked_output = torch.addmm(1, bias[:, None], 1, + weight.view(out_channel, -1), data_col) + ext_module.masked_col2im_forward( + masked_output, + mask_h_idx, + mask_w_idx, + output, + height=out_h, + width=out_w, + channels=out_channel) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + return (None, ) * 5 + + +masked_conv2d = MaskedConv2dFunction.apply + + +class MaskedConv2d(nn.Conv2d): + """A MaskedConv2d which inherits the official Conv2d. + + The masked forward doesn't implement the backward function and only + supports the stride parameter to be 1 currently. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): + super(MaskedConv2d, + self).__init__(in_channels, out_channels, kernel_size, stride, + padding, dilation, groups, bias) + + def forward(self, input, mask=None): + if mask is None: # fallback to the normal Conv2d + return super(MaskedConv2d, self).forward(input) + else: + return masked_conv2d(input, mask, self.weight, self.bias, + self.padding) diff --git a/annotator/uniformer/mmcv/ops/merge_cells.py b/annotator/uniformer/mmcv/ops/merge_cells.py new file mode 100644 index 0000000000000000000000000000000000000000..48ca8cc0a8aca8432835bd760c0403a3c35b34cf --- /dev/null +++ b/annotator/uniformer/mmcv/ops/merge_cells.py @@ -0,0 +1,149 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import abstractmethod + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..cnn import ConvModule + + +class BaseMergeCell(nn.Module): + """The basic class for cells used in NAS-FPN and NAS-FCOS. + + BaseMergeCell takes 2 inputs. After applying convolution + on them, they are resized to the target size. Then, + they go through binary_op, which depends on the type of cell. + If with_out_conv is True, the result of output will go through + another convolution layer. + + Args: + in_channels (int): number of input channels in out_conv layer. + out_channels (int): number of output channels in out_conv layer. + with_out_conv (bool): Whether to use out_conv layer + out_conv_cfg (dict): Config dict for convolution layer, which should + contain "groups", "kernel_size", "padding", "bias" to build + out_conv layer. + out_norm_cfg (dict): Config dict for normalization layer in out_conv. + out_conv_order (tuple): The order of conv/norm/activation layers in + out_conv. + with_input1_conv (bool): Whether to use convolution on input1. + with_input2_conv (bool): Whether to use convolution on input2. + input_conv_cfg (dict): Config dict for building input1_conv layer and + input2_conv layer, which is expected to contain the type of + convolution. + Default: None, which means using conv2d. + input_norm_cfg (dict): Config dict for normalization layer in + input1_conv and input2_conv layer. Default: None. + upsample_mode (str): Interpolation method used to resize the output + of input1_conv and input2_conv to target size. Currently, we + support ['nearest', 'bilinear']. Default: 'nearest'. + """ + + def __init__(self, + fused_channels=256, + out_channels=256, + with_out_conv=True, + out_conv_cfg=dict( + groups=1, kernel_size=3, padding=1, bias=True), + out_norm_cfg=None, + out_conv_order=('act', 'conv', 'norm'), + with_input1_conv=False, + with_input2_conv=False, + input_conv_cfg=None, + input_norm_cfg=None, + upsample_mode='nearest'): + super(BaseMergeCell, self).__init__() + assert upsample_mode in ['nearest', 'bilinear'] + self.with_out_conv = with_out_conv + self.with_input1_conv = with_input1_conv + self.with_input2_conv = with_input2_conv + self.upsample_mode = upsample_mode + + if self.with_out_conv: + self.out_conv = ConvModule( + fused_channels, + out_channels, + **out_conv_cfg, + norm_cfg=out_norm_cfg, + order=out_conv_order) + + self.input1_conv = self._build_input_conv( + out_channels, input_conv_cfg, + input_norm_cfg) if with_input1_conv else nn.Sequential() + self.input2_conv = self._build_input_conv( + out_channels, input_conv_cfg, + input_norm_cfg) if with_input2_conv else nn.Sequential() + + def _build_input_conv(self, channel, conv_cfg, norm_cfg): + return ConvModule( + channel, + channel, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + bias=True) + + @abstractmethod + def _binary_op(self, x1, x2): + pass + + def _resize(self, x, size): + if x.shape[-2:] == size: + return x + elif x.shape[-2:] < size: + return F.interpolate(x, size=size, mode=self.upsample_mode) + else: + assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0 + kernel_size = x.shape[-1] // size[-1] + x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size) + return x + + def forward(self, x1, x2, out_size=None): + assert x1.shape[:2] == x2.shape[:2] + assert out_size is None or len(out_size) == 2 + if out_size is None: # resize to larger one + out_size = max(x1.size()[2:], x2.size()[2:]) + + x1 = self.input1_conv(x1) + x2 = self.input2_conv(x2) + + x1 = self._resize(x1, out_size) + x2 = self._resize(x2, out_size) + + x = self._binary_op(x1, x2) + if self.with_out_conv: + x = self.out_conv(x) + return x + + +class SumCell(BaseMergeCell): + + def __init__(self, in_channels, out_channels, **kwargs): + super(SumCell, self).__init__(in_channels, out_channels, **kwargs) + + def _binary_op(self, x1, x2): + return x1 + x2 + + +class ConcatCell(BaseMergeCell): + + def __init__(self, in_channels, out_channels, **kwargs): + super(ConcatCell, self).__init__(in_channels * 2, out_channels, + **kwargs) + + def _binary_op(self, x1, x2): + ret = torch.cat([x1, x2], dim=1) + return ret + + +class GlobalPoolingCell(BaseMergeCell): + + def __init__(self, in_channels=None, out_channels=None, **kwargs): + super().__init__(in_channels, out_channels, **kwargs) + self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) + + def _binary_op(self, x1, x2): + x2_att = self.global_pool(x2).sigmoid() + return x2 + x2_att * x1 diff --git a/annotator/uniformer/mmcv/ops/modulated_deform_conv.py b/annotator/uniformer/mmcv/ops/modulated_deform_conv.py new file mode 100644 index 0000000000000000000000000000000000000000..75559579cf053abcc99538606cbb88c723faf783 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/modulated_deform_conv.py @@ -0,0 +1,282 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair, _single + +from annotator.uniformer.mmcv.utils import deprecated_api_warning +from ..cnn import CONV_LAYERS +from ..utils import ext_loader, print_log + +ext_module = ext_loader.load_ext( + '_ext', + ['modulated_deform_conv_forward', 'modulated_deform_conv_backward']) + + +class ModulatedDeformConv2dFunction(Function): + + @staticmethod + def symbolic(g, input, offset, mask, weight, bias, stride, padding, + dilation, groups, deform_groups): + input_tensors = [input, offset, mask, weight] + if bias is not None: + input_tensors.append(bias) + return g.op( + 'mmcv::MMCVModulatedDeformConv2d', + *input_tensors, + stride_i=stride, + padding_i=padding, + dilation_i=dilation, + groups_i=groups, + deform_groups_i=deform_groups) + + @staticmethod + def forward(ctx, + input, + offset, + mask, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1): + if input is not None and input.dim() != 4: + raise ValueError( + f'Expected 4D tensor as input, got {input.dim()}D tensor \ + instead.') + ctx.stride = _pair(stride) + ctx.padding = _pair(padding) + ctx.dilation = _pair(dilation) + ctx.groups = groups + ctx.deform_groups = deform_groups + ctx.with_bias = bias is not None + if not ctx.with_bias: + bias = input.new_empty(0) # fake tensor + # When pytorch version >= 1.6.0, amp is adopted for fp16 mode; + # amp won't cast the type of model (float32), but "offset" is cast + # to float16 by nn.Conv2d automatically, leading to the type + # mismatch with input (when it is float32) or weight. + # The flag for whether to use fp16 or amp is the type of "offset", + # we cast weight and input to temporarily support fp16 and amp + # whatever the pytorch version is. + input = input.type_as(offset) + weight = weight.type_as(input) + ctx.save_for_backward(input, offset, mask, weight, bias) + output = input.new_empty( + ModulatedDeformConv2dFunction._output_size(ctx, input, weight)) + ctx._bufs = [input.new_empty(0), input.new_empty(0)] + ext_module.modulated_deform_conv_forward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + output, + ctx._bufs[1], + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + input, offset, mask, weight, bias = ctx.saved_tensors + grad_input = torch.zeros_like(input) + grad_offset = torch.zeros_like(offset) + grad_mask = torch.zeros_like(mask) + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(bias) + grad_output = grad_output.contiguous() + ext_module.modulated_deform_conv_backward( + input, + weight, + bias, + ctx._bufs[0], + offset, + mask, + ctx._bufs[1], + grad_input, + grad_weight, + grad_bias, + grad_offset, + grad_mask, + grad_output, + kernel_h=weight.size(2), + kernel_w=weight.size(3), + stride_h=ctx.stride[0], + stride_w=ctx.stride[1], + pad_h=ctx.padding[0], + pad_w=ctx.padding[1], + dilation_h=ctx.dilation[0], + dilation_w=ctx.dilation[1], + group=ctx.groups, + deformable_group=ctx.deform_groups, + with_bias=ctx.with_bias) + if not ctx.with_bias: + grad_bias = None + + return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias, + None, None, None, None, None) + + @staticmethod + def _output_size(ctx, input, weight): + channels = weight.size(0) + output_size = (input.size(0), channels) + for d in range(input.dim() - 2): + in_size = input.size(d + 2) + pad = ctx.padding[d] + kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1 + stride_ = ctx.stride[d] + output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) + if not all(map(lambda s: s > 0, output_size)): + raise ValueError( + 'convolution input is too small (output would be ' + + 'x'.join(map(str, output_size)) + ')') + return output_size + + +modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply + + +class ModulatedDeformConv2d(nn.Module): + + @deprecated_api_warning({'deformable_groups': 'deform_groups'}, + cls_name='ModulatedDeformConv2d') + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + deform_groups=1, + bias=True): + super(ModulatedDeformConv2d, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = _pair(kernel_size) + self.stride = _pair(stride) + self.padding = _pair(padding) + self.dilation = _pair(dilation) + self.groups = groups + self.deform_groups = deform_groups + # enable compatibility with nn.Conv2d + self.transposed = False + self.output_padding = _single(0) + + self.weight = nn.Parameter( + torch.Tensor(out_channels, in_channels // groups, + *self.kernel_size)) + if bias: + self.bias = nn.Parameter(torch.Tensor(out_channels)) + else: + self.register_parameter('bias', None) + self.init_weights() + + def init_weights(self): + n = self.in_channels + for k in self.kernel_size: + n *= k + stdv = 1. / math.sqrt(n) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + def forward(self, x, offset, mask): + return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, + self.stride, self.padding, + self.dilation, self.groups, + self.deform_groups) + + +@CONV_LAYERS.register_module('DCNv2') +class ModulatedDeformConv2dPack(ModulatedDeformConv2d): + """A ModulatedDeformable Conv Encapsulation that acts as normal Conv + layers. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int or tuple[int]): Same as nn.Conv2d. + stride (int): Same as nn.Conv2d, while tuple is not supported. + padding (int): Same as nn.Conv2d, while tuple is not supported. + dilation (int): Same as nn.Conv2d, while tuple is not supported. + groups (int): Same as nn.Conv2d. + bias (bool or str): If specified as `auto`, it will be decided by the + norm_cfg. Bias will be set as True if norm_cfg is None, otherwise + False. + """ + + _version = 2 + + def __init__(self, *args, **kwargs): + super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs) + self.conv_offset = nn.Conv2d( + self.in_channels, + self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1], + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + bias=True) + self.init_weights() + + def init_weights(self): + super(ModulatedDeformConv2dPack, self).init_weights() + if hasattr(self, 'conv_offset'): + self.conv_offset.weight.data.zero_() + self.conv_offset.bias.data.zero_() + + def forward(self, x): + out = self.conv_offset(x) + o1, o2, mask = torch.chunk(out, 3, dim=1) + offset = torch.cat((o1, o2), dim=1) + mask = torch.sigmoid(mask) + return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias, + self.stride, self.padding, + self.dilation, self.groups, + self.deform_groups) + + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): + version = local_metadata.get('version', None) + + if version is None or version < 2: + # the key is different in early versions + # In version < 2, ModulatedDeformConvPack + # loads previous benchmark models. + if (prefix + 'conv_offset.weight' not in state_dict + and prefix[:-1] + '_offset.weight' in state_dict): + state_dict[prefix + 'conv_offset.weight'] = state_dict.pop( + prefix[:-1] + '_offset.weight') + if (prefix + 'conv_offset.bias' not in state_dict + and prefix[:-1] + '_offset.bias' in state_dict): + state_dict[prefix + + 'conv_offset.bias'] = state_dict.pop(prefix[:-1] + + '_offset.bias') + + if version is not None and version > 1: + print_log( + f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to ' + 'version 2.', + logger='root') + + super()._load_from_state_dict(state_dict, prefix, local_metadata, + strict, missing_keys, unexpected_keys, + error_msgs) diff --git a/annotator/uniformer/mmcv/ops/multi_scale_deform_attn.py b/annotator/uniformer/mmcv/ops/multi_scale_deform_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..c52dda18b41705705b47dd0e995b124048c16fba --- /dev/null +++ b/annotator/uniformer/mmcv/ops/multi_scale_deform_attn.py @@ -0,0 +1,358 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.autograd.function import Function, once_differentiable + +from annotator.uniformer.mmcv import deprecated_api_warning +from annotator.uniformer.mmcv.cnn import constant_init, xavier_init +from annotator.uniformer.mmcv.cnn.bricks.registry import ATTENTION +from annotator.uniformer.mmcv.runner import BaseModule +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward']) + + +class MultiScaleDeformableAttnFunction(Function): + + @staticmethod + def forward(ctx, value, value_spatial_shapes, value_level_start_index, + sampling_locations, attention_weights, im2col_step): + """GPU version of multi-scale deformable attention. + + Args: + value (Tensor): The value has shape + (bs, num_keys, mum_heads, embed_dims//num_heads) + value_spatial_shapes (Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + im2col_step (Tensor): The step used in image to column. + + Returns: + Tensor: has shape (bs, num_queries, embed_dims) + """ + + ctx.im2col_step = im2col_step + output = ext_module.ms_deform_attn_forward( + value, + value_spatial_shapes, + value_level_start_index, + sampling_locations, + attention_weights, + im2col_step=ctx.im2col_step) + ctx.save_for_backward(value, value_spatial_shapes, + value_level_start_index, sampling_locations, + attention_weights) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + """GPU version of backward function. + + Args: + grad_output (Tensor): Gradient + of output tensor of forward. + + Returns: + Tuple[Tensor]: Gradient + of input tensors in forward. + """ + value, value_spatial_shapes, value_level_start_index,\ + sampling_locations, attention_weights = ctx.saved_tensors + grad_value = torch.zeros_like(value) + grad_sampling_loc = torch.zeros_like(sampling_locations) + grad_attn_weight = torch.zeros_like(attention_weights) + + ext_module.ms_deform_attn_backward( + value, + value_spatial_shapes, + value_level_start_index, + sampling_locations, + attention_weights, + grad_output.contiguous(), + grad_value, + grad_sampling_loc, + grad_attn_weight, + im2col_step=ctx.im2col_step) + + return grad_value, None, None, \ + grad_sampling_loc, grad_attn_weight, None + + +def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes, + sampling_locations, attention_weights): + """CPU version of multi-scale deformable attention. + + Args: + value (Tensor): The value has shape + (bs, num_keys, mum_heads, embed_dims//num_heads) + value_spatial_shapes (Tensor): Spatial shape of + each feature map, has shape (num_levels, 2), + last dimension 2 represent (h, w) + sampling_locations (Tensor): The location of sampling points, + has shape + (bs ,num_queries, num_heads, num_levels, num_points, 2), + the last dimension 2 represent (x, y). + attention_weights (Tensor): The weight of sampling points used + when calculate the attention, has shape + (bs ,num_queries, num_heads, num_levels, num_points), + + Returns: + Tensor: has shape (bs, num_queries, embed_dims) + """ + + bs, _, num_heads, embed_dims = value.shape + _, num_queries, num_heads, num_levels, num_points, _ =\ + sampling_locations.shape + value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], + dim=1) + sampling_grids = 2 * sampling_locations - 1 + sampling_value_list = [] + for level, (H_, W_) in enumerate(value_spatial_shapes): + # bs, H_*W_, num_heads, embed_dims -> + # bs, H_*W_, num_heads*embed_dims -> + # bs, num_heads*embed_dims, H_*W_ -> + # bs*num_heads, embed_dims, H_, W_ + value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape( + bs * num_heads, embed_dims, H_, W_) + # bs, num_queries, num_heads, num_points, 2 -> + # bs, num_heads, num_queries, num_points, 2 -> + # bs*num_heads, num_queries, num_points, 2 + sampling_grid_l_ = sampling_grids[:, :, :, + level].transpose(1, 2).flatten(0, 1) + # bs*num_heads, embed_dims, num_queries, num_points + sampling_value_l_ = F.grid_sample( + value_l_, + sampling_grid_l_, + mode='bilinear', + padding_mode='zeros', + align_corners=False) + sampling_value_list.append(sampling_value_l_) + # (bs, num_queries, num_heads, num_levels, num_points) -> + # (bs, num_heads, num_queries, num_levels, num_points) -> + # (bs, num_heads, 1, num_queries, num_levels*num_points) + attention_weights = attention_weights.transpose(1, 2).reshape( + bs * num_heads, 1, num_queries, num_levels * num_points) + output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * + attention_weights).sum(-1).view(bs, num_heads * embed_dims, + num_queries) + return output.transpose(1, 2).contiguous() + + +@ATTENTION.register_module() +class MultiScaleDeformableAttention(BaseModule): + """An attention module used in Deformable-Detr. + + `Deformable DETR: Deformable Transformers for End-to-End Object Detection. + `_. + + Args: + embed_dims (int): The embedding dimension of Attention. + Default: 256. + num_heads (int): Parallel attention heads. Default: 64. + num_levels (int): The number of feature map used in + Attention. Default: 4. + num_points (int): The number of sampling points for + each query in each head. Default: 4. + im2col_step (int): The step used in image_to_column. + Default: 64. + dropout (float): A Dropout layer on `inp_identity`. + Default: 0.1. + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default to False. + norm_cfg (dict): Config dict for normalization layer. + Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims=256, + num_heads=8, + num_levels=4, + num_points=4, + im2col_step=64, + dropout=0.1, + batch_first=False, + norm_cfg=None, + init_cfg=None): + super().__init__(init_cfg) + if embed_dims % num_heads != 0: + raise ValueError(f'embed_dims must be divisible by num_heads, ' + f'but got {embed_dims} and {num_heads}') + dim_per_head = embed_dims // num_heads + self.norm_cfg = norm_cfg + self.dropout = nn.Dropout(dropout) + self.batch_first = batch_first + + # you'd better set dim_per_head to a power of 2 + # which is more efficient in the CUDA implementation + def _is_power_of_2(n): + if (not isinstance(n, int)) or (n < 0): + raise ValueError( + 'invalid input for _is_power_of_2: {} (type: {})'.format( + n, type(n))) + return (n & (n - 1) == 0) and n != 0 + + if not _is_power_of_2(dim_per_head): + warnings.warn( + "You'd better set embed_dims in " + 'MultiScaleDeformAttention to make ' + 'the dimension of each attention head a power of 2 ' + 'which is more efficient in our CUDA implementation.') + + self.im2col_step = im2col_step + self.embed_dims = embed_dims + self.num_levels = num_levels + self.num_heads = num_heads + self.num_points = num_points + self.sampling_offsets = nn.Linear( + embed_dims, num_heads * num_levels * num_points * 2) + self.attention_weights = nn.Linear(embed_dims, + num_heads * num_levels * num_points) + self.value_proj = nn.Linear(embed_dims, embed_dims) + self.output_proj = nn.Linear(embed_dims, embed_dims) + self.init_weights() + + def init_weights(self): + """Default initialization for Parameters of Module.""" + constant_init(self.sampling_offsets, 0.) + thetas = torch.arange( + self.num_heads, + dtype=torch.float32) * (2.0 * math.pi / self.num_heads) + grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) + grid_init = (grid_init / + grid_init.abs().max(-1, keepdim=True)[0]).view( + self.num_heads, 1, 1, + 2).repeat(1, self.num_levels, self.num_points, 1) + for i in range(self.num_points): + grid_init[:, :, i, :] *= i + 1 + + self.sampling_offsets.bias.data = grid_init.view(-1) + constant_init(self.attention_weights, val=0., bias=0.) + xavier_init(self.value_proj, distribution='uniform', bias=0.) + xavier_init(self.output_proj, distribution='uniform', bias=0.) + self._is_init = True + + @deprecated_api_warning({'residual': 'identity'}, + cls_name='MultiScaleDeformableAttention') + def forward(self, + query, + key=None, + value=None, + identity=None, + query_pos=None, + key_padding_mask=None, + reference_points=None, + spatial_shapes=None, + level_start_index=None, + **kwargs): + """Forward Function of MultiScaleDeformAttention. + + Args: + query (Tensor): Query of Transformer with shape + (num_query, bs, embed_dims). + key (Tensor): The key tensor with shape + `(num_key, bs, embed_dims)`. + value (Tensor): The value tensor with shape + `(num_key, bs, embed_dims)`. + identity (Tensor): The tensor used for addition, with the + same shape as `query`. Default None. If None, + `query` will be used. + query_pos (Tensor): The positional encoding for `query`. + Default: None. + key_pos (Tensor): The positional encoding for `key`. Default + None. + reference_points (Tensor): The normalized reference + points with shape (bs, num_query, num_levels, 2), + all elements is range in [0, 1], top-left (0,0), + bottom-right (1, 1), including padding area. + or (N, Length_{query}, num_levels, 4), add + additional two dimensions is (w, h) to + form reference boxes. + key_padding_mask (Tensor): ByteTensor for `query`, with + shape [bs, num_key]. + spatial_shapes (Tensor): Spatial shape of features in + different levels. With shape (num_levels, 2), + last dimension represents (h, w). + level_start_index (Tensor): The start index of each level. + A tensor has shape ``(num_levels, )`` and can be represented + as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. + + Returns: + Tensor: forwarded results with shape [num_query, bs, embed_dims]. + """ + + if value is None: + value = query + + if identity is None: + identity = query + if query_pos is not None: + query = query + query_pos + if not self.batch_first: + # change to (bs, num_query ,embed_dims) + query = query.permute(1, 0, 2) + value = value.permute(1, 0, 2) + + bs, num_query, _ = query.shape + bs, num_value, _ = value.shape + assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value + + value = self.value_proj(value) + if key_padding_mask is not None: + value = value.masked_fill(key_padding_mask[..., None], 0.0) + value = value.view(bs, num_value, self.num_heads, -1) + sampling_offsets = self.sampling_offsets(query).view( + bs, num_query, self.num_heads, self.num_levels, self.num_points, 2) + attention_weights = self.attention_weights(query).view( + bs, num_query, self.num_heads, self.num_levels * self.num_points) + attention_weights = attention_weights.softmax(-1) + + attention_weights = attention_weights.view(bs, num_query, + self.num_heads, + self.num_levels, + self.num_points) + if reference_points.shape[-1] == 2: + offset_normalizer = torch.stack( + [spatial_shapes[..., 1], spatial_shapes[..., 0]], -1) + sampling_locations = reference_points[:, :, None, :, None, :] \ + + sampling_offsets \ + / offset_normalizer[None, None, None, :, None, :] + elif reference_points.shape[-1] == 4: + sampling_locations = reference_points[:, :, None, :, None, :2] \ + + sampling_offsets / self.num_points \ + * reference_points[:, :, None, :, None, 2:] \ + * 0.5 + else: + raise ValueError( + f'Last dim of reference_points must be' + f' 2 or 4, but get {reference_points.shape[-1]} instead.') + if torch.cuda.is_available() and value.is_cuda: + output = MultiScaleDeformableAttnFunction.apply( + value, spatial_shapes, level_start_index, sampling_locations, + attention_weights, self.im2col_step) + else: + output = multi_scale_deformable_attn_pytorch( + value, spatial_shapes, sampling_locations, attention_weights) + + output = self.output_proj(output) + + if not self.batch_first: + # (num_query, bs ,embed_dims) + output = output.permute(1, 0, 2) + + return self.dropout(output) + identity diff --git a/annotator/uniformer/mmcv/ops/nms.py b/annotator/uniformer/mmcv/ops/nms.py new file mode 100644 index 0000000000000000000000000000000000000000..6d9634281f486ab284091786886854c451368052 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/nms.py @@ -0,0 +1,417 @@ +import os + +import numpy as np +import torch + +from annotator.uniformer.mmcv.utils import deprecated_api_warning +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated']) + + +# This function is modified from: https://github.com/pytorch/vision/ +class NMSop(torch.autograd.Function): + + @staticmethod + def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold, + max_num): + is_filtering_by_score = score_threshold > 0 + if is_filtering_by_score: + valid_mask = scores > score_threshold + bboxes, scores = bboxes[valid_mask], scores[valid_mask] + valid_inds = torch.nonzero( + valid_mask, as_tuple=False).squeeze(dim=1) + + inds = ext_module.nms( + bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) + + if max_num > 0: + inds = inds[:max_num] + if is_filtering_by_score: + inds = valid_inds[inds] + return inds + + @staticmethod + def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, + max_num): + from ..onnx import is_custom_op_loaded + has_custom_op = is_custom_op_loaded() + # TensorRT nms plugin is aligned with original nms in ONNXRuntime + is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT' + if has_custom_op and (not is_trt_backend): + return g.op( + 'mmcv::NonMaxSuppression', + bboxes, + scores, + iou_threshold_f=float(iou_threshold), + offset_i=int(offset)) + else: + from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze + from ..onnx.onnx_utils.symbolic_helper import _size_helper + + boxes = unsqueeze(g, bboxes, 0) + scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) + + if max_num > 0: + max_num = g.op( + 'Constant', + value_t=torch.tensor(max_num, dtype=torch.long)) + else: + dim = g.op('Constant', value_t=torch.tensor(0)) + max_num = _size_helper(g, bboxes, dim) + max_output_per_class = max_num + iou_threshold = g.op( + 'Constant', + value_t=torch.tensor([iou_threshold], dtype=torch.float)) + score_threshold = g.op( + 'Constant', + value_t=torch.tensor([score_threshold], dtype=torch.float)) + nms_out = g.op('NonMaxSuppression', boxes, scores, + max_output_per_class, iou_threshold, + score_threshold) + return squeeze( + g, + select( + g, nms_out, 1, + g.op( + 'Constant', + value_t=torch.tensor([2], dtype=torch.long))), 1) + + +class SoftNMSop(torch.autograd.Function): + + @staticmethod + def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method, + offset): + dets = boxes.new_empty((boxes.size(0), 5), device='cpu') + inds = ext_module.softnms( + boxes.cpu(), + scores.cpu(), + dets.cpu(), + iou_threshold=float(iou_threshold), + sigma=float(sigma), + min_score=float(min_score), + method=int(method), + offset=int(offset)) + return dets, inds + + @staticmethod + def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method, + offset): + from packaging import version + assert version.parse(torch.__version__) >= version.parse('1.7.0') + nms_out = g.op( + 'mmcv::SoftNonMaxSuppression', + boxes, + scores, + iou_threshold_f=float(iou_threshold), + sigma_f=float(sigma), + min_score_f=float(min_score), + method_i=int(method), + offset_i=int(offset), + outputs=2) + return nms_out + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1): + """Dispatch to either CPU or GPU NMS implementations. + + The input can be either torch tensor or numpy array. GPU NMS will be used + if the input is gpu tensor, otherwise CPU NMS + will be used. The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + score_threshold (float): score threshold for NMS. + max_num (int): maximum number of boxes after NMS. + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[49.1, 32.4, 51.0, 35.9], + >>> [49.3, 32.9, 51.0, 35.3], + >>> [49.2, 31.8, 51.0, 35.4], + >>> [35.1, 11.5, 39.1, 15.7], + >>> [35.6, 11.8, 39.3, 14.2], + >>> [35.3, 11.5, 39.9, 14.5], + >>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\ + dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = nms(boxes, scores, iou_threshold) + >>> assert len(inds) == len(dets) == 3 + """ + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + + if torch.__version__ == 'parrots': + indata_list = [boxes, scores] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'offset': int(offset) + } + inds = ext_module.nms(*indata_list, **indata_dict) + else: + inds = NMSop.apply(boxes, scores, iou_threshold, offset, + score_threshold, max_num) + dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1) + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + + +@deprecated_api_warning({'iou_thr': 'iou_threshold'}) +def soft_nms(boxes, + scores, + iou_threshold=0.3, + sigma=0.5, + min_score=1e-3, + method='linear', + offset=0): + """Dispatch to only CPU Soft NMS implementations. + + The input can be either a torch tensor or numpy array. + The returned type will always be the same as inputs. + + Arguments: + boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4). + scores (torch.Tensor or np.ndarray): scores in shape (N, ). + iou_threshold (float): IoU threshold for NMS. + sigma (float): hyperparameter for gaussian method + min_score (float): score filter threshold + method (str): either 'linear' or 'gaussian' + offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + + Example: + >>> boxes = np.array([[4., 3., 5., 3.], + >>> [4., 3., 5., 4.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.], + >>> [3., 1., 3., 1.]], dtype=np.float32) + >>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32) + >>> iou_threshold = 0.6 + >>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5) + >>> assert len(inds) == len(dets) == 5 + """ + + assert isinstance(boxes, (torch.Tensor, np.ndarray)) + assert isinstance(scores, (torch.Tensor, np.ndarray)) + is_numpy = False + if isinstance(boxes, np.ndarray): + is_numpy = True + boxes = torch.from_numpy(boxes) + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + assert boxes.size(1) == 4 + assert boxes.size(0) == scores.size(0) + assert offset in (0, 1) + method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2} + assert method in method_dict.keys() + + if torch.__version__ == 'parrots': + dets = boxes.new_empty((boxes.size(0), 5), device='cpu') + indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()] + indata_dict = { + 'iou_threshold': float(iou_threshold), + 'sigma': float(sigma), + 'min_score': min_score, + 'method': method_dict[method], + 'offset': int(offset) + } + inds = ext_module.softnms(*indata_list, **indata_dict) + else: + dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(), + float(iou_threshold), float(sigma), + float(min_score), method_dict[method], + int(offset)) + + dets = dets[:inds.size(0)] + + if is_numpy: + dets = dets.cpu().numpy() + inds = inds.cpu().numpy() + return dets, inds + else: + return dets.to(device=boxes.device), inds.to(device=boxes.device) + + +def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): + """Performs non-maximum suppression in a batched fashion. + + Modified from https://github.com/pytorch/vision/blob + /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. + In order to perform NMS independently per class, we add an offset to all + the boxes. The offset is dependent only on the class idx, and is large + enough so that boxes from different classes do not overlap. + + Arguments: + boxes (torch.Tensor): boxes in shape (N, 4). + scores (torch.Tensor): scores in shape (N, ). + idxs (torch.Tensor): each index value correspond to a bbox cluster, + and NMS will not be applied between elements of different idxs, + shape (N, ). + nms_cfg (dict): specify nms type and other parameters like iou_thr. + Possible keys includes the following. + + - iou_thr (float): IoU threshold used for NMS. + - split_thr (float): threshold number of boxes. In some cases the + number of boxes is large (e.g., 200k). To avoid OOM during + training, the users could set `split_thr` to a small value. + If the number of boxes is greater than the threshold, it will + perform NMS on each group of boxes separately and sequentially. + Defaults to 10000. + class_agnostic (bool): if true, nms is class agnostic, + i.e. IoU thresholding happens over all boxes, + regardless of the predicted class. + + Returns: + tuple: kept dets and indice. + """ + nms_cfg_ = nms_cfg.copy() + class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic) + if class_agnostic: + boxes_for_nms = boxes + else: + max_coordinate = boxes.max() + offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes)) + boxes_for_nms = boxes + offsets[:, None] + + nms_type = nms_cfg_.pop('type', 'nms') + nms_op = eval(nms_type) + + split_thr = nms_cfg_.pop('split_thr', 10000) + # Won't split to multiple nms nodes when exporting to onnx + if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export(): + dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_) + boxes = boxes[keep] + # -1 indexing works abnormal in TensorRT + # This assumes `dets` has 5 dimensions where + # the last dimension is score. + # TODO: more elegant way to handle the dimension issue. + # Some type of nms would reweight the score, such as SoftNMS + scores = dets[:, 4] + else: + max_num = nms_cfg_.pop('max_num', -1) + total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) + # Some type of nms would reweight the score, such as SoftNMS + scores_after_nms = scores.new_zeros(scores.size()) + for id in torch.unique(idxs): + mask = (idxs == id).nonzero(as_tuple=False).view(-1) + dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_) + total_mask[mask[keep]] = True + scores_after_nms[mask[keep]] = dets[:, -1] + keep = total_mask.nonzero(as_tuple=False).view(-1) + + scores, inds = scores_after_nms[keep].sort(descending=True) + keep = keep[inds] + boxes = boxes[keep] + + if max_num > 0: + keep = keep[:max_num] + boxes = boxes[:max_num] + scores = scores[:max_num] + + return torch.cat([boxes, scores[:, None]], -1), keep + + +def nms_match(dets, iou_threshold): + """Matched dets into different groups by NMS. + + NMS match is Similar to NMS but when a bbox is suppressed, nms match will + record the indice of suppressed bbox and form a group with the indice of + kept bbox. In each group, indice is sorted as score order. + + Arguments: + dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5). + iou_thr (float): IoU thresh for NMS. + + Returns: + List[torch.Tensor | np.ndarray]: The outer list corresponds different + matched group, the inner Tensor corresponds the indices for a group + in score order. + """ + if dets.shape[0] == 0: + matched = [] + else: + assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \ + f'but get {dets.shape}' + if isinstance(dets, torch.Tensor): + dets_t = dets.detach().cpu() + else: + dets_t = torch.from_numpy(dets) + indata_list = [dets_t] + indata_dict = {'iou_threshold': float(iou_threshold)} + matched = ext_module.nms_match(*indata_list, **indata_dict) + if torch.__version__ == 'parrots': + matched = matched.tolist() + + if isinstance(dets, torch.Tensor): + return [dets.new_tensor(m, dtype=torch.long) for m in matched] + else: + return [np.array(m, dtype=np.int) for m in matched] + + +def nms_rotated(dets, scores, iou_threshold, labels=None): + """Performs non-maximum suppression (NMS) on the rotated boxes according to + their intersection-over-union (IoU). + + Rotated NMS iteratively removes lower scoring rotated boxes which have an + IoU greater than iou_threshold with another (higher scoring) rotated box. + + Args: + boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \ + be in (x_ctr, y_ctr, width, height, angle_radian) format. + scores (Tensor): scores in shape (N, ). + iou_threshold (float): IoU thresh for NMS. + labels (Tensor): boxes' label in shape (N,). + + Returns: + tuple: kept dets(boxes and scores) and indice, which is always the \ + same data type as the input. + """ + if dets.shape[0] == 0: + return dets, None + multi_label = labels is not None + if multi_label: + dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1) + else: + dets_wl = dets + _, order = scores.sort(0, descending=True) + dets_sorted = dets_wl.index_select(0, order) + + if torch.__version__ == 'parrots': + keep_inds = ext_module.nms_rotated( + dets_wl, + scores, + order, + dets_sorted, + iou_threshold=iou_threshold, + multi_label=multi_label) + else: + keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted, + iou_threshold, multi_label) + dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)), + dim=1) + return dets, keep_inds diff --git a/annotator/uniformer/mmcv/ops/pixel_group.py b/annotator/uniformer/mmcv/ops/pixel_group.py new file mode 100644 index 0000000000000000000000000000000000000000..2143c75f835a467c802fc3c37ecd3ac0f85bcda4 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/pixel_group.py @@ -0,0 +1,75 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['pixel_group']) + + +def pixel_group(score, mask, embedding, kernel_label, kernel_contour, + kernel_region_num, distance_threshold): + """Group pixels into text instances, which is widely used text detection + methods. + + Arguments: + score (np.array or Tensor): The foreground score with size hxw. + mask (np.array or Tensor): The foreground mask with size hxw. + embedding (np.array or Tensor): The embedding with size hxwxc to + distinguish instances. + kernel_label (np.array or Tensor): The instance kernel index with + size hxw. + kernel_contour (np.array or Tensor): The kernel contour with size hxw. + kernel_region_num (int): The instance kernel region number. + distance_threshold (float): The embedding distance threshold between + kernel and pixel in one instance. + + Returns: + pixel_assignment (List[List[float]]): The instance coordinate list. + Each element consists of averaged confidence, pixel number, and + coordinates (x_i, y_i for all pixels) in order. + """ + assert isinstance(score, (torch.Tensor, np.ndarray)) + assert isinstance(mask, (torch.Tensor, np.ndarray)) + assert isinstance(embedding, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_label, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_contour, (torch.Tensor, np.ndarray)) + assert isinstance(kernel_region_num, int) + assert isinstance(distance_threshold, float) + + if isinstance(score, np.ndarray): + score = torch.from_numpy(score) + if isinstance(mask, np.ndarray): + mask = torch.from_numpy(mask) + if isinstance(embedding, np.ndarray): + embedding = torch.from_numpy(embedding) + if isinstance(kernel_label, np.ndarray): + kernel_label = torch.from_numpy(kernel_label) + if isinstance(kernel_contour, np.ndarray): + kernel_contour = torch.from_numpy(kernel_contour) + + if torch.__version__ == 'parrots': + label = ext_module.pixel_group( + score, + mask, + embedding, + kernel_label, + kernel_contour, + kernel_region_num=kernel_region_num, + distance_threshold=distance_threshold) + label = label.tolist() + label = label[0] + list_index = kernel_region_num + pixel_assignment = [] + for x in range(kernel_region_num): + pixel_assignment.append( + np.array( + label[list_index:list_index + int(label[x])], + dtype=np.float)) + list_index = list_index + int(label[x]) + else: + pixel_assignment = ext_module.pixel_group(score, mask, embedding, + kernel_label, kernel_contour, + kernel_region_num, + distance_threshold) + return pixel_assignment diff --git a/annotator/uniformer/mmcv/ops/point_sample.py b/annotator/uniformer/mmcv/ops/point_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..267f4b3c56630acd85f9bdc630b7be09abab0aba --- /dev/null +++ b/annotator/uniformer/mmcv/ops/point_sample.py @@ -0,0 +1,336 @@ +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa + +from os import path as osp + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.utils import _pair +from torch.onnx.operators import shape_as_tensor + + +def bilinear_grid_sample(im, grid, align_corners=False): + """Given an input and a flow-field grid, computes the output using input + values and pixel locations from grid. Supported only bilinear interpolation + method to sample the input pixels. + + Args: + im (torch.Tensor): Input feature map, shape (N, C, H, W) + grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2) + align_corners {bool}: If set to True, the extrema (-1 and 1) are + considered as referring to the center points of the input’s + corner pixels. If set to False, they are instead considered as + referring to the corner points of the input’s corner pixels, + making the sampling more resolution agnostic. + Returns: + torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg) + """ + n, c, h, w = im.shape + gn, gh, gw, _ = grid.shape + assert n == gn + + x = grid[:, :, :, 0] + y = grid[:, :, :, 1] + + if align_corners: + x = ((x + 1) / 2) * (w - 1) + y = ((y + 1) / 2) * (h - 1) + else: + x = ((x + 1) * w - 1) / 2 + y = ((y + 1) * h - 1) / 2 + + x = x.view(n, -1) + y = y.view(n, -1) + + x0 = torch.floor(x).long() + y0 = torch.floor(y).long() + x1 = x0 + 1 + y1 = y0 + 1 + + wa = ((x1 - x) * (y1 - y)).unsqueeze(1) + wb = ((x1 - x) * (y - y0)).unsqueeze(1) + wc = ((x - x0) * (y1 - y)).unsqueeze(1) + wd = ((x - x0) * (y - y0)).unsqueeze(1) + + # Apply default for grid_sample function zero padding + im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0) + padded_h = h + 2 + padded_w = w + 2 + # save points positions after padding + x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1 + + # Clip coordinates to padded image size + x0 = torch.where(x0 < 0, torch.tensor(0), x0) + x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0) + x1 = torch.where(x1 < 0, torch.tensor(0), x1) + x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1) + y0 = torch.where(y0 < 0, torch.tensor(0), y0) + y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0) + y1 = torch.where(y1 < 0, torch.tensor(0), y1) + y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1) + + im_padded = im_padded.view(n, c, -1) + + x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) + x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) + x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1) + x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1) + + Ia = torch.gather(im_padded, 2, x0_y0) + Ib = torch.gather(im_padded, 2, x0_y1) + Ic = torch.gather(im_padded, 2, x1_y0) + Id = torch.gather(im_padded, 2, x1_y1) + + return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw) + + +def is_in_onnx_export_without_custom_ops(): + from annotator.uniformer.mmcv.ops import get_onnxruntime_op_path + ort_custom_op_path = get_onnxruntime_op_path() + return torch.onnx.is_in_onnx_export( + ) and not osp.exists(ort_custom_op_path) + + +def normalize(grid): + """Normalize input grid from [-1, 1] to [0, 1] + Args: + grid (Tensor): The grid to be normalize, range [-1, 1]. + Returns: + Tensor: Normalized grid, range [0, 1]. + """ + + return (grid + 1.0) / 2.0 + + +def denormalize(grid): + """Denormalize input grid from range [0, 1] to [-1, 1] + Args: + grid (Tensor): The grid to be denormalize, range [0, 1]. + Returns: + Tensor: Denormalized grid, range [-1, 1]. + """ + + return grid * 2.0 - 1.0 + + +def generate_grid(num_grid, size, device): + """Generate regular square grid of points in [0, 1] x [0, 1] coordinate + space. + + Args: + num_grid (int): The number of grids to sample, one for each region. + size (tuple(int, int)): The side size of the regular grid. + device (torch.device): Desired device of returned tensor. + + Returns: + (torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that + contains coordinates for the regular grids. + """ + + affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device) + grid = F.affine_grid( + affine_trans, torch.Size((1, 1, *size)), align_corners=False) + grid = normalize(grid) + return grid.view(1, -1, 2).expand(num_grid, -1, -1) + + +def rel_roi_point_to_abs_img_point(rois, rel_roi_points): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + Returns: + Tensor: Image based absolute point coordinates, shape (N, P, 2) + """ + + with torch.no_grad(): + assert rel_roi_points.size(0) == rois.size(0) + assert rois.dim() == 2 + assert rel_roi_points.dim() == 3 + assert rel_roi_points.size(2) == 2 + # remove batch idx + if rois.size(1) == 5: + rois = rois[:, 1:] + abs_img_points = rel_roi_points.clone() + # To avoid an error during exporting to onnx use independent + # variables instead inplace computation + xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0]) + ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1]) + xs += rois[:, None, 0] + ys += rois[:, None, 1] + abs_img_points = torch.stack([xs, ys], dim=2) + return abs_img_points + + +def get_shape_from_feature_map(x): + """Get spatial resolution of input feature map considering exporting to + onnx mode. + + Args: + x (torch.Tensor): Input tensor, shape (N, C, H, W) + Returns: + torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2) + """ + if torch.onnx.is_in_onnx_export(): + img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to( + x.device).float() + else: + img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to( + x.device).float() + return img_shape + + +def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.): + """Convert image based absolute point coordinates to image based relative + coordinates for sampling. + + Args: + abs_img_points (Tensor): Image based absolute point coordinates, + shape (N, P, 2) + img (tuple/Tensor): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + assert (isinstance(img, tuple) and len(img) == 2) or \ + (isinstance(img, torch.Tensor) and len(img.shape) == 4) + + if isinstance(img, tuple): + h, w = img + scale = torch.tensor([w, h], + dtype=torch.float, + device=abs_img_points.device) + scale = scale.view(1, 1, 2) + else: + scale = get_shape_from_feature_map(img) + + return abs_img_points / scale * spatial_scale + + +def rel_roi_point_to_rel_img_point(rois, + rel_roi_points, + img, + spatial_scale=1.): + """Convert roi based relative point coordinates to image based absolute + point coordinates. + + Args: + rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5) + rel_roi_points (Tensor): Point coordinates inside RoI, relative to + RoI, location, range (0, 1), shape (N, P, 2) + img (tuple/Tensor): (height, width) of image or feature map. + spatial_scale (float): Scale points by this factor. Default: 1. + + Returns: + Tensor: Image based relative point coordinates for sampling, + shape (N, P, 2) + """ + + abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points) + rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img, + spatial_scale) + + return rel_img_point + + +def point_sample(input, points, align_corners=False, **kwargs): + """A wrapper around :func:`grid_sample` to support 3D point_coords tensors + Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to + lie inside ``[0, 1] x [0, 1]`` square. + + Args: + input (Tensor): Feature map, shape (N, C, H, W). + points (Tensor): Image based absolute point coordinates (normalized), + range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2). + align_corners (bool): Whether align_corners. Default: False + + Returns: + Tensor: Features of `point` on `input`, shape (N, C, P) or + (N, C, Hgrid, Wgrid). + """ + + add_dim = False + if points.dim() == 3: + add_dim = True + points = points.unsqueeze(2) + if is_in_onnx_export_without_custom_ops(): + # If custom ops for onnx runtime not compiled use python + # implementation of grid_sample function to make onnx graph + # with supported nodes + output = bilinear_grid_sample( + input, denormalize(points), align_corners=align_corners) + else: + output = F.grid_sample( + input, denormalize(points), align_corners=align_corners, **kwargs) + if add_dim: + output = output.squeeze(3) + return output + + +class SimpleRoIAlign(nn.Module): + + def __init__(self, output_size, spatial_scale, aligned=True): + """Simple RoI align in PointRend, faster than standard RoIAlign. + + Args: + output_size (tuple[int]): h, w + spatial_scale (float): scale the input boxes by this number + aligned (bool): if False, use the legacy implementation in + MMDetection, align_corners=True will be used in F.grid_sample. + If True, align the results more perfectly. + """ + + super(SimpleRoIAlign, self).__init__() + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + # to be consistent with other RoI ops + self.use_torchvision = False + self.aligned = aligned + + def forward(self, features, rois): + num_imgs = features.size(0) + num_rois = rois.size(0) + rel_roi_points = generate_grid( + num_rois, self.output_size, device=rois.device) + + if torch.onnx.is_in_onnx_export(): + rel_img_points = rel_roi_point_to_rel_img_point( + rois, rel_roi_points, features, self.spatial_scale) + rel_img_points = rel_img_points.reshape(num_imgs, -1, + *rel_img_points.shape[1:]) + point_feats = point_sample( + features, rel_img_points, align_corners=not self.aligned) + point_feats = point_feats.transpose(1, 2) + else: + point_feats = [] + for batch_ind in range(num_imgs): + # unravel batch dim + feat = features[batch_ind].unsqueeze(0) + inds = (rois[:, 0].long() == batch_ind) + if inds.any(): + rel_img_points = rel_roi_point_to_rel_img_point( + rois[inds], rel_roi_points[inds], feat, + self.spatial_scale).unsqueeze(0) + point_feat = point_sample( + feat, rel_img_points, align_corners=not self.aligned) + point_feat = point_feat.squeeze(0).transpose(0, 1) + point_feats.append(point_feat) + + point_feats = torch.cat(point_feats, dim=0) + + channels = features.size(1) + roi_feats = point_feats.reshape(num_rois, channels, *self.output_size) + + return roi_feats + + def __repr__(self): + format_str = self.__class__.__name__ + format_str += '(output_size={}, spatial_scale={}'.format( + self.output_size, self.spatial_scale) + return format_str diff --git a/annotator/uniformer/mmcv/ops/points_in_boxes.py b/annotator/uniformer/mmcv/ops/points_in_boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..4003173a53052161dbcd687a2fa1d755642fdab8 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/points_in_boxes.py @@ -0,0 +1,133 @@ +import torch + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'points_in_boxes_part_forward', 'points_in_boxes_cpu_forward', + 'points_in_boxes_all_forward' +]) + + +def points_in_boxes_part(points, boxes): + """Find the box in which each point is (CUDA). + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in + LiDAR/DEPTH coordinate, (x, y, z) is the bottom center + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M), default background = -1 + """ + assert points.shape[0] == boxes.shape[0], \ + 'Points and boxes should have the same batch size, ' \ + f'but got {points.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ + 'boxes dimension should be 7, ' \ + f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, \ + 'points dimension should be 3, ' \ + f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + + box_idxs_of_pts = points.new_zeros((batch_size, num_points), + dtype=torch.int).fill_(-1) + + # If manually put the tensor 'points' or 'boxes' on a device + # which is not the current device, some temporary variables + # will be created on the current device in the cuda op, + # and the output will be incorrect. + # Therefore, we force the current device to be the same + # as the device of the tensors if it was not. + # Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305 + # for the incorrect output before the fix. + points_device = points.get_device() + assert points_device == boxes.get_device(), \ + 'Points and boxes should be put on the same device' + if torch.cuda.current_device() != points_device: + torch.cuda.set_device(points_device) + + ext_module.points_in_boxes_part_forward(boxes.contiguous(), + points.contiguous(), + box_idxs_of_pts) + + return box_idxs_of_pts + + +def points_in_boxes_cpu(points, boxes): + """Find all boxes in which each point is (CPU). The CPU version of + :meth:`points_in_boxes_all`. + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in + LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], + (x, y, z) is the bottom center. + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. + """ + assert points.shape[0] == boxes.shape[0], \ + 'Points and boxes should have the same batch size, ' \ + f'but got {points.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ + 'boxes dimension should be 7, ' \ + f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, \ + 'points dimension should be 3, ' \ + f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + num_boxes = boxes.shape[1] + + point_indices = points.new_zeros((batch_size, num_boxes, num_points), + dtype=torch.int) + for b in range(batch_size): + ext_module.points_in_boxes_cpu_forward(boxes[b].float().contiguous(), + points[b].float().contiguous(), + point_indices[b]) + point_indices = point_indices.transpose(1, 2) + + return point_indices + + +def points_in_boxes_all(points, boxes): + """Find all boxes in which each point is (CUDA). + + Args: + points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate + boxes (torch.Tensor): [B, T, 7], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], + (x, y, z) is the bottom center. + + Returns: + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. + """ + assert boxes.shape[0] == points.shape[0], \ + 'Points and boxes should have the same batch size, ' \ + f'but got {boxes.shape[0]} and {boxes.shape[0]}' + assert boxes.shape[2] == 7, \ + 'boxes dimension should be 7, ' \ + f'but got unexpected shape {boxes.shape[2]}' + assert points.shape[2] == 3, \ + 'points dimension should be 3, ' \ + f'but got unexpected shape {points.shape[2]}' + batch_size, num_points, _ = points.shape + num_boxes = boxes.shape[1] + + box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes), + dtype=torch.int).fill_(0) + + # Same reason as line 25-32 + points_device = points.get_device() + assert points_device == boxes.get_device(), \ + 'Points and boxes should be put on the same device' + if torch.cuda.current_device() != points_device: + torch.cuda.set_device(points_device) + + ext_module.points_in_boxes_all_forward(boxes.contiguous(), + points.contiguous(), + box_idxs_of_pts) + + return box_idxs_of_pts diff --git a/annotator/uniformer/mmcv/ops/points_sampler.py b/annotator/uniformer/mmcv/ops/points_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..a802a74fd6c3610d9ae178e6201f47423eca7ad1 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/points_sampler.py @@ -0,0 +1,177 @@ +from typing import List + +import torch +from torch import nn as nn + +from annotator.uniformer.mmcv.runner import force_fp32 +from .furthest_point_sample import (furthest_point_sample, + furthest_point_sample_with_dist) + + +def calc_square_dist(point_feat_a, point_feat_b, norm=True): + """Calculating square distance between a and b. + + Args: + point_feat_a (Tensor): (B, N, C) Feature vector of each point. + point_feat_b (Tensor): (B, M, C) Feature vector of each point. + norm (Bool, optional): Whether to normalize the distance. + Default: True. + + Returns: + Tensor: (B, N, M) Distance between each pair points. + """ + num_channel = point_feat_a.shape[-1] + # [bs, n, 1] + a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1) + # [bs, 1, m] + b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1) + + corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2)) + + dist = a_square + b_square - 2 * corr_matrix + if norm: + dist = torch.sqrt(dist) / num_channel + return dist + + +def get_sampler_cls(sampler_type): + """Get the type and mode of points sampler. + + Args: + sampler_type (str): The type of points sampler. + The valid value are "D-FPS", "F-FPS", or "FS". + + Returns: + class: Points sampler type. + """ + sampler_mappings = { + 'D-FPS': DFPSSampler, + 'F-FPS': FFPSSampler, + 'FS': FSSampler, + } + try: + return sampler_mappings[sampler_type] + except KeyError: + raise KeyError( + f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \ + {sampler_type}') + + +class PointsSampler(nn.Module): + """Points sampling. + + Args: + num_point (list[int]): Number of sample points. + fps_mod_list (list[str], optional): Type of FPS method, valid mod + ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. + F-FPS: using feature distances for FPS. + D-FPS: using Euclidean distances of points for FPS. + FS: using F-FPS and D-FPS simultaneously. + fps_sample_range_list (list[int], optional): + Range of points to apply FPS. Default: [-1]. + """ + + def __init__(self, + num_point: List[int], + fps_mod_list: List[str] = ['D-FPS'], + fps_sample_range_list: List[int] = [-1]): + super().__init__() + # FPS would be applied to different fps_mod in the list, + # so the length of the num_point should be equal to + # fps_mod_list and fps_sample_range_list. + assert len(num_point) == len(fps_mod_list) == len( + fps_sample_range_list) + self.num_point = num_point + self.fps_sample_range_list = fps_sample_range_list + self.samplers = nn.ModuleList() + for fps_mod in fps_mod_list: + self.samplers.append(get_sampler_cls(fps_mod)()) + self.fp16_enabled = False + + @force_fp32() + def forward(self, points_xyz, features): + """ + Args: + points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. + features (Tensor): (B, C, N) Descriptors of the features. + + Returns: + Tensor: (B, npoint, sample_num) Indices of sampled points. + """ + indices = [] + last_fps_end_index = 0 + + for fps_sample_range, sampler, npoint in zip( + self.fps_sample_range_list, self.samplers, self.num_point): + assert fps_sample_range < points_xyz.shape[1] + + if fps_sample_range == -1: + sample_points_xyz = points_xyz[:, last_fps_end_index:] + if features is not None: + sample_features = features[:, :, last_fps_end_index:] + else: + sample_features = None + else: + sample_points_xyz = \ + points_xyz[:, last_fps_end_index:fps_sample_range] + if features is not None: + sample_features = features[:, :, last_fps_end_index: + fps_sample_range] + else: + sample_features = None + + fps_idx = sampler(sample_points_xyz.contiguous(), sample_features, + npoint) + + indices.append(fps_idx + last_fps_end_index) + last_fps_end_index += fps_sample_range + indices = torch.cat(indices, dim=1) + + return indices + + +class DFPSSampler(nn.Module): + """Using Euclidean distances of points for FPS.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with D-FPS.""" + fps_idx = furthest_point_sample(points.contiguous(), npoint) + return fps_idx + + +class FFPSSampler(nn.Module): + """Using feature distances for FPS.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with F-FPS.""" + assert features is not None, \ + 'feature input to FFPS_Sampler should not be None' + features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2) + features_dist = calc_square_dist( + features_for_fps, features_for_fps, norm=False) + fps_idx = furthest_point_sample_with_dist(features_dist, npoint) + return fps_idx + + +class FSSampler(nn.Module): + """Using F-FPS and D-FPS simultaneously.""" + + def __init__(self): + super().__init__() + + def forward(self, points, features, npoint): + """Sampling points with FS_Sampling.""" + assert features is not None, \ + 'feature input to FS_Sampler should not be None' + ffps_sampler = FFPSSampler() + dfps_sampler = DFPSSampler() + fps_idx_ffps = ffps_sampler(points, features, npoint) + fps_idx_dfps = dfps_sampler(points, features, npoint) + fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1) + return fps_idx diff --git a/annotator/uniformer/mmcv/ops/psa_mask.py b/annotator/uniformer/mmcv/ops/psa_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..cdf14e62b50e8d4dd6856c94333c703bcc4c9ab6 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/psa_mask.py @@ -0,0 +1,92 @@ +# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa +from torch import nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['psamask_forward', 'psamask_backward']) + + +class PSAMaskFunction(Function): + + @staticmethod + def symbolic(g, input, psa_type, mask_size): + return g.op( + 'mmcv::MMCVPSAMask', + input, + psa_type_i=psa_type, + mask_size_i=mask_size) + + @staticmethod + def forward(ctx, input, psa_type, mask_size): + ctx.psa_type = psa_type + ctx.mask_size = _pair(mask_size) + ctx.save_for_backward(input) + + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + assert channels == h_mask * w_mask + output = input.new_zeros( + (batch_size, h_feature * w_feature, h_feature, w_feature)) + + ext_module.psamask_forward( + input, + output, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2) + return output + + @staticmethod + def backward(ctx, grad_output): + input = ctx.saved_tensors[0] + psa_type = ctx.psa_type + h_mask, w_mask = ctx.mask_size + batch_size, channels, h_feature, w_feature = input.size() + grad_input = grad_output.new_zeros( + (batch_size, channels, h_feature, w_feature)) + ext_module.psamask_backward( + grad_output, + grad_input, + psa_type=psa_type, + num_=batch_size, + h_feature=h_feature, + w_feature=w_feature, + h_mask=h_mask, + w_mask=w_mask, + half_h_mask=(h_mask - 1) // 2, + half_w_mask=(w_mask - 1) // 2) + return grad_input, None, None, None + + +psa_mask = PSAMaskFunction.apply + + +class PSAMask(nn.Module): + + def __init__(self, psa_type, mask_size=None): + super(PSAMask, self).__init__() + assert psa_type in ['collect', 'distribute'] + if psa_type == 'collect': + psa_type_enum = 0 + else: + psa_type_enum = 1 + self.psa_type_enum = psa_type_enum + self.mask_size = mask_size + self.psa_type = psa_type + + def forward(self, input): + return psa_mask(input, self.psa_type_enum, self.mask_size) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(psa_type={self.psa_type}, ' + s += f'mask_size={self.mask_size})' + return s diff --git a/annotator/uniformer/mmcv/ops/roi_align.py b/annotator/uniformer/mmcv/ops/roi_align.py new file mode 100644 index 0000000000000000000000000000000000000000..0755aefc66e67233ceae0f4b77948301c443e9fb --- /dev/null +++ b/annotator/uniformer/mmcv/ops/roi_align.py @@ -0,0 +1,223 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import deprecated_api_warning, ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['roi_align_forward', 'roi_align_backward']) + + +class RoIAlignFunction(Function): + + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, + pool_mode, aligned): + from ..onnx import is_custom_op_loaded + has_custom_op = is_custom_op_loaded() + if has_custom_op: + return g.op( + 'mmcv::MMCVRoiAlign', + input, + rois, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=sampling_ratio, + mode_s=pool_mode, + aligned_i=aligned) + else: + from torch.onnx.symbolic_opset9 import sub, squeeze + from torch.onnx.symbolic_helper import _slice_helper + from torch.onnx import TensorProtoDataType + # batch_indices = rois[:, 0].long() + batch_indices = _slice_helper( + g, rois, axes=[1], starts=[0], ends=[1]) + batch_indices = squeeze(g, batch_indices, 1) + batch_indices = g.op( + 'Cast', batch_indices, to_i=TensorProtoDataType.INT64) + # rois = rois[:, 1:] + rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5]) + if aligned: + # rois -= 0.5/spatial_scale + aligned_offset = g.op( + 'Constant', + value_t=torch.tensor([0.5 / spatial_scale], + dtype=torch.float32)) + rois = sub(g, rois, aligned_offset) + # roi align + return g.op( + 'RoiAlign', + input, + rois, + batch_indices, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=max(0, sampling_ratio), + mode_s=pool_mode) + + @staticmethod + def forward(ctx, + input, + rois, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + pool_mode='avg', + aligned=True): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.sampling_ratio = sampling_ratio + assert pool_mode in ('max', 'avg') + ctx.pool_mode = 0 if pool_mode == 'max' else 1 + ctx.aligned = aligned + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + if ctx.pool_mode == 0: + argmax_y = input.new_zeros(output_shape) + argmax_x = input.new_zeros(output_shape) + else: + argmax_y = input.new_zeros(0) + argmax_x = input.new_zeros(0) + + ext_module.roi_align_forward( + input, + rois, + output, + argmax_y, + argmax_x, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned) + + ctx.save_for_backward(rois, argmax_y, argmax_x) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax_y, argmax_x = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + # complex head architecture may cause grad_output uncontiguous. + grad_output = grad_output.contiguous() + ext_module.roi_align_backward( + grad_output, + rois, + argmax_y, + argmax_x, + grad_input, + aligned_height=ctx.output_size[0], + aligned_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale, + sampling_ratio=ctx.sampling_ratio, + pool_mode=ctx.pool_mode, + aligned=ctx.aligned) + return grad_input, None, None, None, None, None, None + + +roi_align = RoIAlignFunction.apply + + +class RoIAlign(nn.Module): + """RoI align pooling layer. + + Args: + output_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sampling_ratio (int): number of inputs samples to take for each + output sample. 0 to take samples densely for current models. + pool_mode (str, 'avg' or 'max'): pooling mode in each bin. + aligned (bool): if False, use the legacy implementation in + MMDetection. If True, align the results more perfectly. + use_torchvision (bool): whether to use roi_align from torchvision. + + Note: + The implementation of RoIAlign when aligned=True is modified from + https://github.com/facebookresearch/detectron2/ + + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel + indices (in our pixel model) are computed by floor(c - 0.5) and + ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete + indices [0] and [1] (which are sampled from the underlying signal + at continuous coordinates 0.5 and 1.5). But the original roi_align + (aligned=False) does not subtract the 0.5 when computing + neighboring pixel indices and therefore it uses pixels with a + slightly incorrect alignment (relative to our pixel model) when + performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; + + The difference does not make a difference to the model's + performance if ROIAlign is used together with conv layers. + """ + + @deprecated_api_warning( + { + 'out_size': 'output_size', + 'sample_num': 'sampling_ratio' + }, + cls_name='RoIAlign') + def __init__(self, + output_size, + spatial_scale=1.0, + sampling_ratio=0, + pool_mode='avg', + aligned=True, + use_torchvision=False): + super(RoIAlign, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + self.sampling_ratio = int(sampling_ratio) + self.pool_mode = pool_mode + self.aligned = aligned + self.use_torchvision = use_torchvision + + def forward(self, input, rois): + """ + Args: + input: NCHW images + rois: Bx5 boxes. First column is the index into N.\ + The other 4 columns are xyxy. + """ + if self.use_torchvision: + from torchvision.ops import roi_align as tv_roi_align + if 'aligned' in tv_roi_align.__code__.co_varnames: + return tv_roi_align(input, rois, self.output_size, + self.spatial_scale, self.sampling_ratio, + self.aligned) + else: + if self.aligned: + rois -= rois.new_tensor([0.] + + [0.5 / self.spatial_scale] * 4) + return tv_roi_align(input, rois, self.output_size, + self.spatial_scale, self.sampling_ratio) + else: + return roi_align(input, rois, self.output_size, self.spatial_scale, + self.sampling_ratio, self.pool_mode, self.aligned) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale}, ' + s += f'sampling_ratio={self.sampling_ratio}, ' + s += f'pool_mode={self.pool_mode}, ' + s += f'aligned={self.aligned}, ' + s += f'use_torchvision={self.use_torchvision})' + return s diff --git a/annotator/uniformer/mmcv/ops/roi_align_rotated.py b/annotator/uniformer/mmcv/ops/roi_align_rotated.py new file mode 100644 index 0000000000000000000000000000000000000000..0ce4961a3555d4da8bc3e32f1f7d5ad50036587d --- /dev/null +++ b/annotator/uniformer/mmcv/ops/roi_align_rotated.py @@ -0,0 +1,177 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['roi_align_rotated_forward', 'roi_align_rotated_backward']) + + +class RoIAlignRotatedFunction(Function): + + @staticmethod + def symbolic(g, features, rois, out_size, spatial_scale, sample_num, + aligned, clockwise): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + return g.op( + 'mmcv::MMCVRoIAlignRotated', + features, + rois, + output_height_i=out_h, + output_width_i=out_h, + spatial_scale_f=spatial_scale, + sampling_ratio_i=sample_num, + aligned_i=aligned, + clockwise_i=clockwise) + + @staticmethod + def forward(ctx, + features, + rois, + out_size, + spatial_scale, + sample_num=0, + aligned=True, + clockwise=False): + if isinstance(out_size, int): + out_h = out_size + out_w = out_size + elif isinstance(out_size, tuple): + assert len(out_size) == 2 + assert isinstance(out_size[0], int) + assert isinstance(out_size[1], int) + out_h, out_w = out_size + else: + raise TypeError( + '"out_size" must be an integer or tuple of integers') + ctx.spatial_scale = spatial_scale + ctx.sample_num = sample_num + ctx.aligned = aligned + ctx.clockwise = clockwise + ctx.save_for_backward(rois) + ctx.feature_size = features.size() + + batch_size, num_channels, data_height, data_width = features.size() + num_rois = rois.size(0) + + output = features.new_zeros(num_rois, num_channels, out_h, out_w) + ext_module.roi_align_rotated_forward( + features, + rois, + output, + pooled_height=out_h, + pooled_width=out_w, + spatial_scale=spatial_scale, + sample_num=sample_num, + aligned=aligned, + clockwise=clockwise) + return output + + @staticmethod + def backward(ctx, grad_output): + feature_size = ctx.feature_size + spatial_scale = ctx.spatial_scale + aligned = ctx.aligned + clockwise = ctx.clockwise + sample_num = ctx.sample_num + rois = ctx.saved_tensors[0] + assert feature_size is not None + batch_size, num_channels, data_height, data_width = feature_size + + out_w = grad_output.size(3) + out_h = grad_output.size(2) + + grad_input = grad_rois = None + + if ctx.needs_input_grad[0]: + grad_input = rois.new_zeros(batch_size, num_channels, data_height, + data_width) + ext_module.roi_align_rotated_backward( + grad_output.contiguous(), + rois, + grad_input, + pooled_height=out_h, + pooled_width=out_w, + spatial_scale=spatial_scale, + sample_num=sample_num, + aligned=aligned, + clockwise=clockwise) + return grad_input, grad_rois, None, None, None, None, None + + +roi_align_rotated = RoIAlignRotatedFunction.apply + + +class RoIAlignRotated(nn.Module): + """RoI align pooling layer for rotated proposals. + + It accepts a feature map of shape (N, C, H, W) and rois with shape + (n, 6) with each roi decoded as (batch_index, center_x, center_y, + w, h, angle). The angle is in radian. + + Args: + out_size (tuple): h, w + spatial_scale (float): scale the input boxes by this number + sample_num (int): number of inputs samples to take for each + output sample. 0 to take samples densely for current models. + aligned (bool): if False, use the legacy implementation in + MMDetection. If True, align the results more perfectly. + Default: True. + clockwise (bool): If True, the angle in each proposal follows a + clockwise fashion in image space, otherwise, the angle is + counterclockwise. Default: False. + + Note: + The implementation of RoIAlign when aligned=True is modified from + https://github.com/facebookresearch/detectron2/ + + The meaning of aligned=True: + + Given a continuous coordinate c, its two neighboring pixel + indices (in our pixel model) are computed by floor(c - 0.5) and + ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete + indices [0] and [1] (which are sampled from the underlying signal + at continuous coordinates 0.5 and 1.5). But the original roi_align + (aligned=False) does not subtract the 0.5 when computing + neighboring pixel indices and therefore it uses pixels with a + slightly incorrect alignment (relative to our pixel model) when + performing bilinear interpolation. + + With `aligned=True`, + we first appropriately scale the ROI and then shift it by -0.5 + prior to calling roi_align. This produces the correct neighbors; + + The difference does not make a difference to the model's + performance if ROIAlign is used together with conv layers. + """ + + def __init__(self, + out_size, + spatial_scale, + sample_num=0, + aligned=True, + clockwise=False): + super(RoIAlignRotated, self).__init__() + + self.out_size = out_size + self.spatial_scale = float(spatial_scale) + self.sample_num = int(sample_num) + self.aligned = aligned + self.clockwise = clockwise + + def forward(self, features, rois): + return RoIAlignRotatedFunction.apply(features, rois, self.out_size, + self.spatial_scale, + self.sample_num, self.aligned, + self.clockwise) diff --git a/annotator/uniformer/mmcv/ops/roi_pool.py b/annotator/uniformer/mmcv/ops/roi_pool.py new file mode 100644 index 0000000000000000000000000000000000000000..d339d8f2941eabc1cbe181a9c6c5ab5ff4ff4e5f --- /dev/null +++ b/annotator/uniformer/mmcv/ops/roi_pool.py @@ -0,0 +1,86 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['roi_pool_forward', 'roi_pool_backward']) + + +class RoIPoolFunction(Function): + + @staticmethod + def symbolic(g, input, rois, output_size, spatial_scale): + return g.op( + 'MaxRoiPool', + input, + rois, + pooled_shape_i=output_size, + spatial_scale_f=spatial_scale) + + @staticmethod + def forward(ctx, input, rois, output_size, spatial_scale=1.0): + ctx.output_size = _pair(output_size) + ctx.spatial_scale = spatial_scale + ctx.input_shape = input.size() + + assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!' + + output_shape = (rois.size(0), input.size(1), ctx.output_size[0], + ctx.output_size[1]) + output = input.new_zeros(output_shape) + argmax = input.new_zeros(output_shape, dtype=torch.int) + + ext_module.roi_pool_forward( + input, + rois, + output, + argmax, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale) + + ctx.save_for_backward(rois, argmax) + return output + + @staticmethod + @once_differentiable + def backward(ctx, grad_output): + rois, argmax = ctx.saved_tensors + grad_input = grad_output.new_zeros(ctx.input_shape) + + ext_module.roi_pool_backward( + grad_output, + rois, + argmax, + grad_input, + pooled_height=ctx.output_size[0], + pooled_width=ctx.output_size[1], + spatial_scale=ctx.spatial_scale) + + return grad_input, None, None, None + + +roi_pool = RoIPoolFunction.apply + + +class RoIPool(nn.Module): + + def __init__(self, output_size, spatial_scale=1.0): + super(RoIPool, self).__init__() + + self.output_size = _pair(output_size) + self.spatial_scale = float(spatial_scale) + + def forward(self, input, rois): + return roi_pool(input, rois, self.output_size, self.spatial_scale) + + def __repr__(self): + s = self.__class__.__name__ + s += f'(output_size={self.output_size}, ' + s += f'spatial_scale={self.spatial_scale})' + return s diff --git a/annotator/uniformer/mmcv/ops/roiaware_pool3d.py b/annotator/uniformer/mmcv/ops/roiaware_pool3d.py new file mode 100644 index 0000000000000000000000000000000000000000..291b0e5a9b692492c7d7e495ea639c46042e2f18 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/roiaware_pool3d.py @@ -0,0 +1,114 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn as nn +from torch.autograd import Function + +import annotator.uniformer.mmcv as mmcv +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['roiaware_pool3d_forward', 'roiaware_pool3d_backward']) + + +class RoIAwarePool3d(nn.Module): + """Encode the geometry-specific features of each 3D proposal. + + Please refer to `PartA2 `_ for more + details. + + Args: + out_size (int or tuple): The size of output features. n or + [n1, n2, n3]. + max_pts_per_voxel (int, optional): The maximum number of points per + voxel. Default: 128. + mode (str, optional): Pooling method of RoIAware, 'max' or 'avg'. + Default: 'max'. + """ + + def __init__(self, out_size, max_pts_per_voxel=128, mode='max'): + super().__init__() + + self.out_size = out_size + self.max_pts_per_voxel = max_pts_per_voxel + assert mode in ['max', 'avg'] + pool_mapping = {'max': 0, 'avg': 1} + self.mode = pool_mapping[mode] + + def forward(self, rois, pts, pts_feature): + """ + Args: + rois (torch.Tensor): [N, 7], in LiDAR coordinate, + (x, y, z) is the bottom center of rois. + pts (torch.Tensor): [npoints, 3], coordinates of input points. + pts_feature (torch.Tensor): [npoints, C], features of input points. + + Returns: + pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C] + """ + + return RoIAwarePool3dFunction.apply(rois, pts, pts_feature, + self.out_size, + self.max_pts_per_voxel, self.mode) + + +class RoIAwarePool3dFunction(Function): + + @staticmethod + def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, + mode): + """ + Args: + rois (torch.Tensor): [N, 7], in LiDAR coordinate, + (x, y, z) is the bottom center of rois. + pts (torch.Tensor): [npoints, 3], coordinates of input points. + pts_feature (torch.Tensor): [npoints, C], features of input points. + out_size (int or tuple): The size of output features. n or + [n1, n2, n3]. + max_pts_per_voxel (int): The maximum number of points per voxel. + Default: 128. + mode (int): Pooling method of RoIAware, 0 (max pool) or 1 (average + pool). + + Returns: + pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C], output + pooled features. + """ + + if isinstance(out_size, int): + out_x = out_y = out_z = out_size + else: + assert len(out_size) == 3 + assert mmcv.is_tuple_of(out_size, int) + out_x, out_y, out_z = out_size + + num_rois = rois.shape[0] + num_channels = pts_feature.shape[-1] + num_pts = pts.shape[0] + + pooled_features = pts_feature.new_zeros( + (num_rois, out_x, out_y, out_z, num_channels)) + argmax = pts_feature.new_zeros( + (num_rois, out_x, out_y, out_z, num_channels), dtype=torch.int) + pts_idx_of_voxels = pts_feature.new_zeros( + (num_rois, out_x, out_y, out_z, max_pts_per_voxel), + dtype=torch.int) + + ext_module.roiaware_pool3d_forward(rois, pts, pts_feature, argmax, + pts_idx_of_voxels, pooled_features, + mode) + + ctx.roiaware_pool3d_for_backward = (pts_idx_of_voxels, argmax, mode, + num_pts, num_channels) + return pooled_features + + @staticmethod + def backward(ctx, grad_out): + ret = ctx.roiaware_pool3d_for_backward + pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret + + grad_in = grad_out.new_zeros((num_pts, num_channels)) + ext_module.roiaware_pool3d_backward(pts_idx_of_voxels, argmax, + grad_out.contiguous(), grad_in, + mode) + + return None, None, grad_in, None, None, None diff --git a/annotator/uniformer/mmcv/ops/roipoint_pool3d.py b/annotator/uniformer/mmcv/ops/roipoint_pool3d.py new file mode 100644 index 0000000000000000000000000000000000000000..0a21412c0728431c04b84245bc2e3109eea9aefc --- /dev/null +++ b/annotator/uniformer/mmcv/ops/roipoint_pool3d.py @@ -0,0 +1,77 @@ +from torch import nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['roipoint_pool3d_forward']) + + +class RoIPointPool3d(nn.Module): + """Encode the geometry-specific features of each 3D proposal. + + Please refer to `Paper of PartA2 `_ + for more details. + + Args: + num_sampled_points (int, optional): Number of samples in each roi. + Default: 512. + """ + + def __init__(self, num_sampled_points=512): + super().__init__() + self.num_sampled_points = num_sampled_points + + def forward(self, points, point_features, boxes3d): + """ + Args: + points (torch.Tensor): Input points whose shape is (B, N, C). + point_features (torch.Tensor): Features of input points whose shape + is (B, N, C). + boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). + + Returns: + pooled_features (torch.Tensor): The output pooled features whose + shape is (B, M, 512, 3 + C). + pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). + """ + return RoIPointPool3dFunction.apply(points, point_features, boxes3d, + self.num_sampled_points) + + +class RoIPointPool3dFunction(Function): + + @staticmethod + def forward(ctx, points, point_features, boxes3d, num_sampled_points=512): + """ + Args: + points (torch.Tensor): Input points whose shape is (B, N, C). + point_features (torch.Tensor): Features of input points whose shape + is (B, N, C). + boxes3d (B, M, 7), Input bounding boxes whose shape is (B, M, 7). + num_sampled_points (int, optional): The num of sampled points. + Default: 512. + + Returns: + pooled_features (torch.Tensor): The output pooled features whose + shape is (B, M, 512, 3 + C). + pooled_empty_flag (torch.Tensor): Empty flag whose shape is (B, M). + """ + assert len(points.shape) == 3 and points.shape[2] == 3 + batch_size, boxes_num, feature_len = points.shape[0], boxes3d.shape[ + 1], point_features.shape[2] + pooled_boxes3d = boxes3d.view(batch_size, -1, 7) + pooled_features = point_features.new_zeros( + (batch_size, boxes_num, num_sampled_points, 3 + feature_len)) + pooled_empty_flag = point_features.new_zeros( + (batch_size, boxes_num)).int() + + ext_module.roipoint_pool3d_forward(points.contiguous(), + pooled_boxes3d.contiguous(), + point_features.contiguous(), + pooled_features, pooled_empty_flag) + + return pooled_features, pooled_empty_flag + + @staticmethod + def backward(ctx, grad_out): + raise NotImplementedError diff --git a/annotator/uniformer/mmcv/ops/saconv.py b/annotator/uniformer/mmcv/ops/saconv.py new file mode 100644 index 0000000000000000000000000000000000000000..b4ee3978e097fca422805db4e31ae481006d7971 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/saconv.py @@ -0,0 +1,145 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from annotator.uniformer.mmcv.cnn import CONV_LAYERS, ConvAWS2d, constant_init +from annotator.uniformer.mmcv.ops.deform_conv import deform_conv2d +from annotator.uniformer.mmcv.utils import TORCH_VERSION, digit_version + + +@CONV_LAYERS.register_module(name='SAC') +class SAConv2d(ConvAWS2d): + """SAC (Switchable Atrous Convolution) + + This is an implementation of SAC in DetectoRS + (https://arxiv.org/pdf/2006.02334.pdf). + + Args: + in_channels (int): Number of channels in the input image + out_channels (int): Number of channels produced by the convolution + kernel_size (int or tuple): Size of the convolving kernel + stride (int or tuple, optional): Stride of the convolution. Default: 1 + padding (int or tuple, optional): Zero-padding added to both sides of + the input. Default: 0 + padding_mode (string, optional): ``'zeros'``, ``'reflect'``, + ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` + dilation (int or tuple, optional): Spacing between kernel elements. + Default: 1 + groups (int, optional): Number of blocked connections from input + channels to output channels. Default: 1 + bias (bool, optional): If ``True``, adds a learnable bias to the + output. Default: ``True`` + use_deform: If ``True``, replace convolution with deformable + convolution. Default: ``False``. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + use_deform=False): + super().__init__( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias) + self.use_deform = use_deform + self.switch = nn.Conv2d( + self.in_channels, 1, kernel_size=1, stride=stride, bias=True) + self.weight_diff = nn.Parameter(torch.Tensor(self.weight.size())) + self.pre_context = nn.Conv2d( + self.in_channels, self.in_channels, kernel_size=1, bias=True) + self.post_context = nn.Conv2d( + self.out_channels, self.out_channels, kernel_size=1, bias=True) + if self.use_deform: + self.offset_s = nn.Conv2d( + self.in_channels, + 18, + kernel_size=3, + padding=1, + stride=stride, + bias=True) + self.offset_l = nn.Conv2d( + self.in_channels, + 18, + kernel_size=3, + padding=1, + stride=stride, + bias=True) + self.init_weights() + + def init_weights(self): + constant_init(self.switch, 0, bias=1) + self.weight_diff.data.zero_() + constant_init(self.pre_context, 0) + constant_init(self.post_context, 0) + if self.use_deform: + constant_init(self.offset_s, 0) + constant_init(self.offset_l, 0) + + def forward(self, x): + # pre-context + avg_x = F.adaptive_avg_pool2d(x, output_size=1) + avg_x = self.pre_context(avg_x) + avg_x = avg_x.expand_as(x) + x = x + avg_x + # switch + avg_x = F.pad(x, pad=(2, 2, 2, 2), mode='reflect') + avg_x = F.avg_pool2d(avg_x, kernel_size=5, stride=1, padding=0) + switch = self.switch(avg_x) + # sac + weight = self._get_weight(self.weight) + zero_bias = torch.zeros( + self.out_channels, device=weight.device, dtype=weight.dtype) + + if self.use_deform: + offset = self.offset_s(avg_x) + out_s = deform_conv2d(x, offset, weight, self.stride, self.padding, + self.dilation, self.groups, 1) + else: + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.5.0')): + out_s = super().conv2d_forward(x, weight) + elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): + # bias is a required argument of _conv_forward in torch 1.8.0 + out_s = super()._conv_forward(x, weight, zero_bias) + else: + out_s = super()._conv_forward(x, weight) + ori_p = self.padding + ori_d = self.dilation + self.padding = tuple(3 * p for p in self.padding) + self.dilation = tuple(3 * d for d in self.dilation) + weight = weight + self.weight_diff + if self.use_deform: + offset = self.offset_l(avg_x) + out_l = deform_conv2d(x, offset, weight, self.stride, self.padding, + self.dilation, self.groups, 1) + else: + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.5.0')): + out_l = super().conv2d_forward(x, weight) + elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'): + # bias is a required argument of _conv_forward in torch 1.8.0 + out_l = super()._conv_forward(x, weight, zero_bias) + else: + out_l = super()._conv_forward(x, weight) + + out = switch * out_s + (1 - switch) * out_l + self.padding = ori_p + self.dilation = ori_d + # post-context + avg_x = F.adaptive_avg_pool2d(out, output_size=1) + avg_x = self.post_context(avg_x) + avg_x = avg_x.expand_as(out) + out = out + avg_x + return out diff --git a/annotator/uniformer/mmcv/ops/scatter_points.py b/annotator/uniformer/mmcv/ops/scatter_points.py new file mode 100644 index 0000000000000000000000000000000000000000..2b8aa4169e9f6ca4a6f845ce17d6d1e4db416bb8 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/scatter_points.py @@ -0,0 +1,135 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', + ['dynamic_point_to_voxel_forward', 'dynamic_point_to_voxel_backward']) + + +class _DynamicScatter(Function): + + @staticmethod + def forward(ctx, feats, coors, reduce_type='max'): + """convert kitti points(N, >=3) to voxels. + + Args: + feats (torch.Tensor): [N, C]. Points features to be reduced + into voxels. + coors (torch.Tensor): [N, ndim]. Corresponding voxel coordinates + (specifically multi-dim voxel index) of each points. + reduce_type (str, optional): Reduce op. support 'max', 'sum' and + 'mean'. Default: 'max'. + + Returns: + voxel_feats (torch.Tensor): [M, C]. Reduced features, input + features that shares the same voxel coordinates are reduced to + one row. + voxel_coors (torch.Tensor): [M, ndim]. Voxel coordinates. + """ + results = ext_module.dynamic_point_to_voxel_forward( + feats, coors, reduce_type) + (voxel_feats, voxel_coors, point2voxel_map, + voxel_points_count) = results + ctx.reduce_type = reduce_type + ctx.save_for_backward(feats, voxel_feats, point2voxel_map, + voxel_points_count) + ctx.mark_non_differentiable(voxel_coors) + return voxel_feats, voxel_coors + + @staticmethod + def backward(ctx, grad_voxel_feats, grad_voxel_coors=None): + (feats, voxel_feats, point2voxel_map, + voxel_points_count) = ctx.saved_tensors + grad_feats = torch.zeros_like(feats) + # TODO: whether to use index put or use cuda_backward + # To use index put, need point to voxel index + ext_module.dynamic_point_to_voxel_backward( + grad_feats, grad_voxel_feats.contiguous(), feats, voxel_feats, + point2voxel_map, voxel_points_count, ctx.reduce_type) + return grad_feats, None, None + + +dynamic_scatter = _DynamicScatter.apply + + +class DynamicScatter(nn.Module): + """Scatters points into voxels, used in the voxel encoder with dynamic + voxelization. + + Note: + The CPU and GPU implementation get the same output, but have numerical + difference after summation and division (e.g., 5e-7). + + Args: + voxel_size (list): list [x, y, z] size of three dimension. + point_cloud_range (list): The coordinate range of points, [x_min, + y_min, z_min, x_max, y_max, z_max]. + average_points (bool): whether to use avg pooling to scatter points + into voxel. + """ + + def __init__(self, voxel_size, point_cloud_range, average_points: bool): + super().__init__() + + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + self.average_points = average_points + + def forward_single(self, points, coors): + """Scatters points into voxels. + + Args: + points (torch.Tensor): Points to be reduced into voxels. + coors (torch.Tensor): Corresponding voxel coordinates (specifically + multi-dim voxel index) of each points. + + Returns: + voxel_feats (torch.Tensor): Reduced features, input features that + shares the same voxel coordinates are reduced to one row. + voxel_coors (torch.Tensor): Voxel coordinates. + """ + reduce = 'mean' if self.average_points else 'max' + return dynamic_scatter(points.contiguous(), coors.contiguous(), reduce) + + def forward(self, points, coors): + """Scatters points/features into voxels. + + Args: + points (torch.Tensor): Points to be reduced into voxels. + coors (torch.Tensor): Corresponding voxel coordinates (specifically + multi-dim voxel index) of each points. + + Returns: + voxel_feats (torch.Tensor): Reduced features, input features that + shares the same voxel coordinates are reduced to one row. + voxel_coors (torch.Tensor): Voxel coordinates. + """ + if coors.size(-1) == 3: + return self.forward_single(points, coors) + else: + batch_size = coors[-1, 0] + 1 + voxels, voxel_coors = [], [] + for i in range(batch_size): + inds = torch.where(coors[:, 0] == i) + voxel, voxel_coor = self.forward_single( + points[inds], coors[inds][:, 1:]) + coor_pad = nn.functional.pad( + voxel_coor, (1, 0), mode='constant', value=i) + voxel_coors.append(coor_pad) + voxels.append(voxel) + features = torch.cat(voxels, dim=0) + feature_coors = torch.cat(voxel_coors, dim=0) + + return features, feature_coors + + def __repr__(self): + s = self.__class__.__name__ + '(' + s += 'voxel_size=' + str(self.voxel_size) + s += ', point_cloud_range=' + str(self.point_cloud_range) + s += ', average_points=' + str(self.average_points) + s += ')' + return s diff --git a/annotator/uniformer/mmcv/ops/sync_bn.py b/annotator/uniformer/mmcv/ops/sync_bn.py new file mode 100644 index 0000000000000000000000000000000000000000..c9b016fcbe860989c56cd1040034bcfa60e146d2 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/sync_bn.py @@ -0,0 +1,279 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.distributed as dist +import torch.nn.functional as F +from torch.autograd import Function +from torch.autograd.function import once_differentiable +from torch.nn.modules.module import Module +from torch.nn.parameter import Parameter + +from annotator.uniformer.mmcv.cnn import NORM_LAYERS +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output', + 'sync_bn_backward_param', 'sync_bn_backward_data' +]) + + +class SyncBatchNormFunction(Function): + + @staticmethod + def symbolic(g, input, running_mean, running_var, weight, bias, momentum, + eps, group, group_size, stats_mode): + return g.op( + 'mmcv::MMCVSyncBatchNorm', + input, + running_mean, + running_var, + weight, + bias, + momentum_f=momentum, + eps_f=eps, + group_i=group, + group_size_i=group_size, + stats_mode=stats_mode) + + @staticmethod + def forward(self, input, running_mean, running_var, weight, bias, momentum, + eps, group, group_size, stats_mode): + self.momentum = momentum + self.eps = eps + self.group = group + self.group_size = group_size + self.stats_mode = stats_mode + + assert isinstance( + input, (torch.HalfTensor, torch.FloatTensor, + torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \ + f'only support Half or Float Tensor, but {input.type()}' + output = torch.zeros_like(input) + input3d = input.flatten(start_dim=2) + output3d = output.view_as(input3d) + num_channels = input3d.size(1) + + # ensure mean/var/norm/std are initialized as zeros + # ``torch.empty()`` does not guarantee that + mean = torch.zeros( + num_channels, dtype=torch.float, device=input3d.device) + var = torch.zeros( + num_channels, dtype=torch.float, device=input3d.device) + norm = torch.zeros_like( + input3d, dtype=torch.float, device=input3d.device) + std = torch.zeros( + num_channels, dtype=torch.float, device=input3d.device) + + batch_size = input3d.size(0) + if batch_size > 0: + ext_module.sync_bn_forward_mean(input3d, mean) + batch_flag = torch.ones([1], device=mean.device, dtype=mean.dtype) + else: + # skip updating mean and leave it as zeros when the input is empty + batch_flag = torch.zeros([1], device=mean.device, dtype=mean.dtype) + + # synchronize mean and the batch flag + vec = torch.cat([mean, batch_flag]) + if self.stats_mode == 'N': + vec *= batch_size + if self.group_size > 1: + dist.all_reduce(vec, group=self.group) + total_batch = vec[-1].detach() + mean = vec[:num_channels] + + if self.stats_mode == 'default': + mean = mean / self.group_size + elif self.stats_mode == 'N': + mean = mean / total_batch.clamp(min=1) + else: + raise NotImplementedError + + # leave var as zeros when the input is empty + if batch_size > 0: + ext_module.sync_bn_forward_var(input3d, mean, var) + + if self.stats_mode == 'N': + var *= batch_size + if self.group_size > 1: + dist.all_reduce(var, group=self.group) + + if self.stats_mode == 'default': + var /= self.group_size + elif self.stats_mode == 'N': + var /= total_batch.clamp(min=1) + else: + raise NotImplementedError + + # if the total batch size over all the ranks is zero, + # we should not update the statistics in the current batch + update_flag = total_batch.clamp(max=1) + momentum = update_flag * self.momentum + ext_module.sync_bn_forward_output( + input3d, + mean, + var, + weight, + bias, + running_mean, + running_var, + norm, + std, + output3d, + eps=self.eps, + momentum=momentum, + group_size=self.group_size) + self.save_for_backward(norm, std, weight) + return output + + @staticmethod + @once_differentiable + def backward(self, grad_output): + norm, std, weight = self.saved_tensors + grad_weight = torch.zeros_like(weight) + grad_bias = torch.zeros_like(weight) + grad_input = torch.zeros_like(grad_output) + grad_output3d = grad_output.flatten(start_dim=2) + grad_input3d = grad_input.view_as(grad_output3d) + + batch_size = grad_input3d.size(0) + if batch_size > 0: + ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight, + grad_bias) + + # all reduce + if self.group_size > 1: + dist.all_reduce(grad_weight, group=self.group) + dist.all_reduce(grad_bias, group=self.group) + grad_weight /= self.group_size + grad_bias /= self.group_size + + if batch_size > 0: + ext_module.sync_bn_backward_data(grad_output3d, weight, + grad_weight, grad_bias, norm, std, + grad_input3d) + + return grad_input, None, None, grad_weight, grad_bias, \ + None, None, None, None, None + + +@NORM_LAYERS.register_module(name='MMSyncBN') +class SyncBatchNorm(Module): + """Synchronized Batch Normalization. + + Args: + num_features (int): number of features/chennels in input tensor + eps (float, optional): a value added to the denominator for numerical + stability. Defaults to 1e-5. + momentum (float, optional): the value used for the running_mean and + running_var computation. Defaults to 0.1. + affine (bool, optional): whether to use learnable affine parameters. + Defaults to True. + track_running_stats (bool, optional): whether to track the running + mean and variance during training. When set to False, this + module does not track such statistics, and initializes statistics + buffers ``running_mean`` and ``running_var`` as ``None``. When + these buffers are ``None``, this module always uses batch + statistics in both training and eval modes. Defaults to True. + group (int, optional): synchronization of stats happen within + each process group individually. By default it is synchronization + across the whole world. Defaults to None. + stats_mode (str, optional): The statistical mode. Available options + includes ``'default'`` and ``'N'``. Defaults to 'default'. + When ``stats_mode=='default'``, it computes the overall statistics + using those from each worker with equal weight, i.e., the + statistics are synchronized and simply divied by ``group``. This + mode will produce inaccurate statistics when empty tensors occur. + When ``stats_mode=='N'``, it compute the overall statistics using + the total number of batches in each worker ignoring the number of + group, i.e., the statistics are synchronized and then divied by + the total batch ``N``. This mode is beneficial when empty tensors + occur during training, as it average the total mean by the real + number of batch. + """ + + def __init__(self, + num_features, + eps=1e-5, + momentum=0.1, + affine=True, + track_running_stats=True, + group=None, + stats_mode='default'): + super(SyncBatchNorm, self).__init__() + self.num_features = num_features + self.eps = eps + self.momentum = momentum + self.affine = affine + self.track_running_stats = track_running_stats + group = dist.group.WORLD if group is None else group + self.group = group + self.group_size = dist.get_world_size(group) + assert stats_mode in ['default', 'N'], \ + f'"stats_mode" only accepts "default" and "N", got "{stats_mode}"' + self.stats_mode = stats_mode + if self.affine: + self.weight = Parameter(torch.Tensor(num_features)) + self.bias = Parameter(torch.Tensor(num_features)) + else: + self.register_parameter('weight', None) + self.register_parameter('bias', None) + if self.track_running_stats: + self.register_buffer('running_mean', torch.zeros(num_features)) + self.register_buffer('running_var', torch.ones(num_features)) + self.register_buffer('num_batches_tracked', + torch.tensor(0, dtype=torch.long)) + else: + self.register_buffer('running_mean', None) + self.register_buffer('running_var', None) + self.register_buffer('num_batches_tracked', None) + self.reset_parameters() + + def reset_running_stats(self): + if self.track_running_stats: + self.running_mean.zero_() + self.running_var.fill_(1) + self.num_batches_tracked.zero_() + + def reset_parameters(self): + self.reset_running_stats() + if self.affine: + self.weight.data.uniform_() # pytorch use ones_() + self.bias.data.zero_() + + def forward(self, input): + if input.dim() < 2: + raise ValueError( + f'expected at least 2D input, got {input.dim()}D input') + if self.momentum is None: + exponential_average_factor = 0.0 + else: + exponential_average_factor = self.momentum + + if self.training and self.track_running_stats: + if self.num_batches_tracked is not None: + self.num_batches_tracked += 1 + if self.momentum is None: # use cumulative moving average + exponential_average_factor = 1.0 / float( + self.num_batches_tracked) + else: # use exponential moving average + exponential_average_factor = self.momentum + + if self.training or not self.track_running_stats: + return SyncBatchNormFunction.apply( + input, self.running_mean, self.running_var, self.weight, + self.bias, exponential_average_factor, self.eps, self.group, + self.group_size, self.stats_mode) + else: + return F.batch_norm(input, self.running_mean, self.running_var, + self.weight, self.bias, False, + exponential_average_factor, self.eps) + + def __repr__(self): + s = self.__class__.__name__ + s += f'({self.num_features}, ' + s += f'eps={self.eps}, ' + s += f'momentum={self.momentum}, ' + s += f'affine={self.affine}, ' + s += f'track_running_stats={self.track_running_stats}, ' + s += f'group_size={self.group_size},' + s += f'stats_mode={self.stats_mode})' + return s diff --git a/annotator/uniformer/mmcv/ops/three_interpolate.py b/annotator/uniformer/mmcv/ops/three_interpolate.py new file mode 100644 index 0000000000000000000000000000000000000000..203f47f05d58087e034fb3cd8cd6a09233947b4a --- /dev/null +++ b/annotator/uniformer/mmcv/ops/three_interpolate.py @@ -0,0 +1,68 @@ +from typing import Tuple + +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['three_interpolate_forward', 'three_interpolate_backward']) + + +class ThreeInterpolate(Function): + """Performs weighted linear interpolation on 3 features. + + Please refer to `Paper of PointNet++ `_ + for more details. + """ + + @staticmethod + def forward(ctx, features: torch.Tensor, indices: torch.Tensor, + weight: torch.Tensor) -> torch.Tensor: + """ + Args: + features (Tensor): (B, C, M) Features descriptors to be + interpolated + indices (Tensor): (B, n, 3) index three nearest neighbors + of the target features in features + weight (Tensor): (B, n, 3) weights of interpolation + + Returns: + Tensor: (B, C, N) tensor of the interpolated features + """ + assert features.is_contiguous() + assert indices.is_contiguous() + assert weight.is_contiguous() + + B, c, m = features.size() + n = indices.size(1) + ctx.three_interpolate_for_backward = (indices, weight, m) + output = torch.cuda.FloatTensor(B, c, n) + + ext_module.three_interpolate_forward( + features, indices, weight, output, b=B, c=c, m=m, n=n) + return output + + @staticmethod + def backward( + ctx, grad_out: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Args: + grad_out (Tensor): (B, C, N) tensor with gradients of outputs + + Returns: + Tensor: (B, C, M) tensor with gradients of features + """ + idx, weight, m = ctx.three_interpolate_for_backward + B, c, n = grad_out.size() + + grad_features = torch.cuda.FloatTensor(B, c, m).zero_() + grad_out_data = grad_out.data.contiguous() + + ext_module.three_interpolate_backward( + grad_out_data, idx, weight, grad_features.data, b=B, c=c, n=n, m=m) + return grad_features, None, None + + +three_interpolate = ThreeInterpolate.apply diff --git a/annotator/uniformer/mmcv/ops/three_nn.py b/annotator/uniformer/mmcv/ops/three_nn.py new file mode 100644 index 0000000000000000000000000000000000000000..2b01047a129989cd5545a0a86f23a487f4a13ce1 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/three_nn.py @@ -0,0 +1,51 @@ +from typing import Tuple + +import torch +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', ['three_nn_forward']) + + +class ThreeNN(Function): + """Find the top-3 nearest neighbors of the target set from the source set. + + Please refer to `Paper of PointNet++ `_ + for more details. + """ + + @staticmethod + def forward(ctx, target: torch.Tensor, + source: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Args: + target (Tensor): shape (B, N, 3), points set that needs to + find the nearest neighbors. + source (Tensor): shape (B, M, 3), points set that is used + to find the nearest neighbors of points in target set. + + Returns: + Tensor: shape (B, N, 3), L2 distance of each point in target + set to their corresponding nearest neighbors. + """ + target = target.contiguous() + source = source.contiguous() + + B, N, _ = target.size() + m = source.size(1) + dist2 = torch.cuda.FloatTensor(B, N, 3) + idx = torch.cuda.IntTensor(B, N, 3) + + ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m) + if torch.__version__ != 'parrots': + ctx.mark_non_differentiable(idx) + + return torch.sqrt(dist2), idx + + @staticmethod + def backward(ctx, a=None, b=None): + return None, None + + +three_nn = ThreeNN.apply diff --git a/annotator/uniformer/mmcv/ops/tin_shift.py b/annotator/uniformer/mmcv/ops/tin_shift.py new file mode 100644 index 0000000000000000000000000000000000000000..472c9fcfe45a124e819b7ed5653e585f94a8811e --- /dev/null +++ b/annotator/uniformer/mmcv/ops/tin_shift.py @@ -0,0 +1,68 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Code reference from "Temporal Interlacing Network" +# https://github.com/deepcs233/TIN/blob/master/cuda_shift/rtc_wrap.py +# Hao Shao, Shengju Qian, Yu Liu +# shaoh19@mails.tsinghua.edu.cn, sjqian@cse.cuhk.edu.hk, yuliu@ee.cuhk.edu.hk + +import torch +import torch.nn as nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', + ['tin_shift_forward', 'tin_shift_backward']) + + +class TINShiftFunction(Function): + + @staticmethod + def forward(ctx, input, shift): + C = input.size(2) + num_segments = shift.size(1) + if C // num_segments <= 0 or C % num_segments != 0: + raise ValueError('C should be a multiple of num_segments, ' + f'but got C={C} and num_segments={num_segments}.') + + ctx.save_for_backward(shift) + + out = torch.zeros_like(input) + ext_module.tin_shift_forward(input, shift, out) + + return out + + @staticmethod + def backward(ctx, grad_output): + + shift = ctx.saved_tensors[0] + data_grad_input = grad_output.new(*grad_output.size()).zero_() + shift_grad_input = shift.new(*shift.size()).zero_() + ext_module.tin_shift_backward(grad_output, shift, data_grad_input) + + return data_grad_input, shift_grad_input + + +tin_shift = TINShiftFunction.apply + + +class TINShift(nn.Module): + """Temporal Interlace Shift. + + Temporal Interlace shift is a differentiable temporal-wise frame shifting + which is proposed in "Temporal Interlacing Network" + + Please refer to https://arxiv.org/abs/2001.06499 for more details. + Code is modified from https://github.com/mit-han-lab/temporal-shift-module + """ + + def forward(self, input, shift): + """Perform temporal interlace shift. + + Args: + input (Tensor): Feature map with shape [N, num_segments, C, H * W]. + shift (Tensor): Shift tensor with shape [N, num_segments]. + + Returns: + Feature map after temporal interlace shift. + """ + return tin_shift(input, shift) diff --git a/annotator/uniformer/mmcv/ops/upfirdn2d.py b/annotator/uniformer/mmcv/ops/upfirdn2d.py new file mode 100644 index 0000000000000000000000000000000000000000..c8bb2c3c949eed38a6465ed369fa881538dca010 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/upfirdn2d.py @@ -0,0 +1,330 @@ +# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.py # noqa:E501 + +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator +# Augmentation (ADA) +# ======================================================================= + +# 1. Definitions + +# "Licensor" means any person or entity that distributes its Work. + +# "Software" means the original work of authorship made available under +# this License. + +# "Work" means the Software and any additions to or derivative works of +# the Software that are made available under this License. + +# The terms "reproduce," "reproduction," "derivative works," and +# "distribution" have the meaning as provided under U.S. copyright law; +# provided, however, that for the purposes of this License, derivative +# works shall not include works that remain separable from, or merely +# link (or bind by name) to the interfaces of, the Work. + +# Works, including the Software, are "made available" under this License +# by including in or with the Work either (a) a copyright notice +# referencing the applicability of this License to the Work, or (b) a +# copy of this License. + +# 2. License Grants + +# 2.1 Copyright Grant. Subject to the terms and conditions of this +# License, each Licensor grants to you a perpetual, worldwide, +# non-exclusive, royalty-free, copyright license to reproduce, +# prepare derivative works of, publicly display, publicly perform, +# sublicense and distribute its Work and any resulting derivative +# works in any form. + +# 3. Limitations + +# 3.1 Redistribution. You may reproduce or distribute the Work only +# if (a) you do so under this License, (b) you include a complete +# copy of this License with your distribution, and (c) you retain +# without modification any copyright, patent, trademark, or +# attribution notices that are present in the Work. + +# 3.2 Derivative Works. You may specify that additional or different +# terms apply to the use, reproduction, and distribution of your +# derivative works of the Work ("Your Terms") only if (a) Your Terms +# provide that the use limitation in Section 3.3 applies to your +# derivative works, and (b) you identify the specific derivative +# works that are subject to Your Terms. Notwithstanding Your Terms, +# this License (including the redistribution requirements in Section +# 3.1) will continue to apply to the Work itself. + +# 3.3 Use Limitation. The Work and any derivative works thereof only +# may be used or intended for use non-commercially. Notwithstanding +# the foregoing, NVIDIA and its affiliates may use the Work and any +# derivative works commercially. As used herein, "non-commercially" +# means for research or evaluation purposes only. + +# 3.4 Patent Claims. If you bring or threaten to bring a patent claim +# against any Licensor (including any claim, cross-claim or +# counterclaim in a lawsuit) to enforce any patents that you allege +# are infringed by any Work, then your rights under this License from +# such Licensor (including the grant in Section 2.1) will terminate +# immediately. + +# 3.5 Trademarks. This License does not grant any rights to use any +# Licensor’s or its affiliates’ names, logos, or trademarks, except +# as necessary to reproduce the notices described in this License. + +# 3.6 Termination. If you violate any term of this License, then your +# rights under this License (including the grant in Section 2.1) will +# terminate immediately. + +# 4. Disclaimer of Warranty. + +# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +# THIS LICENSE. + +# 5. Limitation of Liability. + +# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGES. + +# ======================================================================= + +import torch +from torch.autograd import Function +from torch.nn import functional as F + +from annotator.uniformer.mmcv.utils import to_2tuple +from ..utils import ext_loader + +upfirdn2d_ext = ext_loader.load_ext('_ext', ['upfirdn2d']) + + +class UpFirDn2dBackward(Function): + + @staticmethod + def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, + in_size, out_size): + + up_x, up_y = up + down_x, down_y = down + g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad + + grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1) + + grad_input = upfirdn2d_ext.upfirdn2d( + grad_output, + grad_kernel, + up_x=down_x, + up_y=down_y, + down_x=up_x, + down_y=up_y, + pad_x0=g_pad_x0, + pad_x1=g_pad_x1, + pad_y0=g_pad_y0, + pad_y1=g_pad_y1) + grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], + in_size[3]) + + ctx.save_for_backward(kernel) + + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + ctx.up_x = up_x + ctx.up_y = up_y + ctx.down_x = down_x + ctx.down_y = down_y + ctx.pad_x0 = pad_x0 + ctx.pad_x1 = pad_x1 + ctx.pad_y0 = pad_y0 + ctx.pad_y1 = pad_y1 + ctx.in_size = in_size + ctx.out_size = out_size + + return grad_input + + @staticmethod + def backward(ctx, gradgrad_input): + kernel, = ctx.saved_tensors + + gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], + ctx.in_size[3], 1) + + gradgrad_out = upfirdn2d_ext.upfirdn2d( + gradgrad_input, + kernel, + up_x=ctx.up_x, + up_y=ctx.up_y, + down_x=ctx.down_x, + down_y=ctx.down_y, + pad_x0=ctx.pad_x0, + pad_x1=ctx.pad_x1, + pad_y0=ctx.pad_y0, + pad_y1=ctx.pad_y1) + # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], + # ctx.out_size[1], ctx.in_size[3]) + gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.in_size[1], + ctx.out_size[0], ctx.out_size[1]) + + return gradgrad_out, None, None, None, None, None, None, None, None + + +class UpFirDn2d(Function): + + @staticmethod + def forward(ctx, input, kernel, up, down, pad): + up_x, up_y = up + down_x, down_y = down + pad_x0, pad_x1, pad_y0, pad_y1 = pad + + kernel_h, kernel_w = kernel.shape + batch, channel, in_h, in_w = input.shape + ctx.in_size = input.shape + + input = input.reshape(-1, in_h, in_w, 1) + + ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1])) + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + ctx.out_size = (out_h, out_w) + + ctx.up = (up_x, up_y) + ctx.down = (down_x, down_y) + ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1) + + g_pad_x0 = kernel_w - pad_x0 - 1 + g_pad_y0 = kernel_h - pad_y0 - 1 + g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1 + g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1 + + ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1) + + out = upfirdn2d_ext.upfirdn2d( + input, + kernel, + up_x=up_x, + up_y=up_y, + down_x=down_x, + down_y=down_y, + pad_x0=pad_x0, + pad_x1=pad_x1, + pad_y0=pad_y0, + pad_y1=pad_y1) + # out = out.view(major, out_h, out_w, minor) + out = out.view(-1, channel, out_h, out_w) + + return out + + @staticmethod + def backward(ctx, grad_output): + kernel, grad_kernel = ctx.saved_tensors + + grad_input = UpFirDn2dBackward.apply( + grad_output, + kernel, + grad_kernel, + ctx.up, + ctx.down, + ctx.pad, + ctx.g_pad, + ctx.in_size, + ctx.out_size, + ) + + return grad_input, None, None, None, None + + +def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)): + """UpFRIDn for 2d features. + + UpFIRDn is short for upsample, apply FIR filter and downsample. More + details can be found in: + https://www.mathworks.com/help/signal/ref/upfirdn.html + + Args: + input (Tensor): Tensor with shape of (n, c, h, w). + kernel (Tensor): Filter kernel. + up (int | tuple[int], optional): Upsampling factor. If given a number, + we will use this factor for the both height and width side. + Defaults to 1. + down (int | tuple[int], optional): Downsampling factor. If given a + number, we will use this factor for the both height and width side. + Defaults to 1. + pad (tuple[int], optional): Padding for tensors, (x_pad, y_pad) or + (x_pad_0, x_pad_1, y_pad_0, y_pad_1). Defaults to (0, 0). + + Returns: + Tensor: Tensor after UpFIRDn. + """ + if input.device.type == 'cpu': + if len(pad) == 2: + pad = (pad[0], pad[1], pad[0], pad[1]) + + up = to_2tuple(up) + + down = to_2tuple(down) + + out = upfirdn2d_native(input, kernel, up[0], up[1], down[0], down[1], + pad[0], pad[1], pad[2], pad[3]) + else: + _up = to_2tuple(up) + + _down = to_2tuple(down) + + if len(pad) == 4: + _pad = pad + elif len(pad) == 2: + _pad = (pad[0], pad[1], pad[0], pad[1]) + + out = UpFirDn2d.apply(input, kernel, _up, _down, _pad) + + return out + + +def upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, + pad_y0, pad_y1): + _, channel, in_h, in_w = input.shape + input = input.reshape(-1, in_h, in_w, 1) + + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + + out = input.view(-1, in_h, 1, in_w, 1, minor) + out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.view(-1, in_h * up_y, in_w * up_x, minor) + + out = F.pad( + out, + [0, 0, + max(pad_x0, 0), + max(pad_x1, 0), + max(pad_y0, 0), + max(pad_y1, 0)]) + out = out[:, + max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0), + max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ] + + out = out.permute(0, 3, 1, 2) + out = out.reshape( + [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w) + out = F.conv2d(out, w) + out = out.reshape( + -1, + minor, + in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, + in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1, + ) + out = out.permute(0, 2, 3, 1) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.view(-1, channel, out_h, out_w) diff --git a/annotator/uniformer/mmcv/ops/voxelize.py b/annotator/uniformer/mmcv/ops/voxelize.py new file mode 100644 index 0000000000000000000000000000000000000000..ca3226a4fbcbfe58490fa2ea8e1c16b531214121 --- /dev/null +++ b/annotator/uniformer/mmcv/ops/voxelize.py @@ -0,0 +1,132 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.autograd import Function +from torch.nn.modules.utils import _pair + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext( + '_ext', ['dynamic_voxelize_forward', 'hard_voxelize_forward']) + + +class _Voxelization(Function): + + @staticmethod + def forward(ctx, + points, + voxel_size, + coors_range, + max_points=35, + max_voxels=20000): + """Convert kitti points(N, >=3) to voxels. + + Args: + points (torch.Tensor): [N, ndim]. Points[:, :3] contain xyz points + and points[:, 3:] contain other information like reflectivity. + voxel_size (tuple or float): The size of voxel with the shape of + [3]. + coors_range (tuple or float): The coordinate range of voxel with + the shape of [6]. + max_points (int, optional): maximum points contained in a voxel. if + max_points=-1, it means using dynamic_voxelize. Default: 35. + max_voxels (int, optional): maximum voxels this function create. + for second, 20000 is a good choice. Users should shuffle points + before call this function because max_voxels may drop points. + Default: 20000. + + Returns: + voxels_out (torch.Tensor): Output voxels with the shape of [M, + max_points, ndim]. Only contain points and returned when + max_points != -1. + coors_out (torch.Tensor): Output coordinates with the shape of + [M, 3]. + num_points_per_voxel_out (torch.Tensor): Num points per voxel with + the shape of [M]. Only returned when max_points != -1. + """ + if max_points == -1 or max_voxels == -1: + coors = points.new_zeros(size=(points.size(0), 3), dtype=torch.int) + ext_module.dynamic_voxelize_forward(points, coors, voxel_size, + coors_range, 3) + return coors + else: + voxels = points.new_zeros( + size=(max_voxels, max_points, points.size(1))) + coors = points.new_zeros(size=(max_voxels, 3), dtype=torch.int) + num_points_per_voxel = points.new_zeros( + size=(max_voxels, ), dtype=torch.int) + voxel_num = ext_module.hard_voxelize_forward( + points, voxels, coors, num_points_per_voxel, voxel_size, + coors_range, max_points, max_voxels, 3) + # select the valid voxels + voxels_out = voxels[:voxel_num] + coors_out = coors[:voxel_num] + num_points_per_voxel_out = num_points_per_voxel[:voxel_num] + return voxels_out, coors_out, num_points_per_voxel_out + + +voxelization = _Voxelization.apply + + +class Voxelization(nn.Module): + """Convert kitti points(N, >=3) to voxels. + + Please refer to `PVCNN `_ for more + details. + + Args: + voxel_size (tuple or float): The size of voxel with the shape of [3]. + point_cloud_range (tuple or float): The coordinate range of voxel with + the shape of [6]. + max_num_points (int): maximum points contained in a voxel. if + max_points=-1, it means using dynamic_voxelize. + max_voxels (int, optional): maximum voxels this function create. + for second, 20000 is a good choice. Users should shuffle points + before call this function because max_voxels may drop points. + Default: 20000. + """ + + def __init__(self, + voxel_size, + point_cloud_range, + max_num_points, + max_voxels=20000): + super().__init__() + + self.voxel_size = voxel_size + self.point_cloud_range = point_cloud_range + self.max_num_points = max_num_points + if isinstance(max_voxels, tuple): + self.max_voxels = max_voxels + else: + self.max_voxels = _pair(max_voxels) + + point_cloud_range = torch.tensor( + point_cloud_range, dtype=torch.float32) + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + grid_size = (point_cloud_range[3:] - + point_cloud_range[:3]) / voxel_size + grid_size = torch.round(grid_size).long() + input_feat_shape = grid_size[:2] + self.grid_size = grid_size + # the origin shape is as [x-len, y-len, z-len] + # [w, h, d] -> [d, h, w] + self.pcd_shape = [*input_feat_shape, 1][::-1] + + def forward(self, input): + if self.training: + max_voxels = self.max_voxels[0] + else: + max_voxels = self.max_voxels[1] + + return voxelization(input, self.voxel_size, self.point_cloud_range, + self.max_num_points, max_voxels) + + def __repr__(self): + s = self.__class__.__name__ + '(' + s += 'voxel_size=' + str(self.voxel_size) + s += ', point_cloud_range=' + str(self.point_cloud_range) + s += ', max_num_points=' + str(self.max_num_points) + s += ', max_voxels=' + str(self.max_voxels) + s += ')' + return s diff --git a/annotator/uniformer/mmcv/parallel/__init__.py b/annotator/uniformer/mmcv/parallel/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2ed2c17ad357742e423beeaf4d35db03fe9af469 --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .collate import collate +from .data_container import DataContainer +from .data_parallel import MMDataParallel +from .distributed import MMDistributedDataParallel +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter, scatter_kwargs +from .utils import is_module_wrapper + +__all__ = [ + 'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel', + 'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS' +] diff --git a/annotator/uniformer/mmcv/parallel/_functions.py b/annotator/uniformer/mmcv/parallel/_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..9b5a8a44483ab991411d07122b22a1d027e4be8e --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/_functions.py @@ -0,0 +1,79 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import _get_stream + + +def scatter(input, devices, streams=None): + """Scatters tensor across multiple GPUs.""" + if streams is None: + streams = [None] * len(devices) + + if isinstance(input, list): + chunk_size = (len(input) - 1) // len(devices) + 1 + outputs = [ + scatter(input[i], [devices[i // chunk_size]], + [streams[i // chunk_size]]) for i in range(len(input)) + ] + return outputs + elif isinstance(input, torch.Tensor): + output = input.contiguous() + # TODO: copy to a pinned buffer first (if copying from CPU) + stream = streams[0] if output.numel() > 0 else None + if devices != [-1]: + with torch.cuda.device(devices[0]), torch.cuda.stream(stream): + output = output.cuda(devices[0], non_blocking=True) + else: + # unsqueeze the first dimension thus the tensor's shape is the + # same as those scattered with GPU. + output = output.unsqueeze(0) + return output + else: + raise Exception(f'Unknown type {type(input)}.') + + +def synchronize_stream(output, devices, streams): + if isinstance(output, list): + chunk_size = len(output) // len(devices) + for i in range(len(devices)): + for j in range(chunk_size): + synchronize_stream(output[i * chunk_size + j], [devices[i]], + [streams[i]]) + elif isinstance(output, torch.Tensor): + if output.numel() != 0: + with torch.cuda.device(devices[0]): + main_stream = torch.cuda.current_stream() + main_stream.wait_stream(streams[0]) + output.record_stream(main_stream) + else: + raise Exception(f'Unknown type {type(output)}.') + + +def get_input_device(input): + if isinstance(input, list): + for item in input: + input_device = get_input_device(item) + if input_device != -1: + return input_device + return -1 + elif isinstance(input, torch.Tensor): + return input.get_device() if input.is_cuda else -1 + else: + raise Exception(f'Unknown type {type(input)}.') + + +class Scatter: + + @staticmethod + def forward(target_gpus, input): + input_device = get_input_device(input) + streams = None + if input_device == -1 and target_gpus != [-1]: + # Perform CPU to GPU copies in a background stream + streams = [_get_stream(device) for device in target_gpus] + + outputs = scatter(input, target_gpus, streams) + # Synchronize with the copy stream + if streams is not None: + synchronize_stream(outputs, target_gpus, streams) + + return tuple(outputs) diff --git a/annotator/uniformer/mmcv/parallel/collate.py b/annotator/uniformer/mmcv/parallel/collate.py new file mode 100644 index 0000000000000000000000000000000000000000..ad749197df21b0d74297548be5f66a696adebf7f --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/collate.py @@ -0,0 +1,84 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections.abc import Mapping, Sequence + +import torch +import torch.nn.functional as F +from torch.utils.data.dataloader import default_collate + +from .data_container import DataContainer + + +def collate(batch, samples_per_gpu=1): + """Puts each data field into a tensor/DataContainer with outer dimension + batch size. + + Extend default_collate to add support for + :type:`~mmcv.parallel.DataContainer`. There are 3 cases. + + 1. cpu_only = True, e.g., meta data + 2. cpu_only = False, stack = True, e.g., images tensors + 3. cpu_only = False, stack = False, e.g., gt bboxes + """ + + if not isinstance(batch, Sequence): + raise TypeError(f'{batch.dtype} is not supported.') + + if isinstance(batch[0], DataContainer): + stacked = [] + if batch[0].cpu_only: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer( + stacked, batch[0].stack, batch[0].padding_value, cpu_only=True) + elif batch[0].stack: + for i in range(0, len(batch), samples_per_gpu): + assert isinstance(batch[i].data, torch.Tensor) + + if batch[i].pad_dims is not None: + ndim = batch[i].dim() + assert ndim > batch[i].pad_dims + max_shape = [0 for _ in range(batch[i].pad_dims)] + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = batch[i].size(-dim) + for sample in batch[i:i + samples_per_gpu]: + for dim in range(0, ndim - batch[i].pad_dims): + assert batch[i].size(dim) == sample.size(dim) + for dim in range(1, batch[i].pad_dims + 1): + max_shape[dim - 1] = max(max_shape[dim - 1], + sample.size(-dim)) + padded_samples = [] + for sample in batch[i:i + samples_per_gpu]: + pad = [0 for _ in range(batch[i].pad_dims * 2)] + for dim in range(1, batch[i].pad_dims + 1): + pad[2 * dim - + 1] = max_shape[dim - 1] - sample.size(-dim) + padded_samples.append( + F.pad( + sample.data, pad, value=sample.padding_value)) + stacked.append(default_collate(padded_samples)) + elif batch[i].pad_dims is None: + stacked.append( + default_collate([ + sample.data + for sample in batch[i:i + samples_per_gpu] + ])) + else: + raise ValueError( + 'pad_dims should be either None or integers (1-3)') + + else: + for i in range(0, len(batch), samples_per_gpu): + stacked.append( + [sample.data for sample in batch[i:i + samples_per_gpu]]) + return DataContainer(stacked, batch[0].stack, batch[0].padding_value) + elif isinstance(batch[0], Sequence): + transposed = zip(*batch) + return [collate(samples, samples_per_gpu) for samples in transposed] + elif isinstance(batch[0], Mapping): + return { + key: collate([d[key] for d in batch], samples_per_gpu) + for key in batch[0] + } + else: + return default_collate(batch) diff --git a/annotator/uniformer/mmcv/parallel/data_container.py b/annotator/uniformer/mmcv/parallel/data_container.py new file mode 100644 index 0000000000000000000000000000000000000000..cedb0d32a51a1f575a622b38de2cee3ab4757821 --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/data_container.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools + +import torch + + +def assert_tensor_type(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not isinstance(args[0].data, torch.Tensor): + raise AttributeError( + f'{args[0].__class__.__name__} has no attribute ' + f'{func.__name__} for type {args[0].datatype}') + return func(*args, **kwargs) + + return wrapper + + +class DataContainer: + """A container for any type of objects. + + Typically tensors will be stacked in the collate function and sliced along + some dimension in the scatter function. This behavior has some limitations. + 1. All tensors have to be the same size. + 2. Types are limited (numpy array or Tensor). + + We design `DataContainer` and `MMDataParallel` to overcome these + limitations. The behavior can be either of the following. + + - copy to GPU, pad all tensors to the same size and stack them + - copy to GPU without stacking + - leave the objects as is and pass it to the model + - pad_dims specifies the number of last few dimensions to do padding + """ + + def __init__(self, + data, + stack=False, + padding_value=0, + cpu_only=False, + pad_dims=2): + self._data = data + self._cpu_only = cpu_only + self._stack = stack + self._padding_value = padding_value + assert pad_dims in [None, 1, 2, 3] + self._pad_dims = pad_dims + + def __repr__(self): + return f'{self.__class__.__name__}({repr(self.data)})' + + def __len__(self): + return len(self._data) + + @property + def data(self): + return self._data + + @property + def datatype(self): + if isinstance(self.data, torch.Tensor): + return self.data.type() + else: + return type(self.data) + + @property + def cpu_only(self): + return self._cpu_only + + @property + def stack(self): + return self._stack + + @property + def padding_value(self): + return self._padding_value + + @property + def pad_dims(self): + return self._pad_dims + + @assert_tensor_type + def size(self, *args, **kwargs): + return self.data.size(*args, **kwargs) + + @assert_tensor_type + def dim(self): + return self.data.dim() diff --git a/annotator/uniformer/mmcv/parallel/data_parallel.py b/annotator/uniformer/mmcv/parallel/data_parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..79b5f69b654cf647dc7ae9174223781ab5c607d2 --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/data_parallel.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from itertools import chain + +from torch.nn.parallel import DataParallel + +from .scatter_gather import scatter_kwargs + + +class MMDataParallel(DataParallel): + """The DataParallel module that supports DataContainer. + + MMDataParallel has two main differences with PyTorch DataParallel: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data during both GPU and CPU inference. + - It implement two more APIs ``train_step()`` and ``val_step()``. + + Args: + module (:class:`nn.Module`): Module to be encapsulated. + device_ids (list[int]): Device IDS of modules to be scattered to. + Defaults to None when GPU is not available. + output_device (str | int): Device ID for output. Defaults to None. + dim (int): Dimension used to scatter the data. Defaults to 0. + """ + + def __init__(self, *args, dim=0, **kwargs): + super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs) + self.dim = dim + + def forward(self, *inputs, **kwargs): + """Override the original forward function. + + The main difference lies in the CPU inference where the data in + :class:`DataContainers` will still be gathered. + """ + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module(*inputs[0], **kwargs[0]) + else: + return super().forward(*inputs, **kwargs) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.train_step(*inputs[0], **kwargs[0]) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + 'instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.train_step(*inputs[0], **kwargs[0]) + + def val_step(self, *inputs, **kwargs): + if not self.device_ids: + # We add the following line thus the module could gather and + # convert data containers as those in GPU inference + inputs, kwargs = self.scatter(inputs, kwargs, [-1]) + return self.module.val_step(*inputs[0], **kwargs[0]) + + assert len(self.device_ids) == 1, \ + ('MMDataParallel only supports single GPU training, if you need to' + ' train with multiple GPUs, please use MMDistributedDataParallel' + ' instead.') + + for t in chain(self.module.parameters(), self.module.buffers()): + if t.device != self.src_device_obj: + raise RuntimeError( + 'module must have its parameters and buffers ' + f'on device {self.src_device_obj} (device_ids[0]) but ' + f'found one of them on device: {t.device}') + + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + return self.module.val_step(*inputs[0], **kwargs[0]) diff --git a/annotator/uniformer/mmcv/parallel/distributed.py b/annotator/uniformer/mmcv/parallel/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..1e4c27903db58a54d37ea1ed9ec0104098b486f2 --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/distributed.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel.distributed import (DistributedDataParallel, + _find_tensors) + +from annotator.uniformer.mmcv import print_log +from annotator.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from .scatter_gather import scatter_kwargs + + +class MMDistributedDataParallel(DistributedDataParallel): + """The DDP module that supports DataContainer. + + MMDDP has two main differences with PyTorch DDP: + + - It supports a custom type :class:`DataContainer` which allows more + flexible control of input data. + - It implement two APIs ``train_step()`` and ``val_step()``. + """ + + def to_kwargs(self, inputs, kwargs, device_id): + # Use `self.to_kwargs` instead of `self.scatter` in pytorch1.8 + # to move all tensors to device_id + return scatter_kwargs(inputs, kwargs, [device_id], dim=self.dim) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def train_step(self, *inputs, **kwargs): + """train_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.train_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets()): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.train_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.train_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) > digit_version('1.2')): + self.require_forward_param_sync = False + return output + + def val_step(self, *inputs, **kwargs): + """val_step() API for module wrapped by DistributedDataParallel. + + This method is basically the same as + ``DistributedDataParallel.forward()``, while replacing + ``self.module.forward()`` with ``self.module.val_step()``. + It is compatible with PyTorch 1.1 - 1.5. + """ + # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the + # end of backward to the beginning of forward. + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) >= digit_version('1.7') + and self.reducer._rebuild_buckets()): + print_log( + 'Reducer buckets have been rebuilt in this iteration.', + logger='mmcv') + + if getattr(self, 'require_forward_param_sync', True): + self._sync_params() + if self.device_ids: + inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) + if len(self.device_ids) == 1: + output = self.module.val_step(*inputs[0], **kwargs[0]) + else: + outputs = self.parallel_apply( + self._module_copies[:len(inputs)], inputs, kwargs) + output = self.gather(outputs, self.output_device) + else: + output = self.module.val_step(*inputs, **kwargs) + + if torch.is_grad_enabled() and getattr( + self, 'require_backward_grad_sync', True): + if self.find_unused_parameters: + self.reducer.prepare_for_backward(list(_find_tensors(output))) + else: + self.reducer.prepare_for_backward([]) + else: + if ('parrots' not in TORCH_VERSION + and digit_version(TORCH_VERSION) > digit_version('1.2')): + self.require_forward_param_sync = False + return output diff --git a/annotator/uniformer/mmcv/parallel/distributed_deprecated.py b/annotator/uniformer/mmcv/parallel/distributed_deprecated.py new file mode 100644 index 0000000000000000000000000000000000000000..676937a2085d4da20fa87923041a200fca6214eb --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/distributed_deprecated.py @@ -0,0 +1,70 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.distributed as dist +import torch.nn as nn +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + +from annotator.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from .registry import MODULE_WRAPPERS +from .scatter_gather import scatter_kwargs + + +@MODULE_WRAPPERS.register_module() +class MMDistributedDataParallel(nn.Module): + + def __init__(self, + module, + dim=0, + broadcast_buffers=True, + bucket_cap_mb=25): + super(MMDistributedDataParallel, self).__init__() + self.module = module + self.dim = dim + self.broadcast_buffers = broadcast_buffers + + self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024 + self._sync_params() + + def _dist_broadcast_coalesced(self, tensors, buffer_size): + for tensors in _take_tensors(tensors, buffer_size): + flat_tensors = _flatten_dense_tensors(tensors) + dist.broadcast(flat_tensors, 0) + for tensor, synced in zip( + tensors, _unflatten_dense_tensors(flat_tensors, tensors)): + tensor.copy_(synced) + + def _sync_params(self): + module_states = list(self.module.state_dict().values()) + if len(module_states) > 0: + self._dist_broadcast_coalesced(module_states, + self.broadcast_bucket_size) + if self.broadcast_buffers: + if (TORCH_VERSION != 'parrots' + and digit_version(TORCH_VERSION) < digit_version('1.0')): + buffers = [b.data for b in self.module._all_buffers()] + else: + buffers = [b.data for b in self.module.buffers()] + if len(buffers) > 0: + self._dist_broadcast_coalesced(buffers, + self.broadcast_bucket_size) + + def scatter(self, inputs, kwargs, device_ids): + return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim) + + def forward(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + return self.module(*inputs[0], **kwargs[0]) + + def train_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + output = self.module.train_step(*inputs[0], **kwargs[0]) + return output + + def val_step(self, *inputs, **kwargs): + inputs, kwargs = self.scatter(inputs, kwargs, + [torch.cuda.current_device()]) + output = self.module.val_step(*inputs[0], **kwargs[0]) + return output diff --git a/annotator/uniformer/mmcv/parallel/registry.py b/annotator/uniformer/mmcv/parallel/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..a204a07fba10e614223f090d1a57cf9c4d74d4a1 --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/registry.py @@ -0,0 +1,8 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch.nn.parallel import DataParallel, DistributedDataParallel + +from annotator.uniformer.mmcv.utils import Registry + +MODULE_WRAPPERS = Registry('module wrapper') +MODULE_WRAPPERS.register_module(module=DataParallel) +MODULE_WRAPPERS.register_module(module=DistributedDataParallel) diff --git a/annotator/uniformer/mmcv/parallel/scatter_gather.py b/annotator/uniformer/mmcv/parallel/scatter_gather.py new file mode 100644 index 0000000000000000000000000000000000000000..900ff88566f8f14830590459dc4fd16d4b382e47 --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/scatter_gather.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.parallel._functions import Scatter as OrigScatter + +from ._functions import Scatter +from .data_container import DataContainer + + +def scatter(inputs, target_gpus, dim=0): + """Scatter inputs to target gpus. + + The only difference from original :func:`scatter` is to add support for + :type:`~mmcv.parallel.DataContainer`. + """ + + def scatter_map(obj): + if isinstance(obj, torch.Tensor): + if target_gpus != [-1]: + return OrigScatter.apply(target_gpus, None, dim, obj) + else: + # for CPU inference we use self-implemented scatter + return Scatter.forward(target_gpus, obj) + if isinstance(obj, DataContainer): + if obj.cpu_only: + return obj.data + else: + return Scatter.forward(target_gpus, obj.data) + if isinstance(obj, tuple) and len(obj) > 0: + return list(zip(*map(scatter_map, obj))) + if isinstance(obj, list) and len(obj) > 0: + out = list(map(list, zip(*map(scatter_map, obj)))) + return out + if isinstance(obj, dict) and len(obj) > 0: + out = list(map(type(obj), zip(*map(scatter_map, obj.items())))) + return out + return [obj for targets in target_gpus] + + # After scatter_map is called, a scatter_map cell will exist. This cell + # has a reference to the actual function scatter_map, which has references + # to a closure that has a reference to the scatter_map cell (because the + # fn is recursive). To avoid this reference cycle, we set the function to + # None, clearing the cell + try: + return scatter_map(inputs) + finally: + scatter_map = None + + +def scatter_kwargs(inputs, kwargs, target_gpus, dim=0): + """Scatter with support for kwargs dictionary.""" + inputs = scatter(inputs, target_gpus, dim) if inputs else [] + kwargs = scatter(kwargs, target_gpus, dim) if kwargs else [] + if len(inputs) < len(kwargs): + inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) + elif len(kwargs) < len(inputs): + kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) + inputs = tuple(inputs) + kwargs = tuple(kwargs) + return inputs, kwargs diff --git a/annotator/uniformer/mmcv/parallel/utils.py b/annotator/uniformer/mmcv/parallel/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0f5712cb42c38a2e8563bf563efb6681383cab9b --- /dev/null +++ b/annotator/uniformer/mmcv/parallel/utils.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .registry import MODULE_WRAPPERS + + +def is_module_wrapper(module): + """Check if a module is a module wrapper. + + The following 3 modules in MMCV (and their subclasses) are regarded as + module wrappers: DataParallel, DistributedDataParallel, + MMDistributedDataParallel (the deprecated version). You may add you own + module wrapper by registering it to mmcv.parallel.MODULE_WRAPPERS. + + Args: + module (nn.Module): The module to be checked. + + Returns: + bool: True if the input module is a module wrapper. + """ + module_wrappers = tuple(MODULE_WRAPPERS.module_dict.values()) + return isinstance(module, module_wrappers) diff --git a/annotator/uniformer/mmcv/runner/__init__.py b/annotator/uniformer/mmcv/runner/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..52e4b48d383a84a055dcd7f6236f6e8e58eab924 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/__init__.py @@ -0,0 +1,47 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base_module import BaseModule, ModuleList, Sequential +from .base_runner import BaseRunner +from .builder import RUNNERS, build_runner +from .checkpoint import (CheckpointLoader, _load_checkpoint, + _load_checkpoint_with_prefix, load_checkpoint, + load_state_dict, save_checkpoint, weights_to_cpu) +from .default_constructor import DefaultRunnerConstructor +from .dist_utils import (allreduce_grads, allreduce_params, get_dist_info, + init_dist, master_only) +from .epoch_based_runner import EpochBasedRunner, Runner +from .fp16_utils import LossScaler, auto_fp16, force_fp32, wrap_fp16_model +from .hooks import (HOOKS, CheckpointHook, ClosureHook, DistEvalHook, + DistSamplerSeedHook, DvcliveLoggerHook, EMAHook, EvalHook, + Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, Hook, IterTimerHook, + LoggerHook, LrUpdaterHook, MlflowLoggerHook, + NeptuneLoggerHook, OptimizerHook, PaviLoggerHook, + SyncBuffersHook, TensorboardLoggerHook, TextLoggerHook, + WandbLoggerHook) +from .iter_based_runner import IterBasedRunner, IterLoader +from .log_buffer import LogBuffer +from .optimizer import (OPTIMIZER_BUILDERS, OPTIMIZERS, + DefaultOptimizerConstructor, build_optimizer, + build_optimizer_constructor) +from .priority import Priority, get_priority +from .utils import get_host_info, get_time_str, obj_from_dict, set_random_seed + +__all__ = [ + 'BaseRunner', 'Runner', 'EpochBasedRunner', 'IterBasedRunner', 'LogBuffer', + 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', + 'OptimizerHook', 'IterTimerHook', 'DistSamplerSeedHook', 'LoggerHook', + 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', + 'NeptuneLoggerHook', 'WandbLoggerHook', 'MlflowLoggerHook', + 'DvcliveLoggerHook', '_load_checkpoint', 'load_state_dict', + 'load_checkpoint', 'weights_to_cpu', 'save_checkpoint', 'Priority', + 'get_priority', 'get_host_info', 'get_time_str', 'obj_from_dict', + 'init_dist', 'get_dist_info', 'master_only', 'OPTIMIZER_BUILDERS', + 'OPTIMIZERS', 'DefaultOptimizerConstructor', 'build_optimizer', + 'build_optimizer_constructor', 'IterLoader', 'set_random_seed', + 'auto_fp16', 'force_fp32', 'wrap_fp16_model', 'Fp16OptimizerHook', + 'SyncBuffersHook', 'EMAHook', 'build_runner', 'RUNNERS', 'allreduce_grads', + 'allreduce_params', 'LossScaler', 'CheckpointLoader', 'BaseModule', + '_load_checkpoint_with_prefix', 'EvalHook', 'DistEvalHook', 'Sequential', + 'ModuleList', 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook', 'DefaultRunnerConstructor' +] diff --git a/annotator/uniformer/mmcv/runner/base_module.py b/annotator/uniformer/mmcv/runner/base_module.py new file mode 100644 index 0000000000000000000000000000000000000000..617fad9bb89f10a9a0911d962dfb3bc8f3a3628c --- /dev/null +++ b/annotator/uniformer/mmcv/runner/base_module.py @@ -0,0 +1,195 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import warnings +from abc import ABCMeta +from collections import defaultdict +from logging import FileHandler + +import torch.nn as nn + +from annotator.uniformer.mmcv.runner.dist_utils import master_only +from annotator.uniformer.mmcv.utils.logging import get_logger, logger_initialized, print_log + + +class BaseModule(nn.Module, metaclass=ABCMeta): + """Base module for all modules in openmmlab. + + ``BaseModule`` is a wrapper of ``torch.nn.Module`` with additional + functionality of parameter initialization. Compared with + ``torch.nn.Module``, ``BaseModule`` mainly adds three attributes. + + - ``init_cfg``: the config to control the initialization. + - ``init_weights``: The function of parameter + initialization and recording initialization + information. + - ``_params_init_info``: Used to track the parameter + initialization information. This attribute only + exists during executing the ``init_weights``. + + Args: + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, init_cfg=None): + """Initialize BaseModule, inherited from `torch.nn.Module`""" + + # NOTE init_cfg can be defined in different levels, but init_cfg + # in low levels has a higher priority. + + super(BaseModule, self).__init__() + # define default value of init_cfg instead of hard code + # in init_weights() function + self._is_init = False + + self.init_cfg = copy.deepcopy(init_cfg) + + # Backward compatibility in derived classes + # if pretrained is not None: + # warnings.warn('DeprecationWarning: pretrained is a deprecated \ + # key, please consider using init_cfg') + # self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + + @property + def is_init(self): + return self._is_init + + def init_weights(self): + """Initialize the weights.""" + + is_top_level_module = False + # check if it is top-level module + if not hasattr(self, '_params_init_info'): + # The `_params_init_info` is used to record the initialization + # information of the parameters + # the key should be the obj:`nn.Parameter` of model and the value + # should be a dict containing + # - init_info (str): The string that describes the initialization. + # - tmp_mean_value (FloatTensor): The mean of the parameter, + # which indicates whether the parameter has been modified. + # this attribute would be deleted after all parameters + # is initialized. + self._params_init_info = defaultdict(dict) + is_top_level_module = True + + # Initialize the `_params_init_info`, + # When detecting the `tmp_mean_value` of + # the corresponding parameter is changed, update related + # initialization information + for name, param in self.named_parameters(): + self._params_init_info[param][ + 'init_info'] = f'The value is the same before and ' \ + f'after calling `init_weights` ' \ + f'of {self.__class__.__name__} ' + self._params_init_info[param][ + 'tmp_mean_value'] = param.data.mean() + + # pass `params_init_info` to all submodules + # All submodules share the same `params_init_info`, + # so it will be updated when parameters are + # modified at any level of the model. + for sub_module in self.modules(): + sub_module._params_init_info = self._params_init_info + + # Get the initialized logger, if not exist, + # create a logger named `mmcv` + logger_names = list(logger_initialized.keys()) + logger_name = logger_names[0] if logger_names else 'mmcv' + + from ..cnn import initialize + from ..cnn.utils.weight_init import update_init_info + module_name = self.__class__.__name__ + if not self._is_init: + if self.init_cfg: + print_log( + f'initialize {module_name} with init_cfg {self.init_cfg}', + logger=logger_name) + initialize(self, self.init_cfg) + if isinstance(self.init_cfg, dict): + # prevent the parameters of + # the pre-trained model + # from being overwritten by + # the `init_weights` + if self.init_cfg['type'] == 'Pretrained': + return + + for m in self.children(): + if hasattr(m, 'init_weights'): + m.init_weights() + # users may overload the `init_weights` + update_init_info( + m, + init_info=f'Initialized by ' + f'user-defined `init_weights`' + f' in {m.__class__.__name__} ') + + self._is_init = True + else: + warnings.warn(f'init_weights of {self.__class__.__name__} has ' + f'been called more than once.') + + if is_top_level_module: + self._dump_init_info(logger_name) + + for sub_module in self.modules(): + del sub_module._params_init_info + + @master_only + def _dump_init_info(self, logger_name): + """Dump the initialization information to a file named + `initialization.log.json` in workdir. + + Args: + logger_name (str): The name of logger. + """ + + logger = get_logger(logger_name) + + with_file_handler = False + # dump the information to the logger file if there is a `FileHandler` + for handler in logger.handlers: + if isinstance(handler, FileHandler): + handler.stream.write( + 'Name of parameter - Initialization information\n') + for name, param in self.named_parameters(): + handler.stream.write( + f'\n{name} - {param.shape}: ' + f"\n{self._params_init_info[param]['init_info']} \n") + handler.stream.flush() + with_file_handler = True + if not with_file_handler: + for name, param in self.named_parameters(): + print_log( + f'\n{name} - {param.shape}: ' + f"\n{self._params_init_info[param]['init_info']} \n ", + logger=logger_name) + + def __repr__(self): + s = super().__repr__() + if self.init_cfg: + s += f'\ninit_cfg={self.init_cfg}' + return s + + +class Sequential(BaseModule, nn.Sequential): + """Sequential module in openmmlab. + + Args: + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, *args, init_cfg=None): + BaseModule.__init__(self, init_cfg) + nn.Sequential.__init__(self, *args) + + +class ModuleList(BaseModule, nn.ModuleList): + """ModuleList in openmmlab. + + Args: + modules (iterable, optional): an iterable of modules to add. + init_cfg (dict, optional): Initialization config dict. + """ + + def __init__(self, modules=None, init_cfg=None): + BaseModule.__init__(self, init_cfg) + nn.ModuleList.__init__(self, modules) diff --git a/annotator/uniformer/mmcv/runner/base_runner.py b/annotator/uniformer/mmcv/runner/base_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..4928db0a73b56fe0218a4bf66ec4ffa082d31ccc --- /dev/null +++ b/annotator/uniformer/mmcv/runner/base_runner.py @@ -0,0 +1,542 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import logging +import os.path as osp +import warnings +from abc import ABCMeta, abstractmethod + +import torch +from torch.optim import Optimizer + +import annotator.uniformer.mmcv as mmcv +from ..parallel import is_module_wrapper +from .checkpoint import load_checkpoint +from .dist_utils import get_dist_info +from .hooks import HOOKS, Hook +from .log_buffer import LogBuffer +from .priority import Priority, get_priority +from .utils import get_time_str + + +class BaseRunner(metaclass=ABCMeta): + """The base class of Runner, a training helper for PyTorch. + + All subclasses should implement the following APIs: + + - ``run()`` + - ``train()`` + - ``val()`` + - ``save_checkpoint()`` + + Args: + model (:obj:`torch.nn.Module`): The model to be run. + batch_processor (callable): A callable method that process a data + batch. The interface of this method should be + `batch_processor(model, data, train_mode) -> dict` + optimizer (dict or :obj:`torch.optim.Optimizer`): It can be either an + optimizer (in most cases) or a dict of optimizers (in models that + requires more than one optimizer, e.g., GAN). + work_dir (str, optional): The working directory to save checkpoints + and logs. Defaults to None. + logger (:obj:`logging.Logger`): Logger used during training. + Defaults to None. (The default value is just for backward + compatibility) + meta (dict | None): A dict records some import information such as + environment info and seed, which will be logged in logger hook. + Defaults to None. + max_epochs (int, optional): Total training epochs. + max_iters (int, optional): Total training iterations. + """ + + def __init__(self, + model, + batch_processor=None, + optimizer=None, + work_dir=None, + logger=None, + meta=None, + max_iters=None, + max_epochs=None): + if batch_processor is not None: + if not callable(batch_processor): + raise TypeError('batch_processor must be callable, ' + f'but got {type(batch_processor)}') + warnings.warn('batch_processor is deprecated, please implement ' + 'train_step() and val_step() in the model instead.') + # raise an error is `batch_processor` is not None and + # `model.train_step()` exists. + if is_module_wrapper(model): + _model = model.module + else: + _model = model + if hasattr(_model, 'train_step') or hasattr(_model, 'val_step'): + raise RuntimeError( + 'batch_processor and model.train_step()/model.val_step() ' + 'cannot be both available.') + else: + assert hasattr(model, 'train_step') + + # check the type of `optimizer` + if isinstance(optimizer, dict): + for name, optim in optimizer.items(): + if not isinstance(optim, Optimizer): + raise TypeError( + f'optimizer must be a dict of torch.optim.Optimizers, ' + f'but optimizer["{name}"] is a {type(optim)}') + elif not isinstance(optimizer, Optimizer) and optimizer is not None: + raise TypeError( + f'optimizer must be a torch.optim.Optimizer object ' + f'or dict or None, but got {type(optimizer)}') + + # check the type of `logger` + if not isinstance(logger, logging.Logger): + raise TypeError(f'logger must be a logging.Logger object, ' + f'but got {type(logger)}') + + # check the type of `meta` + if meta is not None and not isinstance(meta, dict): + raise TypeError( + f'meta must be a dict or None, but got {type(meta)}') + + self.model = model + self.batch_processor = batch_processor + self.optimizer = optimizer + self.logger = logger + self.meta = meta + # create work_dir + if mmcv.is_str(work_dir): + self.work_dir = osp.abspath(work_dir) + mmcv.mkdir_or_exist(self.work_dir) + elif work_dir is None: + self.work_dir = None + else: + raise TypeError('"work_dir" must be a str or None') + + # get model name from the model class + if hasattr(self.model, 'module'): + self._model_name = self.model.module.__class__.__name__ + else: + self._model_name = self.model.__class__.__name__ + + self._rank, self._world_size = get_dist_info() + self.timestamp = get_time_str() + self.mode = None + self._hooks = [] + self._epoch = 0 + self._iter = 0 + self._inner_iter = 0 + + if max_epochs is not None and max_iters is not None: + raise ValueError( + 'Only one of `max_epochs` or `max_iters` can be set.') + + self._max_epochs = max_epochs + self._max_iters = max_iters + # TODO: Redesign LogBuffer, it is not flexible and elegant enough + self.log_buffer = LogBuffer() + + @property + def model_name(self): + """str: Name of the model, usually the module class name.""" + return self._model_name + + @property + def rank(self): + """int: Rank of current process. (distributed training)""" + return self._rank + + @property + def world_size(self): + """int: Number of processes participating in the job. + (distributed training)""" + return self._world_size + + @property + def hooks(self): + """list[:obj:`Hook`]: A list of registered hooks.""" + return self._hooks + + @property + def epoch(self): + """int: Current epoch.""" + return self._epoch + + @property + def iter(self): + """int: Current iteration.""" + return self._iter + + @property + def inner_iter(self): + """int: Iteration in an epoch.""" + return self._inner_iter + + @property + def max_epochs(self): + """int: Maximum training epochs.""" + return self._max_epochs + + @property + def max_iters(self): + """int: Maximum training iterations.""" + return self._max_iters + + @abstractmethod + def train(self): + pass + + @abstractmethod + def val(self): + pass + + @abstractmethod + def run(self, data_loaders, workflow, **kwargs): + pass + + @abstractmethod + def save_checkpoint(self, + out_dir, + filename_tmpl, + save_optimizer=True, + meta=None, + create_symlink=True): + pass + + def current_lr(self): + """Get current learning rates. + + Returns: + list[float] | dict[str, list[float]]: Current learning rates of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + if isinstance(self.optimizer, torch.optim.Optimizer): + lr = [group['lr'] for group in self.optimizer.param_groups] + elif isinstance(self.optimizer, dict): + lr = dict() + for name, optim in self.optimizer.items(): + lr[name] = [group['lr'] for group in optim.param_groups] + else: + raise RuntimeError( + 'lr is not applicable because optimizer does not exist.') + return lr + + def current_momentum(self): + """Get current momentums. + + Returns: + list[float] | dict[str, list[float]]: Current momentums of all + param groups. If the runner has a dict of optimizers, this + method will return a dict. + """ + + def _get_momentum(optimizer): + momentums = [] + for group in optimizer.param_groups: + if 'momentum' in group.keys(): + momentums.append(group['momentum']) + elif 'betas' in group.keys(): + momentums.append(group['betas'][0]) + else: + momentums.append(0) + return momentums + + if self.optimizer is None: + raise RuntimeError( + 'momentum is not applicable because optimizer does not exist.') + elif isinstance(self.optimizer, torch.optim.Optimizer): + momentums = _get_momentum(self.optimizer) + elif isinstance(self.optimizer, dict): + momentums = dict() + for name, optim in self.optimizer.items(): + momentums[name] = _get_momentum(optim) + return momentums + + def register_hook(self, hook, priority='NORMAL'): + """Register a hook into the hook list. + + The hook will be inserted into a priority queue, with the specified + priority (See :class:`Priority` for details of priorities). + For hooks with the same priority, they will be triggered in the same + order as they are registered. + + Args: + hook (:obj:`Hook`): The hook to be registered. + priority (int or str or :obj:`Priority`): Hook priority. + Lower value means higher priority. + """ + assert isinstance(hook, Hook) + if hasattr(hook, 'priority'): + raise ValueError('"priority" is a reserved attribute for hooks') + priority = get_priority(priority) + hook.priority = priority + # insert the hook to a sorted list + inserted = False + for i in range(len(self._hooks) - 1, -1, -1): + if priority >= self._hooks[i].priority: + self._hooks.insert(i + 1, hook) + inserted = True + break + if not inserted: + self._hooks.insert(0, hook) + + def register_hook_from_cfg(self, hook_cfg): + """Register a hook from its cfg. + + Args: + hook_cfg (dict): Hook config. It should have at least keys 'type' + and 'priority' indicating its type and priority. + + Notes: + The specific hook class to register should not use 'type' and + 'priority' arguments during initialization. + """ + hook_cfg = hook_cfg.copy() + priority = hook_cfg.pop('priority', 'NORMAL') + hook = mmcv.build_from_cfg(hook_cfg, HOOKS) + self.register_hook(hook, priority=priority) + + def call_hook(self, fn_name): + """Call all hooks. + + Args: + fn_name (str): The function name in each hook to be called, such as + "before_train_epoch". + """ + for hook in self._hooks: + getattr(hook, fn_name)(self) + + def get_hook_info(self): + # Get hooks info in each stage + stage_hook_map = {stage: [] for stage in Hook.stages} + for hook in self.hooks: + try: + priority = Priority(hook.priority).name + except ValueError: + priority = hook.priority + classname = hook.__class__.__name__ + hook_info = f'({priority:<12}) {classname:<35}' + for trigger_stage in hook.get_triggered_stages(): + stage_hook_map[trigger_stage].append(hook_info) + + stage_hook_infos = [] + for stage in Hook.stages: + hook_infos = stage_hook_map[stage] + if len(hook_infos) > 0: + info = f'{stage}:\n' + info += '\n'.join(hook_infos) + info += '\n -------------------- ' + stage_hook_infos.append(info) + return '\n'.join(stage_hook_infos) + + def load_checkpoint(self, + filename, + map_location='cpu', + strict=False, + revise_keys=[(r'^module.', '')]): + return load_checkpoint( + self.model, + filename, + map_location, + strict, + self.logger, + revise_keys=revise_keys) + + def resume(self, + checkpoint, + resume_optimizer=True, + map_location='default'): + if map_location == 'default': + if torch.cuda.is_available(): + device_id = torch.cuda.current_device() + checkpoint = self.load_checkpoint( + checkpoint, + map_location=lambda storage, loc: storage.cuda(device_id)) + else: + checkpoint = self.load_checkpoint(checkpoint) + else: + checkpoint = self.load_checkpoint( + checkpoint, map_location=map_location) + + self._epoch = checkpoint['meta']['epoch'] + self._iter = checkpoint['meta']['iter'] + if self.meta is None: + self.meta = {} + self.meta.setdefault('hook_msgs', {}) + # load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages + self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {})) + + # Re-calculate the number of iterations when resuming + # models with different number of GPUs + if 'config' in checkpoint['meta']: + config = mmcv.Config.fromstring( + checkpoint['meta']['config'], file_format='.py') + previous_gpu_ids = config.get('gpu_ids', None) + if previous_gpu_ids and len(previous_gpu_ids) > 0 and len( + previous_gpu_ids) != self.world_size: + self._iter = int(self._iter * len(previous_gpu_ids) / + self.world_size) + self.logger.info('the iteration number is changed due to ' + 'change of GPU number') + + # resume meta information meta + self.meta = checkpoint['meta'] + + if 'optimizer' in checkpoint and resume_optimizer: + if isinstance(self.optimizer, Optimizer): + self.optimizer.load_state_dict(checkpoint['optimizer']) + elif isinstance(self.optimizer, dict): + for k in self.optimizer.keys(): + self.optimizer[k].load_state_dict( + checkpoint['optimizer'][k]) + else: + raise TypeError( + 'Optimizer should be dict or torch.optim.Optimizer ' + f'but got {type(self.optimizer)}') + + self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) + + def register_lr_hook(self, lr_config): + if lr_config is None: + return + elif isinstance(lr_config, dict): + assert 'policy' in lr_config + policy_type = lr_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of Lr updater. + # Since this is not applicable for ` + # CosineAnnealingLrUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'LrUpdaterHook' + lr_config['type'] = hook_type + hook = mmcv.build_from_cfg(lr_config, HOOKS) + else: + hook = lr_config + self.register_hook(hook, priority='VERY_HIGH') + + def register_momentum_hook(self, momentum_config): + if momentum_config is None: + return + if isinstance(momentum_config, dict): + assert 'policy' in momentum_config + policy_type = momentum_config.pop('policy') + # If the type of policy is all in lower case, e.g., 'cyclic', + # then its first letter will be capitalized, e.g., to be 'Cyclic'. + # This is for the convenient usage of momentum updater. + # Since this is not applicable for + # `CosineAnnealingMomentumUpdater`, + # the string will not be changed if it contains capital letters. + if policy_type == policy_type.lower(): + policy_type = policy_type.title() + hook_type = policy_type + 'MomentumUpdaterHook' + momentum_config['type'] = hook_type + hook = mmcv.build_from_cfg(momentum_config, HOOKS) + else: + hook = momentum_config + self.register_hook(hook, priority='HIGH') + + def register_optimizer_hook(self, optimizer_config): + if optimizer_config is None: + return + if isinstance(optimizer_config, dict): + optimizer_config.setdefault('type', 'OptimizerHook') + hook = mmcv.build_from_cfg(optimizer_config, HOOKS) + else: + hook = optimizer_config + self.register_hook(hook, priority='ABOVE_NORMAL') + + def register_checkpoint_hook(self, checkpoint_config): + if checkpoint_config is None: + return + if isinstance(checkpoint_config, dict): + checkpoint_config.setdefault('type', 'CheckpointHook') + hook = mmcv.build_from_cfg(checkpoint_config, HOOKS) + else: + hook = checkpoint_config + self.register_hook(hook, priority='NORMAL') + + def register_logger_hooks(self, log_config): + if log_config is None: + return + log_interval = log_config['interval'] + for info in log_config['hooks']: + logger_hook = mmcv.build_from_cfg( + info, HOOKS, default_args=dict(interval=log_interval)) + self.register_hook(logger_hook, priority='VERY_LOW') + + def register_timer_hook(self, timer_config): + if timer_config is None: + return + if isinstance(timer_config, dict): + timer_config_ = copy.deepcopy(timer_config) + hook = mmcv.build_from_cfg(timer_config_, HOOKS) + else: + hook = timer_config + self.register_hook(hook, priority='LOW') + + def register_custom_hooks(self, custom_config): + if custom_config is None: + return + + if not isinstance(custom_config, list): + custom_config = [custom_config] + + for item in custom_config: + if isinstance(item, dict): + self.register_hook_from_cfg(item) + else: + self.register_hook(item, priority='NORMAL') + + def register_profiler_hook(self, profiler_config): + if profiler_config is None: + return + if isinstance(profiler_config, dict): + profiler_config.setdefault('type', 'ProfilerHook') + hook = mmcv.build_from_cfg(profiler_config, HOOKS) + else: + hook = profiler_config + self.register_hook(hook) + + def register_training_hooks(self, + lr_config, + optimizer_config=None, + checkpoint_config=None, + log_config=None, + momentum_config=None, + timer_config=dict(type='IterTimerHook'), + custom_hooks_config=None): + """Register default and custom hooks for training. + + Default and custom hooks include: + + +----------------------+-------------------------+ + | Hooks | Priority | + +======================+=========================+ + | LrUpdaterHook | VERY_HIGH (10) | + +----------------------+-------------------------+ + | MomentumUpdaterHook | HIGH (30) | + +----------------------+-------------------------+ + | OptimizerStepperHook | ABOVE_NORMAL (40) | + +----------------------+-------------------------+ + | CheckpointSaverHook | NORMAL (50) | + +----------------------+-------------------------+ + | IterTimerHook | LOW (70) | + +----------------------+-------------------------+ + | LoggerHook(s) | VERY_LOW (90) | + +----------------------+-------------------------+ + | CustomHook(s) | defaults to NORMAL (50) | + +----------------------+-------------------------+ + + If custom hooks have same priority with default hooks, custom hooks + will be triggered after default hooks. + """ + self.register_lr_hook(lr_config) + self.register_momentum_hook(momentum_config) + self.register_optimizer_hook(optimizer_config) + self.register_checkpoint_hook(checkpoint_config) + self.register_timer_hook(timer_config) + self.register_logger_hooks(log_config) + self.register_custom_hooks(custom_hooks_config) diff --git a/annotator/uniformer/mmcv/runner/builder.py b/annotator/uniformer/mmcv/runner/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..77c96ba0b2f30ead9da23f293c5dc84dd3e4a74f --- /dev/null +++ b/annotator/uniformer/mmcv/runner/builder.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy + +from ..utils import Registry + +RUNNERS = Registry('runner') +RUNNER_BUILDERS = Registry('runner builder') + + +def build_runner_constructor(cfg): + return RUNNER_BUILDERS.build(cfg) + + +def build_runner(cfg, default_args=None): + runner_cfg = copy.deepcopy(cfg) + constructor_type = runner_cfg.pop('constructor', + 'DefaultRunnerConstructor') + runner_constructor = build_runner_constructor( + dict( + type=constructor_type, + runner_cfg=runner_cfg, + default_args=default_args)) + runner = runner_constructor() + return runner diff --git a/annotator/uniformer/mmcv/runner/checkpoint.py b/annotator/uniformer/mmcv/runner/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..b29ca320679164432f446adad893e33fb2b4b29e --- /dev/null +++ b/annotator/uniformer/mmcv/runner/checkpoint.py @@ -0,0 +1,707 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import io +import os +import os.path as osp +import pkgutil +import re +import time +import warnings +from collections import OrderedDict +from importlib import import_module +from tempfile import TemporaryDirectory + +import torch +import torchvision +from torch.optim import Optimizer +from torch.utils import model_zoo + +import annotator.uniformer.mmcv as mmcv +from ..fileio import FileClient +from ..fileio import load as load_file +from ..parallel import is_module_wrapper +from ..utils import mkdir_or_exist +from .dist_utils import get_dist_info + +ENV_MMCV_HOME = 'MMCV_HOME' +ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' +DEFAULT_CACHE_DIR = '~/.cache' + + +def _get_mmcv_home(): + mmcv_home = os.path.expanduser( + os.getenv( + ENV_MMCV_HOME, + os.path.join( + os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'mmcv'))) + + mkdir_or_exist(mmcv_home) + return mmcv_home + + +def load_state_dict(module, state_dict, strict=False, logger=None): + """Load state_dict to a module. + + This method is modified from :meth:`torch.nn.Module.load_state_dict`. + Default value for ``strict`` is set to ``False`` and the message for + param mismatch will be shown even if strict is False. + + Args: + module (Module): Module that receives the state_dict. + state_dict (OrderedDict): Weights. + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. + logger (:obj:`logging.Logger`, optional): Logger to log the error + message. If not specified, print function will be used. + """ + unexpected_keys = [] + all_missing_keys = [] + err_msg = [] + + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + # use _load_from_state_dict to enable checkpoint version control + def load(module, prefix=''): + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict(state_dict, prefix, local_metadata, True, + all_missing_keys, unexpected_keys, + err_msg) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + load(module) + load = None # break load->load reference cycle + + # ignore "num_batches_tracked" of BN layers + missing_keys = [ + key for key in all_missing_keys if 'num_batches_tracked' not in key + ] + + if unexpected_keys: + err_msg.append('unexpected key in source ' + f'state_dict: {", ".join(unexpected_keys)}\n') + if missing_keys: + err_msg.append( + f'missing keys in source state_dict: {", ".join(missing_keys)}\n') + + rank, _ = get_dist_info() + if len(err_msg) > 0 and rank == 0: + err_msg.insert( + 0, 'The model and loaded state dict do not match exactly\n') + err_msg = '\n'.join(err_msg) + if strict: + raise RuntimeError(err_msg) + elif logger is not None: + logger.warning(err_msg) + else: + print(err_msg) + + +def get_torchvision_models(): + model_urls = dict() + for _, name, ispkg in pkgutil.walk_packages(torchvision.models.__path__): + if ispkg: + continue + _zoo = import_module(f'torchvision.models.{name}') + if hasattr(_zoo, 'model_urls'): + _urls = getattr(_zoo, 'model_urls') + model_urls.update(_urls) + return model_urls + + +def get_external_models(): + mmcv_home = _get_mmcv_home() + default_json_path = osp.join(mmcv.__path__[0], 'model_zoo/open_mmlab.json') + default_urls = load_file(default_json_path) + assert isinstance(default_urls, dict) + external_json_path = osp.join(mmcv_home, 'open_mmlab.json') + if osp.exists(external_json_path): + external_urls = load_file(external_json_path) + assert isinstance(external_urls, dict) + default_urls.update(external_urls) + + return default_urls + + +def get_mmcls_models(): + mmcls_json_path = osp.join(mmcv.__path__[0], 'model_zoo/mmcls.json') + mmcls_urls = load_file(mmcls_json_path) + + return mmcls_urls + + +def get_deprecated_model_names(): + deprecate_json_path = osp.join(mmcv.__path__[0], + 'model_zoo/deprecated.json') + deprecate_urls = load_file(deprecate_json_path) + assert isinstance(deprecate_urls, dict) + + return deprecate_urls + + +def _process_mmcls_checkpoint(checkpoint): + state_dict = checkpoint['state_dict'] + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith('backbone.'): + new_state_dict[k[9:]] = v + new_checkpoint = dict(state_dict=new_state_dict) + + return new_checkpoint + + +class CheckpointLoader: + """A general checkpoint loader to manage all schemes.""" + + _schemes = {} + + @classmethod + def _register_scheme(cls, prefixes, loader, force=False): + if isinstance(prefixes, str): + prefixes = [prefixes] + else: + assert isinstance(prefixes, (list, tuple)) + for prefix in prefixes: + if (prefix not in cls._schemes) or force: + cls._schemes[prefix] = loader + else: + raise KeyError( + f'{prefix} is already registered as a loader backend, ' + 'add "force=True" if you want to override it') + # sort, longer prefixes take priority + cls._schemes = OrderedDict( + sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True)) + + @classmethod + def register_scheme(cls, prefixes, loader=None, force=False): + """Register a loader to CheckpointLoader. + + This method can be used as a normal class method or a decorator. + + Args: + prefixes (str or list[str] or tuple[str]): + The prefix of the registered loader. + loader (function, optional): The loader function to be registered. + When this method is used as a decorator, loader is None. + Defaults to None. + force (bool, optional): Whether to override the loader + if the prefix has already been registered. Defaults to False. + """ + + if loader is not None: + cls._register_scheme(prefixes, loader, force=force) + return + + def _register(loader_cls): + cls._register_scheme(prefixes, loader_cls, force=force) + return loader_cls + + return _register + + @classmethod + def _get_checkpoint_loader(cls, path): + """Finds a loader that supports the given path. Falls back to the local + loader if no other loader is found. + + Args: + path (str): checkpoint path + + Returns: + loader (function): checkpoint loader + """ + + for p in cls._schemes: + if path.startswith(p): + return cls._schemes[p] + + @classmethod + def load_checkpoint(cls, filename, map_location=None, logger=None): + """load checkpoint through URL scheme path. + + Args: + filename (str): checkpoint file name with given prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + logger (:mod:`logging.Logger`, optional): The logger for message. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + checkpoint_loader = cls._get_checkpoint_loader(filename) + class_name = checkpoint_loader.__name__ + mmcv.print_log( + f'load checkpoint from {class_name[10:]} path: {filename}', logger) + return checkpoint_loader(filename, map_location) + + +@CheckpointLoader.register_scheme(prefixes='') +def load_from_local(filename, map_location): + """load checkpoint by local file path. + + Args: + filename (str): local checkpoint file path + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes=('http://', 'https://')) +def load_from_http(filename, map_location=None, model_dir=None): + """load checkpoint through HTTP or HTTPS scheme path. In distributed + setting, this function only download checkpoint at local rank 0. + + Args: + filename (str): checkpoint file path with modelzoo or + torchvision prefix + map_location (str, optional): Same as :func:`torch.load`. + model_dir (string, optional): directory in which to save the object, + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + rank, world_size = get_dist_info() + rank = int(os.environ.get('LOCAL_RANK', rank)) + if rank == 0: + checkpoint = model_zoo.load_url( + filename, model_dir=model_dir, map_location=map_location) + if world_size > 1: + torch.distributed.barrier() + if rank > 0: + checkpoint = model_zoo.load_url( + filename, model_dir=model_dir, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='pavi://') +def load_from_pavi(filename, map_location=None): + """load checkpoint through the file path prefixed with pavi. In distributed + setting, this function download ckpt at all ranks to different temporary + directories. + + Args: + filename (str): checkpoint file path with pavi prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + assert filename.startswith('pavi://'), \ + f'Expected filename startswith `pavi://`, but get {filename}' + model_path = filename[7:] + + try: + from pavi import modelcloud + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + + model = modelcloud.get(model_path) + with TemporaryDirectory() as tmp_dir: + downloaded_file = osp.join(tmp_dir, model.name) + model.download(downloaded_file) + checkpoint = torch.load(downloaded_file, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='s3://') +def load_from_ceph(filename, map_location=None, backend='petrel'): + """load checkpoint through the file path prefixed with s3. In distributed + setting, this function download ckpt at all ranks to different temporary + directories. + + Args: + filename (str): checkpoint file path with s3 prefix + map_location (str, optional): Same as :func:`torch.load`. + backend (str, optional): The storage backend type. Options are 'ceph', + 'petrel'. Default: 'petrel'. + + .. warning:: + :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + allowed_backends = ['ceph', 'petrel'] + if backend not in allowed_backends: + raise ValueError(f'Load from Backend {backend} is not supported.') + + if backend == 'ceph': + warnings.warn( + 'CephBackend will be deprecated, please use PetrelBackend instead') + + # CephClient and PetrelBackend have the same prefix 's3://' and the latter + # will be chosen as default. If PetrelBackend can not be instantiated + # successfully, the CephClient will be chosen. + try: + file_client = FileClient(backend=backend) + except ImportError: + allowed_backends.remove(backend) + file_client = FileClient(backend=allowed_backends[0]) + + with io.BytesIO(file_client.get(filename)) as buffer: + checkpoint = torch.load(buffer, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes=('modelzoo://', 'torchvision://')) +def load_from_torchvision(filename, map_location=None): + """load checkpoint through the file path prefixed with modelzoo or + torchvision. + + Args: + filename (str): checkpoint file path with modelzoo or + torchvision prefix + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + model_urls = get_torchvision_models() + if filename.startswith('modelzoo://'): + warnings.warn('The URL scheme of "modelzoo://" is deprecated, please ' + 'use "torchvision://" instead') + model_name = filename[11:] + else: + model_name = filename[14:] + return load_from_http(model_urls[model_name], map_location=map_location) + + +@CheckpointLoader.register_scheme(prefixes=('open-mmlab://', 'openmmlab://')) +def load_from_openmmlab(filename, map_location=None): + """load checkpoint through the file path prefixed with open-mmlab or + openmmlab. + + Args: + filename (str): checkpoint file path with open-mmlab or + openmmlab prefix + map_location (str, optional): Same as :func:`torch.load`. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + model_urls = get_external_models() + prefix_str = 'open-mmlab://' + if filename.startswith(prefix_str): + model_name = filename[13:] + else: + model_name = filename[12:] + prefix_str = 'openmmlab://' + + deprecated_urls = get_deprecated_model_names() + if model_name in deprecated_urls: + warnings.warn(f'{prefix_str}{model_name} is deprecated in favor ' + f'of {prefix_str}{deprecated_urls[model_name]}') + model_name = deprecated_urls[model_name] + model_url = model_urls[model_name] + # check if is url + if model_url.startswith(('http://', 'https://')): + checkpoint = load_from_http(model_url, map_location=map_location) + else: + filename = osp.join(_get_mmcv_home(), model_url) + if not osp.isfile(filename): + raise IOError(f'{filename} is not a checkpoint file') + checkpoint = torch.load(filename, map_location=map_location) + return checkpoint + + +@CheckpointLoader.register_scheme(prefixes='mmcls://') +def load_from_mmcls(filename, map_location=None): + """load checkpoint through the file path prefixed with mmcls. + + Args: + filename (str): checkpoint file path with mmcls prefix + map_location (str, optional): Same as :func:`torch.load`. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + model_urls = get_mmcls_models() + model_name = filename[8:] + checkpoint = load_from_http( + model_urls[model_name], map_location=map_location) + checkpoint = _process_mmcls_checkpoint(checkpoint) + return checkpoint + + +def _load_checkpoint(filename, map_location=None, logger=None): + """Load checkpoint from somewhere (modelzoo, file, url). + + Args: + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str, optional): Same as :func:`torch.load`. + Default: None. + logger (:mod:`logging.Logger`, optional): The logger for error message. + Default: None + + Returns: + dict or OrderedDict: The loaded checkpoint. It can be either an + OrderedDict storing model weights or a dict containing other + information, which depends on the checkpoint. + """ + return CheckpointLoader.load_checkpoint(filename, map_location, logger) + + +def _load_checkpoint_with_prefix(prefix, filename, map_location=None): + """Load partial pretrained model with specific prefix. + + Args: + prefix (str): The prefix of sub-module. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str | None): Same as :func:`torch.load`. Default: None. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + + checkpoint = _load_checkpoint(filename, map_location=map_location) + + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + if not prefix.endswith('.'): + prefix += '.' + prefix_len = len(prefix) + + state_dict = { + k[prefix_len:]: v + for k, v in state_dict.items() if k.startswith(prefix) + } + + assert state_dict, f'{prefix} is not in the pretrained model' + return state_dict + + +def load_checkpoint(model, + filename, + map_location=None, + strict=False, + logger=None, + revise_keys=[(r'^module\.', '')]): + """Load checkpoint from a file or URI. + + Args: + model (Module): Module to load checkpoint. + filename (str): Accept local filepath, URL, ``torchvision://xxx``, + ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for + details. + map_location (str): Same as :func:`torch.load`. + strict (bool): Whether to allow different params for the model and + checkpoint. + logger (:mod:`logging.Logger` or None): The logger for error message. + revise_keys (list): A list of customized keywords to modify the + state_dict in checkpoint. Each item is a (pattern, replacement) + pair of the regular expression operations. Default: strip + the prefix 'module.' by [(r'^module\\.', '')]. + + Returns: + dict or OrderedDict: The loaded checkpoint. + """ + checkpoint = _load_checkpoint(filename, map_location, logger) + # OrderedDict is a subclass of dict + if not isinstance(checkpoint, dict): + raise RuntimeError( + f'No state_dict found in checkpoint file {filename}') + # get state_dict from checkpoint + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + # strip prefix of state_dict + metadata = getattr(state_dict, '_metadata', OrderedDict()) + for p, r in revise_keys: + state_dict = OrderedDict( + {re.sub(p, r, k): v + for k, v in state_dict.items()}) + # Keep metadata in state_dict + state_dict._metadata = metadata + + # load state_dict + load_state_dict(model, state_dict, strict, logger) + return checkpoint + + +def weights_to_cpu(state_dict): + """Copy a model state_dict to cpu. + + Args: + state_dict (OrderedDict): Model weights on GPU. + + Returns: + OrderedDict: Model weights on GPU. + """ + state_dict_cpu = OrderedDict() + for key, val in state_dict.items(): + state_dict_cpu[key] = val.cpu() + # Keep metadata in state_dict + state_dict_cpu._metadata = getattr(state_dict, '_metadata', OrderedDict()) + return state_dict_cpu + + +def _save_to_state_dict(module, destination, prefix, keep_vars): + """Saves module state to `destination` dictionary. + + This method is modified from :meth:`torch.nn.Module._save_to_state_dict`. + + Args: + module (nn.Module): The module to generate state_dict. + destination (dict): A dict where state will be stored. + prefix (str): The prefix for parameters and buffers used in this + module. + """ + for name, param in module._parameters.items(): + if param is not None: + destination[prefix + name] = param if keep_vars else param.detach() + for name, buf in module._buffers.items(): + # remove check of _non_persistent_buffers_set to allow nn.BatchNorm2d + if buf is not None: + destination[prefix + name] = buf if keep_vars else buf.detach() + + +def get_state_dict(module, destination=None, prefix='', keep_vars=False): + """Returns a dictionary containing a whole state of the module. + + Both parameters and persistent buffers (e.g. running averages) are + included. Keys are corresponding parameter and buffer names. + + This method is modified from :meth:`torch.nn.Module.state_dict` to + recursively check parallel module in case that the model has a complicated + structure, e.g., nn.Module(nn.Module(DDP)). + + Args: + module (nn.Module): The module to generate state_dict. + destination (OrderedDict): Returned dict for the state of the + module. + prefix (str): Prefix of the key. + keep_vars (bool): Whether to keep the variable property of the + parameters. Default: False. + + Returns: + dict: A dictionary containing a whole state of the module. + """ + # recursively check parallel module in case that the model has a + # complicated structure, e.g., nn.Module(nn.Module(DDP)) + if is_module_wrapper(module): + module = module.module + + # below is the same as torch.nn.Module.state_dict() + if destination is None: + destination = OrderedDict() + destination._metadata = OrderedDict() + destination._metadata[prefix[:-1]] = local_metadata = dict( + version=module._version) + _save_to_state_dict(module, destination, prefix, keep_vars) + for name, child in module._modules.items(): + if child is not None: + get_state_dict( + child, destination, prefix + name + '.', keep_vars=keep_vars) + for hook in module._state_dict_hooks.values(): + hook_result = hook(module, destination, prefix, local_metadata) + if hook_result is not None: + destination = hook_result + return destination + + +def save_checkpoint(model, + filename, + optimizer=None, + meta=None, + file_client_args=None): + """Save checkpoint to file. + + The checkpoint will have 3 fields: ``meta``, ``state_dict`` and + ``optimizer``. By default ``meta`` will contain version and time info. + + Args: + model (Module): Module whose params are to be saved. + filename (str): Checkpoint filename. + optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. + meta (dict, optional): Metadata to be saved in checkpoint. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError(f'meta must be a dict or None, but got {type(meta)}') + meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) + + if is_module_wrapper(model): + model = model.module + + if hasattr(model, 'CLASSES') and model.CLASSES is not None: + # save class name to the meta + meta.update(CLASSES=model.CLASSES) + + checkpoint = { + 'meta': meta, + 'state_dict': weights_to_cpu(get_state_dict(model)) + } + # save optimizer state dict in the checkpoint + if isinstance(optimizer, Optimizer): + checkpoint['optimizer'] = optimizer.state_dict() + elif isinstance(optimizer, dict): + checkpoint['optimizer'] = {} + for name, optim in optimizer.items(): + checkpoint['optimizer'][name] = optim.state_dict() + + if filename.startswith('pavi://'): + if file_client_args is not None: + raise ValueError( + 'file_client_args should be "None" if filename starts with' + f'"pavi://", but got {file_client_args}') + try: + from pavi import modelcloud + from pavi import exception + except ImportError: + raise ImportError( + 'Please install pavi to load checkpoint from modelcloud.') + model_path = filename[7:] + root = modelcloud.Folder() + model_dir, model_name = osp.split(model_path) + try: + model = modelcloud.get(model_dir) + except exception.NodeNotFoundError: + model = root.create_training_model(model_dir) + with TemporaryDirectory() as tmp_dir: + checkpoint_file = osp.join(tmp_dir, model_name) + with open(checkpoint_file, 'wb') as f: + torch.save(checkpoint, f) + f.flush() + model.create_file(checkpoint_file, name=model_name) + else: + file_client = FileClient.infer_client(file_client_args, filename) + with io.BytesIO() as f: + torch.save(checkpoint, f) + file_client.put(f.getvalue(), filename) diff --git a/annotator/uniformer/mmcv/runner/default_constructor.py b/annotator/uniformer/mmcv/runner/default_constructor.py new file mode 100644 index 0000000000000000000000000000000000000000..3f1f5b44168768dfda3947393a63a6cf9cf50b41 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/default_constructor.py @@ -0,0 +1,44 @@ +from .builder import RUNNER_BUILDERS, RUNNERS + + +@RUNNER_BUILDERS.register_module() +class DefaultRunnerConstructor: + """Default constructor for runners. + + Custom existing `Runner` like `EpocBasedRunner` though `RunnerConstructor`. + For example, We can inject some new properties and functions for `Runner`. + + Example: + >>> from annotator.uniformer.mmcv.runner import RUNNER_BUILDERS, build_runner + >>> # Define a new RunnerReconstructor + >>> @RUNNER_BUILDERS.register_module() + >>> class MyRunnerConstructor: + ... def __init__(self, runner_cfg, default_args=None): + ... if not isinstance(runner_cfg, dict): + ... raise TypeError('runner_cfg should be a dict', + ... f'but got {type(runner_cfg)}') + ... self.runner_cfg = runner_cfg + ... self.default_args = default_args + ... + ... def __call__(self): + ... runner = RUNNERS.build(self.runner_cfg, + ... default_args=self.default_args) + ... # Add new properties for existing runner + ... runner.my_name = 'my_runner' + ... runner.my_function = lambda self: print(self.my_name) + ... ... + >>> # build your runner + >>> runner_cfg = dict(type='EpochBasedRunner', max_epochs=40, + ... constructor='MyRunnerConstructor') + >>> runner = build_runner(runner_cfg) + """ + + def __init__(self, runner_cfg, default_args=None): + if not isinstance(runner_cfg, dict): + raise TypeError('runner_cfg should be a dict', + f'but got {type(runner_cfg)}') + self.runner_cfg = runner_cfg + self.default_args = default_args + + def __call__(self): + return RUNNERS.build(self.runner_cfg, default_args=self.default_args) diff --git a/annotator/uniformer/mmcv/runner/dist_utils.py b/annotator/uniformer/mmcv/runner/dist_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d3a1ef3fda5ceeb31bf15a73779da1b1903ab0fe --- /dev/null +++ b/annotator/uniformer/mmcv/runner/dist_utils.py @@ -0,0 +1,164 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools +import os +import subprocess +from collections import OrderedDict + +import torch +import torch.multiprocessing as mp +from torch import distributed as dist +from torch._utils import (_flatten_dense_tensors, _take_tensors, + _unflatten_dense_tensors) + + +def init_dist(launcher, backend='nccl', **kwargs): + if mp.get_start_method(allow_none=True) is None: + mp.set_start_method('spawn') + if launcher == 'pytorch': + _init_dist_pytorch(backend, **kwargs) + elif launcher == 'mpi': + _init_dist_mpi(backend, **kwargs) + elif launcher == 'slurm': + _init_dist_slurm(backend, **kwargs) + else: + raise ValueError(f'Invalid launcher type: {launcher}') + + +def _init_dist_pytorch(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_mpi(backend, **kwargs): + # TODO: use local_rank instead of rank % num_gpus + rank = int(os.environ['OMPI_COMM_WORLD_RANK']) + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(rank % num_gpus) + dist.init_process_group(backend=backend, **kwargs) + + +def _init_dist_slurm(backend, port=None): + """Initialize slurm distributed training environment. + + If argument ``port`` is not specified, then the master port will be system + environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system + environment variable, then a default port ``29500`` will be used. + + Args: + backend (str): Backend of torch.distributed. + port (int, optional): Master port. Defaults to None. + """ + proc_id = int(os.environ['SLURM_PROCID']) + ntasks = int(os.environ['SLURM_NTASKS']) + node_list = os.environ['SLURM_NODELIST'] + num_gpus = torch.cuda.device_count() + torch.cuda.set_device(proc_id % num_gpus) + addr = subprocess.getoutput( + f'scontrol show hostname {node_list} | head -n1') + # specify master port + if port is not None: + os.environ['MASTER_PORT'] = str(port) + elif 'MASTER_PORT' in os.environ: + pass # use MASTER_PORT in the environment variable + else: + # 29500 is torch.distributed default port + os.environ['MASTER_PORT'] = '29500' + # use MASTER_ADDR in the environment variable if it already exists + if 'MASTER_ADDR' not in os.environ: + os.environ['MASTER_ADDR'] = addr + os.environ['WORLD_SIZE'] = str(ntasks) + os.environ['LOCAL_RANK'] = str(proc_id % num_gpus) + os.environ['RANK'] = str(proc_id) + dist.init_process_group(backend=backend) + + +def get_dist_info(): + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + world_size = dist.get_world_size() + else: + rank = 0 + world_size = 1 + return rank, world_size + + +def master_only(func): + + @functools.wraps(func) + def wrapper(*args, **kwargs): + rank, _ = get_dist_info() + if rank == 0: + return func(*args, **kwargs) + + return wrapper + + +def allreduce_params(params, coalesce=True, bucket_size_mb=-1): + """Allreduce parameters. + + Args: + params (list[torch.Parameters]): List of parameters or buffers of a + model. + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + _, world_size = get_dist_info() + if world_size == 1: + return + params = [param.data for param in params] + if coalesce: + _allreduce_coalesced(params, world_size, bucket_size_mb) + else: + for tensor in params: + dist.all_reduce(tensor.div_(world_size)) + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + """Allreduce gradients. + + Args: + params (list[torch.Parameters]): List of parameters of a model + coalesce (bool, optional): Whether allreduce parameters as a whole. + Defaults to True. + bucket_size_mb (int, optional): Size of bucket, the unit is MB. + Defaults to -1. + """ + grads = [ + param.grad.data for param in params + if param.requires_grad and param.grad is not None + ] + _, world_size = get_dist_info() + if world_size == 1: + return + if coalesce: + _allreduce_coalesced(grads, world_size, bucket_size_mb) + else: + for tensor in grads: + dist.all_reduce(tensor.div_(world_size)) + + +def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): + if bucket_size_mb > 0: + bucket_size_bytes = bucket_size_mb * 1024 * 1024 + buckets = _take_tensors(tensors, bucket_size_bytes) + else: + buckets = OrderedDict() + for tensor in tensors: + tp = tensor.type() + if tp not in buckets: + buckets[tp] = [] + buckets[tp].append(tensor) + buckets = buckets.values() + + for bucket in buckets: + flat_tensors = _flatten_dense_tensors(bucket) + dist.all_reduce(flat_tensors) + flat_tensors.div_(world_size) + for tensor, synced in zip( + bucket, _unflatten_dense_tensors(flat_tensors, bucket)): + tensor.copy_(synced) diff --git a/annotator/uniformer/mmcv/runner/epoch_based_runner.py b/annotator/uniformer/mmcv/runner/epoch_based_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..766a9ce6afdf09cd11b1b15005f5132583011348 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/epoch_based_runner.py @@ -0,0 +1,187 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import platform +import shutil +import time +import warnings + +import torch + +import annotator.uniformer.mmcv as mmcv +from .base_runner import BaseRunner +from .builder import RUNNERS +from .checkpoint import save_checkpoint +from .utils import get_host_info + + +@RUNNERS.register_module() +class EpochBasedRunner(BaseRunner): + """Epoch-based Runner. + + This runner train models epoch by epoch. + """ + + def run_iter(self, data_batch, train_mode, **kwargs): + if self.batch_processor is not None: + outputs = self.batch_processor( + self.model, data_batch, train_mode=train_mode, **kwargs) + elif train_mode: + outputs = self.model.train_step(data_batch, self.optimizer, + **kwargs) + else: + outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) + if not isinstance(outputs, dict): + raise TypeError('"batch_processor()" or "model.train_step()"' + 'and "model.val_step()" must return a dict') + if 'log_vars' in outputs: + self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) + self.outputs = outputs + + def train(self, data_loader, **kwargs): + self.model.train() + self.mode = 'train' + self.data_loader = data_loader + self._max_iters = self._max_epochs * len(self.data_loader) + self.call_hook('before_train_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_train_iter') + self.run_iter(data_batch, train_mode=True, **kwargs) + self.call_hook('after_train_iter') + self._iter += 1 + + self.call_hook('after_train_epoch') + self._epoch += 1 + + @torch.no_grad() + def val(self, data_loader, **kwargs): + self.model.eval() + self.mode = 'val' + self.data_loader = data_loader + self.call_hook('before_val_epoch') + time.sleep(2) # Prevent possible deadlock during epoch transition + for i, data_batch in enumerate(self.data_loader): + self._inner_iter = i + self.call_hook('before_val_iter') + self.run_iter(data_batch, train_mode=False) + self.call_hook('after_val_iter') + + self.call_hook('after_val_epoch') + + def run(self, data_loaders, workflow, max_epochs=None, **kwargs): + """Start running. + + Args: + data_loaders (list[:obj:`DataLoader`]): Dataloaders for training + and validation. + workflow (list[tuple]): A list of (phase, epochs) to specify the + running order and epochs. E.g, [('train', 2), ('val', 1)] means + running 2 epochs for training and 1 epoch for validation, + iteratively. + """ + assert isinstance(data_loaders, list) + assert mmcv.is_list_of(workflow, tuple) + assert len(data_loaders) == len(workflow) + if max_epochs is not None: + warnings.warn( + 'setting max_epochs in run is deprecated, ' + 'please set max_epochs in runner_config', DeprecationWarning) + self._max_epochs = max_epochs + + assert self._max_epochs is not None, ( + 'max_epochs must be specified during instantiation') + + for i, flow in enumerate(workflow): + mode, epochs = flow + if mode == 'train': + self._max_iters = self._max_epochs * len(data_loaders[i]) + break + + work_dir = self.work_dir if self.work_dir is not None else 'NONE' + self.logger.info('Start running, host: %s, work_dir: %s', + get_host_info(), work_dir) + self.logger.info('Hooks will be executed in the following order:\n%s', + self.get_hook_info()) + self.logger.info('workflow: %s, max: %d epochs', workflow, + self._max_epochs) + self.call_hook('before_run') + + while self.epoch < self._max_epochs: + for i, flow in enumerate(workflow): + mode, epochs = flow + if isinstance(mode, str): # self.train() + if not hasattr(self, mode): + raise ValueError( + f'runner has no method named "{mode}" to run an ' + 'epoch') + epoch_runner = getattr(self, mode) + else: + raise TypeError( + 'mode in workflow must be a str, but got {}'.format( + type(mode))) + + for _ in range(epochs): + if mode == 'train' and self.epoch >= self._max_epochs: + break + epoch_runner(data_loaders[i], **kwargs) + + time.sleep(1) # wait for some hooks like loggers to finish + self.call_hook('after_run') + + def save_checkpoint(self, + out_dir, + filename_tmpl='epoch_{}.pth', + save_optimizer=True, + meta=None, + create_symlink=True): + """Save the checkpoint. + + Args: + out_dir (str): The directory that checkpoints are saved. + filename_tmpl (str, optional): The checkpoint filename template, + which contains a placeholder for the epoch number. + Defaults to 'epoch_{}.pth'. + save_optimizer (bool, optional): Whether to save the optimizer to + the checkpoint. Defaults to True. + meta (dict, optional): The meta information to be saved in the + checkpoint. Defaults to None. + create_symlink (bool, optional): Whether to create a symlink + "latest.pth" to point to the latest checkpoint. + Defaults to True. + """ + if meta is None: + meta = {} + elif not isinstance(meta, dict): + raise TypeError( + f'meta should be a dict or None, but got {type(meta)}') + if self.meta is not None: + meta.update(self.meta) + # Note: meta.update(self.meta) should be done before + # meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise + # there will be problems with resumed checkpoints. + # More details in https://github.com/open-mmlab/mmcv/pull/1108 + meta.update(epoch=self.epoch + 1, iter=self.iter) + + filename = filename_tmpl.format(self.epoch + 1) + filepath = osp.join(out_dir, filename) + optimizer = self.optimizer if save_optimizer else None + save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) + # in some environments, `os.symlink` is not supported, you may need to + # set `create_symlink` to False + if create_symlink: + dst_file = osp.join(out_dir, 'latest.pth') + if platform.system() != 'Windows': + mmcv.symlink(filename, dst_file) + else: + shutil.copy(filepath, dst_file) + + +@RUNNERS.register_module() +class Runner(EpochBasedRunner): + """Deprecated name of EpochBasedRunner.""" + + def __init__(self, *args, **kwargs): + warnings.warn( + 'Runner was deprecated, please use EpochBasedRunner instead') + super().__init__(*args, **kwargs) diff --git a/annotator/uniformer/mmcv/runner/fp16_utils.py b/annotator/uniformer/mmcv/runner/fp16_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..1981011d6859192e3e663e29d13500d56ba47f6c --- /dev/null +++ b/annotator/uniformer/mmcv/runner/fp16_utils.py @@ -0,0 +1,410 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools +import warnings +from collections import abc +from inspect import getfullargspec + +import numpy as np +import torch +import torch.nn as nn + +from annotator.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from .dist_utils import allreduce_grads as _allreduce_grads + +try: + # If PyTorch version >= 1.6.0, torch.cuda.amp.autocast would be imported + # and used; otherwise, auto fp16 will adopt mmcv's implementation. + # Note that when PyTorch >= 1.6.0, we still cast tensor types to fp16 + # manually, so the behavior may not be consistent with real amp. + from torch.cuda.amp import autocast +except ImportError: + pass + + +def cast_tensor_type(inputs, src_type, dst_type): + """Recursively convert Tensor in inputs from src_type to dst_type. + + Args: + inputs: Inputs that to be casted. + src_type (torch.dtype): Source type.. + dst_type (torch.dtype): Destination type. + + Returns: + The same type with inputs, but all contained Tensors have been cast. + """ + if isinstance(inputs, nn.Module): + return inputs + elif isinstance(inputs, torch.Tensor): + return inputs.to(dst_type) + elif isinstance(inputs, str): + return inputs + elif isinstance(inputs, np.ndarray): + return inputs + elif isinstance(inputs, abc.Mapping): + return type(inputs)({ + k: cast_tensor_type(v, src_type, dst_type) + for k, v in inputs.items() + }) + elif isinstance(inputs, abc.Iterable): + return type(inputs)( + cast_tensor_type(item, src_type, dst_type) for item in inputs) + else: + return inputs + + +def auto_fp16(apply_to=None, out_fp32=False): + """Decorator to enable fp16 training automatically. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If inputs arguments are fp32 tensors, they will + be converted to fp16 automatically. Arguments other than fp32 tensors are + ignored. If you are using PyTorch >= 1.6, torch.cuda.amp is used as the + backend, otherwise, original mmcv implementation will be adopted. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp32 (bool): Whether to convert the output back to fp32. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp16 + >>> @auto_fp16() + >>> def forward(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp16 + >>> @auto_fp16(apply_to=('pred', )) + >>> def do_something(self, pred, others): + >>> pass + """ + + def auto_fp16_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@auto_fp16 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + # NOTE: default args are not taken into consideration + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.float, torch.half)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = {} + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.float, torch.half) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + if (TORCH_VERSION != 'parrots' and + digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + with autocast(enabled=True): + output = old_func(*new_args, **new_kwargs) + else: + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp32: + output = cast_tensor_type(output, torch.half, torch.float) + return output + + return new_func + + return auto_fp16_wrapper + + +def force_fp32(apply_to=None, out_fp16=False): + """Decorator to convert input arguments to fp32 in force. + + This decorator is useful when you write custom modules and want to support + mixed precision training. If there are some inputs that must be processed + in fp32 mode, then this decorator can handle it. If inputs arguments are + fp16 tensors, they will be converted to fp32 automatically. Arguments other + than fp16 tensors are ignored. If you are using PyTorch >= 1.6, + torch.cuda.amp is used as the backend, otherwise, original mmcv + implementation will be adopted. + + Args: + apply_to (Iterable, optional): The argument names to be converted. + `None` indicates all arguments. + out_fp16 (bool): Whether to convert the output back to fp16. + + Example: + + >>> import torch.nn as nn + >>> class MyModule1(nn.Module): + >>> + >>> # Convert x and y to fp32 + >>> @force_fp32() + >>> def loss(self, x, y): + >>> pass + + >>> import torch.nn as nn + >>> class MyModule2(nn.Module): + >>> + >>> # convert pred to fp32 + >>> @force_fp32(apply_to=('pred', )) + >>> def post_process(self, pred, others): + >>> pass + """ + + def force_fp32_wrapper(old_func): + + @functools.wraps(old_func) + def new_func(*args, **kwargs): + # check if the module has set the attribute `fp16_enabled`, if not, + # just fallback to the original method. + if not isinstance(args[0], torch.nn.Module): + raise TypeError('@force_fp32 can only be used to decorate the ' + 'method of nn.Module') + if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled): + return old_func(*args, **kwargs) + # get the arg spec of the decorated method + args_info = getfullargspec(old_func) + # get the argument names to be casted + args_to_cast = args_info.args if apply_to is None else apply_to + # convert the args that need to be processed + new_args = [] + if args: + arg_names = args_info.args[:len(args)] + for i, arg_name in enumerate(arg_names): + if arg_name in args_to_cast: + new_args.append( + cast_tensor_type(args[i], torch.half, torch.float)) + else: + new_args.append(args[i]) + # convert the kwargs that need to be processed + new_kwargs = dict() + if kwargs: + for arg_name, arg_value in kwargs.items(): + if arg_name in args_to_cast: + new_kwargs[arg_name] = cast_tensor_type( + arg_value, torch.half, torch.float) + else: + new_kwargs[arg_name] = arg_value + # apply converted arguments to the decorated method + if (TORCH_VERSION != 'parrots' and + digit_version(TORCH_VERSION) >= digit_version('1.6.0')): + with autocast(enabled=False): + output = old_func(*new_args, **new_kwargs) + else: + output = old_func(*new_args, **new_kwargs) + # cast the results back to fp32 if necessary + if out_fp16: + output = cast_tensor_type(output, torch.float, torch.half) + return output + + return new_func + + return force_fp32_wrapper + + +def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): + warnings.warning( + '"mmcv.runner.fp16_utils.allreduce_grads" is deprecated, and will be ' + 'removed in v2.8. Please switch to "mmcv.runner.allreduce_grads') + _allreduce_grads(params, coalesce=coalesce, bucket_size_mb=bucket_size_mb) + + +def wrap_fp16_model(model): + """Wrap the FP32 model to FP16. + + If you are using PyTorch >= 1.6, torch.cuda.amp is used as the + backend, otherwise, original mmcv implementation will be adopted. + + For PyTorch >= 1.6, this function will + 1. Set fp16 flag inside the model to True. + + Otherwise: + 1. Convert FP32 model to FP16. + 2. Remain some necessary layers to be FP32, e.g., normalization layers. + 3. Set `fp16_enabled` flag inside the model to True. + + Args: + model (nn.Module): Model in FP32. + """ + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.6.0')): + # convert model to fp16 + model.half() + # patch the normalization layers to make it work in fp32 mode + patch_norm_fp32(model) + # set `fp16_enabled` flag + for m in model.modules(): + if hasattr(m, 'fp16_enabled'): + m.fp16_enabled = True + + +def patch_norm_fp32(module): + """Recursively convert normalization layers from FP16 to FP32. + + Args: + module (nn.Module): The modules to be converted in FP16. + + Returns: + nn.Module: The converted module, the normalization layers have been + converted to FP32. + """ + if isinstance(module, (nn.modules.batchnorm._BatchNorm, nn.GroupNorm)): + module.float() + if isinstance(module, nn.GroupNorm) or torch.__version__ < '1.3': + module.forward = patch_forward_method(module.forward, torch.half, + torch.float) + for child in module.children(): + patch_norm_fp32(child) + return module + + +def patch_forward_method(func, src_type, dst_type, convert_output=True): + """Patch the forward method of a module. + + Args: + func (callable): The original forward method. + src_type (torch.dtype): Type of input arguments to be converted from. + dst_type (torch.dtype): Type of input arguments to be converted to. + convert_output (bool): Whether to convert the output back to src_type. + + Returns: + callable: The patched forward method. + """ + + def new_forward(*args, **kwargs): + output = func(*cast_tensor_type(args, src_type, dst_type), + **cast_tensor_type(kwargs, src_type, dst_type)) + if convert_output: + output = cast_tensor_type(output, dst_type, src_type) + return output + + return new_forward + + +class LossScaler: + """Class that manages loss scaling in mixed precision training which + supports both dynamic or static mode. + + The implementation refers to + https://github.com/NVIDIA/apex/blob/master/apex/fp16_utils/loss_scaler.py. + Indirectly, by supplying ``mode='dynamic'`` for dynamic loss scaling. + It's important to understand how :class:`LossScaler` operates. + Loss scaling is designed to combat the problem of underflowing + gradients encountered at long times when training fp16 networks. + Dynamic loss scaling begins by attempting a very high loss + scale. Ironically, this may result in OVERflowing gradients. + If overflowing gradients are encountered, :class:`FP16_Optimizer` then + skips the update step for this particular iteration/minibatch, + and :class:`LossScaler` adjusts the loss scale to a lower value. + If a certain number of iterations occur without overflowing gradients + detected,:class:`LossScaler` increases the loss scale once more. + In this way :class:`LossScaler` attempts to "ride the edge" of always + using the highest loss scale possible without incurring overflow. + + Args: + init_scale (float): Initial loss scale value, default: 2**32. + scale_factor (float): Factor used when adjusting the loss scale. + Default: 2. + mode (str): Loss scaling mode. 'dynamic' or 'static' + scale_window (int): Number of consecutive iterations without an + overflow to wait before increasing the loss scale. Default: 1000. + """ + + def __init__(self, + init_scale=2**32, + mode='dynamic', + scale_factor=2., + scale_window=1000): + self.cur_scale = init_scale + self.cur_iter = 0 + assert mode in ('dynamic', + 'static'), 'mode can only be dynamic or static' + self.mode = mode + self.last_overflow_iter = -1 + self.scale_factor = scale_factor + self.scale_window = scale_window + + def has_overflow(self, params): + """Check if params contain overflow.""" + if self.mode != 'dynamic': + return False + for p in params: + if p.grad is not None and LossScaler._has_inf_or_nan(p.grad.data): + return True + return False + + def _has_inf_or_nan(x): + """Check if params contain NaN.""" + try: + cpu_sum = float(x.float().sum()) + except RuntimeError as instance: + if 'value cannot be converted' not in instance.args[0]: + raise + return True + else: + if cpu_sum == float('inf') or cpu_sum == -float('inf') \ + or cpu_sum != cpu_sum: + return True + return False + + def update_scale(self, overflow): + """update the current loss scale value when overflow happens.""" + if self.mode != 'dynamic': + return + if overflow: + self.cur_scale = max(self.cur_scale / self.scale_factor, 1) + self.last_overflow_iter = self.cur_iter + else: + if (self.cur_iter - self.last_overflow_iter) % \ + self.scale_window == 0: + self.cur_scale *= self.scale_factor + self.cur_iter += 1 + + def state_dict(self): + """Returns the state of the scaler as a :class:`dict`.""" + return dict( + cur_scale=self.cur_scale, + cur_iter=self.cur_iter, + mode=self.mode, + last_overflow_iter=self.last_overflow_iter, + scale_factor=self.scale_factor, + scale_window=self.scale_window) + + def load_state_dict(self, state_dict): + """Loads the loss_scaler state dict. + + Args: + state_dict (dict): scaler state. + """ + self.cur_scale = state_dict['cur_scale'] + self.cur_iter = state_dict['cur_iter'] + self.mode = state_dict['mode'] + self.last_overflow_iter = state_dict['last_overflow_iter'] + self.scale_factor = state_dict['scale_factor'] + self.scale_window = state_dict['scale_window'] + + @property + def loss_scale(self): + return self.cur_scale diff --git a/annotator/uniformer/mmcv/runner/hooks/__init__.py b/annotator/uniformer/mmcv/runner/hooks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..915af28cefab14a14c1188ed861161080fd138a3 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .checkpoint import CheckpointHook +from .closure import ClosureHook +from .ema import EMAHook +from .evaluation import DistEvalHook, EvalHook +from .hook import HOOKS, Hook +from .iter_timer import IterTimerHook +from .logger import (DvcliveLoggerHook, LoggerHook, MlflowLoggerHook, + NeptuneLoggerHook, PaviLoggerHook, TensorboardLoggerHook, + TextLoggerHook, WandbLoggerHook) +from .lr_updater import LrUpdaterHook +from .memory import EmptyCacheHook +from .momentum_updater import MomentumUpdaterHook +from .optimizer import (Fp16OptimizerHook, GradientCumulativeFp16OptimizerHook, + GradientCumulativeOptimizerHook, OptimizerHook) +from .profiler import ProfilerHook +from .sampler_seed import DistSamplerSeedHook +from .sync_buffer import SyncBuffersHook + +__all__ = [ + 'HOOKS', 'Hook', 'CheckpointHook', 'ClosureHook', 'LrUpdaterHook', + 'OptimizerHook', 'Fp16OptimizerHook', 'IterTimerHook', + 'DistSamplerSeedHook', 'EmptyCacheHook', 'LoggerHook', 'MlflowLoggerHook', + 'PaviLoggerHook', 'TextLoggerHook', 'TensorboardLoggerHook', + 'NeptuneLoggerHook', 'WandbLoggerHook', 'DvcliveLoggerHook', + 'MomentumUpdaterHook', 'SyncBuffersHook', 'EMAHook', 'EvalHook', + 'DistEvalHook', 'ProfilerHook', 'GradientCumulativeOptimizerHook', + 'GradientCumulativeFp16OptimizerHook' +] diff --git a/annotator/uniformer/mmcv/runner/hooks/checkpoint.py b/annotator/uniformer/mmcv/runner/hooks/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..6af3fae43ac4b35532641a81eb13557edfc7dfba --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/checkpoint.py @@ -0,0 +1,167 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings + +from annotator.uniformer.mmcv.fileio import FileClient +from ..dist_utils import allreduce_params, master_only +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class CheckpointHook(Hook): + """Save checkpoints periodically. + + Args: + interval (int): The saving period. If ``by_epoch=True``, interval + indicates epochs, otherwise it indicates iterations. + Default: -1, which means "never". + by_epoch (bool): Saving checkpoints by epoch or by iteration. + Default: True. + save_optimizer (bool): Whether to save optimizer state_dict in the + checkpoint. It is usually used for resuming experiments. + Default: True. + out_dir (str, optional): The root directory to save checkpoints. If not + specified, ``runner.work_dir`` will be used by default. If + specified, the ``out_dir`` will be the concatenation of ``out_dir`` + and the last level directory of ``runner.work_dir``. + `Changed in version 1.3.16.` + max_keep_ckpts (int, optional): The maximum checkpoints to keep. + In some cases we want only the latest few checkpoints and would + like to delete old ones to save the disk space. + Default: -1, which means unlimited. + save_last (bool, optional): Whether to force the last checkpoint to be + saved regardless of interval. Default: True. + sync_buffer (bool, optional): Whether to synchronize buffers in + different gpus. Default: False. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + + .. warning:: + Before v1.3.16, the ``out_dir`` argument indicates the path where the + checkpoint is stored. However, since v1.3.16, ``out_dir`` indicates the + root directory and the final path to save checkpoint is the + concatenation of ``out_dir`` and the last level directory of + ``runner.work_dir``. Suppose the value of ``out_dir`` is "/path/of/A" + and the value of ``runner.work_dir`` is "/path/of/B", then the final + path will be "/path/of/A/B". + """ + + def __init__(self, + interval=-1, + by_epoch=True, + save_optimizer=True, + out_dir=None, + max_keep_ckpts=-1, + save_last=True, + sync_buffer=False, + file_client_args=None, + **kwargs): + self.interval = interval + self.by_epoch = by_epoch + self.save_optimizer = save_optimizer + self.out_dir = out_dir + self.max_keep_ckpts = max_keep_ckpts + self.save_last = save_last + self.args = kwargs + self.sync_buffer = sync_buffer + self.file_client_args = file_client_args + + def before_run(self, runner): + if not self.out_dir: + self.out_dir = runner.work_dir + + self.file_client = FileClient.infer_client(self.file_client_args, + self.out_dir) + + # if `self.out_dir` is not equal to `runner.work_dir`, it means that + # `self.out_dir` is set so the final `self.out_dir` is the + # concatenation of `self.out_dir` and the last level directory of + # `runner.work_dir` + if self.out_dir != runner.work_dir: + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + + runner.logger.info((f'Checkpoints will be saved to {self.out_dir} by ' + f'{self.file_client.name}.')) + + # disable the create_symlink option because some file backends do not + # allow to create a symlink + if 'create_symlink' in self.args: + if self.args[ + 'create_symlink'] and not self.file_client.allow_symlink: + self.args['create_symlink'] = False + warnings.warn( + ('create_symlink is set as True by the user but is changed' + 'to be False because creating symbolic link is not ' + f'allowed in {self.file_client.name}')) + else: + self.args['create_symlink'] = self.file_client.allow_symlink + + def after_train_epoch(self, runner): + if not self.by_epoch: + return + + # save checkpoint for following cases: + # 1. every ``self.interval`` epochs + # 2. reach the last epoch of training + if self.every_n_epochs( + runner, self.interval) or (self.save_last + and self.is_last_epoch(runner)): + runner.logger.info( + f'Saving checkpoint at {runner.epoch + 1} epochs') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) + + @master_only + def _save_checkpoint(self, runner): + """Save the current checkpoint and delete unwanted checkpoint.""" + runner.save_checkpoint( + self.out_dir, save_optimizer=self.save_optimizer, **self.args) + if runner.meta is not None: + if self.by_epoch: + cur_ckpt_filename = self.args.get( + 'filename_tmpl', 'epoch_{}.pth').format(runner.epoch + 1) + else: + cur_ckpt_filename = self.args.get( + 'filename_tmpl', 'iter_{}.pth').format(runner.iter + 1) + runner.meta.setdefault('hook_msgs', dict()) + runner.meta['hook_msgs']['last_ckpt'] = self.file_client.join_path( + self.out_dir, cur_ckpt_filename) + # remove other checkpoints + if self.max_keep_ckpts > 0: + if self.by_epoch: + name = 'epoch_{}.pth' + current_ckpt = runner.epoch + 1 + else: + name = 'iter_{}.pth' + current_ckpt = runner.iter + 1 + redundant_ckpts = range( + current_ckpt - self.max_keep_ckpts * self.interval, 0, + -self.interval) + filename_tmpl = self.args.get('filename_tmpl', name) + for _step in redundant_ckpts: + ckpt_path = self.file_client.join_path( + self.out_dir, filename_tmpl.format(_step)) + if self.file_client.isfile(ckpt_path): + self.file_client.remove(ckpt_path) + else: + break + + def after_train_iter(self, runner): + if self.by_epoch: + return + + # save checkpoint for following cases: + # 1. every ``self.interval`` iterations + # 2. reach the last iteration of training + if self.every_n_iters( + runner, self.interval) or (self.save_last + and self.is_last_iter(runner)): + runner.logger.info( + f'Saving checkpoint at {runner.iter + 1} iterations') + if self.sync_buffer: + allreduce_params(runner.model.buffers()) + self._save_checkpoint(runner) diff --git a/annotator/uniformer/mmcv/runner/hooks/closure.py b/annotator/uniformer/mmcv/runner/hooks/closure.py new file mode 100644 index 0000000000000000000000000000000000000000..b955f81f425be4ac3e6bb3f4aac653887989e872 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/closure.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class ClosureHook(Hook): + + def __init__(self, fn_name, fn): + assert hasattr(self, fn_name) + assert callable(fn) + setattr(self, fn_name, fn) diff --git a/annotator/uniformer/mmcv/runner/hooks/ema.py b/annotator/uniformer/mmcv/runner/hooks/ema.py new file mode 100644 index 0000000000000000000000000000000000000000..15c7e68088f019802a59e7ae41cc1fe0c7f28f96 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/ema.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...parallel import is_module_wrapper +from ..hooks.hook import HOOKS, Hook + + +@HOOKS.register_module() +class EMAHook(Hook): + r"""Exponential Moving Average Hook. + + Use Exponential Moving Average on all parameters of model in training + process. All parameters have a ema backup, which update by the formula + as below. EMAHook takes priority over EvalHook and CheckpointSaverHook. + + .. math:: + + \text{Xema\_{t+1}} = (1 - \text{momentum}) \times + \text{Xema\_{t}} + \text{momentum} \times X_t + + Args: + momentum (float): The momentum used for updating ema parameter. + Defaults to 0.0002. + interval (int): Update ema parameter every interval iteration. + Defaults to 1. + warm_up (int): During first warm_up steps, we may use smaller momentum + to update ema parameters more slowly. Defaults to 100. + resume_from (str): The checkpoint path. Defaults to None. + """ + + def __init__(self, + momentum=0.0002, + interval=1, + warm_up=100, + resume_from=None): + assert isinstance(interval, int) and interval > 0 + self.warm_up = warm_up + self.interval = interval + assert momentum > 0 and momentum < 1 + self.momentum = momentum**interval + self.checkpoint = resume_from + + def before_run(self, runner): + """To resume model with it's ema parameters more friendly. + + Register ema parameter as ``named_buffer`` to model + """ + model = runner.model + if is_module_wrapper(model): + model = model.module + self.param_ema_buffer = {} + self.model_parameters = dict(model.named_parameters(recurse=True)) + for name, value in self.model_parameters.items(): + # "." is not allowed in module's buffer name + buffer_name = f"ema_{name.replace('.', '_')}" + self.param_ema_buffer[name] = buffer_name + model.register_buffer(buffer_name, value.data.clone()) + self.model_buffers = dict(model.named_buffers(recurse=True)) + if self.checkpoint is not None: + runner.resume(self.checkpoint) + + def after_train_iter(self, runner): + """Update ema parameter every self.interval iterations.""" + curr_step = runner.iter + # We warm up the momentum considering the instability at beginning + momentum = min(self.momentum, + (1 + curr_step) / (self.warm_up + curr_step)) + if curr_step % self.interval != 0: + return + for name, parameter in self.model_parameters.items(): + buffer_name = self.param_ema_buffer[name] + buffer_parameter = self.model_buffers[buffer_name] + buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data) + + def after_train_epoch(self, runner): + """We load parameter values from ema backup to model before the + EvalHook.""" + self._swap_ema_parameters() + + def before_train_epoch(self, runner): + """We recover model's parameter from ema backup after last epoch's + EvalHook.""" + self._swap_ema_parameters() + + def _swap_ema_parameters(self): + """Swap the parameter of model with parameter in ema_buffer.""" + for name, value in self.model_parameters.items(): + temp = value.data.clone() + ema_buffer = self.model_buffers[self.param_ema_buffer[name]] + value.data.copy_(ema_buffer.data) + ema_buffer.data.copy_(temp) diff --git a/annotator/uniformer/mmcv/runner/hooks/evaluation.py b/annotator/uniformer/mmcv/runner/hooks/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..4d00999ce5665c53bded8de9e084943eee2d230d --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/evaluation.py @@ -0,0 +1,509 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings +from math import inf + +import torch.distributed as dist +from torch.nn.modules.batchnorm import _BatchNorm +from torch.utils.data import DataLoader + +from annotator.uniformer.mmcv.fileio import FileClient +from annotator.uniformer.mmcv.utils import is_seq_of +from .hook import Hook +from .logger import LoggerHook + + +class EvalHook(Hook): + """Non-Distributed evaluation hook. + + This hook will regularly perform evaluation in a given interval when + performing in non-distributed environment. + + Args: + dataloader (DataLoader): A PyTorch dataloader, whose dataset has + implemented ``evaluate`` function. + start (int | None, optional): Evaluation starting epoch. It enables + evaluation before the training starts if ``start`` <= the resuming + epoch. If None, whether to evaluate is merely decided by + ``interval``. Default: None. + interval (int): Evaluation interval. Default: 1. + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: True. + save_best (str, optional): If a metric is specified, it would measure + the best checkpoint during evaluation. The information about best + checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep + best score value and best checkpoint path, which will be also + loaded when resume checkpoint. Options are the evaluation metrics + on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox + detection and instance segmentation. ``AR@100`` for proposal + recall. If ``save_best`` is ``auto``, the first key of the returned + ``OrderedDict`` result will be used. Default: None. + rule (str | None, optional): Comparison rule for best score. If set to + None, it will infer a reasonable rule. Keys such as 'acc', 'top' + .etc will be inferred by 'greater' rule. Keys contain 'loss' will + be inferred by 'less' rule. Options are 'greater', 'less', None. + Default: None. + test_fn (callable, optional): test a model with samples from a + dataloader, and return the test results. If ``None``, the default + test function ``mmcv.engine.single_gpu_test`` will be used. + (default: ``None``) + greater_keys (List[str] | None, optional): Metric keys that will be + inferred by 'greater' comparison rule. If ``None``, + _default_greater_keys will be used. (default: ``None``) + less_keys (List[str] | None, optional): Metric keys that will be + inferred by 'less' comparison rule. If ``None``, _default_less_keys + will be used. (default: ``None``) + out_dir (str, optional): The root directory to save checkpoints. If not + specified, `runner.work_dir` will be used by default. If specified, + the `out_dir` will be the concatenation of `out_dir` and the last + level directory of `runner.work_dir`. + `New in version 1.3.16.` + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. Default: None. + `New in version 1.3.16.` + **eval_kwargs: Evaluation arguments fed into the evaluate function of + the dataset. + + Notes: + If new arguments are added for EvalHook, tools/test.py, + tools/eval_metric.py may be affected. + """ + + # Since the key for determine greater or less is related to the downstream + # tasks, downstream repos may need to overwrite the following inner + # variable accordingly. + + rule_map = {'greater': lambda x, y: x > y, 'less': lambda x, y: x < y} + init_value_map = {'greater': -inf, 'less': inf} + _default_greater_keys = [ + 'acc', 'top', 'AR@', 'auc', 'precision', 'mAP', 'mDice', 'mIoU', + 'mAcc', 'aAcc' + ] + _default_less_keys = ['loss'] + + def __init__(self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + out_dir=None, + file_client_args=None, + **eval_kwargs): + if not isinstance(dataloader, DataLoader): + raise TypeError(f'dataloader must be a pytorch DataLoader, ' + f'but got {type(dataloader)}') + + if interval <= 0: + raise ValueError(f'interval must be a positive number, ' + f'but got {interval}') + + assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean' + + if start is not None and start < 0: + raise ValueError(f'The evaluation start epoch {start} is smaller ' + f'than 0') + + self.dataloader = dataloader + self.interval = interval + self.start = start + self.by_epoch = by_epoch + + assert isinstance(save_best, str) or save_best is None, \ + '""save_best"" should be a str or None ' \ + f'rather than {type(save_best)}' + self.save_best = save_best + self.eval_kwargs = eval_kwargs + self.initial_flag = True + + if test_fn is None: + from annotator.uniformer.mmcv.engine import single_gpu_test + self.test_fn = single_gpu_test + else: + self.test_fn = test_fn + + if greater_keys is None: + self.greater_keys = self._default_greater_keys + else: + if not isinstance(greater_keys, (list, tuple)): + greater_keys = (greater_keys, ) + assert is_seq_of(greater_keys, str) + self.greater_keys = greater_keys + + if less_keys is None: + self.less_keys = self._default_less_keys + else: + if not isinstance(less_keys, (list, tuple)): + less_keys = (less_keys, ) + assert is_seq_of(less_keys, str) + self.less_keys = less_keys + + if self.save_best is not None: + self.best_ckpt_path = None + self._init_rule(rule, self.save_best) + + self.out_dir = out_dir + self.file_client_args = file_client_args + + def _init_rule(self, rule, key_indicator): + """Initialize rule, key_indicator, comparison_func, and best score. + + Here is the rule to determine which rule is used for key indicator + when the rule is not specific (note that the key indicator matching + is case-insensitive): + 1. If the key indicator is in ``self.greater_keys``, the rule will be + specified as 'greater'. + 2. Or if the key indicator is in ``self.less_keys``, the rule will be + specified as 'less'. + 3. Or if the key indicator is equal to the substring in any one item + in ``self.greater_keys``, the rule will be specified as 'greater'. + 4. Or if the key indicator is equal to the substring in any one item + in ``self.less_keys``, the rule will be specified as 'less'. + + Args: + rule (str | None): Comparison rule for best score. + key_indicator (str | None): Key indicator to determine the + comparison rule. + """ + if rule not in self.rule_map and rule is not None: + raise KeyError(f'rule must be greater, less or None, ' + f'but got {rule}.') + + if rule is None: + if key_indicator != 'auto': + # `_lc` here means we use the lower case of keys for + # case-insensitive matching + key_indicator_lc = key_indicator.lower() + greater_keys = [key.lower() for key in self.greater_keys] + less_keys = [key.lower() for key in self.less_keys] + + if key_indicator_lc in greater_keys: + rule = 'greater' + elif key_indicator_lc in less_keys: + rule = 'less' + elif any(key in key_indicator_lc for key in greater_keys): + rule = 'greater' + elif any(key in key_indicator_lc for key in less_keys): + rule = 'less' + else: + raise ValueError(f'Cannot infer the rule for key ' + f'{key_indicator}, thus a specific rule ' + f'must be specified.') + self.rule = rule + self.key_indicator = key_indicator + if self.rule is not None: + self.compare_func = self.rule_map[self.rule] + + def before_run(self, runner): + if not self.out_dir: + self.out_dir = runner.work_dir + + self.file_client = FileClient.infer_client(self.file_client_args, + self.out_dir) + + # if `self.out_dir` is not equal to `runner.work_dir`, it means that + # `self.out_dir` is set so the final `self.out_dir` is the + # concatenation of `self.out_dir` and the last level directory of + # `runner.work_dir` + if self.out_dir != runner.work_dir: + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + runner.logger.info( + (f'The best checkpoint will be saved to {self.out_dir} by ' + f'{self.file_client.name}')) + + if self.save_best is not None: + if runner.meta is None: + warnings.warn('runner.meta is None. Creating an empty one.') + runner.meta = dict() + runner.meta.setdefault('hook_msgs', dict()) + self.best_ckpt_path = runner.meta['hook_msgs'].get( + 'best_ckpt', None) + + def before_train_iter(self, runner): + """Evaluate the model only at the start of training by iteration.""" + if self.by_epoch or not self.initial_flag: + return + if self.start is not None and runner.iter >= self.start: + self.after_train_iter(runner) + self.initial_flag = False + + def before_train_epoch(self, runner): + """Evaluate the model only at the start of training by epoch.""" + if not (self.by_epoch and self.initial_flag): + return + if self.start is not None and runner.epoch >= self.start: + self.after_train_epoch(runner) + self.initial_flag = False + + def after_train_iter(self, runner): + """Called after every training iter to evaluate the results.""" + if not self.by_epoch and self._should_evaluate(runner): + # Because the priority of EvalHook is higher than LoggerHook, the + # training log and the evaluating log are mixed. Therefore, + # we need to dump the training log and clear it before evaluating + # log is generated. In addition, this problem will only appear in + # `IterBasedRunner` whose `self.by_epoch` is False, because + # `EpochBasedRunner` whose `self.by_epoch` is True calls + # `_do_evaluate` in `after_train_epoch` stage, and at this stage + # the training log has been printed, so it will not cause any + # problem. more details at + # https://github.com/open-mmlab/mmsegmentation/issues/694 + for hook in runner._hooks: + if isinstance(hook, LoggerHook): + hook.after_train_iter(runner) + runner.log_buffer.clear() + + self._do_evaluate(runner) + + def after_train_epoch(self, runner): + """Called after every training epoch to evaluate the results.""" + if self.by_epoch and self._should_evaluate(runner): + self._do_evaluate(runner) + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + results = self.test_fn(runner.model, self.dataloader) + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + # the key_score may be `None` so it needs to skip the action to save + # the best checkpoint + if self.save_best and key_score: + self._save_ckpt(runner, key_score) + + def _should_evaluate(self, runner): + """Judge whether to perform evaluation. + + Here is the rule to judge whether to perform evaluation: + 1. It will not perform evaluation during the epoch/iteration interval, + which is determined by ``self.interval``. + 2. It will not perform evaluation if the start time is larger than + current time. + 3. It will not perform evaluation when current time is larger than + the start time but during epoch/iteration interval. + + Returns: + bool: The flag indicating whether to perform evaluation. + """ + if self.by_epoch: + current = runner.epoch + check_time = self.every_n_epochs + else: + current = runner.iter + check_time = self.every_n_iters + + if self.start is None: + if not check_time(runner, self.interval): + # No evaluation during the interval. + return False + elif (current + 1) < self.start: + # No evaluation if start is larger than the current time. + return False + else: + # Evaluation only at epochs/iters 3, 5, 7... + # if start==3 and interval==2 + if (current + 1 - self.start) % self.interval: + return False + return True + + def _save_ckpt(self, runner, key_score): + """Save the best checkpoint. + + It will compare the score according to the compare function, write + related information (best score, best checkpoint path) and save the + best checkpoint into ``work_dir``. + """ + if self.by_epoch: + current = f'epoch_{runner.epoch + 1}' + cur_type, cur_time = 'epoch', runner.epoch + 1 + else: + current = f'iter_{runner.iter + 1}' + cur_type, cur_time = 'iter', runner.iter + 1 + + best_score = runner.meta['hook_msgs'].get( + 'best_score', self.init_value_map[self.rule]) + if self.compare_func(key_score, best_score): + best_score = key_score + runner.meta['hook_msgs']['best_score'] = best_score + + if self.best_ckpt_path and self.file_client.isfile( + self.best_ckpt_path): + self.file_client.remove(self.best_ckpt_path) + runner.logger.info( + (f'The previous best checkpoint {self.best_ckpt_path} was ' + 'removed')) + + best_ckpt_name = f'best_{self.key_indicator}_{current}.pth' + self.best_ckpt_path = self.file_client.join_path( + self.out_dir, best_ckpt_name) + runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path + + runner.save_checkpoint( + self.out_dir, best_ckpt_name, create_symlink=False) + runner.logger.info( + f'Now best checkpoint is saved as {best_ckpt_name}.') + runner.logger.info( + f'Best {self.key_indicator} is {best_score:0.4f} ' + f'at {cur_time} {cur_type}.') + + def evaluate(self, runner, results): + """Evaluate the results. + + Args: + runner (:obj:`mmcv.Runner`): The underlined training runner. + results (list): Output results. + """ + eval_res = self.dataloader.dataset.evaluate( + results, logger=runner.logger, **self.eval_kwargs) + + for name, val in eval_res.items(): + runner.log_buffer.output[name] = val + runner.log_buffer.ready = True + + if self.save_best is not None: + # If the performance of model is pool, the `eval_res` may be an + # empty dict and it will raise exception when `self.save_best` is + # not None. More details at + # https://github.com/open-mmlab/mmdetection/issues/6265. + if not eval_res: + warnings.warn( + 'Since `eval_res` is an empty dict, the behavior to save ' + 'the best checkpoint will be skipped in this evaluation.') + return None + + if self.key_indicator == 'auto': + # infer from eval_results + self._init_rule(self.rule, list(eval_res.keys())[0]) + return eval_res[self.key_indicator] + + return None + + +class DistEvalHook(EvalHook): + """Distributed evaluation hook. + + This hook will regularly perform evaluation in a given interval when + performing in distributed environment. + + Args: + dataloader (DataLoader): A PyTorch dataloader, whose dataset has + implemented ``evaluate`` function. + start (int | None, optional): Evaluation starting epoch. It enables + evaluation before the training starts if ``start`` <= the resuming + epoch. If None, whether to evaluate is merely decided by + ``interval``. Default: None. + interval (int): Evaluation interval. Default: 1. + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + default: True. + save_best (str, optional): If a metric is specified, it would measure + the best checkpoint during evaluation. The information about best + checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep + best score value and best checkpoint path, which will be also + loaded when resume checkpoint. Options are the evaluation metrics + on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox + detection and instance segmentation. ``AR@100`` for proposal + recall. If ``save_best`` is ``auto``, the first key of the returned + ``OrderedDict`` result will be used. Default: None. + rule (str | None, optional): Comparison rule for best score. If set to + None, it will infer a reasonable rule. Keys such as 'acc', 'top' + .etc will be inferred by 'greater' rule. Keys contain 'loss' will + be inferred by 'less' rule. Options are 'greater', 'less', None. + Default: None. + test_fn (callable, optional): test a model with samples from a + dataloader in a multi-gpu manner, and return the test results. If + ``None``, the default test function ``mmcv.engine.multi_gpu_test`` + will be used. (default: ``None``) + tmpdir (str | None): Temporary directory to save the results of all + processes. Default: None. + gpu_collect (bool): Whether to use gpu or cpu to collect results. + Default: False. + broadcast_bn_buffer (bool): Whether to broadcast the + buffer(running_mean and running_var) of rank 0 to other rank + before evaluation. Default: True. + out_dir (str, optional): The root directory to save checkpoints. If not + specified, `runner.work_dir` will be used by default. If specified, + the `out_dir` will be the concatenation of `out_dir` and the last + level directory of `runner.work_dir`. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. Default: None. + **eval_kwargs: Evaluation arguments fed into the evaluate function of + the dataset. + """ + + def __init__(self, + dataloader, + start=None, + interval=1, + by_epoch=True, + save_best=None, + rule=None, + test_fn=None, + greater_keys=None, + less_keys=None, + broadcast_bn_buffer=True, + tmpdir=None, + gpu_collect=False, + out_dir=None, + file_client_args=None, + **eval_kwargs): + + if test_fn is None: + from annotator.uniformer.mmcv.engine import multi_gpu_test + test_fn = multi_gpu_test + + super().__init__( + dataloader, + start=start, + interval=interval, + by_epoch=by_epoch, + save_best=save_best, + rule=rule, + test_fn=test_fn, + greater_keys=greater_keys, + less_keys=less_keys, + out_dir=out_dir, + file_client_args=file_client_args, + **eval_kwargs) + + self.broadcast_bn_buffer = broadcast_bn_buffer + self.tmpdir = tmpdir + self.gpu_collect = gpu_collect + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + # Synchronization of BatchNorm's buffer (running_mean + # and running_var) is not supported in the DDP of pytorch, + # which may cause the inconsistent performance of models in + # different ranks, so we broadcast BatchNorm's buffers + # of rank 0 to other ranks to avoid this. + if self.broadcast_bn_buffer: + model = runner.model + for name, module in model.named_modules(): + if isinstance(module, + _BatchNorm) and module.track_running_stats: + dist.broadcast(module.running_var, 0) + dist.broadcast(module.running_mean, 0) + + tmpdir = self.tmpdir + if tmpdir is None: + tmpdir = osp.join(runner.work_dir, '.eval_hook') + + results = self.test_fn( + runner.model, + self.dataloader, + tmpdir=tmpdir, + gpu_collect=self.gpu_collect) + if runner.rank == 0: + print('\n') + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + # the key_score may be `None` so it needs to skip the action to + # save the best checkpoint + if self.save_best and key_score: + self._save_ckpt(runner, key_score) diff --git a/annotator/uniformer/mmcv/runner/hooks/hook.py b/annotator/uniformer/mmcv/runner/hooks/hook.py new file mode 100644 index 0000000000000000000000000000000000000000..b8855c107727ecf85b917c890fc8b7f6359238a4 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/hook.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from annotator.uniformer.mmcv.utils import Registry, is_method_overridden + +HOOKS = Registry('hook') + + +class Hook: + stages = ('before_run', 'before_train_epoch', 'before_train_iter', + 'after_train_iter', 'after_train_epoch', 'before_val_epoch', + 'before_val_iter', 'after_val_iter', 'after_val_epoch', + 'after_run') + + def before_run(self, runner): + pass + + def after_run(self, runner): + pass + + def before_epoch(self, runner): + pass + + def after_epoch(self, runner): + pass + + def before_iter(self, runner): + pass + + def after_iter(self, runner): + pass + + def before_train_epoch(self, runner): + self.before_epoch(runner) + + def before_val_epoch(self, runner): + self.before_epoch(runner) + + def after_train_epoch(self, runner): + self.after_epoch(runner) + + def after_val_epoch(self, runner): + self.after_epoch(runner) + + def before_train_iter(self, runner): + self.before_iter(runner) + + def before_val_iter(self, runner): + self.before_iter(runner) + + def after_train_iter(self, runner): + self.after_iter(runner) + + def after_val_iter(self, runner): + self.after_iter(runner) + + def every_n_epochs(self, runner, n): + return (runner.epoch + 1) % n == 0 if n > 0 else False + + def every_n_inner_iters(self, runner, n): + return (runner.inner_iter + 1) % n == 0 if n > 0 else False + + def every_n_iters(self, runner, n): + return (runner.iter + 1) % n == 0 if n > 0 else False + + def end_of_epoch(self, runner): + return runner.inner_iter + 1 == len(runner.data_loader) + + def is_last_epoch(self, runner): + return runner.epoch + 1 == runner._max_epochs + + def is_last_iter(self, runner): + return runner.iter + 1 == runner._max_iters + + def get_triggered_stages(self): + trigger_stages = set() + for stage in Hook.stages: + if is_method_overridden(stage, Hook, self): + trigger_stages.add(stage) + + # some methods will be triggered in multi stages + # use this dict to map method to stages. + method_stages_map = { + 'before_epoch': ['before_train_epoch', 'before_val_epoch'], + 'after_epoch': ['after_train_epoch', 'after_val_epoch'], + 'before_iter': ['before_train_iter', 'before_val_iter'], + 'after_iter': ['after_train_iter', 'after_val_iter'], + } + + for method, map_stages in method_stages_map.items(): + if is_method_overridden(method, Hook, self): + trigger_stages.update(map_stages) + + return [stage for stage in Hook.stages if stage in trigger_stages] diff --git a/annotator/uniformer/mmcv/runner/hooks/iter_timer.py b/annotator/uniformer/mmcv/runner/hooks/iter_timer.py new file mode 100644 index 0000000000000000000000000000000000000000..cfd5002fe85ffc6992155ac01003878064a1d9be --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/iter_timer.py @@ -0,0 +1,18 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import time + +from .hook import HOOKS, Hook + + +@HOOKS.register_module() +class IterTimerHook(Hook): + + def before_epoch(self, runner): + self.t = time.time() + + def before_iter(self, runner): + runner.log_buffer.update({'data_time': time.time() - self.t}) + + def after_iter(self, runner): + runner.log_buffer.update({'time': time.time() - self.t}) + self.t = time.time() diff --git a/annotator/uniformer/mmcv/runner/hooks/logger/__init__.py b/annotator/uniformer/mmcv/runner/hooks/logger/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a0b6b345640a895368ac8a647afef6f24333d90e --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/logger/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import LoggerHook +from .dvclive import DvcliveLoggerHook +from .mlflow import MlflowLoggerHook +from .neptune import NeptuneLoggerHook +from .pavi import PaviLoggerHook +from .tensorboard import TensorboardLoggerHook +from .text import TextLoggerHook +from .wandb import WandbLoggerHook + +__all__ = [ + 'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook', + 'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook', + 'NeptuneLoggerHook', 'DvcliveLoggerHook' +] diff --git a/annotator/uniformer/mmcv/runner/hooks/logger/base.py b/annotator/uniformer/mmcv/runner/hooks/logger/base.py new file mode 100644 index 0000000000000000000000000000000000000000..f845256729458ced821762a1b8ef881e17ff9955 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/logger/base.py @@ -0,0 +1,166 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numbers +from abc import ABCMeta, abstractmethod + +import numpy as np +import torch + +from ..hook import Hook + + +class LoggerHook(Hook): + """Base class for logger hooks. + + Args: + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging. + by_epoch (bool): Whether EpochBasedRunner is used. + """ + + __metaclass__ = ABCMeta + + def __init__(self, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + self.interval = interval + self.ignore_last = ignore_last + self.reset_flag = reset_flag + self.by_epoch = by_epoch + + @abstractmethod + def log(self, runner): + pass + + @staticmethod + def is_scalar(val, include_np=True, include_torch=True): + """Tell the input variable is a scalar or not. + + Args: + val: Input variable. + include_np (bool): Whether include 0-d np.ndarray as a scalar. + include_torch (bool): Whether include 0-d torch.Tensor as a scalar. + + Returns: + bool: True or False. + """ + if isinstance(val, numbers.Number): + return True + elif include_np and isinstance(val, np.ndarray) and val.ndim == 0: + return True + elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1: + return True + else: + return False + + def get_mode(self, runner): + if runner.mode == 'train': + if 'time' in runner.log_buffer.output: + mode = 'train' + else: + mode = 'val' + elif runner.mode == 'val': + mode = 'val' + else: + raise ValueError(f"runner mode should be 'train' or 'val', " + f'but got {runner.mode}') + return mode + + def get_epoch(self, runner): + if runner.mode == 'train': + epoch = runner.epoch + 1 + elif runner.mode == 'val': + # normal val mode + # runner.epoch += 1 has been done before val workflow + epoch = runner.epoch + else: + raise ValueError(f"runner mode should be 'train' or 'val', " + f'but got {runner.mode}') + return epoch + + def get_iter(self, runner, inner_iter=False): + """Get the current training iteration step.""" + if self.by_epoch and inner_iter: + current_iter = runner.inner_iter + 1 + else: + current_iter = runner.iter + 1 + return current_iter + + def get_lr_tags(self, runner): + tags = {} + lrs = runner.current_lr() + if isinstance(lrs, dict): + for name, value in lrs.items(): + tags[f'learning_rate/{name}'] = value[0] + else: + tags['learning_rate'] = lrs[0] + return tags + + def get_momentum_tags(self, runner): + tags = {} + momentums = runner.current_momentum() + if isinstance(momentums, dict): + for name, value in momentums.items(): + tags[f'momentum/{name}'] = value[0] + else: + tags['momentum'] = momentums[0] + return tags + + def get_loggable_tags(self, + runner, + allow_scalar=True, + allow_text=False, + add_mode=True, + tags_to_skip=('time', 'data_time')): + tags = {} + for var, val in runner.log_buffer.output.items(): + if var in tags_to_skip: + continue + if self.is_scalar(val) and not allow_scalar: + continue + if isinstance(val, str) and not allow_text: + continue + if add_mode: + var = f'{self.get_mode(runner)}/{var}' + tags[var] = val + tags.update(self.get_lr_tags(runner)) + tags.update(self.get_momentum_tags(runner)) + return tags + + def before_run(self, runner): + for hook in runner.hooks[::-1]: + if isinstance(hook, LoggerHook): + hook.reset_flag = True + break + + def before_epoch(self, runner): + runner.log_buffer.clear() # clear logs of last epoch + + def after_train_iter(self, runner): + if self.by_epoch and self.every_n_inner_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif not self.by_epoch and self.every_n_iters(runner, self.interval): + runner.log_buffer.average(self.interval) + elif self.end_of_epoch(runner) and not self.ignore_last: + # not precise but more stable + runner.log_buffer.average(self.interval) + + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_train_epoch(self, runner): + if runner.log_buffer.ready: + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() + + def after_val_epoch(self, runner): + runner.log_buffer.average() + self.log(runner) + if self.reset_flag: + runner.log_buffer.clear_output() diff --git a/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py b/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py new file mode 100644 index 0000000000000000000000000000000000000000..687cdc58c0336c92b1e4f9a410ba67ebaab2bc7a --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class DvcliveLoggerHook(LoggerHook): + """Class to log metrics with dvclive. + + It requires `dvclive`_ to be installed. + + Args: + path (str): Directory where dvclive will write TSV log files. + interval (int): Logging interval (every k iterations). + Default 10. + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + Default: True. + reset_flag (bool): Whether to clear the output buffer after logging. + Default: True. + by_epoch (bool): Whether EpochBasedRunner is used. + Default: True. + + .. _dvclive: + https://dvc.org/doc/dvclive + """ + + def __init__(self, + path, + interval=10, + ignore_last=True, + reset_flag=True, + by_epoch=True): + + super(DvcliveLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.path = path + self.import_dvclive() + + def import_dvclive(self): + try: + import dvclive + except ImportError: + raise ImportError( + 'Please run "pip install dvclive" to install dvclive') + self.dvclive = dvclive + + @master_only + def before_run(self, runner): + self.dvclive.init(self.path) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + for k, v in tags.items(): + self.dvclive.log(k, v, step=self.get_iter(runner)) diff --git a/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py b/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py new file mode 100644 index 0000000000000000000000000000000000000000..f9a72592be47b534ce22573775fd5a7e8e86d72d --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py @@ -0,0 +1,78 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class MlflowLoggerHook(LoggerHook): + + def __init__(self, + exp_name=None, + tags=None, + log_model=True, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + """Class to log metrics and (optionally) a trained model to MLflow. + + It requires `MLflow`_ to be installed. + + Args: + exp_name (str, optional): Name of the experiment to be used. + Default None. + If not None, set the active experiment. + If experiment does not exist, an experiment with provided name + will be created. + tags (dict of str: str, optional): Tags for the current run. + Default None. + If not None, set tags for the current run. + log_model (bool, optional): Whether to log an MLflow artifact. + Default True. + If True, log runner.model as an MLflow artifact + for the current run. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging + by_epoch (bool): Whether EpochBasedRunner is used. + + .. _MLflow: + https://www.mlflow.org/docs/latest/index.html + """ + super(MlflowLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_mlflow() + self.exp_name = exp_name + self.tags = tags + self.log_model = log_model + + def import_mlflow(self): + try: + import mlflow + import mlflow.pytorch as mlflow_pytorch + except ImportError: + raise ImportError( + 'Please run "pip install mlflow" to install mlflow') + self.mlflow = mlflow + self.mlflow_pytorch = mlflow_pytorch + + @master_only + def before_run(self, runner): + super(MlflowLoggerHook, self).before_run(runner) + if self.exp_name is not None: + self.mlflow.set_experiment(self.exp_name) + if self.tags is not None: + self.mlflow.set_tags(self.tags) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + self.mlflow.log_metrics(tags, step=self.get_iter(runner)) + + @master_only + def after_run(self, runner): + if self.log_model: + self.mlflow_pytorch.log_model(runner.model, 'models') diff --git a/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py b/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py new file mode 100644 index 0000000000000000000000000000000000000000..7a38772b0c93a8608f32c6357b8616e77c139dc9 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py @@ -0,0 +1,82 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class NeptuneLoggerHook(LoggerHook): + """Class to log metrics to NeptuneAI. + + It requires `neptune-client` to be installed. + + Args: + init_kwargs (dict): a dict contains the initialization keys as below: + - project (str): Name of a project in a form of + namespace/project_name. If None, the value of + NEPTUNE_PROJECT environment variable will be taken. + - api_token (str): User’s API token. + If None, the value of NEPTUNE_API_TOKEN environment + variable will be taken. Note: It is strongly recommended + to use NEPTUNE_API_TOKEN environment variable rather than + placing your API token in plain text in your source code. + - name (str, optional, default is 'Untitled'): Editable name of + the run. Name is displayed in the run's Details and in + Runs table as a column. + Check https://docs.neptune.ai/api-reference/neptune#init for + more init arguments. + interval (int): Logging interval (every k iterations). + ignore_last (bool): Ignore the log of last iterations in each epoch + if less than `interval`. + reset_flag (bool): Whether to clear the output buffer after logging + by_epoch (bool): Whether EpochBasedRunner is used. + + .. _NeptuneAI: + https://docs.neptune.ai/you-should-know/logging-metadata + """ + + def __init__(self, + init_kwargs=None, + interval=10, + ignore_last=True, + reset_flag=True, + with_step=True, + by_epoch=True): + + super(NeptuneLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.import_neptune() + self.init_kwargs = init_kwargs + self.with_step = with_step + + def import_neptune(self): + try: + import neptune.new as neptune + except ImportError: + raise ImportError( + 'Please run "pip install neptune-client" to install neptune') + self.neptune = neptune + self.run = None + + @master_only + def before_run(self, runner): + if self.init_kwargs: + self.run = self.neptune.init(**self.init_kwargs) + else: + self.run = self.neptune.init() + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner) + if tags: + for tag_name, tag_value in tags.items(): + if self.with_step: + self.run[tag_name].log( + tag_value, step=self.get_iter(runner)) + else: + tags['global_step'] = self.get_iter(runner) + self.run[tag_name].log(tags) + + @master_only + def after_run(self, runner): + self.run.stop() diff --git a/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py b/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py new file mode 100644 index 0000000000000000000000000000000000000000..1dcf146d8163aff1363e9764999b0a74d674a595 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py @@ -0,0 +1,117 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json +import os +import os.path as osp + +import torch +import yaml + +import annotator.uniformer.mmcv as mmcv +from ....parallel.utils import is_module_wrapper +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class PaviLoggerHook(LoggerHook): + + def __init__(self, + init_kwargs=None, + add_graph=False, + add_last_ckpt=False, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True, + img_key='img_info'): + super(PaviLoggerHook, self).__init__(interval, ignore_last, reset_flag, + by_epoch) + self.init_kwargs = init_kwargs + self.add_graph = add_graph + self.add_last_ckpt = add_last_ckpt + self.img_key = img_key + + @master_only + def before_run(self, runner): + super(PaviLoggerHook, self).before_run(runner) + try: + from pavi import SummaryWriter + except ImportError: + raise ImportError('Please run "pip install pavi" to install pavi.') + + self.run_name = runner.work_dir.split('/')[-1] + + if not self.init_kwargs: + self.init_kwargs = dict() + self.init_kwargs['name'] = self.run_name + self.init_kwargs['model'] = runner._model_name + if runner.meta is not None: + if 'config_dict' in runner.meta: + config_dict = runner.meta['config_dict'] + assert isinstance( + config_dict, + dict), ('meta["config_dict"] has to be of a dict, ' + f'but got {type(config_dict)}') + elif 'config_file' in runner.meta: + config_file = runner.meta['config_file'] + config_dict = dict(mmcv.Config.fromfile(config_file)) + else: + config_dict = None + if config_dict is not None: + # 'max_.*iter' is parsed in pavi sdk as the maximum iterations + # to properly set up the progress bar. + config_dict = config_dict.copy() + config_dict.setdefault('max_iter', runner.max_iters) + # non-serializable values are first converted in + # mmcv.dump to json + config_dict = json.loads( + mmcv.dump(config_dict, file_format='json')) + session_text = yaml.dump(config_dict) + self.init_kwargs['session_text'] = session_text + self.writer = SummaryWriter(**self.init_kwargs) + + def get_step(self, runner): + """Get the total training step/epoch.""" + if self.get_mode(runner) == 'val' and self.by_epoch: + return self.get_epoch(runner) + else: + return self.get_iter(runner) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, add_mode=False) + if tags: + self.writer.add_scalars( + self.get_mode(runner), tags, self.get_step(runner)) + + @master_only + def after_run(self, runner): + if self.add_last_ckpt: + ckpt_path = osp.join(runner.work_dir, 'latest.pth') + if osp.islink(ckpt_path): + ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path)) + + if osp.isfile(ckpt_path): + # runner.epoch += 1 has been done before `after_run`. + iteration = runner.epoch if self.by_epoch else runner.iter + return self.writer.add_snapshot_file( + tag=self.run_name, + snapshot_file_path=ckpt_path, + iteration=iteration) + + # flush the buffer and send a task ending signal to Pavi + self.writer.close() + + @master_only + def before_epoch(self, runner): + if runner.epoch == 0 and self.add_graph: + if is_module_wrapper(runner.model): + _model = runner.model.module + else: + _model = runner.model + device = next(_model.parameters()).device + data = next(iter(runner.data_loader)) + image = data[self.img_key][0:1].to(device) + with torch.no_grad(): + self.writer.add_graph(_model, image) diff --git a/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py b/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py new file mode 100644 index 0000000000000000000000000000000000000000..4dd5011dc08def6c09eef86d3ce5b124c9fc5372 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py @@ -0,0 +1,57 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +from annotator.uniformer.mmcv.utils import TORCH_VERSION, digit_version +from ...dist_utils import master_only +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TensorboardLoggerHook(LoggerHook): + + def __init__(self, + log_dir=None, + interval=10, + ignore_last=True, + reset_flag=False, + by_epoch=True): + super(TensorboardLoggerHook, self).__init__(interval, ignore_last, + reset_flag, by_epoch) + self.log_dir = log_dir + + @master_only + def before_run(self, runner): + super(TensorboardLoggerHook, self).before_run(runner) + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.1')): + try: + from tensorboardX import SummaryWriter + except ImportError: + raise ImportError('Please install tensorboardX to use ' + 'TensorboardLoggerHook.') + else: + try: + from torch.utils.tensorboard import SummaryWriter + except ImportError: + raise ImportError( + 'Please run "pip install future tensorboard" to install ' + 'the dependencies to use torch.utils.tensorboard ' + '(applicable to PyTorch 1.1 or higher)') + + if self.log_dir is None: + self.log_dir = osp.join(runner.work_dir, 'tf_logs') + self.writer = SummaryWriter(self.log_dir) + + @master_only + def log(self, runner): + tags = self.get_loggable_tags(runner, allow_text=True) + for tag, val in tags.items(): + if isinstance(val, str): + self.writer.add_text(tag, val, self.get_iter(runner)) + else: + self.writer.add_scalar(tag, val, self.get_iter(runner)) + + @master_only + def after_run(self, runner): + self.writer.close() diff --git a/annotator/uniformer/mmcv/runner/hooks/logger/text.py b/annotator/uniformer/mmcv/runner/hooks/logger/text.py new file mode 100644 index 0000000000000000000000000000000000000000..87b1a3eca9595a130121526f8b4c29915387ab35 --- /dev/null +++ b/annotator/uniformer/mmcv/runner/hooks/logger/text.py @@ -0,0 +1,256 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import datetime +import os +import os.path as osp +from collections import OrderedDict + +import torch +import torch.distributed as dist + +import annotator.uniformer.mmcv as mmcv +from annotator.uniformer.mmcv.fileio.file_client import FileClient +from annotator.uniformer.mmcv.utils import is_tuple_of, scandir +from ..hook import HOOKS +from .base import LoggerHook + + +@HOOKS.register_module() +class TextLoggerHook(LoggerHook): + """Logger hook in text. + + In this logger hook, the information will be printed on terminal and + saved in json file. + + Args: + by_epoch (bool, optional): Whether EpochBasedRunner is used. + Default: True. + interval (int, optional): Logging interval (every k iterations). + Default: 10. + ignore_last (bool, optional): Ignore the log of last iterations in each + epoch if less than :attr:`interval`. Default: True. + reset_flag (bool, optional): Whether to clear the output buffer after + logging. Default: False. + interval_exp_name (int, optional): Logging interval for experiment + name. This feature is to help users conveniently get the experiment + information from screen or log file. Default: 1000. + out_dir (str, optional): Logs are saved in ``runner.work_dir`` default. + If ``out_dir`` is specified, logs will be copied to a new directory + which is the concatenation of ``out_dir`` and the last level + directory of ``runner.work_dir``. Default: None. + `New in version 1.3.16.` + out_suffix (str or tuple[str], optional): Those filenames ending with + ``out_suffix`` will be copied to ``out_dir``. + Default: ('.log.json', '.log', '.py'). + `New in version 1.3.16.` + keep_local (bool, optional): Whether to keep local log when + :attr:`out_dir` is specified. If False, the local log will be + removed. Default: True. + `New in version 1.3.16.` + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + `New in version 1.3.16.` + """ + + def __init__(self, + by_epoch=True, + interval=10, + ignore_last=True, + reset_flag=False, + interval_exp_name=1000, + out_dir=None, + out_suffix=('.log.json', '.log', '.py'), + keep_local=True, + file_client_args=None): + super(TextLoggerHook, self).__init__(interval, ignore_last, reset_flag, + by_epoch) + self.by_epoch = by_epoch + self.time_sec_tot = 0 + self.interval_exp_name = interval_exp_name + + if out_dir is None and file_client_args is not None: + raise ValueError( + 'file_client_args should be "None" when `out_dir` is not' + 'specified.') + self.out_dir = out_dir + + if not (out_dir is None or isinstance(out_dir, str) + or is_tuple_of(out_dir, str)): + raise TypeError('out_dir should be "None" or string or tuple of ' + 'string, but got {out_dir}') + self.out_suffix = out_suffix + + self.keep_local = keep_local + self.file_client_args = file_client_args + if self.out_dir is not None: + self.file_client = FileClient.infer_client(file_client_args, + self.out_dir) + + def before_run(self, runner): + super(TextLoggerHook, self).before_run(runner) + + if self.out_dir is not None: + self.file_client = FileClient.infer_client(self.file_client_args, + self.out_dir) + # The final `self.out_dir` is the concatenation of `self.out_dir` + # and the last level directory of `runner.work_dir` + basename = osp.basename(runner.work_dir.rstrip(osp.sep)) + self.out_dir = self.file_client.join_path(self.out_dir, basename) + runner.logger.info( + (f'Text logs will be saved to {self.out_dir} by ' + f'{self.file_client.name} after the training process.')) + + self.start_iter = runner.iter + self.json_log_path = osp.join(runner.work_dir, + f'{runner.timestamp}.log.json') + if runner.meta is not None: + self._dump_log(runner.meta, runner) + + def _get_max_memory(self, runner): + device = getattr(runner.model, 'output_device', None) + mem = torch.cuda.max_memory_allocated(device=device) + mem_mb = torch.tensor([mem / (1024 * 1024)], + dtype=torch.int, + device=device) + if runner.world_size > 1: + dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) + return mem_mb.item() + + def _log_info(self, log_dict, runner): + # print exp name for users to distinguish experiments + # at every ``interval_exp_name`` iterations and the end of each epoch + if runner.meta is not None and 'exp_name' in runner.meta: + if (self.every_n_iters(runner, self.interval_exp_name)) or ( + self.by_epoch and self.end_of_epoch(runner)): + exp_info = f'Exp name: {runner.meta["exp_name"]}' + runner.logger.info(exp_info) + + if log_dict['mode'] == 'train': + if isinstance(log_dict['lr'], dict): + lr_str = [] + for k, val in log_dict['lr'].items(): + lr_str.append(f'lr_{k}: {val:.3e}') + lr_str = ' '.join(lr_str) + else: + lr_str = f'lr: {log_dict["lr"]:.3e}' + + # by epoch: Epoch [4][100/1000] + # by iter: Iter [100/100000] + if self.by_epoch: + log_str = f'Epoch [{log_dict["epoch"]}]' \ + f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' + else: + log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' + log_str += f'{lr_str}, ' + + if 'time' in log_dict.keys(): + self.time_sec_tot += (log_dict['time'] * self.interval) + time_sec_avg = self.time_sec_tot / ( + runner.iter - self.start_iter + 1) + eta_sec = time_