Spaces:
Sleeping
Sleeping
File size: 5,632 Bytes
08d7644 7508475 08d7644 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
"""
coding=utf-8
Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal
Adapted From Facebook Inc, Detectron2
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.import copy
"""
import sys
from typing import Tuple
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from .vqa_utils import img_tensorize
class ResizeShortestEdge:
def __init__(self, short_edge_length, max_size=sys.maxsize):
"""
Args:
short_edge_length (list[min, max])
max_size (int): maximum allowed longest edge length.
"""
self.interp_method = "bilinear"
self.max_size = max_size
self.short_edge_length = short_edge_length
def __call__(self, imgs):
img_augs = []
for img in imgs:
h, w = img.shape[:2]
# later: provide list and randomly choose index for resize
size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1)
if size == 0:
return img
scale = size * 1.0 / min(h, w)
if h < w:
newh, neww = size, scale * w
else:
newh, neww = scale * h, size
if max(newh, neww) > self.max_size:
scale = self.max_size * 1.0 / max(newh, neww)
newh = newh * scale
neww = neww * scale
neww = int(neww + 0.5)
newh = int(newh + 0.5)
if img.dtype == np.uint8:
pil_image = Image.fromarray(img)
pil_image = pil_image.resize((neww, newh), Image.BILINEAR)
img = np.asarray(pil_image)
else:
img = img.permute(2, 0, 1).unsqueeze(0) # 3, 0, 1) # hw(c) -> nchw
img = F.interpolate(img, (newh, neww), mode=self.interp_method, align_corners=False).squeeze(0)
img_augs.append(img)
return img_augs
class Preprocess:
def __init__(self, cfg):
self.aug = ResizeShortestEdge([cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST)
self.input_format = cfg.INPUT.FORMAT
self.size_divisibility = cfg.SIZE_DIVISIBILITY
self.pad_value = cfg.PAD_VALUE
self.max_image_size = cfg.INPUT.MAX_SIZE_TEST
self.device = cfg.MODEL.DEVICE
self.pixel_std = torch.tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(len(cfg.MODEL.PIXEL_STD), 1, 1)
self.pixel_mean = torch.tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(len(cfg.MODEL.PIXEL_STD), 1, 1)
self.normalizer = lambda x: (x - self.pixel_mean) / self.pixel_std
def pad(self, images):
max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
image_sizes = [im.shape[-2:] for im in images]
images = [
F.pad(
im,
[0, max_size[-1] - size[1], 0, max_size[-2] - size[0]],
value=self.pad_value,
)
for size, im in zip(image_sizes, images)
]
return torch.stack(images), torch.tensor(image_sizes)
def __call__(self, images, single_image=False):
with torch.no_grad():
if not isinstance(images, list):
images = [images]
if single_image:
assert len(images) == 1
for i in range(len(images)):
if isinstance(images[i], torch.Tensor):
images.insert(i, images.pop(i).to(self.device).float())
elif not isinstance(images[i], torch.Tensor):
images.insert(
i,
torch.as_tensor(img_tensorize(images.pop(i), input_format=self.input_format))
.to(self.device)
.float(),
)
# resize smallest edge
raw_sizes = torch.tensor([im.shape[:2] for im in images])
images = self.aug(images)
# transpose images and convert to torch tensors
# images = [torch.as_tensor(i.astype("float32")).permute(2, 0, 1).to(self.device) for i in images]
# now normalize before pad to avoid useless arithmetic
images = [self.normalizer(x) for x in images]
# now pad them to do the following operations
images, sizes = self.pad(images)
# Normalize
if self.size_divisibility > 0:
raise NotImplementedError()
# pad
scales_yx = torch.true_divide(raw_sizes, sizes)
if single_image:
return images[0], sizes[0], scales_yx[0]
else:
return images, sizes, scales_yx
def _scale_box(boxes, scale_yx):
boxes[:, 0::2] *= scale_yx[:, 1]
boxes[:, 1::2] *= scale_yx[:, 0]
return boxes
def _clip_box(tensor, box_size: Tuple[int, int]):
assert torch.isfinite(tensor).all(), "Box tensor contains infinite or NaN!"
h, w = box_size
tensor[:, 0].clamp_(min=0, max=w)
tensor[:, 1].clamp_(min=0, max=h)
tensor[:, 2].clamp_(min=0, max=w)
tensor[:, 3].clamp_(min=0, max=h)
|