Bounding box detection
PyTorch
Ontocord.AI commited on
Commit
6e696e8
1 Parent(s): 31412c2

Create processing_image.py

Browse files
Files changed (1) hide show
  1. processing_image.py +157 -0
processing_image.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ coding=utf-8
3
+ Copyright 2018, Antonio Mendoza Hao Tan, Mohit Bansal
4
+ Adapted From Facebook Inc, Detectron2
5
+
6
+ Adapted from https://github.com/j-min
7
+
8
+ Copyright 2022, Ontocord LLC
9
+ Licensed under the Apache License, Version 2.0 (the "License");
10
+ you may not use this file except in compliance with the License.
11
+ You may obtain a copy of the License at
12
+
13
+ http://www.apache.org/licenses/LICENSE-2.0
14
+
15
+ Unless required by applicable law or agreed to in writing, software
16
+ distributed under the License is distributed on an "AS IS" BASIS,
17
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ See the License for the specific language governing permissions and
19
+ limitations under the License.import copy
20
+ """
21
+ import sys
22
+ from typing import Tuple
23
+
24
+ import numpy as np
25
+ import torch
26
+ from PIL import Image
27
+ from torch import nn
28
+
29
+ from .utils import img_tensorize
30
+
31
+ class ResizeShortestEdge:
32
+ def __init__(self, short_edge_length, max_size=sys.maxsize):
33
+ """
34
+ Args:
35
+ short_edge_length (list[min, max])
36
+ max_size (int): maximum allowed longest edge length.
37
+ """
38
+ self.interp_method = "bilinear"
39
+ self.max_size = max_size
40
+ self.short_edge_length = short_edge_length
41
+
42
+ def __call__(self, imgs):
43
+ img_augs = []
44
+ for img in imgs:
45
+ h, w = img.shape[:2]
46
+ # later: provide list and randomly choose index for resize
47
+ size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1)
48
+ if size == 0:
49
+ return img
50
+ scale = size * 1.0 / min(h, w)
51
+ if h < w:
52
+ newh, neww = size, scale * w
53
+ else:
54
+ newh, neww = scale * h, size
55
+ if max(newh, neww) > self.max_size:
56
+ scale = self.max_size * 1.0 / max(newh, neww)
57
+ newh = newh * scale
58
+ neww = neww * scale
59
+ neww = int(neww + 0.5)
60
+ newh = int(newh + 0.5)
61
+
62
+ if img.dtype == np.uint8:
63
+ pil_image = Image.fromarray(img)
64
+ pil_image = pil_image.resize((neww, newh), Image.BILINEAR)
65
+ img = np.asarray(pil_image)
66
+ else:
67
+ img = img.permute(2, 0, 1).unsqueeze(0) # 3, 0, 1) # hw(c) -> nchw
68
+ img = nn.functional.interpolate(
69
+ img, (newh, neww), mode=self.interp_method, align_corners=False
70
+ ).squeeze(0)
71
+ img_augs.append(img)
72
+
73
+ return img_augs
74
+
75
+
76
+ class Preprocess (nn.Module):
77
+ def __init__(self, cfg):
78
+ super().__init__()
79
+ self.aug = ResizeShortestEdge([cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST)
80
+ self.input_format = cfg.INPUT.FORMAT
81
+ self.size_divisibility = cfg.SIZE_DIVISIBILITY
82
+ self.pad_value = cfg.PAD_VALUE
83
+ self.max_image_size = cfg.INPUT.MAX_SIZE_TEST
84
+
85
+ pixel_std = torch.tensor(cfg.MODEL.PIXEL_STD).view(len(cfg.MODEL.PIXEL_STD), 1, 1)
86
+ pixel_mean = torch.tensor(cfg.MODEL.PIXEL_MEAN).view(len(cfg.MODEL.PIXEL_STD), 1, 1)
87
+ self.register_buffer('pixel_std', pixel_std)
88
+ self.register_buffer('pixel_mean', pixel_mean)
89
+ self.normalizer = lambda x: (x - self.pixel_mean) / self.pixel_std
90
+
91
+ def pad(self, images):
92
+ max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
93
+ image_sizes = [im.shape[-2:] for im in images]
94
+ images = [
95
+ nn.functional.pad(
96
+ im,
97
+ [0, max_size[-1] - size[1], 0, max_size[-2] - size[0]],
98
+ value=self.pad_value,
99
+ )
100
+ for size, im in zip(image_sizes, images)
101
+ ]
102
+
103
+ return torch.stack(images), torch.tensor(image_sizes)
104
+
105
+ def forward(self, images, single_image=False):
106
+ with torch.no_grad():
107
+ if not isinstance(images, list):
108
+ images = [images]
109
+ if single_image:
110
+ assert len(images) == 1
111
+ for i in range(len(images)):
112
+ if isinstance(images[i], np.ndarray):
113
+ images.insert(i, torch.tensor(images.pop(i)).to(self.pixel_std.device).float())
114
+ elif isinstance(images[i], torch.Tensor):
115
+ images.insert(i, images.pop(i).to(self.pixel_std.device).float())
116
+ elif not isinstance(images[i], torch.Tensor):
117
+ images.insert(
118
+ i,
119
+ torch.as_tensor(img_tensorize(images.pop(i), input_format=self.input_format))
120
+ .to(self.pixel_std.device)
121
+ .float(),
122
+ )
123
+ # resize smallest edge
124
+ raw_sizes = torch.tensor([im.shape[:2] for im in images])
125
+ images = self.aug(images)
126
+ # transpose images and convert to torch tensors
127
+ # images = [torch.as_tensor(i.astype("float32")).permute(2, 0, 1).to(self.pxiel_std.device) for i in images]
128
+ # now normalize before pad to avoid useless arithmetic
129
+ images = [self.normalizer(x) for x in images]
130
+ # now pad them to do the following operations
131
+ images, sizes = self.pad(images)
132
+ # Normalize
133
+
134
+ if self.size_divisibility > 0:
135
+ raise NotImplementedError()
136
+ # pad
137
+ scales_yx = torch.true_divide(raw_sizes, sizes)
138
+ if single_image:
139
+ return images[0], sizes[0], scales_yx[0]
140
+ else:
141
+ return images, sizes, scales_yx
142
+
143
+
144
+ def _scale_box(boxes, scale_yx):
145
+ boxes[:, 0::2] *= scale_yx[:, 1]
146
+ boxes[:, 1::2] *= scale_yx[:, 0]
147
+ return boxes
148
+
149
+
150
+ def _clip_box(tensor, box_size: Tuple[int, int]):
151
+ assert torch.isfinite(tensor).all(), "Box tensor contains infinite or NaN!"
152
+ h, w = box_size
153
+ tensor[:, 0].clamp_(min=0, max=w)
154
+ tensor[:, 1].clamp_(min=0, max=h)
155
+ tensor[:, 2].clamp_(min=0, max=w)
156
+ tensor[:, 3].clamp_(min=0, max=h)
157
+