Kaori1707 commited on
Commit
5eff22e
1 Parent(s): 3aecb94

update application

Browse files
.gitignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ #weights
163
+ weights/
164
+
165
+ #examples
166
+ examples/
app.py CHANGED
@@ -3,6 +3,66 @@ import numpy as np
3
  import torch
4
  import cv2
5
  import os
 
 
6
 
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
- print("device: %s" % device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import torch
4
  import cv2
5
  import os
6
+ from random import randint
7
+ from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor
8
 
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ print("device: %s" % device)
11
+ torch.backends.cudnn.enabled = True
12
+ torch.backends.cudnn.benchmark = True
13
+ default_models = {
14
+ "ssd": "weights/mb1-ssd-bestmodel.pth",
15
+ "label_path": "weights/labels.txt"
16
+ }
17
+
18
+ class_names = [name.strip() for name in open(default_models["label_path"]).readlines()]
19
+ net = create_mobilenetv1_ssd(len(class_names), is_test=True)
20
+ try:
21
+ net.load(default_models["ssd"])
22
+ predictor = create_mobilenetv1_ssd_predictor(net, candidate_size=200)
23
+ except:
24
+ print("The net type is wrong. It should be one of mb1-ssd and mb1-ssd-lite.")
25
+
26
+ colors = [np.random.choice(range(256), size=3) for i in range(len(class_names))]
27
+
28
+
29
+ def detection(image):
30
+ boxes, labels, probs = predictor.predict(image, 10, 0.4)
31
+ for i in range(boxes.size(0)):
32
+ box = boxes[i, :]
33
+ box = box.numpy()
34
+ box = np.array(box, dtype=np.int)
35
+ color = colors[labels[i]]
36
+ cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (int(color[0]), int(color[1]), int(color[2])), thickness=4)
37
+ label = f"{class_names[labels[i]]}: {probs[i]:.2f}"
38
+ # cv2.putText(image, label,
39
+ # (box[0] + 20, box[1] + 40),
40
+ # cv2.FONT_HERSHEY_SIMPLEX,
41
+ # 1, # font scale
42
+ # (255, 0, 255),
43
+ # 2) # line type
44
+ s = f"Found {len(probs)} objects"
45
+ return image, s
46
+
47
+ title = " AISeed AI Application Demo "
48
+ description = "# A Demo of Deep Learning for Object Detection"
49
+ example_list = [["examples/" + example] for example in os.listdir("examples")]
50
+
51
+ with gr.Blocks() as demo:
52
+ demo.title = title
53
+ gr.Markdown(description)
54
+ with gr.Row():
55
+ with gr.Column():
56
+ im = gr.Image(label="Input Image")
57
+ im_2 = gr.Image(label="Output Image")
58
+ with gr.Column():
59
+ text = gr.Textbox(label="Number of objects")
60
+ btn1 = gr.Button(value="Who wears mask?")
61
+ btn1.click(detection, inputs=[im], outputs=[im_2, text])
62
+
63
+ gr.Examples(examples=example_list,
64
+ inputs=[im],
65
+ outputs=[im_2])
66
+
67
+ if __name__ == "__main__":
68
+ demo.launch()
vision/__init__.py ADDED
File without changes
vision/nn/__init__.py ADDED
File without changes
vision/nn/mobilenet.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # borrowed from "https://github.com/marvis/pytorch-mobilenet"
2
+
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+
7
+ class MobileNetV1(nn.Module):
8
+ def __init__(self, num_classes=1024):
9
+ super(MobileNetV1, self).__init__()
10
+
11
+ def conv_bn(inp, oup, stride):
12
+ return nn.Sequential(
13
+ nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
14
+ nn.BatchNorm2d(oup),
15
+ nn.ReLU(inplace=True)
16
+ )
17
+
18
+ def conv_dw(inp, oup, stride):
19
+ return nn.Sequential(
20
+ nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
21
+ nn.BatchNorm2d(inp),
22
+ nn.ReLU(inplace=True),
23
+
24
+ nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
25
+ nn.BatchNorm2d(oup),
26
+ nn.ReLU(inplace=True),
27
+ )
28
+
29
+ self.model = nn.Sequential(
30
+ conv_bn(3, 32, 2),
31
+ conv_dw(32, 64, 1),
32
+ conv_dw(64, 128, 2),
33
+ conv_dw(128, 128, 1),
34
+ conv_dw(128, 256, 2),
35
+ conv_dw(256, 256, 1),
36
+ conv_dw(256, 512, 2),
37
+ conv_dw(512, 512, 1),
38
+ conv_dw(512, 512, 1),
39
+ conv_dw(512, 512, 1),
40
+ conv_dw(512, 512, 1),
41
+ conv_dw(512, 512, 1),
42
+ conv_dw(512, 1024, 2),
43
+ conv_dw(1024, 1024, 1),
44
+ )
45
+ self.fc = nn.Linear(1024, num_classes)
46
+
47
+ def forward(self, x):
48
+ x = self.model(x)
49
+ x = F.avg_pool2d(x, 7)
50
+ x = x.view(-1, 1024)
51
+ x = self.fc(x)
52
+ return x
vision/nn/multibox_loss.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch.nn.functional as F
3
+ import torch
4
+
5
+
6
+ from ..utils import box_utils
7
+
8
+
9
+ class MultiboxLoss(nn.Module):
10
+ def __init__(self, priors, iou_threshold, neg_pos_ratio,
11
+ center_variance, size_variance, device):
12
+ """Implement SSD Multibox Loss.
13
+
14
+ Basically, Multibox loss combines classification loss
15
+ and Smooth L1 regression loss.
16
+ """
17
+ super(MultiboxLoss, self).__init__()
18
+ self.iou_threshold = iou_threshold
19
+ self.neg_pos_ratio = neg_pos_ratio
20
+ self.center_variance = center_variance
21
+ self.size_variance = size_variance
22
+ self.priors = priors
23
+ self.priors.to(device)
24
+
25
+ def forward(self, confidence, predicted_locations, labels, gt_locations):
26
+ """Compute classification loss and smooth l1 loss.
27
+
28
+ Args:
29
+ confidence (batch_size, num_priors, num_classes): class predictions.
30
+ locations (batch_size, num_priors, 4): predicted locations.
31
+ labels (batch_size, num_priors): real labels of all the priors.
32
+ boxes (batch_size, num_priors, 4): real boxes corresponding all the priors.
33
+ """
34
+ num_classes = confidence.size(2)
35
+ with torch.no_grad():
36
+ # derived from cross_entropy=sum(log(p))
37
+ loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
38
+ mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio)
39
+
40
+ confidence = confidence[mask, :]
41
+ classification_loss = F.cross_entropy(confidence.reshape(-1, num_classes), labels[mask], size_average=False)
42
+ pos_mask = labels > 0
43
+ predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4)
44
+ gt_locations = gt_locations[pos_mask, :].reshape(-1, 4)
45
+ smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, size_average=False)
46
+ num_pos = gt_locations.size(0)
47
+ return smooth_l1_loss/num_pos, classification_loss/num_pos
vision/nn/scaled_l2_norm.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch
3
+ import torch.nn.functional as F
4
+
5
+
6
+ class ScaledL2Norm(nn.Module):
7
+ def __init__(self, in_channels, initial_scale):
8
+ super(ScaledL2Norm, self).__init__()
9
+ self.in_channels = in_channels
10
+ self.scale = nn.Parameter(torch.Tensor(in_channels))
11
+ self.initial_scale = initial_scale
12
+ self.reset_parameters()
13
+
14
+ def forward(self, x):
15
+ return (F.normalize(x, p=2, dim=1)
16
+ * self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3))
17
+
18
+ def reset_parameters(self):
19
+ self.scale.data.fill_(self.initial_scale)
vision/ssd/__init__.py ADDED
File without changes
vision/ssd/config/__init__.py ADDED
File without changes
vision/ssd/config/mobilenetv1_ssd_config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from vision.utils.box_utils import SSDSpec, SSDBoxSizes, generate_ssd_priors
4
+
5
+
6
+ image_size = 300
7
+ image_mean = np.array([127, 127, 127]) # RGB layout
8
+ image_std = 128.0
9
+ iou_threshold = 0.45
10
+ center_variance = 0.1
11
+ size_variance = 0.2
12
+
13
+ specs = [
14
+ SSDSpec(19, 16, SSDBoxSizes(60, 105), [2, 3]),
15
+ SSDSpec(10, 32, SSDBoxSizes(105, 150), [2, 3]),
16
+ SSDSpec(5, 64, SSDBoxSizes(150, 195), [2, 3]),
17
+ SSDSpec(3, 100, SSDBoxSizes(195, 240), [2, 3]),
18
+ SSDSpec(2, 150, SSDBoxSizes(240, 285), [2, 3]),
19
+ SSDSpec(1, 300, SSDBoxSizes(285, 330), [2, 3])
20
+ ]
21
+
22
+
23
+ priors = generate_ssd_priors(specs, image_size)
24
+
25
+ #print(' ')
26
+ #print('SSD-Mobilenet-v1 priors:')
27
+ #print(priors.shape)
28
+ #print(priors)
29
+ #print(' ')
30
+
31
+ #import torch
32
+ #torch.save(priors, 'mb1-ssd-priors.pt')
33
+
34
+ #np.savetxt('mb1-ssd-priors.txt', priors.numpy())
vision/ssd/config/squeezenet_ssd_config.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from vision.utils.box_utils import SSDSpec, SSDBoxSizes, generate_ssd_priors
4
+
5
+
6
+ image_size = 300
7
+ image_mean = np.array([127, 127, 127]) # RGB layout
8
+ image_std = 128.0
9
+ iou_threshold = 0.45
10
+ center_variance = 0.1
11
+ size_variance = 0.2
12
+
13
+ specs = [
14
+ SSDSpec(17, 16, SSDBoxSizes(60, 105), [2, 3]),
15
+ SSDSpec(10, 32, SSDBoxSizes(105, 150), [2, 3]),
16
+ SSDSpec(5, 64, SSDBoxSizes(150, 195), [2, 3]),
17
+ SSDSpec(3, 100, SSDBoxSizes(195, 240), [2, 3]),
18
+ SSDSpec(2, 150, SSDBoxSizes(240, 285), [2, 3]),
19
+ SSDSpec(1, 300, SSDBoxSizes(285, 330), [2, 3])
20
+ ]
21
+
22
+
23
+ priors = generate_ssd_priors(specs, image_size)
vision/ssd/config/vgg_ssd_config.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from vision.utils.box_utils import SSDSpec, SSDBoxSizes, generate_ssd_priors
4
+
5
+
6
+ image_size = 300
7
+ image_mean = np.array([123, 117, 104]) # RGB layout
8
+ image_std = 1.0
9
+
10
+ iou_threshold = 0.45
11
+ center_variance = 0.1
12
+ size_variance = 0.2
13
+
14
+ specs = [
15
+ SSDSpec(38, 8, SSDBoxSizes(30, 60), [2]),
16
+ SSDSpec(19, 16, SSDBoxSizes(60, 111), [2, 3]),
17
+ SSDSpec(10, 32, SSDBoxSizes(111, 162), [2, 3]),
18
+ SSDSpec(5, 64, SSDBoxSizes(162, 213), [2, 3]),
19
+ SSDSpec(3, 100, SSDBoxSizes(213, 264), [2]),
20
+ SSDSpec(1, 300, SSDBoxSizes(264, 315), [2])
21
+ ]
22
+
23
+
24
+ priors = generate_ssd_priors(specs, image_size)
vision/ssd/data_preprocessing.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..transforms.transforms import *
2
+
3
+
4
+ class TrainAugmentation:
5
+ def __init__(self, size, mean=0, std=1.0):
6
+ """
7
+ Args:
8
+ size: the size the of final image.
9
+ mean: mean pixel value per channel.
10
+ """
11
+ self.mean = mean
12
+ self.size = size
13
+ self.augment = Compose([
14
+ ConvertFromInts(),
15
+ PhotometricDistort(),
16
+ Expand(self.mean),
17
+ RandomSampleCrop(),
18
+ RandomMirror(),
19
+ ToPercentCoords(),
20
+ Resize(self.size),
21
+ SubtractMeans(self.mean),
22
+ lambda img, boxes=None, labels=None: (img / std, boxes, labels),
23
+ ToTensor(),
24
+ ])
25
+
26
+ def __call__(self, img, boxes, labels):
27
+ """
28
+
29
+ Args:
30
+ img: the output of cv.imread in RGB layout.
31
+ boxes: boundding boxes in the form of (x1, y1, x2, y2).
32
+ labels: labels of boxes.
33
+ """
34
+ return self.augment(img, boxes, labels)
35
+
36
+
37
+ class TestTransform:
38
+ def __init__(self, size, mean=0.0, std=1.0):
39
+ self.transform = Compose([
40
+ ToPercentCoords(),
41
+ Resize(size),
42
+ SubtractMeans(mean),
43
+ lambda img, boxes=None, labels=None: (img / std, boxes, labels),
44
+ ToTensor(),
45
+ ])
46
+
47
+ def __call__(self, image, boxes, labels):
48
+ return self.transform(image, boxes, labels)
49
+
50
+
51
+ class PredictionTransform:
52
+ def __init__(self, size, mean=0.0, std=1.0):
53
+ self.transform = Compose([
54
+ Resize(size),
55
+ SubtractMeans(mean),
56
+ lambda img, boxes=None, labels=None: (img / std, boxes, labels),
57
+ ToTensor()
58
+ ])
59
+
60
+ def __call__(self, image):
61
+ image, _, _ = self.transform(image)
62
+ return image
vision/ssd/mobilenetv1_ssd.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.nn import Conv2d, Sequential, ModuleList, ReLU
3
+ from ..nn.mobilenet import MobileNetV1
4
+
5
+ from .ssd import SSD
6
+ from .predictor import Predictor
7
+ from .config import mobilenetv1_ssd_config as config
8
+
9
+
10
+ def create_mobilenetv1_ssd(num_classes, is_test=False):
11
+ base_net = MobileNetV1(1001).model # disable dropout layer
12
+
13
+ source_layer_indexes = [
14
+ 12,
15
+ 14,
16
+ ]
17
+ extras = ModuleList([
18
+ Sequential(
19
+ Conv2d(in_channels=1024, out_channels=256, kernel_size=1),
20
+ ReLU(),
21
+ Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1),
22
+ ReLU()
23
+ ),
24
+ Sequential(
25
+ Conv2d(in_channels=512, out_channels=128, kernel_size=1),
26
+ ReLU(),
27
+ Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
28
+ ReLU()
29
+ ),
30
+ Sequential(
31
+ Conv2d(in_channels=256, out_channels=128, kernel_size=1),
32
+ ReLU(),
33
+ Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
34
+ ReLU()
35
+ ),
36
+ Sequential(
37
+ Conv2d(in_channels=256, out_channels=128, kernel_size=1),
38
+ ReLU(),
39
+ Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
40
+ ReLU()
41
+ )
42
+ ])
43
+
44
+ regression_headers = ModuleList([
45
+ Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1),
46
+ Conv2d(in_channels=1024, out_channels=6 * 4, kernel_size=3, padding=1),
47
+ Conv2d(in_channels=512, out_channels=6 * 4, kernel_size=3, padding=1),
48
+ Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1),
49
+ Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1),
50
+ Conv2d(in_channels=256, out_channels=6 * 4, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0?
51
+ ])
52
+
53
+ classification_headers = ModuleList([
54
+ Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1),
55
+ Conv2d(in_channels=1024, out_channels=6 * num_classes, kernel_size=3, padding=1),
56
+ Conv2d(in_channels=512, out_channels=6 * num_classes, kernel_size=3, padding=1),
57
+ Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1),
58
+ Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1),
59
+ Conv2d(in_channels=256, out_channels=6 * num_classes, kernel_size=3, padding=1), # TODO: change to kernel_size=1, padding=0?
60
+ ])
61
+
62
+ return SSD(num_classes, base_net, source_layer_indexes,
63
+ extras, classification_headers, regression_headers, is_test=is_test, config=config)
64
+
65
+
66
+ def create_mobilenetv1_ssd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
67
+ predictor = Predictor(net, config.image_size, config.image_mean,
68
+ config.image_std,
69
+ nms_method=nms_method,
70
+ iou_threshold=config.iou_threshold,
71
+ candidate_size=candidate_size,
72
+ sigma=sigma,
73
+ device=device)
74
+ return predictor
vision/ssd/predictor.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from ..utils import box_utils
4
+ from .data_preprocessing import PredictionTransform
5
+ from ..utils.misc import Timer
6
+
7
+
8
+ class Predictor:
9
+ def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None,
10
+ iou_threshold=0.45, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None):
11
+ self.net = net
12
+ self.transform = PredictionTransform(size, mean, std)
13
+ self.iou_threshold = iou_threshold
14
+ self.filter_threshold = filter_threshold
15
+ self.candidate_size = candidate_size
16
+ self.nms_method = nms_method
17
+
18
+ self.sigma = sigma
19
+ if device:
20
+ self.device = device
21
+ else:
22
+ self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
23
+
24
+ self.net.to(self.device)
25
+ self.net.eval()
26
+
27
+ self.timer = Timer()
28
+
29
+ def predict(self, image, top_k=-1, prob_threshold=None):
30
+ cpu_device = torch.device("cpu")
31
+ height, width, _ = image.shape
32
+ image = self.transform(image)
33
+ #print(image)
34
+ images = image.unsqueeze(0)
35
+ images = images.to(self.device)
36
+ with torch.no_grad():
37
+ self.timer.start()
38
+ scores, boxes = self.net.forward(images)
39
+ print("Inference time: ", self.timer.end())
40
+ boxes = boxes[0]
41
+ scores = scores[0]
42
+ if not prob_threshold:
43
+ prob_threshold = self.filter_threshold
44
+ # this version of nms is slower on GPU, so we move data to CPU.
45
+ boxes = boxes.to(cpu_device)
46
+ scores = scores.to(cpu_device)
47
+ picked_box_probs = []
48
+ picked_labels = []
49
+ for class_index in range(1, scores.size(1)):
50
+ probs = scores[:, class_index]
51
+ mask = probs > prob_threshold
52
+ probs = probs[mask]
53
+ if probs.size(0) == 0:
54
+ continue
55
+ subset_boxes = boxes[mask, :]
56
+ box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
57
+ box_probs = box_utils.nms(box_probs, self.nms_method,
58
+ score_threshold=prob_threshold,
59
+ iou_threshold=self.iou_threshold,
60
+ sigma=self.sigma,
61
+ top_k=top_k,
62
+ candidate_size=self.candidate_size)
63
+ picked_box_probs.append(box_probs)
64
+ picked_labels.extend([class_index] * box_probs.size(0))
65
+ if not picked_box_probs:
66
+ return torch.tensor([]), torch.tensor([]), torch.tensor([])
67
+ picked_box_probs = torch.cat(picked_box_probs)
68
+ picked_box_probs[:, 0] *= width
69
+ picked_box_probs[:, 1] *= height
70
+ picked_box_probs[:, 2] *= width
71
+ picked_box_probs[:, 3] *= height
72
+ return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4]
vision/ssd/ssd.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch
3
+ import numpy as np
4
+ from typing import List, Tuple
5
+ import torch.nn.functional as F
6
+
7
+ from ..utils import box_utils
8
+ from collections import namedtuple
9
+ GraphPath = namedtuple("GraphPath", ['s0', 'name', 's1']) #
10
+
11
+
12
+ class SSD(nn.Module):
13
+ def __init__(self, num_classes: int, base_net: nn.ModuleList, source_layer_indexes: List[int],
14
+ extras: nn.ModuleList, classification_headers: nn.ModuleList,
15
+ regression_headers: nn.ModuleList, is_test=False, config=None, device=None):
16
+ """Compose a SSD model using the given components.
17
+ """
18
+ super(SSD, self).__init__()
19
+
20
+ self.num_classes = num_classes
21
+ self.base_net = base_net
22
+ self.source_layer_indexes = source_layer_indexes
23
+ self.extras = extras
24
+ self.classification_headers = classification_headers
25
+ self.regression_headers = regression_headers
26
+ self.is_test = is_test
27
+ self.config = config
28
+
29
+ # register layers in source_layer_indexes by adding them to a module list
30
+ self.source_layer_add_ons = nn.ModuleList([t[1] for t in source_layer_indexes
31
+ if isinstance(t, tuple) and not isinstance(t, GraphPath)])
32
+ if device:
33
+ self.device = device
34
+ else:
35
+ self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
36
+ if is_test:
37
+ self.config = config
38
+ self.priors = config.priors.to(self.device)
39
+
40
+ def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
41
+ confidences = []
42
+ locations = []
43
+ start_layer_index = 0
44
+ header_index = 0
45
+ for end_layer_index in self.source_layer_indexes:
46
+ if isinstance(end_layer_index, GraphPath):
47
+ path = end_layer_index
48
+ end_layer_index = end_layer_index.s0
49
+ added_layer = None
50
+ elif isinstance(end_layer_index, tuple):
51
+ added_layer = end_layer_index[1]
52
+ end_layer_index = end_layer_index[0]
53
+ path = None
54
+ else:
55
+ added_layer = None
56
+ path = None
57
+ for layer in self.base_net[start_layer_index: end_layer_index]:
58
+ x = layer(x)
59
+ if added_layer:
60
+ y = added_layer(x)
61
+ else:
62
+ y = x
63
+ if path:
64
+ sub = getattr(self.base_net[end_layer_index], path.name)
65
+ for layer in sub[:path.s1]:
66
+ x = layer(x)
67
+ y = x
68
+ for layer in sub[path.s1:]:
69
+ x = layer(x)
70
+ end_layer_index += 1
71
+ start_layer_index = end_layer_index
72
+ confidence, location = self.compute_header(header_index, y)
73
+ header_index += 1
74
+ confidences.append(confidence)
75
+ locations.append(location)
76
+
77
+ for layer in self.base_net[end_layer_index:]:
78
+ x = layer(x)
79
+
80
+ for layer in self.extras:
81
+ x = layer(x)
82
+ confidence, location = self.compute_header(header_index, x)
83
+ header_index += 1
84
+ confidences.append(confidence)
85
+ locations.append(location)
86
+
87
+ confidences = torch.cat(confidences, 1)
88
+ locations = torch.cat(locations, 1)
89
+
90
+ if self.is_test:
91
+ confidences = F.softmax(confidences, dim=2)
92
+ boxes = box_utils.convert_locations_to_boxes(
93
+ locations, self.priors, self.config.center_variance, self.config.size_variance
94
+ )
95
+ boxes = box_utils.center_form_to_corner_form(boxes)
96
+ return confidences, boxes
97
+ else:
98
+ return confidences, locations
99
+
100
+ def compute_header(self, i, x):
101
+ confidence = self.classification_headers[i](x)
102
+ confidence = confidence.permute(0, 2, 3, 1).contiguous()
103
+ confidence = confidence.view(confidence.size(0), -1, self.num_classes)
104
+
105
+ location = self.regression_headers[i](x)
106
+ location = location.permute(0, 2, 3, 1).contiguous()
107
+ location = location.view(location.size(0), -1, 4)
108
+
109
+ return confidence, location
110
+
111
+ def init_from_base_net(self, model):
112
+ self.base_net.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage), strict=True)
113
+ self.source_layer_add_ons.apply(_xavier_init_)
114
+ self.extras.apply(_xavier_init_)
115
+ self.classification_headers.apply(_xavier_init_)
116
+ self.regression_headers.apply(_xavier_init_)
117
+
118
+ def init_from_pretrained_ssd(self, model):
119
+ state_dict = torch.load(model, map_location=lambda storage, loc: storage)
120
+ state_dict = {k: v for k, v in state_dict.items() if not (k.startswith("classification_headers") or k.startswith("regression_headers"))}
121
+ model_dict = self.state_dict()
122
+ model_dict.update(state_dict)
123
+ self.load_state_dict(model_dict)
124
+ self.classification_headers.apply(_xavier_init_)
125
+ self.regression_headers.apply(_xavier_init_)
126
+
127
+ def init(self):
128
+ self.base_net.apply(_xavier_init_)
129
+ self.source_layer_add_ons.apply(_xavier_init_)
130
+ self.extras.apply(_xavier_init_)
131
+ self.classification_headers.apply(_xavier_init_)
132
+ self.regression_headers.apply(_xavier_init_)
133
+
134
+ def load(self, model):
135
+ self.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage))
136
+
137
+ def save(self, model_path):
138
+ torch.save(self.state_dict(), model_path)
139
+
140
+
141
+ class MatchPrior(object):
142
+ def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold):
143
+ self.center_form_priors = center_form_priors
144
+ self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors)
145
+ self.center_variance = center_variance
146
+ self.size_variance = size_variance
147
+ self.iou_threshold = iou_threshold
148
+
149
+ def __call__(self, gt_boxes, gt_labels):
150
+ if type(gt_boxes) is np.ndarray:
151
+ gt_boxes = torch.from_numpy(gt_boxes)
152
+ if type(gt_labels) is np.ndarray:
153
+ gt_labels = torch.from_numpy(gt_labels)
154
+ boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels,
155
+ self.corner_form_priors, self.iou_threshold)
156
+ boxes = box_utils.corner_form_to_center_form(boxes)
157
+ locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance)
158
+ return locations, labels
159
+
160
+
161
+ def _xavier_init_(m: nn.Module):
162
+ if isinstance(m, nn.Conv2d):
163
+ nn.init.xavier_uniform_(m.weight)
vision/transforms/__init__.py ADDED
File without changes
vision/transforms/transforms.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from https://github.com/amdegroot/ssd.pytorch
2
+
3
+
4
+ import torch
5
+ from torchvision import transforms
6
+ import cv2
7
+ import numpy as np
8
+ import types
9
+ from numpy import random
10
+
11
+
12
+ def intersect(box_a, box_b):
13
+ max_xy = np.minimum(box_a[:, 2:], box_b[2:])
14
+ min_xy = np.maximum(box_a[:, :2], box_b[:2])
15
+ inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
16
+ return inter[:, 0] * inter[:, 1]
17
+
18
+
19
+ def jaccard_numpy(box_a, box_b):
20
+ """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
21
+ is simply the intersection over union of two boxes.
22
+ E.g.:
23
+ A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
24
+ Args:
25
+ box_a: Multiple bounding boxes, Shape: [num_boxes,4]
26
+ box_b: Single bounding box, Shape: [4]
27
+ Return:
28
+ jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
29
+ """
30
+ inter = intersect(box_a, box_b)
31
+ area_a = ((box_a[:, 2]-box_a[:, 0]) *
32
+ (box_a[:, 3]-box_a[:, 1])) # [A,B]
33
+ area_b = ((box_b[2]-box_b[0]) *
34
+ (box_b[3]-box_b[1])) # [A,B]
35
+ union = area_a + area_b - inter
36
+ return inter / union # [A,B]
37
+
38
+
39
+ class Compose(object):
40
+ """Composes several augmentations together.
41
+ Args:
42
+ transforms (List[Transform]): list of transforms to compose.
43
+ Example:
44
+ >>> augmentations.Compose([
45
+ >>> transforms.CenterCrop(10),
46
+ >>> transforms.ToTensor(),
47
+ >>> ])
48
+ """
49
+
50
+ def __init__(self, transforms):
51
+ self.transforms = transforms
52
+
53
+ def __call__(self, img, boxes=None, labels=None):
54
+ for t in self.transforms:
55
+ img, boxes, labels = t(img, boxes, labels)
56
+ return img, boxes, labels
57
+
58
+
59
+ class Lambda(object):
60
+ """Applies a lambda as a transform."""
61
+
62
+ def __init__(self, lambd):
63
+ assert isinstance(lambd, types.LambdaType)
64
+ self.lambd = lambd
65
+
66
+ def __call__(self, img, boxes=None, labels=None):
67
+ return self.lambd(img, boxes, labels)
68
+
69
+
70
+ class ConvertFromInts(object):
71
+ def __call__(self, image, boxes=None, labels=None):
72
+ return image.astype(np.float32), boxes, labels
73
+
74
+
75
+ class SubtractMeans(object):
76
+ def __init__(self, mean):
77
+ self.mean = np.array(mean, dtype=np.float32)
78
+
79
+ def __call__(self, image, boxes=None, labels=None):
80
+ image = image.astype(np.float32)
81
+ image -= self.mean
82
+ return image.astype(np.float32), boxes, labels
83
+
84
+
85
+ class ToAbsoluteCoords(object):
86
+ def __call__(self, image, boxes=None, labels=None):
87
+ height, width, channels = image.shape
88
+ boxes[:, 0] *= width
89
+ boxes[:, 2] *= width
90
+ boxes[:, 1] *= height
91
+ boxes[:, 3] *= height
92
+
93
+ return image, boxes, labels
94
+
95
+
96
+ class ToPercentCoords(object):
97
+ def __call__(self, image, boxes=None, labels=None):
98
+ height, width, channels = image.shape
99
+ boxes[:, 0] /= width
100
+ boxes[:, 2] /= width
101
+ boxes[:, 1] /= height
102
+ boxes[:, 3] /= height
103
+
104
+ return image, boxes, labels
105
+
106
+
107
+ class Resize(object):
108
+ def __init__(self, size=300):
109
+ self.size = size
110
+
111
+ def __call__(self, image, boxes=None, labels=None):
112
+ image = cv2.resize(image, (self.size,
113
+ self.size))
114
+ return image, boxes, labels
115
+
116
+
117
+ class RandomSaturation(object):
118
+ def __init__(self, lower=0.5, upper=1.5):
119
+ self.lower = lower
120
+ self.upper = upper
121
+ assert self.upper >= self.lower, "contrast upper must be >= lower."
122
+ assert self.lower >= 0, "contrast lower must be non-negative."
123
+
124
+ def __call__(self, image, boxes=None, labels=None):
125
+ if random.randint(2):
126
+ image[:, :, 1] *= random.uniform(self.lower, self.upper)
127
+
128
+ return image, boxes, labels
129
+
130
+
131
+ class RandomHue(object):
132
+ def __init__(self, delta=18.0):
133
+ assert delta >= 0.0 and delta <= 360.0
134
+ self.delta = delta
135
+
136
+ def __call__(self, image, boxes=None, labels=None):
137
+ if random.randint(2):
138
+ image[:, :, 0] += random.uniform(-self.delta, self.delta)
139
+ image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
140
+ image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
141
+ return image, boxes, labels
142
+
143
+
144
+ class RandomLightingNoise(object):
145
+ def __init__(self):
146
+ self.perms = ((0, 1, 2), (0, 2, 1),
147
+ (1, 0, 2), (1, 2, 0),
148
+ (2, 0, 1), (2, 1, 0))
149
+
150
+ def __call__(self, image, boxes=None, labels=None):
151
+ if random.randint(2):
152
+ swap = self.perms[random.randint(len(self.perms))]
153
+ shuffle = SwapChannels(swap) # shuffle channels
154
+ image = shuffle(image)
155
+ return image, boxes, labels
156
+
157
+
158
+ class ConvertColor(object):
159
+ def __init__(self, current, transform):
160
+ self.transform = transform
161
+ self.current = current
162
+
163
+ def __call__(self, image, boxes=None, labels=None):
164
+ if self.current == 'BGR' and self.transform == 'HSV':
165
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
166
+ elif self.current == 'RGB' and self.transform == 'HSV':
167
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
168
+ elif self.current == 'BGR' and self.transform == 'RGB':
169
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
170
+ elif self.current == 'HSV' and self.transform == 'BGR':
171
+ image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
172
+ elif self.current == 'HSV' and self.transform == "RGB":
173
+ image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
174
+ else:
175
+ raise NotImplementedError
176
+ return image, boxes, labels
177
+
178
+
179
+ class RandomContrast(object):
180
+ def __init__(self, lower=0.5, upper=1.5):
181
+ self.lower = lower
182
+ self.upper = upper
183
+ assert self.upper >= self.lower, "contrast upper must be >= lower."
184
+ assert self.lower >= 0, "contrast lower must be non-negative."
185
+
186
+ # expects float image
187
+ def __call__(self, image, boxes=None, labels=None):
188
+ if random.randint(2):
189
+ alpha = random.uniform(self.lower, self.upper)
190
+ image *= alpha
191
+ return image, boxes, labels
192
+
193
+
194
+ class RandomBrightness(object):
195
+ def __init__(self, delta=32):
196
+ assert delta >= 0.0
197
+ assert delta <= 255.0
198
+ self.delta = delta
199
+
200
+ def __call__(self, image, boxes=None, labels=None):
201
+ if random.randint(2):
202
+ delta = random.uniform(-self.delta, self.delta)
203
+ image += delta
204
+ return image, boxes, labels
205
+
206
+
207
+ class ToCV2Image(object):
208
+ def __call__(self, tensor, boxes=None, labels=None):
209
+ return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
210
+
211
+
212
+ class ToTensor(object):
213
+ def __call__(self, cvimage, boxes=None, labels=None):
214
+ return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
215
+
216
+
217
+ class RandomSampleCrop(object):
218
+ """Crop
219
+ Arguments:
220
+ img (Image): the image being input during training
221
+ boxes (Tensor): the original bounding boxes in pt form
222
+ labels (Tensor): the class labels for each bbox
223
+ mode (float tuple): the min and max jaccard overlaps
224
+ Return:
225
+ (img, boxes, classes)
226
+ img (Image): the cropped image
227
+ boxes (Tensor): the adjusted bounding boxes in pt form
228
+ labels (Tensor): the class labels for each bbox
229
+ """
230
+ def __init__(self):
231
+ self.sample_options = (
232
+ # using entire original input image
233
+ None,
234
+ # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
235
+ (0.1, None),
236
+ (0.3, None),
237
+ (0.7, None),
238
+ (0.9, None),
239
+ # randomly sample a patch
240
+ (None, None),
241
+ )
242
+
243
+ def __call__(self, image, boxes=None, labels=None):
244
+ height, width, _ = image.shape
245
+ while True:
246
+ # randomly choose a mode
247
+ #mode = random.choice(self.sample_options) # throws numpy deprecation warning
248
+ mode = self.sample_options[random.randint(len(self.sample_options))]
249
+
250
+ if mode is None:
251
+ return image, boxes, labels
252
+
253
+ min_iou, max_iou = mode
254
+ if min_iou is None:
255
+ min_iou = float('-inf')
256
+ if max_iou is None:
257
+ max_iou = float('inf')
258
+
259
+ # max trails (50)
260
+ for _ in range(50):
261
+ current_image = image
262
+
263
+ w = random.uniform(0.3 * width, width)
264
+ h = random.uniform(0.3 * height, height)
265
+
266
+ # aspect ratio constraint b/t .5 & 2
267
+ if h / w < 0.5 or h / w > 2:
268
+ continue
269
+
270
+ left = random.uniform(width - w)
271
+ top = random.uniform(height - h)
272
+
273
+ # convert to integer rect x1,y1,x2,y2
274
+ rect = np.array([int(left), int(top), int(left+w), int(top+h)])
275
+
276
+ # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
277
+ overlap = jaccard_numpy(boxes, rect)
278
+
279
+ # is min and max overlap constraint satisfied? if not try again
280
+ if overlap.min() < min_iou and max_iou < overlap.max():
281
+ continue
282
+
283
+ # cut the crop from the image
284
+ current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
285
+ :]
286
+
287
+ # keep overlap with gt box IF center in sampled patch
288
+ centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
289
+
290
+ # mask in all gt boxes that above and to the left of centers
291
+ m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
292
+
293
+ # mask in all gt boxes that under and to the right of centers
294
+ m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
295
+
296
+ # mask in that both m1 and m2 are true
297
+ mask = m1 * m2
298
+
299
+ # have any valid boxes? try again if not
300
+ if not mask.any():
301
+ continue
302
+
303
+ # take only matching gt boxes
304
+ current_boxes = boxes[mask, :].copy()
305
+
306
+ # take only matching gt labels
307
+ current_labels = labels[mask]
308
+
309
+ # should we use the box left and top corner or the crop's
310
+ current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
311
+ rect[:2])
312
+ # adjust to crop (by substracting crop's left,top)
313
+ current_boxes[:, :2] -= rect[:2]
314
+
315
+ current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
316
+ rect[2:])
317
+ # adjust to crop (by substracting crop's left,top)
318
+ current_boxes[:, 2:] -= rect[:2]
319
+
320
+ return current_image, current_boxes, current_labels
321
+
322
+
323
+ class Expand(object):
324
+ def __init__(self, mean):
325
+ self.mean = mean
326
+
327
+ def __call__(self, image, boxes, labels):
328
+ if random.randint(2):
329
+ return image, boxes, labels
330
+
331
+ height, width, depth = image.shape
332
+ ratio = random.uniform(1, 4)
333
+ left = random.uniform(0, width*ratio - width)
334
+ top = random.uniform(0, height*ratio - height)
335
+
336
+ expand_image = np.zeros(
337
+ (int(height*ratio), int(width*ratio), depth),
338
+ dtype=image.dtype)
339
+ expand_image[:, :, :] = self.mean
340
+ expand_image[int(top):int(top + height),
341
+ int(left):int(left + width)] = image
342
+ image = expand_image
343
+
344
+ boxes = boxes.copy()
345
+ boxes[:, :2] += (int(left), int(top))
346
+ boxes[:, 2:] += (int(left), int(top))
347
+
348
+ return image, boxes, labels
349
+
350
+
351
+ class RandomMirror(object):
352
+ def __call__(self, image, boxes, classes):
353
+ _, width, _ = image.shape
354
+ if random.randint(2):
355
+ image = image[:, ::-1]
356
+ boxes = boxes.copy()
357
+ boxes[:, 0::2] = width - boxes[:, 2::-2]
358
+ return image, boxes, classes
359
+
360
+
361
+ class SwapChannels(object):
362
+ """Transforms a tensorized image by swapping the channels in the order
363
+ specified in the swap tuple.
364
+ Args:
365
+ swaps (int triple): final order of channels
366
+ eg: (2, 1, 0)
367
+ """
368
+
369
+ def __init__(self, swaps):
370
+ self.swaps = swaps
371
+
372
+ def __call__(self, image):
373
+ """
374
+ Args:
375
+ image (Tensor): image tensor to be transformed
376
+ Return:
377
+ a tensor with channels swapped according to swap
378
+ """
379
+ # if torch.is_tensor(image):
380
+ # image = image.data.cpu().numpy()
381
+ # else:
382
+ # image = np.array(image)
383
+ image = image[:, :, self.swaps]
384
+ return image
385
+
386
+
387
+ class PhotometricDistort(object):
388
+ def __init__(self):
389
+ self.pd = [
390
+ RandomContrast(), # RGB
391
+ ConvertColor(current="RGB", transform='HSV'), # HSV
392
+ RandomSaturation(), # HSV
393
+ RandomHue(), # HSV
394
+ ConvertColor(current='HSV', transform='RGB'), # RGB
395
+ RandomContrast() # RGB
396
+ ]
397
+ self.rand_brightness = RandomBrightness()
398
+ self.rand_light_noise = RandomLightingNoise()
399
+
400
+ def __call__(self, image, boxes, labels):
401
+ im = image.copy()
402
+ im, boxes, labels = self.rand_brightness(im, boxes, labels)
403
+ if random.randint(2):
404
+ distort = Compose(self.pd[:-1])
405
+ else:
406
+ distort = Compose(self.pd[1:])
407
+ im, boxes, labels = distort(im, boxes, labels)
408
+ return self.rand_light_noise(im, boxes, labels)
409
+
vision/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .misc import *
vision/utils/box_utils.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections
2
+ import torch
3
+ import itertools
4
+ from typing import List
5
+ import math
6
+
7
+ SSDBoxSizes = collections.namedtuple('SSDBoxSizes', ['min', 'max'])
8
+
9
+ SSDSpec = collections.namedtuple('SSDSpec', ['feature_map_size', 'shrinkage', 'box_sizes', 'aspect_ratios'])
10
+
11
+
12
+ def generate_ssd_priors(specs: List[SSDSpec], image_size, clamp=True) -> torch.Tensor:
13
+ """Generate SSD Prior Boxes.
14
+
15
+ It returns the center, height and width of the priors. The values are relative to the image size
16
+ Args:
17
+ specs: SSDSpecs about the shapes of sizes of prior boxes. i.e.
18
+ specs = [
19
+ SSDSpec(38, 8, SSDBoxSizes(30, 60), [2]),
20
+ SSDSpec(19, 16, SSDBoxSizes(60, 111), [2, 3]),
21
+ SSDSpec(10, 32, SSDBoxSizes(111, 162), [2, 3]),
22
+ SSDSpec(5, 64, SSDBoxSizes(162, 213), [2, 3]),
23
+ SSDSpec(3, 100, SSDBoxSizes(213, 264), [2]),
24
+ SSDSpec(1, 300, SSDBoxSizes(264, 315), [2])
25
+ ]
26
+ image_size: image size.
27
+ clamp: if true, clamp the values to make fall between [0.0, 1.0]
28
+ Returns:
29
+ priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values
30
+ are relative to the image size.
31
+ """
32
+ priors = []
33
+ for spec in specs:
34
+ scale = image_size / spec.shrinkage
35
+ for j, i in itertools.product(range(spec.feature_map_size), repeat=2):
36
+ x_center = (i + 0.5) / scale
37
+ y_center = (j + 0.5) / scale
38
+
39
+ # small sized square box
40
+ size = spec.box_sizes.min
41
+ h = w = size / image_size
42
+ priors.append([
43
+ x_center,
44
+ y_center,
45
+ w,
46
+ h
47
+ ])
48
+
49
+ # big sized square box
50
+ size = math.sqrt(spec.box_sizes.max * spec.box_sizes.min)
51
+ h = w = size / image_size
52
+ priors.append([
53
+ x_center,
54
+ y_center,
55
+ w,
56
+ h
57
+ ])
58
+
59
+ # change h/w ratio of the small sized box
60
+ size = spec.box_sizes.min
61
+ h = w = size / image_size
62
+ for ratio in spec.aspect_ratios:
63
+ ratio = math.sqrt(ratio)
64
+ priors.append([
65
+ x_center,
66
+ y_center,
67
+ w * ratio,
68
+ h / ratio
69
+ ])
70
+ priors.append([
71
+ x_center,
72
+ y_center,
73
+ w / ratio,
74
+ h * ratio
75
+ ])
76
+
77
+ priors = torch.tensor(priors)
78
+ if clamp:
79
+ torch.clamp(priors, 0.0, 1.0, out=priors)
80
+ return priors
81
+
82
+
83
+ def convert_locations_to_boxes(locations, priors, center_variance,
84
+ size_variance):
85
+ """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
86
+
87
+ The conversion:
88
+ $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
89
+ $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
90
+ We do it in the inverse direction here.
91
+ Args:
92
+ locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
93
+ priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
94
+ center_variance: a float used to change the scale of center.
95
+ size_variance: a float used to change of scale of size.
96
+ Returns:
97
+ boxes: priors: [[center_x, center_y, h, w]]. All the values
98
+ are relative to the image size.
99
+ """
100
+ # priors can have one dimension less.
101
+ if priors.dim() + 1 == locations.dim():
102
+ priors = priors.unsqueeze(0)
103
+ return torch.cat([
104
+ locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
105
+ torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
106
+ ], dim=locations.dim() - 1)
107
+
108
+
109
+ def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
110
+ # priors can have one dimension less
111
+ if center_form_priors.dim() + 1 == center_form_boxes.dim():
112
+ center_form_priors = center_form_priors.unsqueeze(0)
113
+ return torch.cat([
114
+ (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance,
115
+ torch.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance
116
+ ], dim=center_form_boxes.dim() - 1)
117
+
118
+
119
+ def area_of(left_top, right_bottom) -> torch.Tensor:
120
+ """Compute the areas of rectangles given two corners.
121
+
122
+ Args:
123
+ left_top (N, 2): left top corner.
124
+ right_bottom (N, 2): right bottom corner.
125
+
126
+ Returns:
127
+ area (N): return the area.
128
+ """
129
+ hw = torch.clamp(right_bottom - left_top, min=0.0)
130
+ return hw[..., 0] * hw[..., 1]
131
+
132
+
133
+ def iou_of(boxes0, boxes1, eps=1e-5):
134
+ """Return intersection-over-union (Jaccard index) of boxes.
135
+
136
+ Args:
137
+ boxes0 (N, 4): ground truth boxes.
138
+ boxes1 (N or 1, 4): predicted boxes.
139
+ eps: a small number to avoid 0 as denominator.
140
+ Returns:
141
+ iou (N): IoU values.
142
+ """
143
+ overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2])
144
+ overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:])
145
+
146
+ overlap_area = area_of(overlap_left_top, overlap_right_bottom)
147
+ area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
148
+ area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
149
+ return overlap_area / (area0 + area1 - overlap_area + eps)
150
+
151
+
152
+ def assign_priors(gt_boxes, gt_labels, corner_form_priors,
153
+ iou_threshold):
154
+ """Assign ground truth boxes and targets to priors.
155
+
156
+ Args:
157
+ gt_boxes (num_targets, 4): ground truth boxes.
158
+ gt_labels (num_targets): labels of targets.
159
+ priors (num_priors, 4): corner form priors
160
+ Returns:
161
+ boxes (num_priors, 4): real values for priors.
162
+ labels (num_priros): labels for priors.
163
+ """
164
+ # size: num_priors x num_targets
165
+ ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1))
166
+ # size: num_priors
167
+ best_target_per_prior, best_target_per_prior_index = ious.max(1)
168
+ # size: num_targets
169
+ best_prior_per_target, best_prior_per_target_index = ious.max(0)
170
+
171
+ for target_index, prior_index in enumerate(best_prior_per_target_index):
172
+ best_target_per_prior_index[prior_index] = target_index
173
+ # 2.0 is used to make sure every target has a prior assigned
174
+ best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2)
175
+ # size: num_priors
176
+ labels = gt_labels[best_target_per_prior_index]
177
+ labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id
178
+ boxes = gt_boxes[best_target_per_prior_index]
179
+ return boxes, labels
180
+
181
+
182
+ def hard_negative_mining(loss, labels, neg_pos_ratio):
183
+ """
184
+ It used to suppress the presence of a large number of negative prediction.
185
+ It works on image level not batch level.
186
+ For any example/image, it keeps all the positive predictions and
187
+ cut the number of negative predictions to make sure the ratio
188
+ between the negative examples and positive examples is no more
189
+ the given ratio for an image.
190
+
191
+ Args:
192
+ loss (N, num_priors): the loss for each example.
193
+ labels (N, num_priors): the labels.
194
+ neg_pos_ratio: the ratio between the negative examples and positive examples.
195
+ """
196
+ pos_mask = labels > 0
197
+ num_pos = pos_mask.long().sum(dim=1, keepdim=True)
198
+ num_neg = num_pos * neg_pos_ratio
199
+
200
+ loss[pos_mask] = -math.inf
201
+ _, indexes = loss.sort(dim=1, descending=True)
202
+ _, orders = indexes.sort(dim=1)
203
+ neg_mask = orders < num_neg
204
+ return pos_mask | neg_mask
205
+
206
+
207
+ def center_form_to_corner_form(locations):
208
+ return torch.cat([locations[..., :2] - locations[..., 2:]/2,
209
+ locations[..., :2] + locations[..., 2:]/2], locations.dim() - 1)
210
+
211
+
212
+ def corner_form_to_center_form(boxes):
213
+ return torch.cat([
214
+ (boxes[..., :2] + boxes[..., 2:]) / 2,
215
+ boxes[..., 2:] - boxes[..., :2]
216
+ ], boxes.dim() - 1)
217
+
218
+
219
+ def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
220
+ """
221
+
222
+ Args:
223
+ box_scores (N, 5): boxes in corner-form and probabilities.
224
+ iou_threshold: intersection over union threshold.
225
+ top_k: keep top_k results. If k <= 0, keep all the results.
226
+ candidate_size: only consider the candidates with the highest scores.
227
+ Returns:
228
+ picked: a list of indexes of the kept boxes
229
+ """
230
+ scores = box_scores[:, -1]
231
+ boxes = box_scores[:, :-1]
232
+ picked = []
233
+ _, indexes = scores.sort(descending=True)
234
+ indexes = indexes[:candidate_size]
235
+ while len(indexes) > 0:
236
+ current = indexes[0]
237
+ picked.append(current.item())
238
+ if 0 < top_k == len(picked) or len(indexes) == 1:
239
+ break
240
+ current_box = boxes[current, :]
241
+ indexes = indexes[1:]
242
+ rest_boxes = boxes[indexes, :]
243
+ iou = iou_of(
244
+ rest_boxes,
245
+ current_box.unsqueeze(0),
246
+ )
247
+ indexes = indexes[iou <= iou_threshold]
248
+
249
+ return box_scores[picked, :]
250
+
251
+
252
+ def nms(box_scores, nms_method=None, score_threshold=None, iou_threshold=None,
253
+ sigma=0.5, top_k=-1, candidate_size=200):
254
+ if nms_method == "soft":
255
+ return soft_nms(box_scores, score_threshold, sigma, top_k)
256
+ else:
257
+ return hard_nms(box_scores, iou_threshold, top_k, candidate_size=candidate_size)
258
+
259
+
260
+ def soft_nms(box_scores, score_threshold, sigma=0.5, top_k=-1):
261
+ """Soft NMS implementation.
262
+
263
+ References:
264
+ https://arxiv.org/abs/1704.04503
265
+ https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/cython_nms.pyx
266
+
267
+ Args:
268
+ box_scores (N, 5): boxes in corner-form and probabilities.
269
+ score_threshold: boxes with scores less than value are not considered.
270
+ sigma: the parameter in score re-computation.
271
+ scores[i] = scores[i] * exp(-(iou_i)^2 / simga)
272
+ top_k: keep top_k results. If k <= 0, keep all the results.
273
+ Returns:
274
+ picked_box_scores (K, 5): results of NMS.
275
+ """
276
+ picked_box_scores = []
277
+ while box_scores.size(0) > 0:
278
+ max_score_index = torch.argmax(box_scores[:, 4])
279
+ cur_box_prob = torch.tensor(box_scores[max_score_index, :])
280
+ picked_box_scores.append(cur_box_prob)
281
+ if len(picked_box_scores) == top_k > 0 or box_scores.size(0) == 1:
282
+ break
283
+ cur_box = cur_box_prob[:-1]
284
+ box_scores[max_score_index, :] = box_scores[-1, :]
285
+ box_scores = box_scores[:-1, :]
286
+ ious = iou_of(cur_box.unsqueeze(0), box_scores[:, :-1])
287
+ box_scores[:, -1] = box_scores[:, -1] * torch.exp(-(ious * ious) / sigma)
288
+ box_scores = box_scores[box_scores[:, -1] > score_threshold, :]
289
+ if len(picked_box_scores) > 0:
290
+ return torch.stack(picked_box_scores)
291
+ else:
292
+ return torch.tensor([])
293
+
294
+
295
+
vision/utils/box_utils_numpy.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .box_utils import SSDSpec
2
+
3
+ from typing import List
4
+ import itertools
5
+ import math
6
+ import numpy as np
7
+
8
+
9
+ def generate_ssd_priors(specs: List[SSDSpec], image_size, clamp=True):
10
+ """Generate SSD Prior Boxes.
11
+
12
+ It returns the center, height and width of the priors. The values are relative to the image size
13
+ Args:
14
+ specs: SSDSpecs about the shapes of sizes of prior boxes. i.e.
15
+ specs = [
16
+ SSDSpec(38, 8, SSDBoxSizes(30, 60), [2]),
17
+ SSDSpec(19, 16, SSDBoxSizes(60, 111), [2, 3]),
18
+ SSDSpec(10, 32, SSDBoxSizes(111, 162), [2, 3]),
19
+ SSDSpec(5, 64, SSDBoxSizes(162, 213), [2, 3]),
20
+ SSDSpec(3, 100, SSDBoxSizes(213, 264), [2]),
21
+ SSDSpec(1, 300, SSDBoxSizes(264, 315), [2])
22
+ ]
23
+ image_size: image size.
24
+ clamp: if true, clamp the values to make fall between [0.0, 1.0]
25
+ Returns:
26
+ priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values
27
+ are relative to the image size.
28
+ """
29
+ priors = []
30
+ for spec in specs:
31
+ scale = image_size / spec.shrinkage
32
+ for j, i in itertools.product(range(spec.feature_map_size), repeat=2):
33
+ x_center = (i + 0.5) / scale
34
+ y_center = (j + 0.5) / scale
35
+
36
+ # small sized square box
37
+ size = spec.box_sizes.min
38
+ h = w = size / image_size
39
+ priors.append([
40
+ x_center,
41
+ y_center,
42
+ w,
43
+ h
44
+ ])
45
+
46
+ # big sized square box
47
+ size = math.sqrt(spec.box_sizes.max * spec.box_sizes.min)
48
+ h = w = size / image_size
49
+ priors.append([
50
+ x_center,
51
+ y_center,
52
+ w,
53
+ h
54
+ ])
55
+
56
+ # change h/w ratio of the small sized box
57
+ size = spec.box_sizes.min
58
+ h = w = size / image_size
59
+ for ratio in spec.aspect_ratios:
60
+ ratio = math.sqrt(ratio)
61
+ priors.append([
62
+ x_center,
63
+ y_center,
64
+ w * ratio,
65
+ h / ratio
66
+ ])
67
+ priors.append([
68
+ x_center,
69
+ y_center,
70
+ w / ratio,
71
+ h * ratio
72
+ ])
73
+
74
+ priors = np.array(priors, dtype=np.float32)
75
+ if clamp:
76
+ np.clip(priors, 0.0, 1.0, out=priors)
77
+ return priors
78
+
79
+
80
+ def convert_locations_to_boxes(locations, priors, center_variance,
81
+ size_variance):
82
+ """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
83
+
84
+ The conversion:
85
+ $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
86
+ $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
87
+ We do it in the inverse direction here.
88
+ Args:
89
+ locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
90
+ priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
91
+ center_variance: a float used to change the scale of center.
92
+ size_variance: a float used to change of scale of size.
93
+ Returns:
94
+ boxes: priors: [[center_x, center_y, h, w]]. All the values
95
+ are relative to the image size.
96
+ """
97
+ # priors can have one dimension less.
98
+ if len(priors.shape) + 1 == len(locations.shape):
99
+ priors = np.expand_dims(priors, 0)
100
+ return np.concatenate([
101
+ locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
102
+ np.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
103
+ ], axis=len(locations.shape) - 1)
104
+
105
+
106
+ def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
107
+ # priors can have one dimension less
108
+ if len(center_form_priors.shape) + 1 == len(center_form_boxes.shape):
109
+ center_form_priors = np.expand_dims(center_form_priors, 0)
110
+ return np.concatenate([
111
+ (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance,
112
+ np.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance
113
+ ], axis=len(center_form_boxes.shape) - 1)
114
+
115
+
116
+ def area_of(left_top, right_bottom):
117
+ """Compute the areas of rectangles given two corners.
118
+
119
+ Args:
120
+ left_top (N, 2): left top corner.
121
+ right_bottom (N, 2): right bottom corner.
122
+
123
+ Returns:
124
+ area (N): return the area.
125
+ """
126
+ hw = np.clip(right_bottom - left_top, 0.0, None)
127
+ return hw[..., 0] * hw[..., 1]
128
+
129
+
130
+ def iou_of(boxes0, boxes1, eps=1e-5):
131
+ """Return intersection-over-union (Jaccard index) of boxes.
132
+
133
+ Args:
134
+ boxes0 (N, 4): ground truth boxes.
135
+ boxes1 (N or 1, 4): predicted boxes.
136
+ eps: a small number to avoid 0 as denominator.
137
+ Returns:
138
+ iou (N): IoU values.
139
+ """
140
+ overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
141
+ overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
142
+
143
+ overlap_area = area_of(overlap_left_top, overlap_right_bottom)
144
+ area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
145
+ area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
146
+ return overlap_area / (area0 + area1 - overlap_area + eps)
147
+
148
+
149
+ def center_form_to_corner_form(locations):
150
+ return np.concatenate([locations[..., :2] - locations[..., 2:]/2,
151
+ locations[..., :2] + locations[..., 2:]/2], len(locations.shape) - 1)
152
+
153
+
154
+ def corner_form_to_center_form(boxes):
155
+ return np.concatenate([
156
+ (boxes[..., :2] + boxes[..., 2:]) / 2,
157
+ boxes[..., 2:] - boxes[..., :2]
158
+ ], len(boxes.shape) - 1)
159
+
160
+
161
+ def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
162
+ """
163
+
164
+ Args:
165
+ box_scores (N, 5): boxes in corner-form and probabilities.
166
+ iou_threshold: intersection over union threshold.
167
+ top_k: keep top_k results. If k <= 0, keep all the results.
168
+ candidate_size: only consider the candidates with the highest scores.
169
+ Returns:
170
+ picked: a list of indexes of the kept boxes
171
+ """
172
+ scores = box_scores[:, -1]
173
+ boxes = box_scores[:, :-1]
174
+ picked = []
175
+ #_, indexes = scores.sort(descending=True)
176
+ indexes = np.argsort(scores)
177
+ #indexes = indexes[:candidate_size]
178
+ indexes = indexes[-candidate_size:]
179
+ while len(indexes) > 0:
180
+ #current = indexes[0]
181
+ current = indexes[-1]
182
+ picked.append(current)
183
+ if 0 < top_k == len(picked) or len(indexes) == 1:
184
+ break
185
+ current_box = boxes[current, :]
186
+ #indexes = indexes[1:]
187
+ indexes = indexes[:-1]
188
+ rest_boxes = boxes[indexes, :]
189
+ iou = iou_of(
190
+ rest_boxes,
191
+ np.expand_dims(current_box, axis=0),
192
+ )
193
+ indexes = indexes[iou <= iou_threshold]
194
+
195
+ return box_scores[picked, :]
196
+
197
+
198
+ # def nms(box_scores, nms_method=None, score_threshold=None, iou_threshold=None,
199
+ # sigma=0.5, top_k=-1, candidate_size=200):
200
+ # if nms_method == "soft":
201
+ # return soft_nms(box_scores, score_threshold, sigma, top_k)
202
+ # else:
203
+ # return hard_nms(box_scores, iou_threshold, top_k, candidate_size=candidate_size)
204
+
205
+ #
206
+ # def soft_nms(box_scores, score_threshold, sigma=0.5, top_k=-1):
207
+ # """Soft NMS implementation.
208
+ #
209
+ # References:
210
+ # https://arxiv.org/abs/1704.04503
211
+ # https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/cython_nms.pyx
212
+ #
213
+ # Args:
214
+ # box_scores (N, 5): boxes in corner-form and probabilities.
215
+ # score_threshold: boxes with scores less than value are not considered.
216
+ # sigma: the parameter in score re-computation.
217
+ # scores[i] = scores[i] * exp(-(iou_i)^2 / simga)
218
+ # top_k: keep top_k results. If k <= 0, keep all the results.
219
+ # Returns:
220
+ # picked_box_scores (K, 5): results of NMS.
221
+ # """
222
+ # picked_box_scores = []
223
+ # while box_scores.size(0) > 0:
224
+ # max_score_index = torch.argmax(box_scores[:, 4])
225
+ # cur_box_prob = torch.tensor(box_scores[max_score_index, :])
226
+ # picked_box_scores.append(cur_box_prob)
227
+ # if len(picked_box_scores) == top_k > 0 or box_scores.size(0) == 1:
228
+ # break
229
+ # cur_box = cur_box_prob[:-1]
230
+ # box_scores[max_score_index, :] = box_scores[-1, :]
231
+ # box_scores = box_scores[:-1, :]
232
+ # ious = iou_of(cur_box.unsqueeze(0), box_scores[:, :-1])
233
+ # box_scores[:, -1] = box_scores[:, -1] * torch.exp(-(ious * ious) / sigma)
234
+ # box_scores = box_scores[box_scores[:, -1] > score_threshold, :]
235
+ # if len(picked_box_scores) > 0:
236
+ # return torch.stack(picked_box_scores)
237
+ # else:
238
+ # return torch.tensor([])
vision/utils/measurements.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def compute_average_precision(precision, recall):
5
+ """
6
+ It computes average precision based on the definition of Pascal Competition. It computes the under curve area
7
+ of precision and recall. Recall follows the normal definition. Precision is a variant.
8
+ pascal_precision[i] = typical_precision[i:].max()
9
+ """
10
+ # identical but faster version of new_precision[i] = old_precision[i:].max()
11
+ precision = np.concatenate([[0.0], precision, [0.0]])
12
+ for i in range(len(precision) - 1, 0, -1):
13
+ precision[i - 1] = np.maximum(precision[i - 1], precision[i])
14
+
15
+ # find the index where the value changes
16
+ recall = np.concatenate([[0.0], recall, [1.0]])
17
+ changing_points = np.where(recall[1:] != recall[:-1])[0]
18
+
19
+ # compute under curve area
20
+ areas = (recall[changing_points + 1] - recall[changing_points]) * precision[changing_points + 1]
21
+ return areas.sum()
22
+
23
+
24
+ def compute_voc2007_average_precision(precision, recall):
25
+ ap = 0.
26
+ for t in np.arange(0., 1.1, 0.1):
27
+ if np.sum(recall >= t) == 0:
28
+ p = 0
29
+ else:
30
+ p = np.max(precision[recall >= t])
31
+ ap = ap + p / 11.
32
+ return ap
vision/utils/misc.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import torch
3
+
4
+
5
+ def str2bool(s):
6
+ return s.lower() in ('true', '1')
7
+
8
+
9
+ class Timer:
10
+ def __init__(self):
11
+ self.clock = {}
12
+
13
+ def start(self, key="default"):
14
+ self.clock[key] = time.time()
15
+
16
+ def end(self, key="default"):
17
+ if key not in self.clock:
18
+ raise Exception(f"{key} is not in the clock.")
19
+ interval = time.time() - self.clock[key]
20
+ del self.clock[key]
21
+ return interval
22
+
23
+
24
+ def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path):
25
+ torch.save({
26
+ 'epoch': epoch,
27
+ 'model': net_state_dict,
28
+ 'optimizer': optimizer_state_dict,
29
+ 'best_score': best_score
30
+ }, checkpoint_path)
31
+ torch.save(net_state_dict, model_path)
32
+
33
+
34
+ def load_checkpoint(checkpoint_path):
35
+ return torch.load(checkpoint_path)
36
+
37
+
38
+ def freeze_net_layers(net):
39
+ for param in net.parameters():
40
+ param.requires_grad = False
41
+
42
+
43
+ def store_labels(path, labels):
44
+ with open(path, "w") as f:
45
+ f.write("\n".join(labels))
vision/utils/model_book.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+ import torch.nn as nn
3
+
4
+
5
+ class ModelBook:
6
+ """Maintain the mapping between modules and their paths.
7
+
8
+ Example:
9
+ book = ModelBook(model_ft)
10
+ for p, m in book.conv2d_modules():
11
+ print('path:', p, 'num of filters:', m.out_channels)
12
+ assert m is book.get_module(p)
13
+ """
14
+
15
+ def __init__(self, model):
16
+ self._model = model
17
+ self._modules = OrderedDict()
18
+ self._paths = OrderedDict()
19
+ path = []
20
+ self._construct(self._model, path)
21
+
22
+ def _construct(self, module, path):
23
+ if not module._modules:
24
+ return
25
+ for name, m in module._modules.items():
26
+ cur_path = tuple(path + [name])
27
+ self._paths[m] = cur_path
28
+ self._modules[cur_path] = m
29
+ self._construct(m, path + [name])
30
+
31
+ def conv2d_modules(self):
32
+ return self.modules(nn.Conv2d)
33
+
34
+ def linear_modules(self):
35
+ return self.modules(nn.Linear)
36
+
37
+ def modules(self, module_type=None):
38
+ for p, m in self._modules.items():
39
+ if not module_type or isinstance(m, module_type):
40
+ yield p, m
41
+
42
+ def num_of_conv2d_modules(self):
43
+ return self.num_of_modules(nn.Conv2d)
44
+
45
+ def num_of_conv2d_filters(self):
46
+ """Return the sum of out_channels of all conv2d layers.
47
+
48
+ Here we treat the sub weight with size of [in_channels, h, w] as a single filter.
49
+ """
50
+ num_filters = 0
51
+ for _, m in self.conv2d_modules():
52
+ num_filters += m.out_channels
53
+ return num_filters
54
+
55
+ def num_of_linear_modules(self):
56
+ return self.num_of_modules(nn.Linear)
57
+
58
+ def num_of_linear_filters(self):
59
+ num_filters = 0
60
+ for _, m in self.linear_modules():
61
+ num_filters += m.out_features
62
+ return num_filters
63
+
64
+ def num_of_modules(self, module_type=None):
65
+ num = 0
66
+ for p, m in self._modules.items():
67
+ if not module_type or isinstance(m, module_type):
68
+ num += 1
69
+ return num
70
+
71
+ def get_module(self, path):
72
+ return self._modules.get(path)
73
+
74
+ def get_path(self, module):
75
+ return self._paths.get(module)
76
+
77
+ def update(self, path, module):
78
+ old_module = self._modules[path]
79
+ del self._paths[old_module]
80
+ self._paths[module] = path
81
+ self._modules[path] = module