toto10 commited on
Commit
05c7648
1 Parent(s): 9c5ffc0

a2f60cfcfe82ab0eaeb3c1f2ca16d93a6e5568a958e466e532d32ae8c92178c0

Browse files
Files changed (50) hide show
  1. extensions/microsoftexcel-controlnet/annotator/leres/leres/LICENSE +23 -0
  2. extensions/microsoftexcel-controlnet/annotator/leres/leres/Resnet.py +199 -0
  3. extensions/microsoftexcel-controlnet/annotator/leres/leres/Resnext_torch.py +237 -0
  4. extensions/microsoftexcel-controlnet/annotator/leres/leres/depthmap.py +546 -0
  5. extensions/microsoftexcel-controlnet/annotator/leres/leres/multi_depth_model_woauxi.py +34 -0
  6. extensions/microsoftexcel-controlnet/annotator/leres/leres/net_tools.py +54 -0
  7. extensions/microsoftexcel-controlnet/annotator/leres/leres/network_auxi.py +417 -0
  8. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/LICENSE +19 -0
  9. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/__init__.py +67 -0
  10. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/base_model.py +241 -0
  11. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/base_model_hg.py +58 -0
  12. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/networks.py +623 -0
  13. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/pix2pix4depth_model.py +155 -0
  14. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/options/__init__.py +1 -0
  15. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/options/base_options.py +156 -0
  16. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/options/test_options.py +22 -0
  17. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/__init__.py +1 -0
  18. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/get_data.py +110 -0
  19. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/guidedfilter.py +47 -0
  20. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/html.py +86 -0
  21. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/image_pool.py +54 -0
  22. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/util.py +105 -0
  23. extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/visualizer.py +166 -0
  24. extensions/microsoftexcel-controlnet/annotator/lineart/LICENSE +21 -0
  25. extensions/microsoftexcel-controlnet/annotator/lineart/__init__.py +133 -0
  26. extensions/microsoftexcel-controlnet/annotator/lineart_anime/LICENSE +21 -0
  27. extensions/microsoftexcel-controlnet/annotator/lineart_anime/__init__.py +161 -0
  28. extensions/microsoftexcel-controlnet/annotator/manga_line/LICENSE +21 -0
  29. extensions/microsoftexcel-controlnet/annotator/manga_line/__init__.py +248 -0
  30. extensions/microsoftexcel-controlnet/annotator/mediapipe_face/__init__.py +5 -0
  31. extensions/microsoftexcel-controlnet/annotator/mediapipe_face/mediapipe_face_common.py +155 -0
  32. extensions/microsoftexcel-controlnet/annotator/midas/LICENSE +21 -0
  33. extensions/microsoftexcel-controlnet/annotator/midas/__init__.py +49 -0
  34. extensions/microsoftexcel-controlnet/annotator/midas/api.py +181 -0
  35. extensions/microsoftexcel-controlnet/annotator/midas/midas/__init__.py +0 -0
  36. extensions/microsoftexcel-controlnet/annotator/midas/midas/base_model.py +16 -0
  37. extensions/microsoftexcel-controlnet/annotator/midas/midas/blocks.py +342 -0
  38. extensions/microsoftexcel-controlnet/annotator/midas/midas/dpt_depth.py +109 -0
  39. extensions/microsoftexcel-controlnet/annotator/midas/midas/midas_net.py +76 -0
  40. extensions/microsoftexcel-controlnet/annotator/midas/midas/midas_net_custom.py +128 -0
  41. extensions/microsoftexcel-controlnet/annotator/midas/midas/transforms.py +234 -0
  42. extensions/microsoftexcel-controlnet/annotator/midas/midas/vit.py +491 -0
  43. extensions/microsoftexcel-controlnet/annotator/midas/utils.py +189 -0
  44. extensions/microsoftexcel-controlnet/annotator/mlsd/LICENSE +201 -0
  45. extensions/microsoftexcel-controlnet/annotator/mlsd/__init__.py +49 -0
  46. extensions/microsoftexcel-controlnet/annotator/mlsd/models/mbv2_mlsd_large.py +292 -0
  47. extensions/microsoftexcel-controlnet/annotator/mlsd/models/mbv2_mlsd_tiny.py +275 -0
  48. extensions/microsoftexcel-controlnet/annotator/mlsd/utils.py +581 -0
  49. extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/__init__.py +15 -0
  50. extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/arraymisc/__init__.py +4 -0
extensions/microsoftexcel-controlnet/annotator/leres/leres/LICENSE ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ https://github.com/thygate/stable-diffusion-webui-depthmap-script
2
+
3
+ MIT License
4
+
5
+ Copyright (c) 2023 Bob Thiry
6
+
7
+ Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ of this software and associated documentation files (the "Software"), to deal
9
+ in the Software without restriction, including without limitation the rights
10
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the Software is
12
+ furnished to do so, subject to the following conditions:
13
+
14
+ The above copyright notice and this permission notice shall be included in all
15
+ copies or substantial portions of the Software.
16
+
17
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
extensions/microsoftexcel-controlnet/annotator/leres/leres/Resnet.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch.nn as NN
3
+
4
+ __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
5
+ 'resnet152']
6
+
7
+
8
+ model_urls = {
9
+ 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
10
+ 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
11
+ 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
12
+ 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
13
+ 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
14
+ }
15
+
16
+
17
+ def conv3x3(in_planes, out_planes, stride=1):
18
+ """3x3 convolution with padding"""
19
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
20
+ padding=1, bias=False)
21
+
22
+
23
+ class BasicBlock(nn.Module):
24
+ expansion = 1
25
+
26
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
27
+ super(BasicBlock, self).__init__()
28
+ self.conv1 = conv3x3(inplanes, planes, stride)
29
+ self.bn1 = NN.BatchNorm2d(planes) #NN.BatchNorm2d
30
+ self.relu = nn.ReLU(inplace=True)
31
+ self.conv2 = conv3x3(planes, planes)
32
+ self.bn2 = NN.BatchNorm2d(planes) #NN.BatchNorm2d
33
+ self.downsample = downsample
34
+ self.stride = stride
35
+
36
+ def forward(self, x):
37
+ residual = x
38
+
39
+ out = self.conv1(x)
40
+ out = self.bn1(out)
41
+ out = self.relu(out)
42
+
43
+ out = self.conv2(out)
44
+ out = self.bn2(out)
45
+
46
+ if self.downsample is not None:
47
+ residual = self.downsample(x)
48
+
49
+ out += residual
50
+ out = self.relu(out)
51
+
52
+ return out
53
+
54
+
55
+ class Bottleneck(nn.Module):
56
+ expansion = 4
57
+
58
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
59
+ super(Bottleneck, self).__init__()
60
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
61
+ self.bn1 = NN.BatchNorm2d(planes) #NN.BatchNorm2d
62
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
63
+ padding=1, bias=False)
64
+ self.bn2 = NN.BatchNorm2d(planes) #NN.BatchNorm2d
65
+ self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
66
+ self.bn3 = NN.BatchNorm2d(planes * self.expansion) #NN.BatchNorm2d
67
+ self.relu = nn.ReLU(inplace=True)
68
+ self.downsample = downsample
69
+ self.stride = stride
70
+
71
+ def forward(self, x):
72
+ residual = x
73
+
74
+ out = self.conv1(x)
75
+ out = self.bn1(out)
76
+ out = self.relu(out)
77
+
78
+ out = self.conv2(out)
79
+ out = self.bn2(out)
80
+ out = self.relu(out)
81
+
82
+ out = self.conv3(out)
83
+ out = self.bn3(out)
84
+
85
+ if self.downsample is not None:
86
+ residual = self.downsample(x)
87
+
88
+ out += residual
89
+ out = self.relu(out)
90
+
91
+ return out
92
+
93
+
94
+ class ResNet(nn.Module):
95
+
96
+ def __init__(self, block, layers, num_classes=1000):
97
+ self.inplanes = 64
98
+ super(ResNet, self).__init__()
99
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
100
+ bias=False)
101
+ self.bn1 = NN.BatchNorm2d(64) #NN.BatchNorm2d
102
+ self.relu = nn.ReLU(inplace=True)
103
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
104
+ self.layer1 = self._make_layer(block, 64, layers[0])
105
+ self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
106
+ self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
107
+ self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
108
+ #self.avgpool = nn.AvgPool2d(7, stride=1)
109
+ #self.fc = nn.Linear(512 * block.expansion, num_classes)
110
+
111
+ for m in self.modules():
112
+ if isinstance(m, nn.Conv2d):
113
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
114
+ elif isinstance(m, nn.BatchNorm2d):
115
+ nn.init.constant_(m.weight, 1)
116
+ nn.init.constant_(m.bias, 0)
117
+
118
+ def _make_layer(self, block, planes, blocks, stride=1):
119
+ downsample = None
120
+ if stride != 1 or self.inplanes != planes * block.expansion:
121
+ downsample = nn.Sequential(
122
+ nn.Conv2d(self.inplanes, planes * block.expansion,
123
+ kernel_size=1, stride=stride, bias=False),
124
+ NN.BatchNorm2d(planes * block.expansion), #NN.BatchNorm2d
125
+ )
126
+
127
+ layers = []
128
+ layers.append(block(self.inplanes, planes, stride, downsample))
129
+ self.inplanes = planes * block.expansion
130
+ for i in range(1, blocks):
131
+ layers.append(block(self.inplanes, planes))
132
+
133
+ return nn.Sequential(*layers)
134
+
135
+ def forward(self, x):
136
+ features = []
137
+
138
+ x = self.conv1(x)
139
+ x = self.bn1(x)
140
+ x = self.relu(x)
141
+ x = self.maxpool(x)
142
+
143
+ x = self.layer1(x)
144
+ features.append(x)
145
+ x = self.layer2(x)
146
+ features.append(x)
147
+ x = self.layer3(x)
148
+ features.append(x)
149
+ x = self.layer4(x)
150
+ features.append(x)
151
+
152
+ return features
153
+
154
+
155
+ def resnet18(pretrained=True, **kwargs):
156
+ """Constructs a ResNet-18 model.
157
+ Args:
158
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
159
+ """
160
+ model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
161
+ return model
162
+
163
+
164
+ def resnet34(pretrained=True, **kwargs):
165
+ """Constructs a ResNet-34 model.
166
+ Args:
167
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
168
+ """
169
+ model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
170
+ return model
171
+
172
+
173
+ def resnet50(pretrained=True, **kwargs):
174
+ """Constructs a ResNet-50 model.
175
+ Args:
176
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
177
+ """
178
+ model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
179
+
180
+ return model
181
+
182
+
183
+ def resnet101(pretrained=True, **kwargs):
184
+ """Constructs a ResNet-101 model.
185
+ Args:
186
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
187
+ """
188
+ model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
189
+
190
+ return model
191
+
192
+
193
+ def resnet152(pretrained=True, **kwargs):
194
+ """Constructs a ResNet-152 model.
195
+ Args:
196
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
197
+ """
198
+ model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
199
+ return model
extensions/microsoftexcel-controlnet/annotator/leres/leres/Resnext_torch.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+ import torch.nn as nn
4
+
5
+ try:
6
+ from urllib import urlretrieve
7
+ except ImportError:
8
+ from urllib.request import urlretrieve
9
+
10
+ __all__ = ['resnext101_32x8d']
11
+
12
+
13
+ model_urls = {
14
+ 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
15
+ 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
16
+ }
17
+
18
+
19
+ def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
20
+ """3x3 convolution with padding"""
21
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
22
+ padding=dilation, groups=groups, bias=False, dilation=dilation)
23
+
24
+
25
+ def conv1x1(in_planes, out_planes, stride=1):
26
+ """1x1 convolution"""
27
+ return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
28
+
29
+
30
+ class BasicBlock(nn.Module):
31
+ expansion = 1
32
+
33
+ def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
34
+ base_width=64, dilation=1, norm_layer=None):
35
+ super(BasicBlock, self).__init__()
36
+ if norm_layer is None:
37
+ norm_layer = nn.BatchNorm2d
38
+ if groups != 1 or base_width != 64:
39
+ raise ValueError('BasicBlock only supports groups=1 and base_width=64')
40
+ if dilation > 1:
41
+ raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
42
+ # Both self.conv1 and self.downsample layers downsample the input when stride != 1
43
+ self.conv1 = conv3x3(inplanes, planes, stride)
44
+ self.bn1 = norm_layer(planes)
45
+ self.relu = nn.ReLU(inplace=True)
46
+ self.conv2 = conv3x3(planes, planes)
47
+ self.bn2 = norm_layer(planes)
48
+ self.downsample = downsample
49
+ self.stride = stride
50
+
51
+ def forward(self, x):
52
+ identity = x
53
+
54
+ out = self.conv1(x)
55
+ out = self.bn1(out)
56
+ out = self.relu(out)
57
+
58
+ out = self.conv2(out)
59
+ out = self.bn2(out)
60
+
61
+ if self.downsample is not None:
62
+ identity = self.downsample(x)
63
+
64
+ out += identity
65
+ out = self.relu(out)
66
+
67
+ return out
68
+
69
+
70
+ class Bottleneck(nn.Module):
71
+ # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
72
+ # while original implementation places the stride at the first 1x1 convolution(self.conv1)
73
+ # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
74
+ # This variant is also known as ResNet V1.5 and improves accuracy according to
75
+ # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
76
+
77
+ expansion = 4
78
+
79
+ def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
80
+ base_width=64, dilation=1, norm_layer=None):
81
+ super(Bottleneck, self).__init__()
82
+ if norm_layer is None:
83
+ norm_layer = nn.BatchNorm2d
84
+ width = int(planes * (base_width / 64.)) * groups
85
+ # Both self.conv2 and self.downsample layers downsample the input when stride != 1
86
+ self.conv1 = conv1x1(inplanes, width)
87
+ self.bn1 = norm_layer(width)
88
+ self.conv2 = conv3x3(width, width, stride, groups, dilation)
89
+ self.bn2 = norm_layer(width)
90
+ self.conv3 = conv1x1(width, planes * self.expansion)
91
+ self.bn3 = norm_layer(planes * self.expansion)
92
+ self.relu = nn.ReLU(inplace=True)
93
+ self.downsample = downsample
94
+ self.stride = stride
95
+
96
+ def forward(self, x):
97
+ identity = x
98
+
99
+ out = self.conv1(x)
100
+ out = self.bn1(out)
101
+ out = self.relu(out)
102
+
103
+ out = self.conv2(out)
104
+ out = self.bn2(out)
105
+ out = self.relu(out)
106
+
107
+ out = self.conv3(out)
108
+ out = self.bn3(out)
109
+
110
+ if self.downsample is not None:
111
+ identity = self.downsample(x)
112
+
113
+ out += identity
114
+ out = self.relu(out)
115
+
116
+ return out
117
+
118
+
119
+ class ResNet(nn.Module):
120
+
121
+ def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
122
+ groups=1, width_per_group=64, replace_stride_with_dilation=None,
123
+ norm_layer=None):
124
+ super(ResNet, self).__init__()
125
+ if norm_layer is None:
126
+ norm_layer = nn.BatchNorm2d
127
+ self._norm_layer = norm_layer
128
+
129
+ self.inplanes = 64
130
+ self.dilation = 1
131
+ if replace_stride_with_dilation is None:
132
+ # each element in the tuple indicates if we should replace
133
+ # the 2x2 stride with a dilated convolution instead
134
+ replace_stride_with_dilation = [False, False, False]
135
+ if len(replace_stride_with_dilation) != 3:
136
+ raise ValueError("replace_stride_with_dilation should be None "
137
+ "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
138
+ self.groups = groups
139
+ self.base_width = width_per_group
140
+ self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
141
+ bias=False)
142
+ self.bn1 = norm_layer(self.inplanes)
143
+ self.relu = nn.ReLU(inplace=True)
144
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
145
+ self.layer1 = self._make_layer(block, 64, layers[0])
146
+ self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
147
+ dilate=replace_stride_with_dilation[0])
148
+ self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
149
+ dilate=replace_stride_with_dilation[1])
150
+ self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
151
+ dilate=replace_stride_with_dilation[2])
152
+ #self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
153
+ #self.fc = nn.Linear(512 * block.expansion, num_classes)
154
+
155
+ for m in self.modules():
156
+ if isinstance(m, nn.Conv2d):
157
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
158
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
159
+ nn.init.constant_(m.weight, 1)
160
+ nn.init.constant_(m.bias, 0)
161
+
162
+ # Zero-initialize the last BN in each residual branch,
163
+ # so that the residual branch starts with zeros, and each residual block behaves like an identity.
164
+ # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
165
+ if zero_init_residual:
166
+ for m in self.modules():
167
+ if isinstance(m, Bottleneck):
168
+ nn.init.constant_(m.bn3.weight, 0)
169
+ elif isinstance(m, BasicBlock):
170
+ nn.init.constant_(m.bn2.weight, 0)
171
+
172
+ def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
173
+ norm_layer = self._norm_layer
174
+ downsample = None
175
+ previous_dilation = self.dilation
176
+ if dilate:
177
+ self.dilation *= stride
178
+ stride = 1
179
+ if stride != 1 or self.inplanes != planes * block.expansion:
180
+ downsample = nn.Sequential(
181
+ conv1x1(self.inplanes, planes * block.expansion, stride),
182
+ norm_layer(planes * block.expansion),
183
+ )
184
+
185
+ layers = []
186
+ layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
187
+ self.base_width, previous_dilation, norm_layer))
188
+ self.inplanes = planes * block.expansion
189
+ for _ in range(1, blocks):
190
+ layers.append(block(self.inplanes, planes, groups=self.groups,
191
+ base_width=self.base_width, dilation=self.dilation,
192
+ norm_layer=norm_layer))
193
+
194
+ return nn.Sequential(*layers)
195
+
196
+ def _forward_impl(self, x):
197
+ # See note [TorchScript super()]
198
+ features = []
199
+ x = self.conv1(x)
200
+ x = self.bn1(x)
201
+ x = self.relu(x)
202
+ x = self.maxpool(x)
203
+
204
+ x = self.layer1(x)
205
+ features.append(x)
206
+
207
+ x = self.layer2(x)
208
+ features.append(x)
209
+
210
+ x = self.layer3(x)
211
+ features.append(x)
212
+
213
+ x = self.layer4(x)
214
+ features.append(x)
215
+
216
+ #x = self.avgpool(x)
217
+ #x = torch.flatten(x, 1)
218
+ #x = self.fc(x)
219
+
220
+ return features
221
+
222
+ def forward(self, x):
223
+ return self._forward_impl(x)
224
+
225
+
226
+
227
+ def resnext101_32x8d(pretrained=True, **kwargs):
228
+ """Constructs a ResNet-152 model.
229
+ Args:
230
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
231
+ """
232
+ kwargs['groups'] = 32
233
+ kwargs['width_per_group'] = 8
234
+
235
+ model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
236
+ return model
237
+
extensions/microsoftexcel-controlnet/annotator/leres/leres/depthmap.py ADDED
@@ -0,0 +1,546 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Author: thygate
2
+ # https://github.com/thygate/stable-diffusion-webui-depthmap-script
3
+
4
+ from modules import devices
5
+ from modules.shared import opts
6
+ from torchvision.transforms import transforms
7
+ from operator import getitem
8
+
9
+ import torch, gc
10
+ import cv2
11
+ import numpy as np
12
+ import skimage.measure
13
+
14
+ whole_size_threshold = 1600 # R_max from the paper
15
+ pix2pixsize = 1024
16
+
17
+ def scale_torch(img):
18
+ """
19
+ Scale the image and output it in torch.tensor.
20
+ :param img: input rgb is in shape [H, W, C], input depth/disp is in shape [H, W]
21
+ :param scale: the scale factor. float
22
+ :return: img. [C, H, W]
23
+ """
24
+ if len(img.shape) == 2:
25
+ img = img[np.newaxis, :, :]
26
+ if img.shape[2] == 3:
27
+ transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406) , (0.229, 0.224, 0.225) )])
28
+ img = transform(img.astype(np.float32))
29
+ else:
30
+ img = img.astype(np.float32)
31
+ img = torch.from_numpy(img)
32
+ return img
33
+
34
+ def estimateleres(img, model, w, h):
35
+ # leres transform input
36
+ rgb_c = img[:, :, ::-1].copy()
37
+ A_resize = cv2.resize(rgb_c, (w, h))
38
+ img_torch = scale_torch(A_resize)[None, :, :, :]
39
+
40
+ # compute
41
+ with torch.no_grad():
42
+ img_torch = img_torch.to(devices.get_device_for("controlnet"))
43
+ prediction = model.depth_model(img_torch)
44
+
45
+ prediction = prediction.squeeze().cpu().numpy()
46
+ prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
47
+
48
+ return prediction
49
+
50
+ def generatemask(size):
51
+ # Generates a Guassian mask
52
+ mask = np.zeros(size, dtype=np.float32)
53
+ sigma = int(size[0]/16)
54
+ k_size = int(2 * np.ceil(2 * int(size[0]/16)) + 1)
55
+ mask[int(0.15*size[0]):size[0] - int(0.15*size[0]), int(0.15*size[1]): size[1] - int(0.15*size[1])] = 1
56
+ mask = cv2.GaussianBlur(mask, (int(k_size), int(k_size)), sigma)
57
+ mask = (mask - mask.min()) / (mask.max() - mask.min())
58
+ mask = mask.astype(np.float32)
59
+ return mask
60
+
61
+ def resizewithpool(img, size):
62
+ i_size = img.shape[0]
63
+ n = int(np.floor(i_size/size))
64
+
65
+ out = skimage.measure.block_reduce(img, (n, n), np.max)
66
+ return out
67
+
68
+ def rgb2gray(rgb):
69
+ # Converts rgb to gray
70
+ return np.dot(rgb[..., :3], [0.2989, 0.5870, 0.1140])
71
+
72
+ def calculateprocessingres(img, basesize, confidence=0.1, scale_threshold=3, whole_size_threshold=3000):
73
+ # Returns the R_x resolution described in section 5 of the main paper.
74
+
75
+ # Parameters:
76
+ # img :input rgb image
77
+ # basesize : size the dilation kernel which is equal to receptive field of the network.
78
+ # confidence: value of x in R_x; allowed percentage of pixels that are not getting any contextual cue.
79
+ # scale_threshold: maximum allowed upscaling on the input image ; it has been set to 3.
80
+ # whole_size_threshold: maximum allowed resolution. (R_max from section 6 of the main paper)
81
+
82
+ # Returns:
83
+ # outputsize_scale*speed_scale :The computed R_x resolution
84
+ # patch_scale: K parameter from section 6 of the paper
85
+
86
+ # speed scale parameter is to process every image in a smaller size to accelerate the R_x resolution search
87
+ speed_scale = 32
88
+ image_dim = int(min(img.shape[0:2]))
89
+
90
+ gray = rgb2gray(img)
91
+ grad = np.abs(cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)) + np.abs(cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3))
92
+ grad = cv2.resize(grad, (image_dim, image_dim), cv2.INTER_AREA)
93
+
94
+ # thresholding the gradient map to generate the edge-map as a proxy of the contextual cues
95
+ m = grad.min()
96
+ M = grad.max()
97
+ middle = m + (0.4 * (M - m))
98
+ grad[grad < middle] = 0
99
+ grad[grad >= middle] = 1
100
+
101
+ # dilation kernel with size of the receptive field
102
+ kernel = np.ones((int(basesize/speed_scale), int(basesize/speed_scale)), float)
103
+ # dilation kernel with size of the a quarter of receptive field used to compute k
104
+ # as described in section 6 of main paper
105
+ kernel2 = np.ones((int(basesize / (4*speed_scale)), int(basesize / (4*speed_scale))), float)
106
+
107
+ # Output resolution limit set by the whole_size_threshold and scale_threshold.
108
+ threshold = min(whole_size_threshold, scale_threshold * max(img.shape[:2]))
109
+
110
+ outputsize_scale = basesize / speed_scale
111
+ for p_size in range(int(basesize/speed_scale), int(threshold/speed_scale), int(basesize / (2*speed_scale))):
112
+ grad_resized = resizewithpool(grad, p_size)
113
+ grad_resized = cv2.resize(grad_resized, (p_size, p_size), cv2.INTER_NEAREST)
114
+ grad_resized[grad_resized >= 0.5] = 1
115
+ grad_resized[grad_resized < 0.5] = 0
116
+
117
+ dilated = cv2.dilate(grad_resized, kernel, iterations=1)
118
+ meanvalue = (1-dilated).mean()
119
+ if meanvalue > confidence:
120
+ break
121
+ else:
122
+ outputsize_scale = p_size
123
+
124
+ grad_region = cv2.dilate(grad_resized, kernel2, iterations=1)
125
+ patch_scale = grad_region.mean()
126
+
127
+ return int(outputsize_scale*speed_scale), patch_scale
128
+
129
+ # Generate a double-input depth estimation
130
+ def doubleestimate(img, size1, size2, pix2pixsize, model, net_type, pix2pixmodel):
131
+ # Generate the low resolution estimation
132
+ estimate1 = singleestimate(img, size1, model, net_type)
133
+ # Resize to the inference size of merge network.
134
+ estimate1 = cv2.resize(estimate1, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC)
135
+
136
+ # Generate the high resolution estimation
137
+ estimate2 = singleestimate(img, size2, model, net_type)
138
+ # Resize to the inference size of merge network.
139
+ estimate2 = cv2.resize(estimate2, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC)
140
+
141
+ # Inference on the merge model
142
+ pix2pixmodel.set_input(estimate1, estimate2)
143
+ pix2pixmodel.test()
144
+ visuals = pix2pixmodel.get_current_visuals()
145
+ prediction_mapped = visuals['fake_B']
146
+ prediction_mapped = (prediction_mapped+1)/2
147
+ prediction_mapped = (prediction_mapped - torch.min(prediction_mapped)) / (
148
+ torch.max(prediction_mapped) - torch.min(prediction_mapped))
149
+ prediction_mapped = prediction_mapped.squeeze().cpu().numpy()
150
+
151
+ return prediction_mapped
152
+
153
+ # Generate a single-input depth estimation
154
+ def singleestimate(img, msize, model, net_type):
155
+ # if net_type == 0:
156
+ return estimateleres(img, model, msize, msize)
157
+ # else:
158
+ # return estimatemidasBoost(img, model, msize, msize)
159
+
160
+ def applyGridpatch(blsize, stride, img, box):
161
+ # Extract a simple grid patch.
162
+ counter1 = 0
163
+ patch_bound_list = {}
164
+ for k in range(blsize, img.shape[1] - blsize, stride):
165
+ for j in range(blsize, img.shape[0] - blsize, stride):
166
+ patch_bound_list[str(counter1)] = {}
167
+ patchbounds = [j - blsize, k - blsize, j - blsize + 2 * blsize, k - blsize + 2 * blsize]
168
+ patch_bound = [box[0] + patchbounds[1], box[1] + patchbounds[0], patchbounds[3] - patchbounds[1],
169
+ patchbounds[2] - patchbounds[0]]
170
+ patch_bound_list[str(counter1)]['rect'] = patch_bound
171
+ patch_bound_list[str(counter1)]['size'] = patch_bound[2]
172
+ counter1 = counter1 + 1
173
+ return patch_bound_list
174
+
175
+ # Generating local patches to perform the local refinement described in section 6 of the main paper.
176
+ def generatepatchs(img, base_size):
177
+
178
+ # Compute the gradients as a proxy of the contextual cues.
179
+ img_gray = rgb2gray(img)
180
+ whole_grad = np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 0, 1, ksize=3)) +\
181
+ np.abs(cv2.Sobel(img_gray, cv2.CV_64F, 1, 0, ksize=3))
182
+
183
+ threshold = whole_grad[whole_grad > 0].mean()
184
+ whole_grad[whole_grad < threshold] = 0
185
+
186
+ # We use the integral image to speed-up the evaluation of the amount of gradients for each patch.
187
+ gf = whole_grad.sum()/len(whole_grad.reshape(-1))
188
+ grad_integral_image = cv2.integral(whole_grad)
189
+
190
+ # Variables are selected such that the initial patch size would be the receptive field size
191
+ # and the stride is set to 1/3 of the receptive field size.
192
+ blsize = int(round(base_size/2))
193
+ stride = int(round(blsize*0.75))
194
+
195
+ # Get initial Grid
196
+ patch_bound_list = applyGridpatch(blsize, stride, img, [0, 0, 0, 0])
197
+
198
+ # Refine initial Grid of patches by discarding the flat (in terms of gradients of the rgb image) ones. Refine
199
+ # each patch size to ensure that there will be enough depth cues for the network to generate a consistent depth map.
200
+ print("Selecting patches ...")
201
+ patch_bound_list = adaptiveselection(grad_integral_image, patch_bound_list, gf)
202
+
203
+ # Sort the patch list to make sure the merging operation will be done with the correct order: starting from biggest
204
+ # patch
205
+ patchset = sorted(patch_bound_list.items(), key=lambda x: getitem(x[1], 'size'), reverse=True)
206
+ return patchset
207
+
208
+ def getGF_fromintegral(integralimage, rect):
209
+ # Computes the gradient density of a given patch from the gradient integral image.
210
+ x1 = rect[1]
211
+ x2 = rect[1]+rect[3]
212
+ y1 = rect[0]
213
+ y2 = rect[0]+rect[2]
214
+ value = integralimage[x2, y2]-integralimage[x1, y2]-integralimage[x2, y1]+integralimage[x1, y1]
215
+ return value
216
+
217
+ # Adaptively select patches
218
+ def adaptiveselection(integral_grad, patch_bound_list, gf):
219
+ patchlist = {}
220
+ count = 0
221
+ height, width = integral_grad.shape
222
+
223
+ search_step = int(32/factor)
224
+
225
+ # Go through all patches
226
+ for c in range(len(patch_bound_list)):
227
+ # Get patch
228
+ bbox = patch_bound_list[str(c)]['rect']
229
+
230
+ # Compute the amount of gradients present in the patch from the integral image.
231
+ cgf = getGF_fromintegral(integral_grad, bbox)/(bbox[2]*bbox[3])
232
+
233
+ # Check if patching is beneficial by comparing the gradient density of the patch to
234
+ # the gradient density of the whole image
235
+ if cgf >= gf:
236
+ bbox_test = bbox.copy()
237
+ patchlist[str(count)] = {}
238
+
239
+ # Enlarge each patch until the gradient density of the patch is equal
240
+ # to the whole image gradient density
241
+ while True:
242
+
243
+ bbox_test[0] = bbox_test[0] - int(search_step/2)
244
+ bbox_test[1] = bbox_test[1] - int(search_step/2)
245
+
246
+ bbox_test[2] = bbox_test[2] + search_step
247
+ bbox_test[3] = bbox_test[3] + search_step
248
+
249
+ # Check if we are still within the image
250
+ if bbox_test[0] < 0 or bbox_test[1] < 0 or bbox_test[1] + bbox_test[3] >= height \
251
+ or bbox_test[0] + bbox_test[2] >= width:
252
+ break
253
+
254
+ # Compare gradient density
255
+ cgf = getGF_fromintegral(integral_grad, bbox_test)/(bbox_test[2]*bbox_test[3])
256
+ if cgf < gf:
257
+ break
258
+ bbox = bbox_test.copy()
259
+
260
+ # Add patch to selected patches
261
+ patchlist[str(count)]['rect'] = bbox
262
+ patchlist[str(count)]['size'] = bbox[2]
263
+ count = count + 1
264
+
265
+ # Return selected patches
266
+ return patchlist
267
+
268
+ def impatch(image, rect):
269
+ # Extract the given patch pixels from a given image.
270
+ w1 = rect[0]
271
+ h1 = rect[1]
272
+ w2 = w1 + rect[2]
273
+ h2 = h1 + rect[3]
274
+ image_patch = image[h1:h2, w1:w2]
275
+ return image_patch
276
+
277
+ class ImageandPatchs:
278
+ def __init__(self, root_dir, name, patchsinfo, rgb_image, scale=1):
279
+ self.root_dir = root_dir
280
+ self.patchsinfo = patchsinfo
281
+ self.name = name
282
+ self.patchs = patchsinfo
283
+ self.scale = scale
284
+
285
+ self.rgb_image = cv2.resize(rgb_image, (round(rgb_image.shape[1]*scale), round(rgb_image.shape[0]*scale)),
286
+ interpolation=cv2.INTER_CUBIC)
287
+
288
+ self.do_have_estimate = False
289
+ self.estimation_updated_image = None
290
+ self.estimation_base_image = None
291
+
292
+ def __len__(self):
293
+ return len(self.patchs)
294
+
295
+ def set_base_estimate(self, est):
296
+ self.estimation_base_image = est
297
+ if self.estimation_updated_image is not None:
298
+ self.do_have_estimate = True
299
+
300
+ def set_updated_estimate(self, est):
301
+ self.estimation_updated_image = est
302
+ if self.estimation_base_image is not None:
303
+ self.do_have_estimate = True
304
+
305
+ def __getitem__(self, index):
306
+ patch_id = int(self.patchs[index][0])
307
+ rect = np.array(self.patchs[index][1]['rect'])
308
+ msize = self.patchs[index][1]['size']
309
+
310
+ ## applying scale to rect:
311
+ rect = np.round(rect * self.scale)
312
+ rect = rect.astype('int')
313
+ msize = round(msize * self.scale)
314
+
315
+ patch_rgb = impatch(self.rgb_image, rect)
316
+ if self.do_have_estimate:
317
+ patch_whole_estimate_base = impatch(self.estimation_base_image, rect)
318
+ patch_whole_estimate_updated = impatch(self.estimation_updated_image, rect)
319
+ return {'patch_rgb': patch_rgb, 'patch_whole_estimate_base': patch_whole_estimate_base,
320
+ 'patch_whole_estimate_updated': patch_whole_estimate_updated, 'rect': rect,
321
+ 'size': msize, 'id': patch_id}
322
+ else:
323
+ return {'patch_rgb': patch_rgb, 'rect': rect, 'size': msize, 'id': patch_id}
324
+
325
+ def print_options(self, opt):
326
+ """Print and save options
327
+
328
+ It will print both current options and default values(if different).
329
+ It will save options into a text file / [checkpoints_dir] / opt.txt
330
+ """
331
+ message = ''
332
+ message += '----------------- Options ---------------\n'
333
+ for k, v in sorted(vars(opt).items()):
334
+ comment = ''
335
+ default = self.parser.get_default(k)
336
+ if v != default:
337
+ comment = '\t[default: %s]' % str(default)
338
+ message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment)
339
+ message += '----------------- End -------------------'
340
+ print(message)
341
+
342
+ # save to the disk
343
+ """
344
+ expr_dir = os.path.join(opt.checkpoints_dir, opt.name)
345
+ util.mkdirs(expr_dir)
346
+ file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase))
347
+ with open(file_name, 'wt') as opt_file:
348
+ opt_file.write(message)
349
+ opt_file.write('\n')
350
+ """
351
+
352
+ def parse(self):
353
+ """Parse our options, create checkpoints directory suffix, and set up gpu device."""
354
+ opt = self.gather_options()
355
+ opt.isTrain = self.isTrain # train or test
356
+
357
+ # process opt.suffix
358
+ if opt.suffix:
359
+ suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else ''
360
+ opt.name = opt.name + suffix
361
+
362
+ #self.print_options(opt)
363
+
364
+ # set gpu ids
365
+ str_ids = opt.gpu_ids.split(',')
366
+ opt.gpu_ids = []
367
+ for str_id in str_ids:
368
+ id = int(str_id)
369
+ if id >= 0:
370
+ opt.gpu_ids.append(id)
371
+ #if len(opt.gpu_ids) > 0:
372
+ # torch.cuda.set_device(opt.gpu_ids[0])
373
+
374
+ self.opt = opt
375
+ return self.opt
376
+
377
+
378
+ def estimateboost(img, model, model_type, pix2pixmodel, max_res=512):
379
+ global whole_size_threshold
380
+
381
+ # get settings
382
+ if hasattr(opts, 'depthmap_script_boost_rmax'):
383
+ whole_size_threshold = opts.depthmap_script_boost_rmax
384
+
385
+ if model_type == 0: #leres
386
+ net_receptive_field_size = 448
387
+ patch_netsize = 2 * net_receptive_field_size
388
+ elif model_type == 1: #dpt_beit_large_512
389
+ net_receptive_field_size = 512
390
+ patch_netsize = 2 * net_receptive_field_size
391
+ else: #other midas
392
+ net_receptive_field_size = 384
393
+ patch_netsize = 2 * net_receptive_field_size
394
+
395
+ gc.collect()
396
+ devices.torch_gc()
397
+
398
+ # Generate mask used to smoothly blend the local pathc estimations to the base estimate.
399
+ # It is arbitrarily large to avoid artifacts during rescaling for each crop.
400
+ mask_org = generatemask((3000, 3000))
401
+ mask = mask_org.copy()
402
+
403
+ # Value x of R_x defined in the section 5 of the main paper.
404
+ r_threshold_value = 0.2
405
+ #if R0:
406
+ # r_threshold_value = 0
407
+
408
+ input_resolution = img.shape
409
+ scale_threshold = 3 # Allows up-scaling with a scale up to 3
410
+
411
+ # Find the best input resolution R-x. The resolution search described in section 5-double estimation of the main paper and section B of the
412
+ # supplementary material.
413
+ whole_image_optimal_size, patch_scale = calculateprocessingres(img, net_receptive_field_size, r_threshold_value, scale_threshold, whole_size_threshold)
414
+
415
+ # print('wholeImage being processed in :', whole_image_optimal_size)
416
+
417
+ # Generate the base estimate using the double estimation.
418
+ whole_estimate = doubleestimate(img, net_receptive_field_size, whole_image_optimal_size, pix2pixsize, model, model_type, pix2pixmodel)
419
+
420
+ # Compute the multiplier described in section 6 of the main paper to make sure our initial patch can select
421
+ # small high-density regions of the image.
422
+ global factor
423
+ factor = max(min(1, 4 * patch_scale * whole_image_optimal_size / whole_size_threshold), 0.2)
424
+ # print('Adjust factor is:', 1/factor)
425
+
426
+ # Check if Local boosting is beneficial.
427
+ if max_res < whole_image_optimal_size:
428
+ # print("No Local boosting. Specified Max Res is smaller than R20, Returning doubleestimate result")
429
+ return cv2.resize(whole_estimate, (input_resolution[1], input_resolution[0]), interpolation=cv2.INTER_CUBIC)
430
+
431
+ # Compute the default target resolution.
432
+ if img.shape[0] > img.shape[1]:
433
+ a = 2 * whole_image_optimal_size
434
+ b = round(2 * whole_image_optimal_size * img.shape[1] / img.shape[0])
435
+ else:
436
+ a = round(2 * whole_image_optimal_size * img.shape[0] / img.shape[1])
437
+ b = 2 * whole_image_optimal_size
438
+ b = int(round(b / factor))
439
+ a = int(round(a / factor))
440
+
441
+ """
442
+ # recompute a, b and saturate to max res.
443
+ if max(a,b) > max_res:
444
+ print('Default Res is higher than max-res: Reducing final resolution')
445
+ if img.shape[0] > img.shape[1]:
446
+ a = max_res
447
+ b = round(max_res * img.shape[1] / img.shape[0])
448
+ else:
449
+ a = round(max_res * img.shape[0] / img.shape[1])
450
+ b = max_res
451
+ b = int(b)
452
+ a = int(a)
453
+ """
454
+
455
+ img = cv2.resize(img, (b, a), interpolation=cv2.INTER_CUBIC)
456
+
457
+ # Extract selected patches for local refinement
458
+ base_size = net_receptive_field_size * 2
459
+ patchset = generatepatchs(img, base_size)
460
+
461
+ # print('Target resolution: ', img.shape)
462
+
463
+ # Computing a scale in case user prompted to generate the results as the same resolution of the input.
464
+ # Notice that our method output resolution is independent of the input resolution and this parameter will only
465
+ # enable a scaling operation during the local patch merge implementation to generate results with the same resolution
466
+ # as the input.
467
+ """
468
+ if output_resolution == 1:
469
+ mergein_scale = input_resolution[0] / img.shape[0]
470
+ print('Dynamicly change merged-in resolution; scale:', mergein_scale)
471
+ else:
472
+ mergein_scale = 1
473
+ """
474
+ # always rescale to input res for now
475
+ mergein_scale = input_resolution[0] / img.shape[0]
476
+
477
+ imageandpatchs = ImageandPatchs('', '', patchset, img, mergein_scale)
478
+ whole_estimate_resized = cv2.resize(whole_estimate, (round(img.shape[1]*mergein_scale),
479
+ round(img.shape[0]*mergein_scale)), interpolation=cv2.INTER_CUBIC)
480
+ imageandpatchs.set_base_estimate(whole_estimate_resized.copy())
481
+ imageandpatchs.set_updated_estimate(whole_estimate_resized.copy())
482
+
483
+ print('Resulting depthmap resolution will be :', whole_estimate_resized.shape[:2])
484
+ print('Patches to process: '+str(len(imageandpatchs)))
485
+
486
+ # Enumerate through all patches, generate their estimations and refining the base estimate.
487
+ for patch_ind in range(len(imageandpatchs)):
488
+
489
+ # Get patch information
490
+ patch = imageandpatchs[patch_ind] # patch object
491
+ patch_rgb = patch['patch_rgb'] # rgb patch
492
+ patch_whole_estimate_base = patch['patch_whole_estimate_base'] # corresponding patch from base
493
+ rect = patch['rect'] # patch size and location
494
+ patch_id = patch['id'] # patch ID
495
+ org_size = patch_whole_estimate_base.shape # the original size from the unscaled input
496
+ print('\t Processing patch', patch_ind, '/', len(imageandpatchs)-1, '|', rect)
497
+
498
+ # We apply double estimation for patches. The high resolution value is fixed to twice the receptive
499
+ # field size of the network for patches to accelerate the process.
500
+ patch_estimation = doubleestimate(patch_rgb, net_receptive_field_size, patch_netsize, pix2pixsize, model, model_type, pix2pixmodel)
501
+ patch_estimation = cv2.resize(patch_estimation, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC)
502
+ patch_whole_estimate_base = cv2.resize(patch_whole_estimate_base, (pix2pixsize, pix2pixsize), interpolation=cv2.INTER_CUBIC)
503
+
504
+ # Merging the patch estimation into the base estimate using our merge network:
505
+ # We feed the patch estimation and the same region from the updated base estimate to the merge network
506
+ # to generate the target estimate for the corresponding region.
507
+ pix2pixmodel.set_input(patch_whole_estimate_base, patch_estimation)
508
+
509
+ # Run merging network
510
+ pix2pixmodel.test()
511
+ visuals = pix2pixmodel.get_current_visuals()
512
+
513
+ prediction_mapped = visuals['fake_B']
514
+ prediction_mapped = (prediction_mapped+1)/2
515
+ prediction_mapped = prediction_mapped.squeeze().cpu().numpy()
516
+
517
+ mapped = prediction_mapped
518
+
519
+ # We use a simple linear polynomial to make sure the result of the merge network would match the values of
520
+ # base estimate
521
+ p_coef = np.polyfit(mapped.reshape(-1), patch_whole_estimate_base.reshape(-1), deg=1)
522
+ merged = np.polyval(p_coef, mapped.reshape(-1)).reshape(mapped.shape)
523
+
524
+ merged = cv2.resize(merged, (org_size[1],org_size[0]), interpolation=cv2.INTER_CUBIC)
525
+
526
+ # Get patch size and location
527
+ w1 = rect[0]
528
+ h1 = rect[1]
529
+ w2 = w1 + rect[2]
530
+ h2 = h1 + rect[3]
531
+
532
+ # To speed up the implementation, we only generate the Gaussian mask once with a sufficiently large size
533
+ # and resize it to our needed size while merging the patches.
534
+ if mask.shape != org_size:
535
+ mask = cv2.resize(mask_org, (org_size[1],org_size[0]), interpolation=cv2.INTER_LINEAR)
536
+
537
+ tobemergedto = imageandpatchs.estimation_updated_image
538
+
539
+ # Update the whole estimation:
540
+ # We use a simple Gaussian mask to blend the merged patch region with the base estimate to ensure seamless
541
+ # blending at the boundaries of the patch region.
542
+ tobemergedto[h1:h2, w1:w2] = np.multiply(tobemergedto[h1:h2, w1:w2], 1 - mask) + np.multiply(merged, mask)
543
+ imageandpatchs.set_updated_estimate(tobemergedto)
544
+
545
+ # output
546
+ return cv2.resize(imageandpatchs.estimation_updated_image, (input_resolution[1], input_resolution[0]), interpolation=cv2.INTER_CUBIC)
extensions/microsoftexcel-controlnet/annotator/leres/leres/multi_depth_model_woauxi.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from . import network_auxi as network
2
+ from .net_tools import get_func
3
+ import torch
4
+ import torch.nn as nn
5
+ from modules import devices
6
+
7
+ class RelDepthModel(nn.Module):
8
+ def __init__(self, backbone='resnet50'):
9
+ super(RelDepthModel, self).__init__()
10
+ if backbone == 'resnet50':
11
+ encoder = 'resnet50_stride32'
12
+ elif backbone == 'resnext101':
13
+ encoder = 'resnext101_stride32x8d'
14
+ self.depth_model = DepthModel(encoder)
15
+
16
+ def inference(self, rgb):
17
+ with torch.no_grad():
18
+ input = rgb.to(self.depth_model.device)
19
+ depth = self.depth_model(input)
20
+ #pred_depth_out = depth - depth.min() + 0.01
21
+ return depth #pred_depth_out
22
+
23
+
24
+ class DepthModel(nn.Module):
25
+ def __init__(self, encoder):
26
+ super(DepthModel, self).__init__()
27
+ backbone = network.__name__.split('.')[-1] + '.' + encoder
28
+ self.encoder_modules = get_func(backbone)()
29
+ self.decoder_modules = network.Decoder()
30
+
31
+ def forward(self, x):
32
+ lateral_out = self.encoder_modules(x)
33
+ out_logit = self.decoder_modules(lateral_out)
34
+ return out_logit
extensions/microsoftexcel-controlnet/annotator/leres/leres/net_tools.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ import torch
3
+ import os
4
+ from collections import OrderedDict
5
+
6
+
7
+ def get_func(func_name):
8
+ """Helper to return a function object by name. func_name must identify a
9
+ function in this module or the path to a function relative to the base
10
+ 'modeling' module.
11
+ """
12
+ if func_name == '':
13
+ return None
14
+ try:
15
+ parts = func_name.split('.')
16
+ # Refers to a function in this module
17
+ if len(parts) == 1:
18
+ return globals()[parts[0]]
19
+ # Otherwise, assume we're referencing a module under modeling
20
+ module_name = 'annotator.leres.leres.' + '.'.join(parts[:-1])
21
+ module = importlib.import_module(module_name)
22
+ return getattr(module, parts[-1])
23
+ except Exception:
24
+ print('Failed to f1ind function: %s', func_name)
25
+ raise
26
+
27
+ def load_ckpt(args, depth_model, shift_model, focal_model):
28
+ """
29
+ Load checkpoint.
30
+ """
31
+ if os.path.isfile(args.load_ckpt):
32
+ print("loading checkpoint %s" % args.load_ckpt)
33
+ checkpoint = torch.load(args.load_ckpt)
34
+ if shift_model is not None:
35
+ shift_model.load_state_dict(strip_prefix_if_present(checkpoint['shift_model'], 'module.'),
36
+ strict=True)
37
+ if focal_model is not None:
38
+ focal_model.load_state_dict(strip_prefix_if_present(checkpoint['focal_model'], 'module.'),
39
+ strict=True)
40
+ depth_model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."),
41
+ strict=True)
42
+ del checkpoint
43
+ if torch.cuda.is_available():
44
+ torch.cuda.empty_cache()
45
+
46
+
47
+ def strip_prefix_if_present(state_dict, prefix):
48
+ keys = sorted(state_dict.keys())
49
+ if not all(key.startswith(prefix) for key in keys):
50
+ return state_dict
51
+ stripped_state_dict = OrderedDict()
52
+ for key, value in state_dict.items():
53
+ stripped_state_dict[key.replace(prefix, "")] = value
54
+ return stripped_state_dict
extensions/microsoftexcel-controlnet/annotator/leres/leres/network_auxi.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.init as init
4
+
5
+ from . import Resnet, Resnext_torch
6
+
7
+
8
+ def resnet50_stride32():
9
+ return DepthNet(backbone='resnet', depth=50, upfactors=[2, 2, 2, 2])
10
+
11
+ def resnext101_stride32x8d():
12
+ return DepthNet(backbone='resnext101_32x8d', depth=101, upfactors=[2, 2, 2, 2])
13
+
14
+
15
+ class Decoder(nn.Module):
16
+ def __init__(self):
17
+ super(Decoder, self).__init__()
18
+ self.inchannels = [256, 512, 1024, 2048]
19
+ self.midchannels = [256, 256, 256, 512]
20
+ self.upfactors = [2,2,2,2]
21
+ self.outchannels = 1
22
+
23
+ self.conv = FTB(inchannels=self.inchannels[3], midchannels=self.midchannels[3])
24
+ self.conv1 = nn.Conv2d(in_channels=self.midchannels[3], out_channels=self.midchannels[2], kernel_size=3, padding=1, stride=1, bias=True)
25
+ self.upsample = nn.Upsample(scale_factor=self.upfactors[3], mode='bilinear', align_corners=True)
26
+
27
+ self.ffm2 = FFM(inchannels=self.inchannels[2], midchannels=self.midchannels[2], outchannels = self.midchannels[2], upfactor=self.upfactors[2])
28
+ self.ffm1 = FFM(inchannels=self.inchannels[1], midchannels=self.midchannels[1], outchannels = self.midchannels[1], upfactor=self.upfactors[1])
29
+ self.ffm0 = FFM(inchannels=self.inchannels[0], midchannels=self.midchannels[0], outchannels = self.midchannels[0], upfactor=self.upfactors[0])
30
+
31
+ self.outconv = AO(inchannels=self.midchannels[0], outchannels=self.outchannels, upfactor=2)
32
+ self._init_params()
33
+
34
+ def _init_params(self):
35
+ for m in self.modules():
36
+ if isinstance(m, nn.Conv2d):
37
+ init.normal_(m.weight, std=0.01)
38
+ if m.bias is not None:
39
+ init.constant_(m.bias, 0)
40
+ elif isinstance(m, nn.ConvTranspose2d):
41
+ init.normal_(m.weight, std=0.01)
42
+ if m.bias is not None:
43
+ init.constant_(m.bias, 0)
44
+ elif isinstance(m, nn.BatchNorm2d): #NN.BatchNorm2d
45
+ init.constant_(m.weight, 1)
46
+ init.constant_(m.bias, 0)
47
+ elif isinstance(m, nn.Linear):
48
+ init.normal_(m.weight, std=0.01)
49
+ if m.bias is not None:
50
+ init.constant_(m.bias, 0)
51
+
52
+ def forward(self, features):
53
+ x_32x = self.conv(features[3]) # 1/32
54
+ x_32 = self.conv1(x_32x)
55
+ x_16 = self.upsample(x_32) # 1/16
56
+
57
+ x_8 = self.ffm2(features[2], x_16) # 1/8
58
+ x_4 = self.ffm1(features[1], x_8) # 1/4
59
+ x_2 = self.ffm0(features[0], x_4) # 1/2
60
+ #-----------------------------------------
61
+ x = self.outconv(x_2) # original size
62
+ return x
63
+
64
+ class DepthNet(nn.Module):
65
+ __factory = {
66
+ 18: Resnet.resnet18,
67
+ 34: Resnet.resnet34,
68
+ 50: Resnet.resnet50,
69
+ 101: Resnet.resnet101,
70
+ 152: Resnet.resnet152
71
+ }
72
+ def __init__(self,
73
+ backbone='resnet',
74
+ depth=50,
75
+ upfactors=[2, 2, 2, 2]):
76
+ super(DepthNet, self).__init__()
77
+ self.backbone = backbone
78
+ self.depth = depth
79
+ self.pretrained = False
80
+ self.inchannels = [256, 512, 1024, 2048]
81
+ self.midchannels = [256, 256, 256, 512]
82
+ self.upfactors = upfactors
83
+ self.outchannels = 1
84
+
85
+ # Build model
86
+ if self.backbone == 'resnet':
87
+ if self.depth not in DepthNet.__factory:
88
+ raise KeyError("Unsupported depth:", self.depth)
89
+ self.encoder = DepthNet.__factory[depth](pretrained=self.pretrained)
90
+ elif self.backbone == 'resnext101_32x8d':
91
+ self.encoder = Resnext_torch.resnext101_32x8d(pretrained=self.pretrained)
92
+ else:
93
+ self.encoder = Resnext_torch.resnext101(pretrained=self.pretrained)
94
+
95
+ def forward(self, x):
96
+ x = self.encoder(x) # 1/32, 1/16, 1/8, 1/4
97
+ return x
98
+
99
+
100
+ class FTB(nn.Module):
101
+ def __init__(self, inchannels, midchannels=512):
102
+ super(FTB, self).__init__()
103
+ self.in1 = inchannels
104
+ self.mid = midchannels
105
+ self.conv1 = nn.Conv2d(in_channels=self.in1, out_channels=self.mid, kernel_size=3, padding=1, stride=1,
106
+ bias=True)
107
+ # NN.BatchNorm2d
108
+ self.conv_branch = nn.Sequential(nn.ReLU(inplace=True), \
109
+ nn.Conv2d(in_channels=self.mid, out_channels=self.mid, kernel_size=3,
110
+ padding=1, stride=1, bias=True), \
111
+ nn.BatchNorm2d(num_features=self.mid), \
112
+ nn.ReLU(inplace=True), \
113
+ nn.Conv2d(in_channels=self.mid, out_channels=self.mid, kernel_size=3,
114
+ padding=1, stride=1, bias=True))
115
+ self.relu = nn.ReLU(inplace=True)
116
+
117
+ self.init_params()
118
+
119
+ def forward(self, x):
120
+ x = self.conv1(x)
121
+ x = x + self.conv_branch(x)
122
+ x = self.relu(x)
123
+
124
+ return x
125
+
126
+ def init_params(self):
127
+ for m in self.modules():
128
+ if isinstance(m, nn.Conv2d):
129
+ init.normal_(m.weight, std=0.01)
130
+ if m.bias is not None:
131
+ init.constant_(m.bias, 0)
132
+ elif isinstance(m, nn.ConvTranspose2d):
133
+ # init.kaiming_normal_(m.weight, mode='fan_out')
134
+ init.normal_(m.weight, std=0.01)
135
+ # init.xavier_normal_(m.weight)
136
+ if m.bias is not None:
137
+ init.constant_(m.bias, 0)
138
+ elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d
139
+ init.constant_(m.weight, 1)
140
+ init.constant_(m.bias, 0)
141
+ elif isinstance(m, nn.Linear):
142
+ init.normal_(m.weight, std=0.01)
143
+ if m.bias is not None:
144
+ init.constant_(m.bias, 0)
145
+
146
+
147
+ class ATA(nn.Module):
148
+ def __init__(self, inchannels, reduction=8):
149
+ super(ATA, self).__init__()
150
+ self.inchannels = inchannels
151
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
152
+ self.fc = nn.Sequential(nn.Linear(self.inchannels * 2, self.inchannels // reduction),
153
+ nn.ReLU(inplace=True),
154
+ nn.Linear(self.inchannels // reduction, self.inchannels),
155
+ nn.Sigmoid())
156
+ self.init_params()
157
+
158
+ def forward(self, low_x, high_x):
159
+ n, c, _, _ = low_x.size()
160
+ x = torch.cat([low_x, high_x], 1)
161
+ x = self.avg_pool(x)
162
+ x = x.view(n, -1)
163
+ x = self.fc(x).view(n, c, 1, 1)
164
+ x = low_x * x + high_x
165
+
166
+ return x
167
+
168
+ def init_params(self):
169
+ for m in self.modules():
170
+ if isinstance(m, nn.Conv2d):
171
+ # init.kaiming_normal_(m.weight, mode='fan_out')
172
+ # init.normal(m.weight, std=0.01)
173
+ init.xavier_normal_(m.weight)
174
+ if m.bias is not None:
175
+ init.constant_(m.bias, 0)
176
+ elif isinstance(m, nn.ConvTranspose2d):
177
+ # init.kaiming_normal_(m.weight, mode='fan_out')
178
+ # init.normal_(m.weight, std=0.01)
179
+ init.xavier_normal_(m.weight)
180
+ if m.bias is not None:
181
+ init.constant_(m.bias, 0)
182
+ elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d
183
+ init.constant_(m.weight, 1)
184
+ init.constant_(m.bias, 0)
185
+ elif isinstance(m, nn.Linear):
186
+ init.normal_(m.weight, std=0.01)
187
+ if m.bias is not None:
188
+ init.constant_(m.bias, 0)
189
+
190
+
191
+ class FFM(nn.Module):
192
+ def __init__(self, inchannels, midchannels, outchannels, upfactor=2):
193
+ super(FFM, self).__init__()
194
+ self.inchannels = inchannels
195
+ self.midchannels = midchannels
196
+ self.outchannels = outchannels
197
+ self.upfactor = upfactor
198
+
199
+ self.ftb1 = FTB(inchannels=self.inchannels, midchannels=self.midchannels)
200
+ # self.ata = ATA(inchannels = self.midchannels)
201
+ self.ftb2 = FTB(inchannels=self.midchannels, midchannels=self.outchannels)
202
+
203
+ self.upsample = nn.Upsample(scale_factor=self.upfactor, mode='bilinear', align_corners=True)
204
+
205
+ self.init_params()
206
+
207
+ def forward(self, low_x, high_x):
208
+ x = self.ftb1(low_x)
209
+ x = x + high_x
210
+ x = self.ftb2(x)
211
+ x = self.upsample(x)
212
+
213
+ return x
214
+
215
+ def init_params(self):
216
+ for m in self.modules():
217
+ if isinstance(m, nn.Conv2d):
218
+ # init.kaiming_normal_(m.weight, mode='fan_out')
219
+ init.normal_(m.weight, std=0.01)
220
+ # init.xavier_normal_(m.weight)
221
+ if m.bias is not None:
222
+ init.constant_(m.bias, 0)
223
+ elif isinstance(m, nn.ConvTranspose2d):
224
+ # init.kaiming_normal_(m.weight, mode='fan_out')
225
+ init.normal_(m.weight, std=0.01)
226
+ # init.xavier_normal_(m.weight)
227
+ if m.bias is not None:
228
+ init.constant_(m.bias, 0)
229
+ elif isinstance(m, nn.BatchNorm2d): # NN.Batchnorm2d
230
+ init.constant_(m.weight, 1)
231
+ init.constant_(m.bias, 0)
232
+ elif isinstance(m, nn.Linear):
233
+ init.normal_(m.weight, std=0.01)
234
+ if m.bias is not None:
235
+ init.constant_(m.bias, 0)
236
+
237
+
238
+ class AO(nn.Module):
239
+ # Adaptive output module
240
+ def __init__(self, inchannels, outchannels, upfactor=2):
241
+ super(AO, self).__init__()
242
+ self.inchannels = inchannels
243
+ self.outchannels = outchannels
244
+ self.upfactor = upfactor
245
+
246
+ self.adapt_conv = nn.Sequential(
247
+ nn.Conv2d(in_channels=self.inchannels, out_channels=self.inchannels // 2, kernel_size=3, padding=1,
248
+ stride=1, bias=True), \
249
+ nn.BatchNorm2d(num_features=self.inchannels // 2), \
250
+ nn.ReLU(inplace=True), \
251
+ nn.Conv2d(in_channels=self.inchannels // 2, out_channels=self.outchannels, kernel_size=3, padding=1,
252
+ stride=1, bias=True), \
253
+ nn.Upsample(scale_factor=self.upfactor, mode='bilinear', align_corners=True))
254
+
255
+ self.init_params()
256
+
257
+ def forward(self, x):
258
+ x = self.adapt_conv(x)
259
+ return x
260
+
261
+ def init_params(self):
262
+ for m in self.modules():
263
+ if isinstance(m, nn.Conv2d):
264
+ # init.kaiming_normal_(m.weight, mode='fan_out')
265
+ init.normal_(m.weight, std=0.01)
266
+ # init.xavier_normal_(m.weight)
267
+ if m.bias is not None:
268
+ init.constant_(m.bias, 0)
269
+ elif isinstance(m, nn.ConvTranspose2d):
270
+ # init.kaiming_normal_(m.weight, mode='fan_out')
271
+ init.normal_(m.weight, std=0.01)
272
+ # init.xavier_normal_(m.weight)
273
+ if m.bias is not None:
274
+ init.constant_(m.bias, 0)
275
+ elif isinstance(m, nn.BatchNorm2d): # NN.Batchnorm2d
276
+ init.constant_(m.weight, 1)
277
+ init.constant_(m.bias, 0)
278
+ elif isinstance(m, nn.Linear):
279
+ init.normal_(m.weight, std=0.01)
280
+ if m.bias is not None:
281
+ init.constant_(m.bias, 0)
282
+
283
+
284
+
285
+ # ==============================================================================================================
286
+
287
+
288
+ class ResidualConv(nn.Module):
289
+ def __init__(self, inchannels):
290
+ super(ResidualConv, self).__init__()
291
+ # NN.BatchNorm2d
292
+ self.conv = nn.Sequential(
293
+ # nn.BatchNorm2d(num_features=inchannels),
294
+ nn.ReLU(inplace=False),
295
+ # nn.Conv2d(in_channels=inchannels, out_channels=inchannels, kernel_size=3, padding=1, stride=1, groups=inchannels,bias=True),
296
+ # nn.Conv2d(in_channels=inchannels, out_channels=inchannels, kernel_size=1, padding=0, stride=1, groups=1,bias=True)
297
+ nn.Conv2d(in_channels=inchannels, out_channels=inchannels / 2, kernel_size=3, padding=1, stride=1,
298
+ bias=False),
299
+ nn.BatchNorm2d(num_features=inchannels / 2),
300
+ nn.ReLU(inplace=False),
301
+ nn.Conv2d(in_channels=inchannels / 2, out_channels=inchannels, kernel_size=3, padding=1, stride=1,
302
+ bias=False)
303
+ )
304
+ self.init_params()
305
+
306
+ def forward(self, x):
307
+ x = self.conv(x) + x
308
+ return x
309
+
310
+ def init_params(self):
311
+ for m in self.modules():
312
+ if isinstance(m, nn.Conv2d):
313
+ # init.kaiming_normal_(m.weight, mode='fan_out')
314
+ init.normal_(m.weight, std=0.01)
315
+ # init.xavier_normal_(m.weight)
316
+ if m.bias is not None:
317
+ init.constant_(m.bias, 0)
318
+ elif isinstance(m, nn.ConvTranspose2d):
319
+ # init.kaiming_normal_(m.weight, mode='fan_out')
320
+ init.normal_(m.weight, std=0.01)
321
+ # init.xavier_normal_(m.weight)
322
+ if m.bias is not None:
323
+ init.constant_(m.bias, 0)
324
+ elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d
325
+ init.constant_(m.weight, 1)
326
+ init.constant_(m.bias, 0)
327
+ elif isinstance(m, nn.Linear):
328
+ init.normal_(m.weight, std=0.01)
329
+ if m.bias is not None:
330
+ init.constant_(m.bias, 0)
331
+
332
+
333
+ class FeatureFusion(nn.Module):
334
+ def __init__(self, inchannels, outchannels):
335
+ super(FeatureFusion, self).__init__()
336
+ self.conv = ResidualConv(inchannels=inchannels)
337
+ # NN.BatchNorm2d
338
+ self.up = nn.Sequential(ResidualConv(inchannels=inchannels),
339
+ nn.ConvTranspose2d(in_channels=inchannels, out_channels=outchannels, kernel_size=3,
340
+ stride=2, padding=1, output_padding=1),
341
+ nn.BatchNorm2d(num_features=outchannels),
342
+ nn.ReLU(inplace=True))
343
+
344
+ def forward(self, lowfeat, highfeat):
345
+ return self.up(highfeat + self.conv(lowfeat))
346
+
347
+ def init_params(self):
348
+ for m in self.modules():
349
+ if isinstance(m, nn.Conv2d):
350
+ # init.kaiming_normal_(m.weight, mode='fan_out')
351
+ init.normal_(m.weight, std=0.01)
352
+ # init.xavier_normal_(m.weight)
353
+ if m.bias is not None:
354
+ init.constant_(m.bias, 0)
355
+ elif isinstance(m, nn.ConvTranspose2d):
356
+ # init.kaiming_normal_(m.weight, mode='fan_out')
357
+ init.normal_(m.weight, std=0.01)
358
+ # init.xavier_normal_(m.weight)
359
+ if m.bias is not None:
360
+ init.constant_(m.bias, 0)
361
+ elif isinstance(m, nn.BatchNorm2d): # NN.BatchNorm2d
362
+ init.constant_(m.weight, 1)
363
+ init.constant_(m.bias, 0)
364
+ elif isinstance(m, nn.Linear):
365
+ init.normal_(m.weight, std=0.01)
366
+ if m.bias is not None:
367
+ init.constant_(m.bias, 0)
368
+
369
+
370
+ class SenceUnderstand(nn.Module):
371
+ def __init__(self, channels):
372
+ super(SenceUnderstand, self).__init__()
373
+ self.channels = channels
374
+ self.conv1 = nn.Sequential(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
375
+ nn.ReLU(inplace=True))
376
+ self.pool = nn.AdaptiveAvgPool2d(8)
377
+ self.fc = nn.Sequential(nn.Linear(512 * 8 * 8, self.channels),
378
+ nn.ReLU(inplace=True))
379
+ self.conv2 = nn.Sequential(
380
+ nn.Conv2d(in_channels=self.channels, out_channels=self.channels, kernel_size=1, padding=0),
381
+ nn.ReLU(inplace=True))
382
+ self.initial_params()
383
+
384
+ def forward(self, x):
385
+ n, c, h, w = x.size()
386
+ x = self.conv1(x)
387
+ x = self.pool(x)
388
+ x = x.view(n, -1)
389
+ x = self.fc(x)
390
+ x = x.view(n, self.channels, 1, 1)
391
+ x = self.conv2(x)
392
+ x = x.repeat(1, 1, h, w)
393
+ return x
394
+
395
+ def initial_params(self, dev=0.01):
396
+ for m in self.modules():
397
+ if isinstance(m, nn.Conv2d):
398
+ # print torch.sum(m.weight)
399
+ m.weight.data.normal_(0, dev)
400
+ if m.bias is not None:
401
+ m.bias.data.fill_(0)
402
+ elif isinstance(m, nn.ConvTranspose2d):
403
+ # print torch.sum(m.weight)
404
+ m.weight.data.normal_(0, dev)
405
+ if m.bias is not None:
406
+ m.bias.data.fill_(0)
407
+ elif isinstance(m, nn.Linear):
408
+ m.weight.data.normal_(0, dev)
409
+
410
+
411
+ if __name__ == '__main__':
412
+ net = DepthNet(depth=50, pretrained=True)
413
+ print(net)
414
+ inputs = torch.ones(4,3,128,128)
415
+ out = net(inputs)
416
+ print(out.size())
417
+
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/LICENSE ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ https://github.com/compphoto/BoostingMonocularDepth
2
+
3
+ Copyright 2021, Seyed Mahdi Hosseini Miangoleh, Sebastian Dille, Computational Photography Laboratory. All rights reserved.
4
+
5
+ This software is for academic use only. A redistribution of this
6
+ software, with or without modifications, has to be for academic
7
+ use only, while giving the appropriate credit to the original
8
+ authors of the software. The methods implemented as a part of
9
+ this software may be covered under patents or patent applications.
10
+
11
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
12
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
13
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
14
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
15
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
16
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
17
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
18
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
19
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/__init__.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This package contains modules related to objective functions, optimizations, and network architectures.
2
+
3
+ To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
4
+ You need to implement the following five functions:
5
+ -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt).
6
+ -- <set_input>: unpack data from dataset and apply preprocessing.
7
+ -- <forward>: produce intermediate results.
8
+ -- <optimize_parameters>: calculate loss, gradients, and update network weights.
9
+ -- <modify_commandline_options>: (optionally) add model-specific options and set default options.
10
+
11
+ In the function <__init__>, you need to define four lists:
12
+ -- self.loss_names (str list): specify the training losses that you want to plot and save.
13
+ -- self.model_names (str list): define networks used in our training.
14
+ -- self.visual_names (str list): specify the images that you want to display and save.
15
+ -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
16
+
17
+ Now you can use the model class by specifying flag '--model dummy'.
18
+ See our template model class 'template_model.py' for more details.
19
+ """
20
+
21
+ import importlib
22
+ from .base_model import BaseModel
23
+
24
+
25
+ def find_model_using_name(model_name):
26
+ """Import the module "models/[model_name]_model.py".
27
+
28
+ In the file, the class called DatasetNameModel() will
29
+ be instantiated. It has to be a subclass of BaseModel,
30
+ and it is case-insensitive.
31
+ """
32
+ model_filename = "annotator.leres.pix2pix.models." + model_name + "_model"
33
+ modellib = importlib.import_module(model_filename)
34
+ model = None
35
+ target_model_name = model_name.replace('_', '') + 'model'
36
+ for name, cls in modellib.__dict__.items():
37
+ if name.lower() == target_model_name.lower() \
38
+ and issubclass(cls, BaseModel):
39
+ model = cls
40
+
41
+ if model is None:
42
+ print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
43
+ exit(0)
44
+
45
+ return model
46
+
47
+
48
+ def get_option_setter(model_name):
49
+ """Return the static method <modify_commandline_options> of the model class."""
50
+ model_class = find_model_using_name(model_name)
51
+ return model_class.modify_commandline_options
52
+
53
+
54
+ def create_model(opt):
55
+ """Create a model given the option.
56
+
57
+ This function warps the class CustomDatasetDataLoader.
58
+ This is the main interface between this package and 'train.py'/'test.py'
59
+
60
+ Example:
61
+ >>> from models import create_model
62
+ >>> model = create_model(opt)
63
+ """
64
+ model = find_model_using_name(opt.model)
65
+ instance = model(opt)
66
+ print("model [%s] was created" % type(instance).__name__)
67
+ return instance
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/base_model.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch, gc
3
+ from modules import devices
4
+ from collections import OrderedDict
5
+ from abc import ABC, abstractmethod
6
+ from . import networks
7
+
8
+
9
+ class BaseModel(ABC):
10
+ """This class is an abstract base class (ABC) for models.
11
+ To create a subclass, you need to implement the following five functions:
12
+ -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt).
13
+ -- <set_input>: unpack data from dataset and apply preprocessing.
14
+ -- <forward>: produce intermediate results.
15
+ -- <optimize_parameters>: calculate losses, gradients, and update network weights.
16
+ -- <modify_commandline_options>: (optionally) add model-specific options and set default options.
17
+ """
18
+
19
+ def __init__(self, opt):
20
+ """Initialize the BaseModel class.
21
+
22
+ Parameters:
23
+ opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
24
+
25
+ When creating your custom class, you need to implement your own initialization.
26
+ In this function, you should first call <BaseModel.__init__(self, opt)>
27
+ Then, you need to define four lists:
28
+ -- self.loss_names (str list): specify the training losses that you want to plot and save.
29
+ -- self.model_names (str list): define networks used in our training.
30
+ -- self.visual_names (str list): specify the images that you want to display and save.
31
+ -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example.
32
+ """
33
+ self.opt = opt
34
+ self.gpu_ids = opt.gpu_ids
35
+ self.isTrain = opt.isTrain
36
+ self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') # get device name: CPU or GPU
37
+ self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) # save all the checkpoints to save_dir
38
+ if opt.preprocess != 'scale_width': # with [scale_width], input images might have different sizes, which hurts the performance of cudnn.benchmark.
39
+ torch.backends.cudnn.benchmark = True
40
+ self.loss_names = []
41
+ self.model_names = []
42
+ self.visual_names = []
43
+ self.optimizers = []
44
+ self.image_paths = []
45
+ self.metric = 0 # used for learning rate policy 'plateau'
46
+
47
+ @staticmethod
48
+ def modify_commandline_options(parser, is_train):
49
+ """Add new model-specific options, and rewrite default values for existing options.
50
+
51
+ Parameters:
52
+ parser -- original option parser
53
+ is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
54
+
55
+ Returns:
56
+ the modified parser.
57
+ """
58
+ return parser
59
+
60
+ @abstractmethod
61
+ def set_input(self, input):
62
+ """Unpack input data from the dataloader and perform necessary pre-processing steps.
63
+
64
+ Parameters:
65
+ input (dict): includes the data itself and its metadata information.
66
+ """
67
+ pass
68
+
69
+ @abstractmethod
70
+ def forward(self):
71
+ """Run forward pass; called by both functions <optimize_parameters> and <test>."""
72
+ pass
73
+
74
+ @abstractmethod
75
+ def optimize_parameters(self):
76
+ """Calculate losses, gradients, and update network weights; called in every training iteration"""
77
+ pass
78
+
79
+ def setup(self, opt):
80
+ """Load and print networks; create schedulers
81
+
82
+ Parameters:
83
+ opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
84
+ """
85
+ if self.isTrain:
86
+ self.schedulers = [networks.get_scheduler(optimizer, opt) for optimizer in self.optimizers]
87
+ if not self.isTrain or opt.continue_train:
88
+ load_suffix = 'iter_%d' % opt.load_iter if opt.load_iter > 0 else opt.epoch
89
+ self.load_networks(load_suffix)
90
+ self.print_networks(opt.verbose)
91
+
92
+ def eval(self):
93
+ """Make models eval mode during test time"""
94
+ for name in self.model_names:
95
+ if isinstance(name, str):
96
+ net = getattr(self, 'net' + name)
97
+ net.eval()
98
+
99
+ def test(self):
100
+ """Forward function used in test time.
101
+
102
+ This function wraps <forward> function in no_grad() so we don't save intermediate steps for backprop
103
+ It also calls <compute_visuals> to produce additional visualization results
104
+ """
105
+ with torch.no_grad():
106
+ self.forward()
107
+ self.compute_visuals()
108
+
109
+ def compute_visuals(self):
110
+ """Calculate additional output images for visdom and HTML visualization"""
111
+ pass
112
+
113
+ def get_image_paths(self):
114
+ """ Return image paths that are used to load current data"""
115
+ return self.image_paths
116
+
117
+ def update_learning_rate(self):
118
+ """Update learning rates for all the networks; called at the end of every epoch"""
119
+ old_lr = self.optimizers[0].param_groups[0]['lr']
120
+ for scheduler in self.schedulers:
121
+ if self.opt.lr_policy == 'plateau':
122
+ scheduler.step(self.metric)
123
+ else:
124
+ scheduler.step()
125
+
126
+ lr = self.optimizers[0].param_groups[0]['lr']
127
+ print('learning rate %.7f -> %.7f' % (old_lr, lr))
128
+
129
+ def get_current_visuals(self):
130
+ """Return visualization images. train.py will display these images with visdom, and save the images to a HTML"""
131
+ visual_ret = OrderedDict()
132
+ for name in self.visual_names:
133
+ if isinstance(name, str):
134
+ visual_ret[name] = getattr(self, name)
135
+ return visual_ret
136
+
137
+ def get_current_losses(self):
138
+ """Return traning losses / errors. train.py will print out these errors on console, and save them to a file"""
139
+ errors_ret = OrderedDict()
140
+ for name in self.loss_names:
141
+ if isinstance(name, str):
142
+ errors_ret[name] = float(getattr(self, 'loss_' + name)) # float(...) works for both scalar tensor and float number
143
+ return errors_ret
144
+
145
+ def save_networks(self, epoch):
146
+ """Save all the networks to the disk.
147
+
148
+ Parameters:
149
+ epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name)
150
+ """
151
+ for name in self.model_names:
152
+ if isinstance(name, str):
153
+ save_filename = '%s_net_%s.pth' % (epoch, name)
154
+ save_path = os.path.join(self.save_dir, save_filename)
155
+ net = getattr(self, 'net' + name)
156
+
157
+ if len(self.gpu_ids) > 0 and torch.cuda.is_available():
158
+ torch.save(net.module.cpu().state_dict(), save_path)
159
+ net.cuda(self.gpu_ids[0])
160
+ else:
161
+ torch.save(net.cpu().state_dict(), save_path)
162
+
163
+ def unload_network(self, name):
164
+ """Unload network and gc.
165
+ """
166
+ if isinstance(name, str):
167
+ net = getattr(self, 'net' + name)
168
+ del net
169
+ gc.collect()
170
+ devices.torch_gc()
171
+ return None
172
+
173
+ def __patch_instance_norm_state_dict(self, state_dict, module, keys, i=0):
174
+ """Fix InstanceNorm checkpoints incompatibility (prior to 0.4)"""
175
+ key = keys[i]
176
+ if i + 1 == len(keys): # at the end, pointing to a parameter/buffer
177
+ if module.__class__.__name__.startswith('InstanceNorm') and \
178
+ (key == 'running_mean' or key == 'running_var'):
179
+ if getattr(module, key) is None:
180
+ state_dict.pop('.'.join(keys))
181
+ if module.__class__.__name__.startswith('InstanceNorm') and \
182
+ (key == 'num_batches_tracked'):
183
+ state_dict.pop('.'.join(keys))
184
+ else:
185
+ self.__patch_instance_norm_state_dict(state_dict, getattr(module, key), keys, i + 1)
186
+
187
+ def load_networks(self, epoch):
188
+ """Load all the networks from the disk.
189
+
190
+ Parameters:
191
+ epoch (int) -- current epoch; used in the file name '%s_net_%s.pth' % (epoch, name)
192
+ """
193
+ for name in self.model_names:
194
+ if isinstance(name, str):
195
+ load_filename = '%s_net_%s.pth' % (epoch, name)
196
+ load_path = os.path.join(self.save_dir, load_filename)
197
+ net = getattr(self, 'net' + name)
198
+ if isinstance(net, torch.nn.DataParallel):
199
+ net = net.module
200
+ # print('Loading depth boost model from %s' % load_path)
201
+ # if you are using PyTorch newer than 0.4 (e.g., built from
202
+ # GitHub source), you can remove str() on self.device
203
+ state_dict = torch.load(load_path, map_location=str(self.device))
204
+ if hasattr(state_dict, '_metadata'):
205
+ del state_dict._metadata
206
+
207
+ # patch InstanceNorm checkpoints prior to 0.4
208
+ for key in list(state_dict.keys()): # need to copy keys here because we mutate in loop
209
+ self.__patch_instance_norm_state_dict(state_dict, net, key.split('.'))
210
+ net.load_state_dict(state_dict)
211
+
212
+ def print_networks(self, verbose):
213
+ """Print the total number of parameters in the network and (if verbose) network architecture
214
+
215
+ Parameters:
216
+ verbose (bool) -- if verbose: print the network architecture
217
+ """
218
+ print('---------- Networks initialized -------------')
219
+ for name in self.model_names:
220
+ if isinstance(name, str):
221
+ net = getattr(self, 'net' + name)
222
+ num_params = 0
223
+ for param in net.parameters():
224
+ num_params += param.numel()
225
+ if verbose:
226
+ print(net)
227
+ print('[Network %s] Total number of parameters : %.3f M' % (name, num_params / 1e6))
228
+ print('-----------------------------------------------')
229
+
230
+ def set_requires_grad(self, nets, requires_grad=False):
231
+ """Set requies_grad=Fasle for all the networks to avoid unnecessary computations
232
+ Parameters:
233
+ nets (network list) -- a list of networks
234
+ requires_grad (bool) -- whether the networks require gradients or not
235
+ """
236
+ if not isinstance(nets, list):
237
+ nets = [nets]
238
+ for net in nets:
239
+ if net is not None:
240
+ for param in net.parameters():
241
+ param.requires_grad = requires_grad
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/base_model_hg.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+
4
+ class BaseModelHG():
5
+ def name(self):
6
+ return 'BaseModel'
7
+
8
+ def initialize(self, opt):
9
+ self.opt = opt
10
+ self.gpu_ids = opt.gpu_ids
11
+ self.isTrain = opt.isTrain
12
+ self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
13
+ self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
14
+
15
+ def set_input(self, input):
16
+ self.input = input
17
+
18
+ def forward(self):
19
+ pass
20
+
21
+ # used in test time, no backprop
22
+ def test(self):
23
+ pass
24
+
25
+ def get_image_paths(self):
26
+ pass
27
+
28
+ def optimize_parameters(self):
29
+ pass
30
+
31
+ def get_current_visuals(self):
32
+ return self.input
33
+
34
+ def get_current_errors(self):
35
+ return {}
36
+
37
+ def save(self, label):
38
+ pass
39
+
40
+ # helper saving function that can be used by subclasses
41
+ def save_network(self, network, network_label, epoch_label, gpu_ids):
42
+ save_filename = '_%s_net_%s.pth' % (epoch_label, network_label)
43
+ save_path = os.path.join(self.save_dir, save_filename)
44
+ torch.save(network.cpu().state_dict(), save_path)
45
+ if len(gpu_ids) and torch.cuda.is_available():
46
+ network.cuda(device_id=gpu_ids[0])
47
+
48
+ # helper loading function that can be used by subclasses
49
+ def load_network(self, network, network_label, epoch_label):
50
+ save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
51
+ save_path = os.path.join(self.save_dir, save_filename)
52
+ print(save_path)
53
+ model = torch.load(save_path)
54
+ return model
55
+ # network.load_state_dict(torch.load(save_path))
56
+
57
+ def update_learning_rate():
58
+ pass
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/networks.py ADDED
@@ -0,0 +1,623 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch.nn import init
4
+ import functools
5
+ from torch.optim import lr_scheduler
6
+
7
+
8
+ ###############################################################################
9
+ # Helper Functions
10
+ ###############################################################################
11
+
12
+
13
+ class Identity(nn.Module):
14
+ def forward(self, x):
15
+ return x
16
+
17
+
18
+ def get_norm_layer(norm_type='instance'):
19
+ """Return a normalization layer
20
+
21
+ Parameters:
22
+ norm_type (str) -- the name of the normalization layer: batch | instance | none
23
+
24
+ For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev).
25
+ For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics.
26
+ """
27
+ if norm_type == 'batch':
28
+ norm_layer = functools.partial(nn.BatchNorm2d, affine=True, track_running_stats=True)
29
+ elif norm_type == 'instance':
30
+ norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
31
+ elif norm_type == 'none':
32
+ def norm_layer(x): return Identity()
33
+ else:
34
+ raise NotImplementedError('normalization layer [%s] is not found' % norm_type)
35
+ return norm_layer
36
+
37
+
38
+ def get_scheduler(optimizer, opt):
39
+ """Return a learning rate scheduler
40
+
41
+ Parameters:
42
+ optimizer -- the optimizer of the network
43
+ opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions. 
44
+ opt.lr_policy is the name of learning rate policy: linear | step | plateau | cosine
45
+
46
+ For 'linear', we keep the same learning rate for the first <opt.n_epochs> epochs
47
+ and linearly decay the rate to zero over the next <opt.n_epochs_decay> epochs.
48
+ For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers.
49
+ See https://pytorch.org/docs/stable/optim.html for more details.
50
+ """
51
+ if opt.lr_policy == 'linear':
52
+ def lambda_rule(epoch):
53
+ lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.n_epochs) / float(opt.n_epochs_decay + 1)
54
+ return lr_l
55
+ scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule)
56
+ elif opt.lr_policy == 'step':
57
+ scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1)
58
+ elif opt.lr_policy == 'plateau':
59
+ scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5)
60
+ elif opt.lr_policy == 'cosine':
61
+ scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.n_epochs, eta_min=0)
62
+ else:
63
+ return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy)
64
+ return scheduler
65
+
66
+
67
+ def init_weights(net, init_type='normal', init_gain=0.02):
68
+ """Initialize network weights.
69
+
70
+ Parameters:
71
+ net (network) -- network to be initialized
72
+ init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal
73
+ init_gain (float) -- scaling factor for normal, xavier and orthogonal.
74
+
75
+ We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might
76
+ work better for some applications. Feel free to try yourself.
77
+ """
78
+ def init_func(m): # define the initialization function
79
+ classname = m.__class__.__name__
80
+ if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
81
+ if init_type == 'normal':
82
+ init.normal_(m.weight.data, 0.0, init_gain)
83
+ elif init_type == 'xavier':
84
+ init.xavier_normal_(m.weight.data, gain=init_gain)
85
+ elif init_type == 'kaiming':
86
+ init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
87
+ elif init_type == 'orthogonal':
88
+ init.orthogonal_(m.weight.data, gain=init_gain)
89
+ else:
90
+ raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
91
+ if hasattr(m, 'bias') and m.bias is not None:
92
+ init.constant_(m.bias.data, 0.0)
93
+ elif classname.find('BatchNorm2d') != -1: # BatchNorm Layer's weight is not a matrix; only normal distribution applies.
94
+ init.normal_(m.weight.data, 1.0, init_gain)
95
+ init.constant_(m.bias.data, 0.0)
96
+
97
+ # print('initialize network with %s' % init_type)
98
+ net.apply(init_func) # apply the initialization function <init_func>
99
+
100
+
101
+ def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]):
102
+ """Initialize a network: 1. register CPU/GPU device (with multi-GPU support); 2. initialize the network weights
103
+ Parameters:
104
+ net (network) -- the network to be initialized
105
+ init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal
106
+ gain (float) -- scaling factor for normal, xavier and orthogonal.
107
+ gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2
108
+
109
+ Return an initialized network.
110
+ """
111
+ if len(gpu_ids) > 0:
112
+ assert(torch.cuda.is_available())
113
+ net.to(gpu_ids[0])
114
+ net = torch.nn.DataParallel(net, gpu_ids) # multi-GPUs
115
+ init_weights(net, init_type, init_gain=init_gain)
116
+ return net
117
+
118
+
119
+ def define_G(input_nc, output_nc, ngf, netG, norm='batch', use_dropout=False, init_type='normal', init_gain=0.02, gpu_ids=[]):
120
+ """Create a generator
121
+
122
+ Parameters:
123
+ input_nc (int) -- the number of channels in input images
124
+ output_nc (int) -- the number of channels in output images
125
+ ngf (int) -- the number of filters in the last conv layer
126
+ netG (str) -- the architecture's name: resnet_9blocks | resnet_6blocks | unet_256 | unet_128
127
+ norm (str) -- the name of normalization layers used in the network: batch | instance | none
128
+ use_dropout (bool) -- if use dropout layers.
129
+ init_type (str) -- the name of our initialization method.
130
+ init_gain (float) -- scaling factor for normal, xavier and orthogonal.
131
+ gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2
132
+
133
+ Returns a generator
134
+
135
+ Our current implementation provides two types of generators:
136
+ U-Net: [unet_128] (for 128x128 input images) and [unet_256] (for 256x256 input images)
137
+ The original U-Net paper: https://arxiv.org/abs/1505.04597
138
+
139
+ Resnet-based generator: [resnet_6blocks] (with 6 Resnet blocks) and [resnet_9blocks] (with 9 Resnet blocks)
140
+ Resnet-based generator consists of several Resnet blocks between a few downsampling/upsampling operations.
141
+ We adapt Torch code from Justin Johnson's neural style transfer project (https://github.com/jcjohnson/fast-neural-style).
142
+
143
+
144
+ The generator has been initialized by <init_net>. It uses RELU for non-linearity.
145
+ """
146
+ net = None
147
+ norm_layer = get_norm_layer(norm_type=norm)
148
+
149
+ if netG == 'resnet_9blocks':
150
+ net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=9)
151
+ elif netG == 'resnet_6blocks':
152
+ net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=6)
153
+ elif netG == 'resnet_12blocks':
154
+ net = ResnetGenerator(input_nc, output_nc, ngf, norm_layer=norm_layer, use_dropout=use_dropout, n_blocks=12)
155
+ elif netG == 'unet_128':
156
+ net = UnetGenerator(input_nc, output_nc, 7, ngf, norm_layer=norm_layer, use_dropout=use_dropout)
157
+ elif netG == 'unet_256':
158
+ net = UnetGenerator(input_nc, output_nc, 8, ngf, norm_layer=norm_layer, use_dropout=use_dropout)
159
+ elif netG == 'unet_672':
160
+ net = UnetGenerator(input_nc, output_nc, 5, ngf, norm_layer=norm_layer, use_dropout=use_dropout)
161
+ elif netG == 'unet_960':
162
+ net = UnetGenerator(input_nc, output_nc, 6, ngf, norm_layer=norm_layer, use_dropout=use_dropout)
163
+ elif netG == 'unet_1024':
164
+ net = UnetGenerator(input_nc, output_nc, 10, ngf, norm_layer=norm_layer, use_dropout=use_dropout)
165
+ else:
166
+ raise NotImplementedError('Generator model name [%s] is not recognized' % netG)
167
+ return init_net(net, init_type, init_gain, gpu_ids)
168
+
169
+
170
+ def define_D(input_nc, ndf, netD, n_layers_D=3, norm='batch', init_type='normal', init_gain=0.02, gpu_ids=[]):
171
+ """Create a discriminator
172
+
173
+ Parameters:
174
+ input_nc (int) -- the number of channels in input images
175
+ ndf (int) -- the number of filters in the first conv layer
176
+ netD (str) -- the architecture's name: basic | n_layers | pixel
177
+ n_layers_D (int) -- the number of conv layers in the discriminator; effective when netD=='n_layers'
178
+ norm (str) -- the type of normalization layers used in the network.
179
+ init_type (str) -- the name of the initialization method.
180
+ init_gain (float) -- scaling factor for normal, xavier and orthogonal.
181
+ gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2
182
+
183
+ Returns a discriminator
184
+
185
+ Our current implementation provides three types of discriminators:
186
+ [basic]: 'PatchGAN' classifier described in the original pix2pix paper.
187
+ It can classify whether 70×70 overlapping patches are real or fake.
188
+ Such a patch-level discriminator architecture has fewer parameters
189
+ than a full-image discriminator and can work on arbitrarily-sized images
190
+ in a fully convolutional fashion.
191
+
192
+ [n_layers]: With this mode, you can specify the number of conv layers in the discriminator
193
+ with the parameter <n_layers_D> (default=3 as used in [basic] (PatchGAN).)
194
+
195
+ [pixel]: 1x1 PixelGAN discriminator can classify whether a pixel is real or not.
196
+ It encourages greater color diversity but has no effect on spatial statistics.
197
+
198
+ The discriminator has been initialized by <init_net>. It uses Leakly RELU for non-linearity.
199
+ """
200
+ net = None
201
+ norm_layer = get_norm_layer(norm_type=norm)
202
+
203
+ if netD == 'basic': # default PatchGAN classifier
204
+ net = NLayerDiscriminator(input_nc, ndf, n_layers=3, norm_layer=norm_layer)
205
+ elif netD == 'n_layers': # more options
206
+ net = NLayerDiscriminator(input_nc, ndf, n_layers_D, norm_layer=norm_layer)
207
+ elif netD == 'pixel': # classify if each pixel is real or fake
208
+ net = PixelDiscriminator(input_nc, ndf, norm_layer=norm_layer)
209
+ else:
210
+ raise NotImplementedError('Discriminator model name [%s] is not recognized' % netD)
211
+ return init_net(net, init_type, init_gain, gpu_ids)
212
+
213
+
214
+ ##############################################################################
215
+ # Classes
216
+ ##############################################################################
217
+ class GANLoss(nn.Module):
218
+ """Define different GAN objectives.
219
+
220
+ The GANLoss class abstracts away the need to create the target label tensor
221
+ that has the same size as the input.
222
+ """
223
+
224
+ def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0):
225
+ """ Initialize the GANLoss class.
226
+
227
+ Parameters:
228
+ gan_mode (str) - - the type of GAN objective. It currently supports vanilla, lsgan, and wgangp.
229
+ target_real_label (bool) - - label for a real image
230
+ target_fake_label (bool) - - label of a fake image
231
+
232
+ Note: Do not use sigmoid as the last layer of Discriminator.
233
+ LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss.
234
+ """
235
+ super(GANLoss, self).__init__()
236
+ self.register_buffer('real_label', torch.tensor(target_real_label))
237
+ self.register_buffer('fake_label', torch.tensor(target_fake_label))
238
+ self.gan_mode = gan_mode
239
+ if gan_mode == 'lsgan':
240
+ self.loss = nn.MSELoss()
241
+ elif gan_mode == 'vanilla':
242
+ self.loss = nn.BCEWithLogitsLoss()
243
+ elif gan_mode in ['wgangp']:
244
+ self.loss = None
245
+ else:
246
+ raise NotImplementedError('gan mode %s not implemented' % gan_mode)
247
+
248
+ def get_target_tensor(self, prediction, target_is_real):
249
+ """Create label tensors with the same size as the input.
250
+
251
+ Parameters:
252
+ prediction (tensor) - - tpyically the prediction from a discriminator
253
+ target_is_real (bool) - - if the ground truth label is for real images or fake images
254
+
255
+ Returns:
256
+ A label tensor filled with ground truth label, and with the size of the input
257
+ """
258
+
259
+ if target_is_real:
260
+ target_tensor = self.real_label
261
+ else:
262
+ target_tensor = self.fake_label
263
+ return target_tensor.expand_as(prediction)
264
+
265
+ def __call__(self, prediction, target_is_real):
266
+ """Calculate loss given Discriminator's output and grount truth labels.
267
+
268
+ Parameters:
269
+ prediction (tensor) - - tpyically the prediction output from a discriminator
270
+ target_is_real (bool) - - if the ground truth label is for real images or fake images
271
+
272
+ Returns:
273
+ the calculated loss.
274
+ """
275
+ if self.gan_mode in ['lsgan', 'vanilla']:
276
+ target_tensor = self.get_target_tensor(prediction, target_is_real)
277
+ loss = self.loss(prediction, target_tensor)
278
+ elif self.gan_mode == 'wgangp':
279
+ if target_is_real:
280
+ loss = -prediction.mean()
281
+ else:
282
+ loss = prediction.mean()
283
+ return loss
284
+
285
+
286
+ def cal_gradient_penalty(netD, real_data, fake_data, device, type='mixed', constant=1.0, lambda_gp=10.0):
287
+ """Calculate the gradient penalty loss, used in WGAN-GP paper https://arxiv.org/abs/1704.00028
288
+
289
+ Arguments:
290
+ netD (network) -- discriminator network
291
+ real_data (tensor array) -- real images
292
+ fake_data (tensor array) -- generated images from the generator
293
+ device (str) -- GPU / CPU: from torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu')
294
+ type (str) -- if we mix real and fake data or not [real | fake | mixed].
295
+ constant (float) -- the constant used in formula ( ||gradient||_2 - constant)^2
296
+ lambda_gp (float) -- weight for this loss
297
+
298
+ Returns the gradient penalty loss
299
+ """
300
+ if lambda_gp > 0.0:
301
+ if type == 'real': # either use real images, fake images, or a linear interpolation of two.
302
+ interpolatesv = real_data
303
+ elif type == 'fake':
304
+ interpolatesv = fake_data
305
+ elif type == 'mixed':
306
+ alpha = torch.rand(real_data.shape[0], 1, device=device)
307
+ alpha = alpha.expand(real_data.shape[0], real_data.nelement() // real_data.shape[0]).contiguous().view(*real_data.shape)
308
+ interpolatesv = alpha * real_data + ((1 - alpha) * fake_data)
309
+ else:
310
+ raise NotImplementedError('{} not implemented'.format(type))
311
+ interpolatesv.requires_grad_(True)
312
+ disc_interpolates = netD(interpolatesv)
313
+ gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolatesv,
314
+ grad_outputs=torch.ones(disc_interpolates.size()).to(device),
315
+ create_graph=True, retain_graph=True, only_inputs=True)
316
+ gradients = gradients[0].view(real_data.size(0), -1) # flat the data
317
+ gradient_penalty = (((gradients + 1e-16).norm(2, dim=1) - constant) ** 2).mean() * lambda_gp # added eps
318
+ return gradient_penalty, gradients
319
+ else:
320
+ return 0.0, None
321
+
322
+
323
+ class ResnetGenerator(nn.Module):
324
+ """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations.
325
+
326
+ We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style)
327
+ """
328
+
329
+ def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'):
330
+ """Construct a Resnet-based generator
331
+
332
+ Parameters:
333
+ input_nc (int) -- the number of channels in input images
334
+ output_nc (int) -- the number of channels in output images
335
+ ngf (int) -- the number of filters in the last conv layer
336
+ norm_layer -- normalization layer
337
+ use_dropout (bool) -- if use dropout layers
338
+ n_blocks (int) -- the number of ResNet blocks
339
+ padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero
340
+ """
341
+ assert(n_blocks >= 0)
342
+ super(ResnetGenerator, self).__init__()
343
+ if type(norm_layer) == functools.partial:
344
+ use_bias = norm_layer.func == nn.InstanceNorm2d
345
+ else:
346
+ use_bias = norm_layer == nn.InstanceNorm2d
347
+
348
+ model = [nn.ReflectionPad2d(3),
349
+ nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias),
350
+ norm_layer(ngf),
351
+ nn.ReLU(True)]
352
+
353
+ n_downsampling = 2
354
+ for i in range(n_downsampling): # add downsampling layers
355
+ mult = 2 ** i
356
+ model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias),
357
+ norm_layer(ngf * mult * 2),
358
+ nn.ReLU(True)]
359
+
360
+ mult = 2 ** n_downsampling
361
+ for i in range(n_blocks): # add ResNet blocks
362
+
363
+ model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]
364
+
365
+ for i in range(n_downsampling): # add upsampling layers
366
+ mult = 2 ** (n_downsampling - i)
367
+ model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
368
+ kernel_size=3, stride=2,
369
+ padding=1, output_padding=1,
370
+ bias=use_bias),
371
+ norm_layer(int(ngf * mult / 2)),
372
+ nn.ReLU(True)]
373
+ model += [nn.ReflectionPad2d(3)]
374
+ model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
375
+ model += [nn.Tanh()]
376
+
377
+ self.model = nn.Sequential(*model)
378
+
379
+ def forward(self, input):
380
+ """Standard forward"""
381
+ return self.model(input)
382
+
383
+
384
+ class ResnetBlock(nn.Module):
385
+ """Define a Resnet block"""
386
+
387
+ def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):
388
+ """Initialize the Resnet block
389
+
390
+ A resnet block is a conv block with skip connections
391
+ We construct a conv block with build_conv_block function,
392
+ and implement skip connections in <forward> function.
393
+ Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf
394
+ """
395
+ super(ResnetBlock, self).__init__()
396
+ self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias)
397
+
398
+ def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias):
399
+ """Construct a convolutional block.
400
+
401
+ Parameters:
402
+ dim (int) -- the number of channels in the conv layer.
403
+ padding_type (str) -- the name of padding layer: reflect | replicate | zero
404
+ norm_layer -- normalization layer
405
+ use_dropout (bool) -- if use dropout layers.
406
+ use_bias (bool) -- if the conv layer uses bias or not
407
+
408
+ Returns a conv block (with a conv layer, a normalization layer, and a non-linearity layer (ReLU))
409
+ """
410
+ conv_block = []
411
+ p = 0
412
+ if padding_type == 'reflect':
413
+ conv_block += [nn.ReflectionPad2d(1)]
414
+ elif padding_type == 'replicate':
415
+ conv_block += [nn.ReplicationPad2d(1)]
416
+ elif padding_type == 'zero':
417
+ p = 1
418
+ else:
419
+ raise NotImplementedError('padding [%s] is not implemented' % padding_type)
420
+
421
+ conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim), nn.ReLU(True)]
422
+ if use_dropout:
423
+ conv_block += [nn.Dropout(0.5)]
424
+
425
+ p = 0
426
+ if padding_type == 'reflect':
427
+ conv_block += [nn.ReflectionPad2d(1)]
428
+ elif padding_type == 'replicate':
429
+ conv_block += [nn.ReplicationPad2d(1)]
430
+ elif padding_type == 'zero':
431
+ p = 1
432
+ else:
433
+ raise NotImplementedError('padding [%s] is not implemented' % padding_type)
434
+ conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim)]
435
+
436
+ return nn.Sequential(*conv_block)
437
+
438
+ def forward(self, x):
439
+ """Forward function (with skip connections)"""
440
+ out = x + self.conv_block(x) # add skip connections
441
+ return out
442
+
443
+
444
+ class UnetGenerator(nn.Module):
445
+ """Create a Unet-based generator"""
446
+
447
+ def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False):
448
+ """Construct a Unet generator
449
+ Parameters:
450
+ input_nc (int) -- the number of channels in input images
451
+ output_nc (int) -- the number of channels in output images
452
+ num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
453
+ image of size 128x128 will become of size 1x1 # at the bottleneck
454
+ ngf (int) -- the number of filters in the last conv layer
455
+ norm_layer -- normalization layer
456
+
457
+ We construct the U-Net from the innermost layer to the outermost layer.
458
+ It is a recursive process.
459
+ """
460
+ super(UnetGenerator, self).__init__()
461
+ # construct unet structure
462
+ unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer
463
+ for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters
464
+ unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
465
+ # gradually reduce the number of filters from ngf * 8 to ngf
466
+ unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
467
+ unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
468
+ unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
469
+ self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer
470
+
471
+ def forward(self, input):
472
+ """Standard forward"""
473
+ return self.model(input)
474
+
475
+
476
+ class UnetSkipConnectionBlock(nn.Module):
477
+ """Defines the Unet submodule with skip connection.
478
+ X -------------------identity----------------------
479
+ |-- downsampling -- |submodule| -- upsampling --|
480
+ """
481
+
482
+ def __init__(self, outer_nc, inner_nc, input_nc=None,
483
+ submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
484
+ """Construct a Unet submodule with skip connections.
485
+
486
+ Parameters:
487
+ outer_nc (int) -- the number of filters in the outer conv layer
488
+ inner_nc (int) -- the number of filters in the inner conv layer
489
+ input_nc (int) -- the number of channels in input images/features
490
+ submodule (UnetSkipConnectionBlock) -- previously defined submodules
491
+ outermost (bool) -- if this module is the outermost module
492
+ innermost (bool) -- if this module is the innermost module
493
+ norm_layer -- normalization layer
494
+ use_dropout (bool) -- if use dropout layers.
495
+ """
496
+ super(UnetSkipConnectionBlock, self).__init__()
497
+ self.outermost = outermost
498
+ if type(norm_layer) == functools.partial:
499
+ use_bias = norm_layer.func == nn.InstanceNorm2d
500
+ else:
501
+ use_bias = norm_layer == nn.InstanceNorm2d
502
+ if input_nc is None:
503
+ input_nc = outer_nc
504
+ downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
505
+ stride=2, padding=1, bias=use_bias)
506
+ downrelu = nn.LeakyReLU(0.2, True)
507
+ downnorm = norm_layer(inner_nc)
508
+ uprelu = nn.ReLU(True)
509
+ upnorm = norm_layer(outer_nc)
510
+
511
+ if outermost:
512
+ upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
513
+ kernel_size=4, stride=2,
514
+ padding=1)
515
+ down = [downconv]
516
+ up = [uprelu, upconv, nn.Tanh()]
517
+ model = down + [submodule] + up
518
+ elif innermost:
519
+ upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
520
+ kernel_size=4, stride=2,
521
+ padding=1, bias=use_bias)
522
+ down = [downrelu, downconv]
523
+ up = [uprelu, upconv, upnorm]
524
+ model = down + up
525
+ else:
526
+ upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
527
+ kernel_size=4, stride=2,
528
+ padding=1, bias=use_bias)
529
+ down = [downrelu, downconv, downnorm]
530
+ up = [uprelu, upconv, upnorm]
531
+
532
+ if use_dropout:
533
+ model = down + [submodule] + up + [nn.Dropout(0.5)]
534
+ else:
535
+ model = down + [submodule] + up
536
+
537
+ self.model = nn.Sequential(*model)
538
+
539
+ def forward(self, x):
540
+ if self.outermost:
541
+ return self.model(x)
542
+ else: # add skip connections
543
+ return torch.cat([x, self.model(x)], 1)
544
+
545
+
546
+ class NLayerDiscriminator(nn.Module):
547
+ """Defines a PatchGAN discriminator"""
548
+
549
+ def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d):
550
+ """Construct a PatchGAN discriminator
551
+
552
+ Parameters:
553
+ input_nc (int) -- the number of channels in input images
554
+ ndf (int) -- the number of filters in the last conv layer
555
+ n_layers (int) -- the number of conv layers in the discriminator
556
+ norm_layer -- normalization layer
557
+ """
558
+ super(NLayerDiscriminator, self).__init__()
559
+ if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters
560
+ use_bias = norm_layer.func == nn.InstanceNorm2d
561
+ else:
562
+ use_bias = norm_layer == nn.InstanceNorm2d
563
+
564
+ kw = 4
565
+ padw = 1
566
+ sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]
567
+ nf_mult = 1
568
+ nf_mult_prev = 1
569
+ for n in range(1, n_layers): # gradually increase the number of filters
570
+ nf_mult_prev = nf_mult
571
+ nf_mult = min(2 ** n, 8)
572
+ sequence += [
573
+ nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias),
574
+ norm_layer(ndf * nf_mult),
575
+ nn.LeakyReLU(0.2, True)
576
+ ]
577
+
578
+ nf_mult_prev = nf_mult
579
+ nf_mult = min(2 ** n_layers, 8)
580
+ sequence += [
581
+ nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias),
582
+ norm_layer(ndf * nf_mult),
583
+ nn.LeakyReLU(0.2, True)
584
+ ]
585
+
586
+ sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)] # output 1 channel prediction map
587
+ self.model = nn.Sequential(*sequence)
588
+
589
+ def forward(self, input):
590
+ """Standard forward."""
591
+ return self.model(input)
592
+
593
+
594
+ class PixelDiscriminator(nn.Module):
595
+ """Defines a 1x1 PatchGAN discriminator (pixelGAN)"""
596
+
597
+ def __init__(self, input_nc, ndf=64, norm_layer=nn.BatchNorm2d):
598
+ """Construct a 1x1 PatchGAN discriminator
599
+
600
+ Parameters:
601
+ input_nc (int) -- the number of channels in input images
602
+ ndf (int) -- the number of filters in the last conv layer
603
+ norm_layer -- normalization layer
604
+ """
605
+ super(PixelDiscriminator, self).__init__()
606
+ if type(norm_layer) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters
607
+ use_bias = norm_layer.func == nn.InstanceNorm2d
608
+ else:
609
+ use_bias = norm_layer == nn.InstanceNorm2d
610
+
611
+ self.net = [
612
+ nn.Conv2d(input_nc, ndf, kernel_size=1, stride=1, padding=0),
613
+ nn.LeakyReLU(0.2, True),
614
+ nn.Conv2d(ndf, ndf * 2, kernel_size=1, stride=1, padding=0, bias=use_bias),
615
+ norm_layer(ndf * 2),
616
+ nn.LeakyReLU(0.2, True),
617
+ nn.Conv2d(ndf * 2, 1, kernel_size=1, stride=1, padding=0, bias=use_bias)]
618
+
619
+ self.net = nn.Sequential(*self.net)
620
+
621
+ def forward(self, input):
622
+ """Standard forward."""
623
+ return self.net(input)
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/models/pix2pix4depth_model.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from .base_model import BaseModel
3
+ from . import networks
4
+
5
+
6
+ class Pix2Pix4DepthModel(BaseModel):
7
+ """ This class implements the pix2pix model, for learning a mapping from input images to output images given paired data.
8
+
9
+ The model training requires '--dataset_mode aligned' dataset.
10
+ By default, it uses a '--netG unet256' U-Net generator,
11
+ a '--netD basic' discriminator (PatchGAN),
12
+ and a '--gan_mode' vanilla GAN loss (the cross-entropy objective used in the orignal GAN paper).
13
+
14
+ pix2pix paper: https://arxiv.org/pdf/1611.07004.pdf
15
+ """
16
+ @staticmethod
17
+ def modify_commandline_options(parser, is_train=True):
18
+ """Add new dataset-specific options, and rewrite default values for existing options.
19
+
20
+ Parameters:
21
+ parser -- original option parser
22
+ is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options.
23
+
24
+ Returns:
25
+ the modified parser.
26
+
27
+ For pix2pix, we do not use image buffer
28
+ The training objective is: GAN Loss + lambda_L1 * ||G(A)-B||_1
29
+ By default, we use vanilla GAN loss, UNet with batchnorm, and aligned datasets.
30
+ """
31
+ # changing the default values to match the pix2pix paper (https://phillipi.github.io/pix2pix/)
32
+ parser.set_defaults(input_nc=2,output_nc=1,norm='none', netG='unet_1024', dataset_mode='depthmerge')
33
+ if is_train:
34
+ parser.set_defaults(pool_size=0, gan_mode='vanilla',)
35
+ parser.add_argument('--lambda_L1', type=float, default=1000, help='weight for L1 loss')
36
+ return parser
37
+
38
+ def __init__(self, opt):
39
+ """Initialize the pix2pix class.
40
+
41
+ Parameters:
42
+ opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
43
+ """
44
+ BaseModel.__init__(self, opt)
45
+ # specify the training losses you want to print out. The training/test scripts will call <BaseModel.get_current_losses>
46
+
47
+ self.loss_names = ['G_GAN', 'G_L1', 'D_real', 'D_fake']
48
+ # self.loss_names = ['G_L1']
49
+
50
+ # specify the images you want to save/display. The training/test scripts will call <BaseModel.get_current_visuals>
51
+ if self.isTrain:
52
+ self.visual_names = ['outer','inner', 'fake_B', 'real_B']
53
+ else:
54
+ self.visual_names = ['fake_B']
55
+
56
+ # specify the models you want to save to the disk. The training/test scripts will call <BaseModel.save_networks> and <BaseModel.load_networks>
57
+ if self.isTrain:
58
+ self.model_names = ['G','D']
59
+ else: # during test time, only load G
60
+ self.model_names = ['G']
61
+
62
+ # define networks (both generator and discriminator)
63
+ self.netG = networks.define_G(opt.input_nc, opt.output_nc, 64, 'unet_1024', 'none',
64
+ False, 'normal', 0.02, self.gpu_ids)
65
+
66
+ if self.isTrain: # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc
67
+ self.netD = networks.define_D(opt.input_nc + opt.output_nc, opt.ndf, opt.netD,
68
+ opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids)
69
+
70
+ if self.isTrain:
71
+ # define loss functions
72
+ self.criterionGAN = networks.GANLoss(opt.gan_mode).to(self.device)
73
+ self.criterionL1 = torch.nn.L1Loss()
74
+ # initialize optimizers; schedulers will be automatically created by function <BaseModel.setup>.
75
+ self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=1e-4, betas=(opt.beta1, 0.999))
76
+ self.optimizer_D = torch.optim.Adam(self.netD.parameters(), lr=2e-06, betas=(opt.beta1, 0.999))
77
+ self.optimizers.append(self.optimizer_G)
78
+ self.optimizers.append(self.optimizer_D)
79
+
80
+ def set_input_train(self, input):
81
+ self.outer = input['data_outer'].to(self.device)
82
+ self.outer = torch.nn.functional.interpolate(self.outer,(1024,1024),mode='bilinear',align_corners=False)
83
+
84
+ self.inner = input['data_inner'].to(self.device)
85
+ self.inner = torch.nn.functional.interpolate(self.inner,(1024,1024),mode='bilinear',align_corners=False)
86
+
87
+ self.image_paths = input['image_path']
88
+
89
+ if self.isTrain:
90
+ self.gtfake = input['data_gtfake'].to(self.device)
91
+ self.gtfake = torch.nn.functional.interpolate(self.gtfake, (1024, 1024), mode='bilinear', align_corners=False)
92
+ self.real_B = self.gtfake
93
+
94
+ self.real_A = torch.cat((self.outer, self.inner), 1)
95
+
96
+ def set_input(self, outer, inner):
97
+ inner = torch.from_numpy(inner).unsqueeze(0).unsqueeze(0)
98
+ outer = torch.from_numpy(outer).unsqueeze(0).unsqueeze(0)
99
+
100
+ inner = (inner - torch.min(inner))/(torch.max(inner)-torch.min(inner))
101
+ outer = (outer - torch.min(outer))/(torch.max(outer)-torch.min(outer))
102
+
103
+ inner = self.normalize(inner)
104
+ outer = self.normalize(outer)
105
+
106
+ self.real_A = torch.cat((outer, inner), 1).to(self.device)
107
+
108
+
109
+ def normalize(self, input):
110
+ input = input * 2
111
+ input = input - 1
112
+ return input
113
+
114
+ def forward(self):
115
+ """Run forward pass; called by both functions <optimize_parameters> and <test>."""
116
+ self.fake_B = self.netG(self.real_A) # G(A)
117
+
118
+ def backward_D(self):
119
+ """Calculate GAN loss for the discriminator"""
120
+ # Fake; stop backprop to the generator by detaching fake_B
121
+ fake_AB = torch.cat((self.real_A, self.fake_B), 1) # we use conditional GANs; we need to feed both input and output to the discriminator
122
+ pred_fake = self.netD(fake_AB.detach())
123
+ self.loss_D_fake = self.criterionGAN(pred_fake, False)
124
+ # Real
125
+ real_AB = torch.cat((self.real_A, self.real_B), 1)
126
+ pred_real = self.netD(real_AB)
127
+ self.loss_D_real = self.criterionGAN(pred_real, True)
128
+ # combine loss and calculate gradients
129
+ self.loss_D = (self.loss_D_fake + self.loss_D_real) * 0.5
130
+ self.loss_D.backward()
131
+
132
+ def backward_G(self):
133
+ """Calculate GAN and L1 loss for the generator"""
134
+ # First, G(A) should fake the discriminator
135
+ fake_AB = torch.cat((self.real_A, self.fake_B), 1)
136
+ pred_fake = self.netD(fake_AB)
137
+ self.loss_G_GAN = self.criterionGAN(pred_fake, True)
138
+ # Second, G(A) = B
139
+ self.loss_G_L1 = self.criterionL1(self.fake_B, self.real_B) * self.opt.lambda_L1
140
+ # combine loss and calculate gradients
141
+ self.loss_G = self.loss_G_L1 + self.loss_G_GAN
142
+ self.loss_G.backward()
143
+
144
+ def optimize_parameters(self):
145
+ self.forward() # compute fake images: G(A)
146
+ # update D
147
+ self.set_requires_grad(self.netD, True) # enable backprop for D
148
+ self.optimizer_D.zero_grad() # set D's gradients to zero
149
+ self.backward_D() # calculate gradients for D
150
+ self.optimizer_D.step() # update D's weights
151
+ # update G
152
+ self.set_requires_grad(self.netD, False) # D requires no gradients when optimizing G
153
+ self.optimizer_G.zero_grad() # set G's gradients to zero
154
+ self.backward_G() # calculate graidents for G
155
+ self.optimizer_G.step() # udpate G's weights
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/options/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """This package options includes option modules: training options, test options, and basic options (used in both training and test)."""
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/options/base_options.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ from ...pix2pix.util import util
4
+ # import torch
5
+ from ...pix2pix import models
6
+ # import pix2pix.data
7
+ import numpy as np
8
+
9
+ class BaseOptions():
10
+ """This class defines options used during both training and test time.
11
+
12
+ It also implements several helper functions such as parsing, printing, and saving the options.
13
+ It also gathers additional options defined in <modify_commandline_options> functions in both dataset class and model class.
14
+ """
15
+
16
+ def __init__(self):
17
+ """Reset the class; indicates the class hasn't been initailized"""
18
+ self.initialized = False
19
+
20
+ def initialize(self, parser):
21
+ """Define the common options that are used in both training and test."""
22
+ # basic parameters
23
+ parser.add_argument('--dataroot', help='path to images (should have subfolders trainA, trainB, valA, valB, etc)')
24
+ parser.add_argument('--name', type=str, default='void', help='mahdi_unet_new, scaled_unet')
25
+ parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU')
26
+ parser.add_argument('--checkpoints_dir', type=str, default='./pix2pix/checkpoints', help='models are saved here')
27
+ # model parameters
28
+ parser.add_argument('--model', type=str, default='cycle_gan', help='chooses which model to use. [cycle_gan | pix2pix | test | colorization]')
29
+ parser.add_argument('--input_nc', type=int, default=2, help='# of input image channels: 3 for RGB and 1 for grayscale')
30
+ parser.add_argument('--output_nc', type=int, default=1, help='# of output image channels: 3 for RGB and 1 for grayscale')
31
+ parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in the last conv layer')
32
+ parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in the first conv layer')
33
+ parser.add_argument('--netD', type=str, default='basic', help='specify discriminator architecture [basic | n_layers | pixel]. The basic model is a 70x70 PatchGAN. n_layers allows you to specify the layers in the discriminator')
34
+ parser.add_argument('--netG', type=str, default='resnet_9blocks', help='specify generator architecture [resnet_9blocks | resnet_6blocks | unet_256 | unet_128]')
35
+ parser.add_argument('--n_layers_D', type=int, default=3, help='only used if netD==n_layers')
36
+ parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization [instance | batch | none]')
37
+ parser.add_argument('--init_type', type=str, default='normal', help='network initialization [normal | xavier | kaiming | orthogonal]')
38
+ parser.add_argument('--init_gain', type=float, default=0.02, help='scaling factor for normal, xavier and orthogonal.')
39
+ parser.add_argument('--no_dropout', action='store_true', help='no dropout for the generator')
40
+ # dataset parameters
41
+ parser.add_argument('--dataset_mode', type=str, default='unaligned', help='chooses how datasets are loaded. [unaligned | aligned | single | colorization]')
42
+ parser.add_argument('--direction', type=str, default='AtoB', help='AtoB or BtoA')
43
+ parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly')
44
+ parser.add_argument('--num_threads', default=4, type=int, help='# threads for loading data')
45
+ parser.add_argument('--batch_size', type=int, default=1, help='input batch size')
46
+ parser.add_argument('--load_size', type=int, default=672, help='scale images to this size')
47
+ parser.add_argument('--crop_size', type=int, default=672, help='then crop to this size')
48
+ parser.add_argument('--max_dataset_size', type=int, default=10000, help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.')
49
+ parser.add_argument('--preprocess', type=str, default='resize_and_crop', help='scaling and cropping of images at load time [resize_and_crop | crop | scale_width | scale_width_and_crop | none]')
50
+ parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation')
51
+ parser.add_argument('--display_winsize', type=int, default=256, help='display window size for both visdom and HTML')
52
+ # additional parameters
53
+ parser.add_argument('--epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
54
+ parser.add_argument('--load_iter', type=int, default='0', help='which iteration to load? if load_iter > 0, the code will load models by iter_[load_iter]; otherwise, the code will load models by [epoch]')
55
+ parser.add_argument('--verbose', action='store_true', help='if specified, print more debugging information')
56
+ parser.add_argument('--suffix', default='', type=str, help='customized suffix: opt.name = opt.name + suffix: e.g., {model}_{netG}_size{load_size}')
57
+
58
+ parser.add_argument('--data_dir', type=str, required=False,
59
+ help='input files directory images can be .png .jpg .tiff')
60
+ parser.add_argument('--output_dir', type=str, required=False,
61
+ help='result dir. result depth will be png. vides are JMPG as avi')
62
+ parser.add_argument('--savecrops', type=int, required=False)
63
+ parser.add_argument('--savewholeest', type=int, required=False)
64
+ parser.add_argument('--output_resolution', type=int, required=False,
65
+ help='0 for no restriction 1 for resize to input size')
66
+ parser.add_argument('--net_receptive_field_size', type=int, required=False)
67
+ parser.add_argument('--pix2pixsize', type=int, required=False)
68
+ parser.add_argument('--generatevideo', type=int, required=False)
69
+ parser.add_argument('--depthNet', type=int, required=False, help='0: midas 1:strurturedRL')
70
+ parser.add_argument('--R0', action='store_true')
71
+ parser.add_argument('--R20', action='store_true')
72
+ parser.add_argument('--Final', action='store_true')
73
+ parser.add_argument('--colorize_results', action='store_true')
74
+ parser.add_argument('--max_res', type=float, default=np.inf)
75
+
76
+ self.initialized = True
77
+ return parser
78
+
79
+ def gather_options(self):
80
+ """Initialize our parser with basic options(only once).
81
+ Add additional model-specific and dataset-specific options.
82
+ These options are defined in the <modify_commandline_options> function
83
+ in model and dataset classes.
84
+ """
85
+ if not self.initialized: # check if it has been initialized
86
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
87
+ parser = self.initialize(parser)
88
+
89
+ # get the basic options
90
+ opt, _ = parser.parse_known_args()
91
+
92
+ # modify model-related parser options
93
+ model_name = opt.model
94
+ model_option_setter = models.get_option_setter(model_name)
95
+ parser = model_option_setter(parser, self.isTrain)
96
+ opt, _ = parser.parse_known_args() # parse again with new defaults
97
+
98
+ # modify dataset-related parser options
99
+ # dataset_name = opt.dataset_mode
100
+ # dataset_option_setter = pix2pix.data.get_option_setter(dataset_name)
101
+ # parser = dataset_option_setter(parser, self.isTrain)
102
+
103
+ # save and return the parser
104
+ self.parser = parser
105
+ #return parser.parse_args() #EVIL
106
+ return opt
107
+
108
+ def print_options(self, opt):
109
+ """Print and save options
110
+
111
+ It will print both current options and default values(if different).
112
+ It will save options into a text file / [checkpoints_dir] / opt.txt
113
+ """
114
+ message = ''
115
+ message += '----------------- Options ---------------\n'
116
+ for k, v in sorted(vars(opt).items()):
117
+ comment = ''
118
+ default = self.parser.get_default(k)
119
+ if v != default:
120
+ comment = '\t[default: %s]' % str(default)
121
+ message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment)
122
+ message += '----------------- End -------------------'
123
+ print(message)
124
+
125
+ # save to the disk
126
+ expr_dir = os.path.join(opt.checkpoints_dir, opt.name)
127
+ util.mkdirs(expr_dir)
128
+ file_name = os.path.join(expr_dir, '{}_opt.txt'.format(opt.phase))
129
+ with open(file_name, 'wt') as opt_file:
130
+ opt_file.write(message)
131
+ opt_file.write('\n')
132
+
133
+ def parse(self):
134
+ """Parse our options, create checkpoints directory suffix, and set up gpu device."""
135
+ opt = self.gather_options()
136
+ opt.isTrain = self.isTrain # train or test
137
+
138
+ # process opt.suffix
139
+ if opt.suffix:
140
+ suffix = ('_' + opt.suffix.format(**vars(opt))) if opt.suffix != '' else ''
141
+ opt.name = opt.name + suffix
142
+
143
+ #self.print_options(opt)
144
+
145
+ # set gpu ids
146
+ str_ids = opt.gpu_ids.split(',')
147
+ opt.gpu_ids = []
148
+ for str_id in str_ids:
149
+ id = int(str_id)
150
+ if id >= 0:
151
+ opt.gpu_ids.append(id)
152
+ #if len(opt.gpu_ids) > 0:
153
+ # torch.cuda.set_device(opt.gpu_ids[0])
154
+
155
+ self.opt = opt
156
+ return self.opt
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/options/test_options.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base_options import BaseOptions
2
+
3
+
4
+ class TestOptions(BaseOptions):
5
+ """This class includes test options.
6
+
7
+ It also includes shared options defined in BaseOptions.
8
+ """
9
+
10
+ def initialize(self, parser):
11
+ parser = BaseOptions.initialize(self, parser) # define shared options
12
+ parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
13
+ parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
14
+ # Dropout and Batchnorm has different behavioir during training and test.
15
+ parser.add_argument('--eval', action='store_true', help='use eval mode during test time.')
16
+ parser.add_argument('--num_test', type=int, default=50, help='how many test images to run')
17
+ # rewrite devalue values
18
+ parser.set_defaults(model='pix2pix4depth')
19
+ # To avoid cropping, the load_size should be the same as crop_size
20
+ parser.set_defaults(load_size=parser.get_default('crop_size'))
21
+ self.isTrain = False
22
+ return parser
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """This package includes a miscellaneous collection of useful helper functions."""
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/get_data.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function
2
+ import os
3
+ import tarfile
4
+ import requests
5
+ from warnings import warn
6
+ from zipfile import ZipFile
7
+ from bs4 import BeautifulSoup
8
+ from os.path import abspath, isdir, join, basename
9
+
10
+
11
+ class GetData(object):
12
+ """A Python script for downloading CycleGAN or pix2pix datasets.
13
+
14
+ Parameters:
15
+ technique (str) -- One of: 'cyclegan' or 'pix2pix'.
16
+ verbose (bool) -- If True, print additional information.
17
+
18
+ Examples:
19
+ >>> from util.get_data import GetData
20
+ >>> gd = GetData(technique='cyclegan')
21
+ >>> new_data_path = gd.get(save_path='./datasets') # options will be displayed.
22
+
23
+ Alternatively, You can use bash scripts: 'scripts/download_pix2pix_model.sh'
24
+ and 'scripts/download_cyclegan_model.sh'.
25
+ """
26
+
27
+ def __init__(self, technique='cyclegan', verbose=True):
28
+ url_dict = {
29
+ 'pix2pix': 'http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/',
30
+ 'cyclegan': 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets'
31
+ }
32
+ self.url = url_dict.get(technique.lower())
33
+ self._verbose = verbose
34
+
35
+ def _print(self, text):
36
+ if self._verbose:
37
+ print(text)
38
+
39
+ @staticmethod
40
+ def _get_options(r):
41
+ soup = BeautifulSoup(r.text, 'lxml')
42
+ options = [h.text for h in soup.find_all('a', href=True)
43
+ if h.text.endswith(('.zip', 'tar.gz'))]
44
+ return options
45
+
46
+ def _present_options(self):
47
+ r = requests.get(self.url)
48
+ options = self._get_options(r)
49
+ print('Options:\n')
50
+ for i, o in enumerate(options):
51
+ print("{0}: {1}".format(i, o))
52
+ choice = input("\nPlease enter the number of the "
53
+ "dataset above you wish to download:")
54
+ return options[int(choice)]
55
+
56
+ def _download_data(self, dataset_url, save_path):
57
+ if not isdir(save_path):
58
+ os.makedirs(save_path)
59
+
60
+ base = basename(dataset_url)
61
+ temp_save_path = join(save_path, base)
62
+
63
+ with open(temp_save_path, "wb") as f:
64
+ r = requests.get(dataset_url)
65
+ f.write(r.content)
66
+
67
+ if base.endswith('.tar.gz'):
68
+ obj = tarfile.open(temp_save_path)
69
+ elif base.endswith('.zip'):
70
+ obj = ZipFile(temp_save_path, 'r')
71
+ else:
72
+ raise ValueError("Unknown File Type: {0}.".format(base))
73
+
74
+ self._print("Unpacking Data...")
75
+ obj.extractall(save_path)
76
+ obj.close()
77
+ os.remove(temp_save_path)
78
+
79
+ def get(self, save_path, dataset=None):
80
+ """
81
+
82
+ Download a dataset.
83
+
84
+ Parameters:
85
+ save_path (str) -- A directory to save the data to.
86
+ dataset (str) -- (optional). A specific dataset to download.
87
+ Note: this must include the file extension.
88
+ If None, options will be presented for you
89
+ to choose from.
90
+
91
+ Returns:
92
+ save_path_full (str) -- the absolute path to the downloaded data.
93
+
94
+ """
95
+ if dataset is None:
96
+ selected_dataset = self._present_options()
97
+ else:
98
+ selected_dataset = dataset
99
+
100
+ save_path_full = join(save_path, selected_dataset.split('.')[0])
101
+
102
+ if isdir(save_path_full):
103
+ warn("\n'{0}' already exists. Voiding Download.".format(
104
+ save_path_full))
105
+ else:
106
+ self._print('Downloading Data...')
107
+ url = "{0}/{1}".format(self.url, selected_dataset)
108
+ self._download_data(url, save_path=save_path)
109
+
110
+ return abspath(save_path_full)
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/guidedfilter.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ class GuidedFilter():
4
+ def __init__(self, source, reference, r=64, eps= 0.05**2):
5
+ self.source = source;
6
+ self.reference = reference;
7
+ self.r = r
8
+ self.eps = eps
9
+
10
+ self.smooth = self.guidedfilter(self.source,self.reference,self.r,self.eps)
11
+
12
+ def boxfilter(self,img, r):
13
+ (rows, cols) = img.shape
14
+ imDst = np.zeros_like(img)
15
+
16
+ imCum = np.cumsum(img, 0)
17
+ imDst[0 : r+1, :] = imCum[r : 2*r+1, :]
18
+ imDst[r+1 : rows-r, :] = imCum[2*r+1 : rows, :] - imCum[0 : rows-2*r-1, :]
19
+ imDst[rows-r: rows, :] = np.tile(imCum[rows-1, :], [r, 1]) - imCum[rows-2*r-1 : rows-r-1, :]
20
+
21
+ imCum = np.cumsum(imDst, 1)
22
+ imDst[:, 0 : r+1] = imCum[:, r : 2*r+1]
23
+ imDst[:, r+1 : cols-r] = imCum[:, 2*r+1 : cols] - imCum[:, 0 : cols-2*r-1]
24
+ imDst[:, cols-r: cols] = np.tile(imCum[:, cols-1], [r, 1]).T - imCum[:, cols-2*r-1 : cols-r-1]
25
+
26
+ return imDst
27
+
28
+ def guidedfilter(self,I, p, r, eps):
29
+ (rows, cols) = I.shape
30
+ N = self.boxfilter(np.ones([rows, cols]), r)
31
+
32
+ meanI = self.boxfilter(I, r) / N
33
+ meanP = self.boxfilter(p, r) / N
34
+ meanIp = self.boxfilter(I * p, r) / N
35
+ covIp = meanIp - meanI * meanP
36
+
37
+ meanII = self.boxfilter(I * I, r) / N
38
+ varI = meanII - meanI * meanI
39
+
40
+ a = covIp / (varI + eps)
41
+ b = meanP - a * meanI
42
+
43
+ meanA = self.boxfilter(a, r) / N
44
+ meanB = self.boxfilter(b, r) / N
45
+
46
+ q = meanA * I + meanB
47
+ return q
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/html.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dominate
2
+ from dominate.tags import meta, h3, table, tr, td, p, a, img, br
3
+ import os
4
+
5
+
6
+ class HTML:
7
+ """This HTML class allows us to save images and write texts into a single HTML file.
8
+
9
+ It consists of functions such as <add_header> (add a text header to the HTML file),
10
+ <add_images> (add a row of images to the HTML file), and <save> (save the HTML to the disk).
11
+ It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API.
12
+ """
13
+
14
+ def __init__(self, web_dir, title, refresh=0):
15
+ """Initialize the HTML classes
16
+
17
+ Parameters:
18
+ web_dir (str) -- a directory that stores the webpage. HTML file will be created at <web_dir>/index.html; images will be saved at <web_dir/images/
19
+ title (str) -- the webpage name
20
+ refresh (int) -- how often the website refresh itself; if 0; no refreshing
21
+ """
22
+ self.title = title
23
+ self.web_dir = web_dir
24
+ self.img_dir = os.path.join(self.web_dir, 'images')
25
+ if not os.path.exists(self.web_dir):
26
+ os.makedirs(self.web_dir)
27
+ if not os.path.exists(self.img_dir):
28
+ os.makedirs(self.img_dir)
29
+
30
+ self.doc = dominate.document(title=title)
31
+ if refresh > 0:
32
+ with self.doc.head:
33
+ meta(http_equiv="refresh", content=str(refresh))
34
+
35
+ def get_image_dir(self):
36
+ """Return the directory that stores images"""
37
+ return self.img_dir
38
+
39
+ def add_header(self, text):
40
+ """Insert a header to the HTML file
41
+
42
+ Parameters:
43
+ text (str) -- the header text
44
+ """
45
+ with self.doc:
46
+ h3(text)
47
+
48
+ def add_images(self, ims, txts, links, width=400):
49
+ """add images to the HTML file
50
+
51
+ Parameters:
52
+ ims (str list) -- a list of image paths
53
+ txts (str list) -- a list of image names shown on the website
54
+ links (str list) -- a list of hyperref links; when you click an image, it will redirect you to a new page
55
+ """
56
+ self.t = table(border=1, style="table-layout: fixed;") # Insert a table
57
+ self.doc.add(self.t)
58
+ with self.t:
59
+ with tr():
60
+ for im, txt, link in zip(ims, txts, links):
61
+ with td(style="word-wrap: break-word;", halign="center", valign="top"):
62
+ with p():
63
+ with a(href=os.path.join('images', link)):
64
+ img(style="width:%dpx" % width, src=os.path.join('images', im))
65
+ br()
66
+ p(txt)
67
+
68
+ def save(self):
69
+ """save the current content to the HMTL file"""
70
+ html_file = '%s/index.html' % self.web_dir
71
+ f = open(html_file, 'wt')
72
+ f.write(self.doc.render())
73
+ f.close()
74
+
75
+
76
+ if __name__ == '__main__': # we show an example usage here.
77
+ html = HTML('web/', 'test_html')
78
+ html.add_header('hello world')
79
+
80
+ ims, txts, links = [], [], []
81
+ for n in range(4):
82
+ ims.append('image_%d.png' % n)
83
+ txts.append('text_%d' % n)
84
+ links.append('image_%d.png' % n)
85
+ html.add_images(ims, txts, links)
86
+ html.save()
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/image_pool.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import torch
3
+
4
+
5
+ class ImagePool():
6
+ """This class implements an image buffer that stores previously generated images.
7
+
8
+ This buffer enables us to update discriminators using a history of generated images
9
+ rather than the ones produced by the latest generators.
10
+ """
11
+
12
+ def __init__(self, pool_size):
13
+ """Initialize the ImagePool class
14
+
15
+ Parameters:
16
+ pool_size (int) -- the size of image buffer, if pool_size=0, no buffer will be created
17
+ """
18
+ self.pool_size = pool_size
19
+ if self.pool_size > 0: # create an empty pool
20
+ self.num_imgs = 0
21
+ self.images = []
22
+
23
+ def query(self, images):
24
+ """Return an image from the pool.
25
+
26
+ Parameters:
27
+ images: the latest generated images from the generator
28
+
29
+ Returns images from the buffer.
30
+
31
+ By 50/100, the buffer will return input images.
32
+ By 50/100, the buffer will return images previously stored in the buffer,
33
+ and insert the current images to the buffer.
34
+ """
35
+ if self.pool_size == 0: # if the buffer size is 0, do nothing
36
+ return images
37
+ return_images = []
38
+ for image in images:
39
+ image = torch.unsqueeze(image.data, 0)
40
+ if self.num_imgs < self.pool_size: # if the buffer is not full; keep inserting current images to the buffer
41
+ self.num_imgs = self.num_imgs + 1
42
+ self.images.append(image)
43
+ return_images.append(image)
44
+ else:
45
+ p = random.uniform(0, 1)
46
+ if p > 0.5: # by 50% chance, the buffer will return a previously stored image, and insert the current image into the buffer
47
+ random_id = random.randint(0, self.pool_size - 1) # randint is inclusive
48
+ tmp = self.images[random_id].clone()
49
+ self.images[random_id] = image
50
+ return_images.append(tmp)
51
+ else: # by another 50% chance, the buffer will return the current image
52
+ return_images.append(image)
53
+ return_images = torch.cat(return_images, 0) # collect all the images and return
54
+ return return_images
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/util.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This module contains simple helper functions """
2
+ from __future__ import print_function
3
+ import torch
4
+ import numpy as np
5
+ from PIL import Image
6
+ import os
7
+
8
+
9
+ def tensor2im(input_image, imtype=np.uint16):
10
+ """"Converts a Tensor array into a numpy image array.
11
+
12
+ Parameters:
13
+ input_image (tensor) -- the input image tensor array
14
+ imtype (type) -- the desired type of the converted numpy array
15
+ """
16
+ if not isinstance(input_image, np.ndarray):
17
+ if isinstance(input_image, torch.Tensor): # get the data from a variable
18
+ image_tensor = input_image.data
19
+ else:
20
+ return input_image
21
+ image_numpy = torch.squeeze(image_tensor).cpu().numpy() # convert it into a numpy array
22
+ image_numpy = (image_numpy + 1) / 2.0 * (2**16-1) #
23
+ else: # if it is a numpy array, do nothing
24
+ image_numpy = input_image
25
+ return image_numpy.astype(imtype)
26
+
27
+
28
+ def diagnose_network(net, name='network'):
29
+ """Calculate and print the mean of average absolute(gradients)
30
+
31
+ Parameters:
32
+ net (torch network) -- Torch network
33
+ name (str) -- the name of the network
34
+ """
35
+ mean = 0.0
36
+ count = 0
37
+ for param in net.parameters():
38
+ if param.grad is not None:
39
+ mean += torch.mean(torch.abs(param.grad.data))
40
+ count += 1
41
+ if count > 0:
42
+ mean = mean / count
43
+ print(name)
44
+ print(mean)
45
+
46
+
47
+ def save_image(image_numpy, image_path, aspect_ratio=1.0):
48
+ """Save a numpy image to the disk
49
+
50
+ Parameters:
51
+ image_numpy (numpy array) -- input numpy array
52
+ image_path (str) -- the path of the image
53
+ """
54
+ image_pil = Image.fromarray(image_numpy)
55
+
56
+ image_pil = image_pil.convert('I;16')
57
+
58
+ # image_pil = Image.fromarray(image_numpy)
59
+ # h, w, _ = image_numpy.shape
60
+ #
61
+ # if aspect_ratio > 1.0:
62
+ # image_pil = image_pil.resize((h, int(w * aspect_ratio)), Image.BICUBIC)
63
+ # if aspect_ratio < 1.0:
64
+ # image_pil = image_pil.resize((int(h / aspect_ratio), w), Image.BICUBIC)
65
+
66
+ image_pil.save(image_path)
67
+
68
+
69
+ def print_numpy(x, val=True, shp=False):
70
+ """Print the mean, min, max, median, std, and size of a numpy array
71
+
72
+ Parameters:
73
+ val (bool) -- if print the values of the numpy array
74
+ shp (bool) -- if print the shape of the numpy array
75
+ """
76
+ x = x.astype(np.float64)
77
+ if shp:
78
+ print('shape,', x.shape)
79
+ if val:
80
+ x = x.flatten()
81
+ print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
82
+ np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
83
+
84
+
85
+ def mkdirs(paths):
86
+ """create empty directories if they don't exist
87
+
88
+ Parameters:
89
+ paths (str list) -- a list of directory paths
90
+ """
91
+ if isinstance(paths, list) and not isinstance(paths, str):
92
+ for path in paths:
93
+ mkdir(path)
94
+ else:
95
+ mkdir(paths)
96
+
97
+
98
+ def mkdir(path):
99
+ """create a single empty directory if it didn't exist
100
+
101
+ Parameters:
102
+ path (str) -- a single directory path
103
+ """
104
+ if not os.path.exists(path):
105
+ os.makedirs(path)
extensions/microsoftexcel-controlnet/annotator/leres/pix2pix/util/visualizer.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ import sys
4
+ import ntpath
5
+ import time
6
+ from . import util, html
7
+ from subprocess import Popen, PIPE
8
+ import torch
9
+
10
+
11
+ if sys.version_info[0] == 2:
12
+ VisdomExceptionBase = Exception
13
+ else:
14
+ VisdomExceptionBase = ConnectionError
15
+
16
+
17
+ def save_images(webpage, visuals, image_path, aspect_ratio=1.0, width=256):
18
+ """Save images to the disk.
19
+
20
+ Parameters:
21
+ webpage (the HTML class) -- the HTML webpage class that stores these imaegs (see html.py for more details)
22
+ visuals (OrderedDict) -- an ordered dictionary that stores (name, images (either tensor or numpy) ) pairs
23
+ image_path (str) -- the string is used to create image paths
24
+ aspect_ratio (float) -- the aspect ratio of saved images
25
+ width (int) -- the images will be resized to width x width
26
+
27
+ This function will save images stored in 'visuals' to the HTML file specified by 'webpage'.
28
+ """
29
+ image_dir = webpage.get_image_dir()
30
+ short_path = ntpath.basename(image_path[0])
31
+ name = os.path.splitext(short_path)[0]
32
+
33
+ webpage.add_header(name)
34
+ ims, txts, links = [], [], []
35
+
36
+ for label, im_data in visuals.items():
37
+ im = util.tensor2im(im_data)
38
+ image_name = '%s_%s.png' % (name, label)
39
+ save_path = os.path.join(image_dir, image_name)
40
+ util.save_image(im, save_path, aspect_ratio=aspect_ratio)
41
+ ims.append(image_name)
42
+ txts.append(label)
43
+ links.append(image_name)
44
+ webpage.add_images(ims, txts, links, width=width)
45
+
46
+
47
+ class Visualizer():
48
+ """This class includes several functions that can display/save images and print/save logging information.
49
+
50
+ It uses a Python library 'visdom' for display, and a Python library 'dominate' (wrapped in 'HTML') for creating HTML files with images.
51
+ """
52
+
53
+ def __init__(self, opt):
54
+ """Initialize the Visualizer class
55
+
56
+ Parameters:
57
+ opt -- stores all the experiment flags; needs to be a subclass of BaseOptions
58
+ Step 1: Cache the training/test options
59
+ Step 2: connect to a visdom server
60
+ Step 3: create an HTML object for saveing HTML filters
61
+ Step 4: create a logging file to store training losses
62
+ """
63
+ self.opt = opt # cache the option
64
+ self.display_id = opt.display_id
65
+ self.use_html = opt.isTrain and not opt.no_html
66
+ self.win_size = opt.display_winsize
67
+ self.name = opt.name
68
+ self.port = opt.display_port
69
+ self.saved = False
70
+
71
+ if self.use_html: # create an HTML object at <checkpoints_dir>/web/; images will be saved under <checkpoints_dir>/web/images/
72
+ self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
73
+ self.img_dir = os.path.join(self.web_dir, 'images')
74
+ print('create web directory %s...' % self.web_dir)
75
+ util.mkdirs([self.web_dir, self.img_dir])
76
+ # create a logging file to store training losses
77
+ self.log_name = os.path.join(opt.checkpoints_dir, opt.name, 'loss_log.txt')
78
+ with open(self.log_name, "a") as log_file:
79
+ now = time.strftime("%c")
80
+ log_file.write('================ Training Loss (%s) ================\n' % now)
81
+
82
+ def reset(self):
83
+ """Reset the self.saved status"""
84
+ self.saved = False
85
+
86
+ def create_visdom_connections(self):
87
+ """If the program could not connect to Visdom server, this function will start a new server at port < self.port > """
88
+ cmd = sys.executable + ' -m visdom.server -p %d &>/dev/null &' % self.port
89
+ print('\n\nCould not connect to Visdom server. \n Trying to start a server....')
90
+ print('Command: %s' % cmd)
91
+ Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
92
+
93
+ def display_current_results(self, visuals, epoch, save_result):
94
+ """Display current results on visdom; save current results to an HTML file.
95
+
96
+ Parameters:
97
+ visuals (OrderedDict) - - dictionary of images to display or save
98
+ epoch (int) - - the current epoch
99
+ save_result (bool) - - if save the current results to an HTML file
100
+ """
101
+ if self.use_html and (save_result or not self.saved): # save images to an HTML file if they haven't been saved.
102
+ self.saved = True
103
+ # save images to the disk
104
+ for label, image in visuals.items():
105
+ image_numpy = util.tensor2im(image)
106
+ img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label))
107
+ util.save_image(image_numpy, img_path)
108
+
109
+ # update website
110
+ webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, refresh=1)
111
+ for n in range(epoch, 0, -1):
112
+ webpage.add_header('epoch [%d]' % n)
113
+ ims, txts, links = [], [], []
114
+
115
+ for label, image_numpy in visuals.items():
116
+ # image_numpy = util.tensor2im(image)
117
+ img_path = 'epoch%.3d_%s.png' % (n, label)
118
+ ims.append(img_path)
119
+ txts.append(label)
120
+ links.append(img_path)
121
+ webpage.add_images(ims, txts, links, width=self.win_size)
122
+ webpage.save()
123
+
124
+ # def plot_current_losses(self, epoch, counter_ratio, losses):
125
+ # """display the current losses on visdom display: dictionary of error labels and values
126
+ #
127
+ # Parameters:
128
+ # epoch (int) -- current epoch
129
+ # counter_ratio (float) -- progress (percentage) in the current epoch, between 0 to 1
130
+ # losses (OrderedDict) -- training losses stored in the format of (name, float) pairs
131
+ # """
132
+ # if not hasattr(self, 'plot_data'):
133
+ # self.plot_data = {'X': [], 'Y': [], 'legend': list(losses.keys())}
134
+ # self.plot_data['X'].append(epoch + counter_ratio)
135
+ # self.plot_data['Y'].append([losses[k] for k in self.plot_data['legend']])
136
+ # try:
137
+ # self.vis.line(
138
+ # X=np.stack([np.array(self.plot_data['X'])] * len(self.plot_data['legend']), 1),
139
+ # Y=np.array(self.plot_data['Y']),
140
+ # opts={
141
+ # 'title': self.name + ' loss over time',
142
+ # 'legend': self.plot_data['legend'],
143
+ # 'xlabel': 'epoch',
144
+ # 'ylabel': 'loss'},
145
+ # win=self.display_id)
146
+ # except VisdomExceptionBase:
147
+ # self.create_visdom_connections()
148
+
149
+ # losses: same format as |losses| of plot_current_losses
150
+ def print_current_losses(self, epoch, iters, losses, t_comp, t_data):
151
+ """print current losses on console; also save the losses to the disk
152
+
153
+ Parameters:
154
+ epoch (int) -- current epoch
155
+ iters (int) -- current training iteration during this epoch (reset to 0 at the end of every epoch)
156
+ losses (OrderedDict) -- training losses stored in the format of (name, float) pairs
157
+ t_comp (float) -- computational time per data point (normalized by batch_size)
158
+ t_data (float) -- data loading time per data point (normalized by batch_size)
159
+ """
160
+ message = '(epoch: %d, iters: %d, time: %.3f, data: %.3f) ' % (epoch, iters, t_comp, t_data)
161
+ for k, v in losses.items():
162
+ message += '%s: %.3f ' % (k, v)
163
+
164
+ print(message) # print the message
165
+ with open(self.log_name, "a") as log_file:
166
+ log_file.write('%s\n' % message) # save the message
extensions/microsoftexcel-controlnet/annotator/lineart/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Caroline Chan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
extensions/microsoftexcel-controlnet/annotator/lineart/__init__.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import torch
4
+ import numpy as np
5
+
6
+ import torch.nn as nn
7
+ from einops import rearrange
8
+ from modules import devices
9
+ from annotator.annotator_path import models_path
10
+
11
+
12
+ norm_layer = nn.InstanceNorm2d
13
+
14
+
15
+ class ResidualBlock(nn.Module):
16
+ def __init__(self, in_features):
17
+ super(ResidualBlock, self).__init__()
18
+
19
+ conv_block = [ nn.ReflectionPad2d(1),
20
+ nn.Conv2d(in_features, in_features, 3),
21
+ norm_layer(in_features),
22
+ nn.ReLU(inplace=True),
23
+ nn.ReflectionPad2d(1),
24
+ nn.Conv2d(in_features, in_features, 3),
25
+ norm_layer(in_features)
26
+ ]
27
+
28
+ self.conv_block = nn.Sequential(*conv_block)
29
+
30
+ def forward(self, x):
31
+ return x + self.conv_block(x)
32
+
33
+
34
+ class Generator(nn.Module):
35
+ def __init__(self, input_nc, output_nc, n_residual_blocks=9, sigmoid=True):
36
+ super(Generator, self).__init__()
37
+
38
+ # Initial convolution block
39
+ model0 = [ nn.ReflectionPad2d(3),
40
+ nn.Conv2d(input_nc, 64, 7),
41
+ norm_layer(64),
42
+ nn.ReLU(inplace=True) ]
43
+ self.model0 = nn.Sequential(*model0)
44
+
45
+ # Downsampling
46
+ model1 = []
47
+ in_features = 64
48
+ out_features = in_features*2
49
+ for _ in range(2):
50
+ model1 += [ nn.Conv2d(in_features, out_features, 3, stride=2, padding=1),
51
+ norm_layer(out_features),
52
+ nn.ReLU(inplace=True) ]
53
+ in_features = out_features
54
+ out_features = in_features*2
55
+ self.model1 = nn.Sequential(*model1)
56
+
57
+ model2 = []
58
+ # Residual blocks
59
+ for _ in range(n_residual_blocks):
60
+ model2 += [ResidualBlock(in_features)]
61
+ self.model2 = nn.Sequential(*model2)
62
+
63
+ # Upsampling
64
+ model3 = []
65
+ out_features = in_features//2
66
+ for _ in range(2):
67
+ model3 += [ nn.ConvTranspose2d(in_features, out_features, 3, stride=2, padding=1, output_padding=1),
68
+ norm_layer(out_features),
69
+ nn.ReLU(inplace=True) ]
70
+ in_features = out_features
71
+ out_features = in_features//2
72
+ self.model3 = nn.Sequential(*model3)
73
+
74
+ # Output layer
75
+ model4 = [ nn.ReflectionPad2d(3),
76
+ nn.Conv2d(64, output_nc, 7)]
77
+ if sigmoid:
78
+ model4 += [nn.Sigmoid()]
79
+
80
+ self.model4 = nn.Sequential(*model4)
81
+
82
+ def forward(self, x, cond=None):
83
+ out = self.model0(x)
84
+ out = self.model1(out)
85
+ out = self.model2(out)
86
+ out = self.model3(out)
87
+ out = self.model4(out)
88
+
89
+ return out
90
+
91
+
92
+ class LineartDetector:
93
+ model_dir = os.path.join(models_path, "lineart")
94
+ model_default = 'sk_model.pth'
95
+ model_coarse = 'sk_model2.pth'
96
+
97
+ def __init__(self, model_name):
98
+ self.model = None
99
+ self.model_name = model_name
100
+ self.device = devices.get_device_for("controlnet")
101
+
102
+ def load_model(self, name):
103
+ remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/" + name
104
+ model_path = os.path.join(self.model_dir, name)
105
+ if not os.path.exists(model_path):
106
+ from basicsr.utils.download_util import load_file_from_url
107
+ load_file_from_url(remote_model_path, model_dir=self.model_dir)
108
+ model = Generator(3, 1, 3)
109
+ model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
110
+ model.eval()
111
+ self.model = model.to(self.device)
112
+
113
+ def unload_model(self):
114
+ if self.model is not None:
115
+ self.model.cpu()
116
+
117
+ def __call__(self, input_image):
118
+ if self.model is None:
119
+ self.load_model(self.model_name)
120
+ self.model.to(self.device)
121
+
122
+ assert input_image.ndim == 3
123
+ image = input_image
124
+ with torch.no_grad():
125
+ image = torch.from_numpy(image).float().to(self.device)
126
+ image = image / 255.0
127
+ image = rearrange(image, 'h w c -> 1 c h w')
128
+ line = self.model(image)[0][0]
129
+
130
+ line = line.cpu().numpy()
131
+ line = (line * 255.0).clip(0, 255).astype(np.uint8)
132
+
133
+ return line
extensions/microsoftexcel-controlnet/annotator/lineart_anime/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Caroline Chan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
extensions/microsoftexcel-controlnet/annotator/lineart_anime/__init__.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import functools
5
+
6
+ import os
7
+ import cv2
8
+ from einops import rearrange
9
+ from modules import devices
10
+ from annotator.annotator_path import models_path
11
+
12
+
13
+ class UnetGenerator(nn.Module):
14
+ """Create a Unet-based generator"""
15
+
16
+ def __init__(self, input_nc, output_nc, num_downs, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False):
17
+ """Construct a Unet generator
18
+ Parameters:
19
+ input_nc (int) -- the number of channels in input images
20
+ output_nc (int) -- the number of channels in output images
21
+ num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
22
+ image of size 128x128 will become of size 1x1 # at the bottleneck
23
+ ngf (int) -- the number of filters in the last conv layer
24
+ norm_layer -- normalization layer
25
+ We construct the U-Net from the innermost layer to the outermost layer.
26
+ It is a recursive process.
27
+ """
28
+ super(UnetGenerator, self).__init__()
29
+ # construct unet structure
30
+ unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True) # add the innermost layer
31
+ for _ in range(num_downs - 5): # add intermediate layers with ngf * 8 filters
32
+ unet_block = UnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
33
+ # gradually reduce the number of filters from ngf * 8 to ngf
34
+ unet_block = UnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
35
+ unet_block = UnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
36
+ unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
37
+ self.model = UnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer) # add the outermost layer
38
+
39
+ def forward(self, input):
40
+ """Standard forward"""
41
+ return self.model(input)
42
+
43
+
44
+ class UnetSkipConnectionBlock(nn.Module):
45
+ """Defines the Unet submodule with skip connection.
46
+ X -------------------identity----------------------
47
+ |-- downsampling -- |submodule| -- upsampling --|
48
+ """
49
+
50
+ def __init__(self, outer_nc, inner_nc, input_nc=None,
51
+ submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
52
+ """Construct a Unet submodule with skip connections.
53
+ Parameters:
54
+ outer_nc (int) -- the number of filters in the outer conv layer
55
+ inner_nc (int) -- the number of filters in the inner conv layer
56
+ input_nc (int) -- the number of channels in input images/features
57
+ submodule (UnetSkipConnectionBlock) -- previously defined submodules
58
+ outermost (bool) -- if this module is the outermost module
59
+ innermost (bool) -- if this module is the innermost module
60
+ norm_layer -- normalization layer
61
+ use_dropout (bool) -- if use dropout layers.
62
+ """
63
+ super(UnetSkipConnectionBlock, self).__init__()
64
+ self.outermost = outermost
65
+ if type(norm_layer) == functools.partial:
66
+ use_bias = norm_layer.func == nn.InstanceNorm2d
67
+ else:
68
+ use_bias = norm_layer == nn.InstanceNorm2d
69
+ if input_nc is None:
70
+ input_nc = outer_nc
71
+ downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
72
+ stride=2, padding=1, bias=use_bias)
73
+ downrelu = nn.LeakyReLU(0.2, True)
74
+ downnorm = norm_layer(inner_nc)
75
+ uprelu = nn.ReLU(True)
76
+ upnorm = norm_layer(outer_nc)
77
+
78
+ if outermost:
79
+ upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
80
+ kernel_size=4, stride=2,
81
+ padding=1)
82
+ down = [downconv]
83
+ up = [uprelu, upconv, nn.Tanh()]
84
+ model = down + [submodule] + up
85
+ elif innermost:
86
+ upconv = nn.ConvTranspose2d(inner_nc, outer_nc,
87
+ kernel_size=4, stride=2,
88
+ padding=1, bias=use_bias)
89
+ down = [downrelu, downconv]
90
+ up = [uprelu, upconv, upnorm]
91
+ model = down + up
92
+ else:
93
+ upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc,
94
+ kernel_size=4, stride=2,
95
+ padding=1, bias=use_bias)
96
+ down = [downrelu, downconv, downnorm]
97
+ up = [uprelu, upconv, upnorm]
98
+
99
+ if use_dropout:
100
+ model = down + [submodule] + up + [nn.Dropout(0.5)]
101
+ else:
102
+ model = down + [submodule] + up
103
+
104
+ self.model = nn.Sequential(*model)
105
+
106
+ def forward(self, x):
107
+ if self.outermost:
108
+ return self.model(x)
109
+ else: # add skip connections
110
+ return torch.cat([x, self.model(x)], 1)
111
+
112
+
113
+ class LineartAnimeDetector:
114
+ model_dir = os.path.join(models_path, "lineart_anime")
115
+
116
+ def __init__(self):
117
+ self.model = None
118
+ self.device = devices.get_device_for("controlnet")
119
+
120
+ def load_model(self):
121
+ remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/netG.pth"
122
+ modelpath = os.path.join(self.model_dir, "netG.pth")
123
+ if not os.path.exists(modelpath):
124
+ from basicsr.utils.download_util import load_file_from_url
125
+ load_file_from_url(remote_model_path, model_dir=self.model_dir)
126
+ norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
127
+ net = UnetGenerator(3, 1, 8, 64, norm_layer=norm_layer, use_dropout=False)
128
+ ckpt = torch.load(modelpath)
129
+ for key in list(ckpt.keys()):
130
+ if 'module.' in key:
131
+ ckpt[key.replace('module.', '')] = ckpt[key]
132
+ del ckpt[key]
133
+ net.load_state_dict(ckpt)
134
+ net.eval()
135
+ self.model = net.to(self.device)
136
+
137
+ def unload_model(self):
138
+ if self.model is not None:
139
+ self.model.cpu()
140
+
141
+ def __call__(self, input_image):
142
+ if self.model is None:
143
+ self.load_model()
144
+ self.model.to(self.device)
145
+
146
+ H, W, C = input_image.shape
147
+ Hn = 256 * int(np.ceil(float(H) / 256.0))
148
+ Wn = 256 * int(np.ceil(float(W) / 256.0))
149
+ img = cv2.resize(input_image, (Wn, Hn), interpolation=cv2.INTER_CUBIC)
150
+ with torch.no_grad():
151
+ image_feed = torch.from_numpy(img).float().to(self.device)
152
+ image_feed = image_feed / 127.5 - 1.0
153
+ image_feed = rearrange(image_feed, 'h w c -> 1 c h w')
154
+
155
+ line = self.model(image_feed)[0, 0] * 127.5 + 127.5
156
+ line = line.cpu().numpy()
157
+
158
+ line = cv2.resize(line, (W, H), interpolation=cv2.INTER_CUBIC)
159
+ line = line.clip(0, 255).astype(np.uint8)
160
+ return line
161
+
extensions/microsoftexcel-controlnet/annotator/manga_line/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2021 Miaomiao Li
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
extensions/microsoftexcel-controlnet/annotator/manga_line/__init__.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torch.nn as nn
4
+ from PIL import Image
5
+ import fnmatch
6
+ import cv2
7
+
8
+ import sys
9
+
10
+ import numpy as np
11
+ from einops import rearrange
12
+ from modules import devices
13
+ from annotator.annotator_path import models_path
14
+
15
+
16
+ class _bn_relu_conv(nn.Module):
17
+ def __init__(self, in_filters, nb_filters, fw, fh, subsample=1):
18
+ super(_bn_relu_conv, self).__init__()
19
+ self.model = nn.Sequential(
20
+ nn.BatchNorm2d(in_filters, eps=1e-3),
21
+ nn.LeakyReLU(0.2),
22
+ nn.Conv2d(in_filters, nb_filters, (fw, fh), stride=subsample, padding=(fw//2, fh//2), padding_mode='zeros')
23
+ )
24
+
25
+ def forward(self, x):
26
+ return self.model(x)
27
+
28
+ # the following are for debugs
29
+ print("****", np.max(x.cpu().numpy()), np.min(x.cpu().numpy()), np.mean(x.cpu().numpy()), np.std(x.cpu().numpy()), x.shape)
30
+ for i,layer in enumerate(self.model):
31
+ if i != 2:
32
+ x = layer(x)
33
+ else:
34
+ x = layer(x)
35
+ #x = nn.functional.pad(x, (1, 1, 1, 1), mode='constant', value=0)
36
+ print("____", np.max(x.cpu().numpy()), np.min(x.cpu().numpy()), np.mean(x.cpu().numpy()), np.std(x.cpu().numpy()), x.shape)
37
+ print(x[0])
38
+ return x
39
+
40
+ class _u_bn_relu_conv(nn.Module):
41
+ def __init__(self, in_filters, nb_filters, fw, fh, subsample=1):
42
+ super(_u_bn_relu_conv, self).__init__()
43
+ self.model = nn.Sequential(
44
+ nn.BatchNorm2d(in_filters, eps=1e-3),
45
+ nn.LeakyReLU(0.2),
46
+ nn.Conv2d(in_filters, nb_filters, (fw, fh), stride=subsample, padding=(fw//2, fh//2)),
47
+ nn.Upsample(scale_factor=2, mode='nearest')
48
+ )
49
+
50
+ def forward(self, x):
51
+ return self.model(x)
52
+
53
+
54
+
55
+ class _shortcut(nn.Module):
56
+ def __init__(self, in_filters, nb_filters, subsample=1):
57
+ super(_shortcut, self).__init__()
58
+ self.process = False
59
+ self.model = None
60
+ if in_filters != nb_filters or subsample != 1:
61
+ self.process = True
62
+ self.model = nn.Sequential(
63
+ nn.Conv2d(in_filters, nb_filters, (1, 1), stride=subsample)
64
+ )
65
+
66
+ def forward(self, x, y):
67
+ #print(x.size(), y.size(), self.process)
68
+ if self.process:
69
+ y0 = self.model(x)
70
+ #print("merge+", torch.max(y0+y), torch.min(y0+y),torch.mean(y0+y), torch.std(y0+y), y0.shape)
71
+ return y0 + y
72
+ else:
73
+ #print("merge", torch.max(x+y), torch.min(x+y),torch.mean(x+y), torch.std(x+y), y.shape)
74
+ return x + y
75
+
76
+ class _u_shortcut(nn.Module):
77
+ def __init__(self, in_filters, nb_filters, subsample):
78
+ super(_u_shortcut, self).__init__()
79
+ self.process = False
80
+ self.model = None
81
+ if in_filters != nb_filters:
82
+ self.process = True
83
+ self.model = nn.Sequential(
84
+ nn.Conv2d(in_filters, nb_filters, (1, 1), stride=subsample, padding_mode='zeros'),
85
+ nn.Upsample(scale_factor=2, mode='nearest')
86
+ )
87
+
88
+ def forward(self, x, y):
89
+ if self.process:
90
+ return self.model(x) + y
91
+ else:
92
+ return x + y
93
+
94
+
95
+ class basic_block(nn.Module):
96
+ def __init__(self, in_filters, nb_filters, init_subsample=1):
97
+ super(basic_block, self).__init__()
98
+ self.conv1 = _bn_relu_conv(in_filters, nb_filters, 3, 3, subsample=init_subsample)
99
+ self.residual = _bn_relu_conv(nb_filters, nb_filters, 3, 3)
100
+ self.shortcut = _shortcut(in_filters, nb_filters, subsample=init_subsample)
101
+
102
+ def forward(self, x):
103
+ x1 = self.conv1(x)
104
+ x2 = self.residual(x1)
105
+ return self.shortcut(x, x2)
106
+
107
+ class _u_basic_block(nn.Module):
108
+ def __init__(self, in_filters, nb_filters, init_subsample=1):
109
+ super(_u_basic_block, self).__init__()
110
+ self.conv1 = _u_bn_relu_conv(in_filters, nb_filters, 3, 3, subsample=init_subsample)
111
+ self.residual = _bn_relu_conv(nb_filters, nb_filters, 3, 3)
112
+ self.shortcut = _u_shortcut(in_filters, nb_filters, subsample=init_subsample)
113
+
114
+ def forward(self, x):
115
+ y = self.residual(self.conv1(x))
116
+ return self.shortcut(x, y)
117
+
118
+
119
+ class _residual_block(nn.Module):
120
+ def __init__(self, in_filters, nb_filters, repetitions, is_first_layer=False):
121
+ super(_residual_block, self).__init__()
122
+ layers = []
123
+ for i in range(repetitions):
124
+ init_subsample = 1
125
+ if i == repetitions - 1 and not is_first_layer:
126
+ init_subsample = 2
127
+ if i == 0:
128
+ l = basic_block(in_filters=in_filters, nb_filters=nb_filters, init_subsample=init_subsample)
129
+ else:
130
+ l = basic_block(in_filters=nb_filters, nb_filters=nb_filters, init_subsample=init_subsample)
131
+ layers.append(l)
132
+
133
+ self.model = nn.Sequential(*layers)
134
+
135
+ def forward(self, x):
136
+ return self.model(x)
137
+
138
+
139
+ class _upsampling_residual_block(nn.Module):
140
+ def __init__(self, in_filters, nb_filters, repetitions):
141
+ super(_upsampling_residual_block, self).__init__()
142
+ layers = []
143
+ for i in range(repetitions):
144
+ l = None
145
+ if i == 0:
146
+ l = _u_basic_block(in_filters=in_filters, nb_filters=nb_filters)#(input)
147
+ else:
148
+ l = basic_block(in_filters=nb_filters, nb_filters=nb_filters)#(input)
149
+ layers.append(l)
150
+
151
+ self.model = nn.Sequential(*layers)
152
+
153
+ def forward(self, x):
154
+ return self.model(x)
155
+
156
+
157
+ class res_skip(nn.Module):
158
+
159
+ def __init__(self):
160
+ super(res_skip, self).__init__()
161
+ self.block0 = _residual_block(in_filters=1, nb_filters=24, repetitions=2, is_first_layer=True)#(input)
162
+ self.block1 = _residual_block(in_filters=24, nb_filters=48, repetitions=3)#(block0)
163
+ self.block2 = _residual_block(in_filters=48, nb_filters=96, repetitions=5)#(block1)
164
+ self.block3 = _residual_block(in_filters=96, nb_filters=192, repetitions=7)#(block2)
165
+ self.block4 = _residual_block(in_filters=192, nb_filters=384, repetitions=12)#(block3)
166
+
167
+ self.block5 = _upsampling_residual_block(in_filters=384, nb_filters=192, repetitions=7)#(block4)
168
+ self.res1 = _shortcut(in_filters=192, nb_filters=192)#(block3, block5, subsample=(1,1))
169
+
170
+ self.block6 = _upsampling_residual_block(in_filters=192, nb_filters=96, repetitions=5)#(res1)
171
+ self.res2 = _shortcut(in_filters=96, nb_filters=96)#(block2, block6, subsample=(1,1))
172
+
173
+ self.block7 = _upsampling_residual_block(in_filters=96, nb_filters=48, repetitions=3)#(res2)
174
+ self.res3 = _shortcut(in_filters=48, nb_filters=48)#(block1, block7, subsample=(1,1))
175
+
176
+ self.block8 = _upsampling_residual_block(in_filters=48, nb_filters=24, repetitions=2)#(res3)
177
+ self.res4 = _shortcut(in_filters=24, nb_filters=24)#(block0,block8, subsample=(1,1))
178
+
179
+ self.block9 = _residual_block(in_filters=24, nb_filters=16, repetitions=2, is_first_layer=True)#(res4)
180
+ self.conv15 = _bn_relu_conv(in_filters=16, nb_filters=1, fh=1, fw=1, subsample=1)#(block7)
181
+
182
+ def forward(self, x):
183
+ x0 = self.block0(x)
184
+ x1 = self.block1(x0)
185
+ x2 = self.block2(x1)
186
+ x3 = self.block3(x2)
187
+ x4 = self.block4(x3)
188
+
189
+ x5 = self.block5(x4)
190
+ res1 = self.res1(x3, x5)
191
+
192
+ x6 = self.block6(res1)
193
+ res2 = self.res2(x2, x6)
194
+
195
+ x7 = self.block7(res2)
196
+ res3 = self.res3(x1, x7)
197
+
198
+ x8 = self.block8(res3)
199
+ res4 = self.res4(x0, x8)
200
+
201
+ x9 = self.block9(res4)
202
+ y = self.conv15(x9)
203
+
204
+ return y
205
+
206
+
207
+ class MangaLineExtration:
208
+ model_dir = os.path.join(models_path, "manga_line")
209
+
210
+ def __init__(self):
211
+ self.model = None
212
+ self.device = devices.get_device_for("controlnet")
213
+
214
+ def load_model(self):
215
+ remote_model_path = "https://huggingface.co/lllyasviel/Annotators/resolve/main/erika.pth"
216
+ modelpath = os.path.join(self.model_dir, "erika.pth")
217
+ if not os.path.exists(modelpath):
218
+ from basicsr.utils.download_util import load_file_from_url
219
+ load_file_from_url(remote_model_path, model_dir=self.model_dir)
220
+ #norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
221
+ net = res_skip()
222
+ ckpt = torch.load(modelpath)
223
+ for key in list(ckpt.keys()):
224
+ if 'module.' in key:
225
+ ckpt[key.replace('module.', '')] = ckpt[key]
226
+ del ckpt[key]
227
+ net.load_state_dict(ckpt)
228
+ net.eval()
229
+ self.model = net.to(self.device)
230
+
231
+ def unload_model(self):
232
+ if self.model is not None:
233
+ self.model.cpu()
234
+
235
+ def __call__(self, input_image):
236
+ if self.model is None:
237
+ self.load_model()
238
+ self.model.to(self.device)
239
+ img = cv2.cvtColor(input_image, cv2.COLOR_RGB2GRAY)
240
+ img = np.ascontiguousarray(img.copy()).copy()
241
+ with torch.no_grad():
242
+ image_feed = torch.from_numpy(img).float().to(self.device)
243
+ image_feed = rearrange(image_feed, 'h w -> 1 1 h w')
244
+ line = self.model(image_feed)
245
+ line = 255 - line.cpu().numpy()[0, 0]
246
+ return line.clip(0, 255).astype(np.uint8)
247
+
248
+
extensions/microsoftexcel-controlnet/annotator/mediapipe_face/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .mediapipe_face_common import generate_annotation
2
+
3
+
4
+ def apply_mediapipe_face(image, max_faces: int = 1, min_confidence: float = 0.5):
5
+ return generate_annotation(image, max_faces, min_confidence)
extensions/microsoftexcel-controlnet/annotator/mediapipe_face/mediapipe_face_common.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Mapping
2
+
3
+ import mediapipe as mp
4
+ import numpy
5
+
6
+
7
+ mp_drawing = mp.solutions.drawing_utils
8
+ mp_drawing_styles = mp.solutions.drawing_styles
9
+ mp_face_detection = mp.solutions.face_detection # Only for counting faces.
10
+ mp_face_mesh = mp.solutions.face_mesh
11
+ mp_face_connections = mp.solutions.face_mesh_connections.FACEMESH_TESSELATION
12
+ mp_hand_connections = mp.solutions.hands_connections.HAND_CONNECTIONS
13
+ mp_body_connections = mp.solutions.pose_connections.POSE_CONNECTIONS
14
+
15
+ DrawingSpec = mp.solutions.drawing_styles.DrawingSpec
16
+ PoseLandmark = mp.solutions.drawing_styles.PoseLandmark
17
+
18
+ min_face_size_pixels: int = 64
19
+ f_thick = 2
20
+ f_rad = 1
21
+ right_iris_draw = DrawingSpec(color=(10, 200, 250), thickness=f_thick, circle_radius=f_rad)
22
+ right_eye_draw = DrawingSpec(color=(10, 200, 180), thickness=f_thick, circle_radius=f_rad)
23
+ right_eyebrow_draw = DrawingSpec(color=(10, 220, 180), thickness=f_thick, circle_radius=f_rad)
24
+ left_iris_draw = DrawingSpec(color=(250, 200, 10), thickness=f_thick, circle_radius=f_rad)
25
+ left_eye_draw = DrawingSpec(color=(180, 200, 10), thickness=f_thick, circle_radius=f_rad)
26
+ left_eyebrow_draw = DrawingSpec(color=(180, 220, 10), thickness=f_thick, circle_radius=f_rad)
27
+ mouth_draw = DrawingSpec(color=(10, 180, 10), thickness=f_thick, circle_radius=f_rad)
28
+ head_draw = DrawingSpec(color=(10, 200, 10), thickness=f_thick, circle_radius=f_rad)
29
+
30
+ # mp_face_mesh.FACEMESH_CONTOURS has all the items we care about.
31
+ face_connection_spec = {}
32
+ for edge in mp_face_mesh.FACEMESH_FACE_OVAL:
33
+ face_connection_spec[edge] = head_draw
34
+ for edge in mp_face_mesh.FACEMESH_LEFT_EYE:
35
+ face_connection_spec[edge] = left_eye_draw
36
+ for edge in mp_face_mesh.FACEMESH_LEFT_EYEBROW:
37
+ face_connection_spec[edge] = left_eyebrow_draw
38
+ # for edge in mp_face_mesh.FACEMESH_LEFT_IRIS:
39
+ # face_connection_spec[edge] = left_iris_draw
40
+ for edge in mp_face_mesh.FACEMESH_RIGHT_EYE:
41
+ face_connection_spec[edge] = right_eye_draw
42
+ for edge in mp_face_mesh.FACEMESH_RIGHT_EYEBROW:
43
+ face_connection_spec[edge] = right_eyebrow_draw
44
+ # for edge in mp_face_mesh.FACEMESH_RIGHT_IRIS:
45
+ # face_connection_spec[edge] = right_iris_draw
46
+ for edge in mp_face_mesh.FACEMESH_LIPS:
47
+ face_connection_spec[edge] = mouth_draw
48
+ iris_landmark_spec = {468: right_iris_draw, 473: left_iris_draw}
49
+
50
+
51
+ def draw_pupils(image, landmark_list, drawing_spec, halfwidth: int = 2):
52
+ """We have a custom function to draw the pupils because the mp.draw_landmarks method requires a parameter for all
53
+ landmarks. Until our PR is merged into mediapipe, we need this separate method."""
54
+ if len(image.shape) != 3:
55
+ raise ValueError("Input image must be H,W,C.")
56
+ image_rows, image_cols, image_channels = image.shape
57
+ if image_channels != 3: # BGR channels
58
+ raise ValueError('Input image must contain three channel bgr data.')
59
+ for idx, landmark in enumerate(landmark_list.landmark):
60
+ if (
61
+ (landmark.HasField('visibility') and landmark.visibility < 0.9) or
62
+ (landmark.HasField('presence') and landmark.presence < 0.5)
63
+ ):
64
+ continue
65
+ if landmark.x >= 1.0 or landmark.x < 0 or landmark.y >= 1.0 or landmark.y < 0:
66
+ continue
67
+ image_x = int(image_cols*landmark.x)
68
+ image_y = int(image_rows*landmark.y)
69
+ draw_color = None
70
+ if isinstance(drawing_spec, Mapping):
71
+ if drawing_spec.get(idx) is None:
72
+ continue
73
+ else:
74
+ draw_color = drawing_spec[idx].color
75
+ elif isinstance(drawing_spec, DrawingSpec):
76
+ draw_color = drawing_spec.color
77
+ image[image_y-halfwidth:image_y+halfwidth, image_x-halfwidth:image_x+halfwidth, :] = draw_color
78
+
79
+
80
+ def reverse_channels(image):
81
+ """Given a numpy array in RGB form, convert to BGR. Will also convert from BGR to RGB."""
82
+ # im[:,:,::-1] is a neat hack to convert BGR to RGB by reversing the indexing order.
83
+ # im[:,:,::[2,1,0]] would also work but makes a copy of the data.
84
+ return image[:, :, ::-1]
85
+
86
+
87
+ def generate_annotation(
88
+ img_rgb,
89
+ max_faces: int,
90
+ min_confidence: float
91
+ ):
92
+ """
93
+ Find up to 'max_faces' inside the provided input image.
94
+ If min_face_size_pixels is provided and nonzero it will be used to filter faces that occupy less than this many
95
+ pixels in the image.
96
+ """
97
+ with mp_face_mesh.FaceMesh(
98
+ static_image_mode=True,
99
+ max_num_faces=max_faces,
100
+ refine_landmarks=True,
101
+ min_detection_confidence=min_confidence,
102
+ ) as facemesh:
103
+ img_height, img_width, img_channels = img_rgb.shape
104
+ assert(img_channels == 3)
105
+
106
+ results = facemesh.process(img_rgb).multi_face_landmarks
107
+
108
+ if results is None:
109
+ print("No faces detected in controlnet image for Mediapipe face annotator.")
110
+ return numpy.zeros_like(img_rgb)
111
+
112
+ # Filter faces that are too small
113
+ filtered_landmarks = []
114
+ for lm in results:
115
+ landmarks = lm.landmark
116
+ face_rect = [
117
+ landmarks[0].x,
118
+ landmarks[0].y,
119
+ landmarks[0].x,
120
+ landmarks[0].y,
121
+ ] # Left, up, right, down.
122
+ for i in range(len(landmarks)):
123
+ face_rect[0] = min(face_rect[0], landmarks[i].x)
124
+ face_rect[1] = min(face_rect[1], landmarks[i].y)
125
+ face_rect[2] = max(face_rect[2], landmarks[i].x)
126
+ face_rect[3] = max(face_rect[3], landmarks[i].y)
127
+ if min_face_size_pixels > 0:
128
+ face_width = abs(face_rect[2] - face_rect[0])
129
+ face_height = abs(face_rect[3] - face_rect[1])
130
+ face_width_pixels = face_width * img_width
131
+ face_height_pixels = face_height * img_height
132
+ face_size = min(face_width_pixels, face_height_pixels)
133
+ if face_size >= min_face_size_pixels:
134
+ filtered_landmarks.append(lm)
135
+ else:
136
+ filtered_landmarks.append(lm)
137
+
138
+ # Annotations are drawn in BGR for some reason, but we don't need to flip a zero-filled image at the start.
139
+ empty = numpy.zeros_like(img_rgb)
140
+
141
+ # Draw detected faces:
142
+ for face_landmarks in filtered_landmarks:
143
+ mp_drawing.draw_landmarks(
144
+ empty,
145
+ face_landmarks,
146
+ connections=face_connection_spec.keys(),
147
+ landmark_drawing_spec=None,
148
+ connection_drawing_spec=face_connection_spec
149
+ )
150
+ draw_pupils(empty, face_landmarks, iris_landmark_spec, 2)
151
+
152
+ # Flip BGR back to RGB.
153
+ empty = reverse_channels(empty).copy()
154
+
155
+ return empty
extensions/microsoftexcel-controlnet/annotator/midas/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
extensions/microsoftexcel-controlnet/annotator/midas/__init__.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+
5
+ from einops import rearrange
6
+ from .api import MiDaSInference
7
+ from modules import devices
8
+
9
+ model = None
10
+
11
+ def unload_midas_model():
12
+ global model
13
+ if model is not None:
14
+ model = model.cpu()
15
+
16
+ def apply_midas(input_image, a=np.pi * 2.0, bg_th=0.1):
17
+ global model
18
+ if model is None:
19
+ model = MiDaSInference(model_type="dpt_hybrid")
20
+ if devices.get_device_for("controlnet").type != 'mps':
21
+ model = model.to(devices.get_device_for("controlnet"))
22
+
23
+ assert input_image.ndim == 3
24
+ image_depth = input_image
25
+ with torch.no_grad():
26
+ image_depth = torch.from_numpy(image_depth).float()
27
+ if devices.get_device_for("controlnet").type != 'mps':
28
+ image_depth = image_depth.to(devices.get_device_for("controlnet"))
29
+ image_depth = image_depth / 127.5 - 1.0
30
+ image_depth = rearrange(image_depth, 'h w c -> 1 c h w')
31
+ depth = model(image_depth)[0]
32
+
33
+ depth_pt = depth.clone()
34
+ depth_pt -= torch.min(depth_pt)
35
+ depth_pt /= torch.max(depth_pt)
36
+ depth_pt = depth_pt.cpu().numpy()
37
+ depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8)
38
+
39
+ depth_np = depth.cpu().numpy()
40
+ x = cv2.Sobel(depth_np, cv2.CV_32F, 1, 0, ksize=3)
41
+ y = cv2.Sobel(depth_np, cv2.CV_32F, 0, 1, ksize=3)
42
+ z = np.ones_like(x) * a
43
+ x[depth_pt < bg_th] = 0
44
+ y[depth_pt < bg_th] = 0
45
+ normal = np.stack([x, y, z], axis=2)
46
+ normal /= np.sum(normal ** 2.0, axis=2, keepdims=True) ** 0.5
47
+ normal_image = (normal * 127.5 + 127.5).clip(0, 255).astype(np.uint8)[:, :, ::-1]
48
+
49
+ return depth_image, normal_image
extensions/microsoftexcel-controlnet/annotator/midas/api.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # based on https://github.com/isl-org/MiDaS
2
+
3
+ import cv2
4
+ import torch
5
+ import torch.nn as nn
6
+ import os
7
+ from annotator.annotator_path import models_path
8
+
9
+ from torchvision.transforms import Compose
10
+
11
+ from .midas.dpt_depth import DPTDepthModel
12
+ from .midas.midas_net import MidasNet
13
+ from .midas.midas_net_custom import MidasNet_small
14
+ from .midas.transforms import Resize, NormalizeImage, PrepareForNet
15
+
16
+ base_model_path = os.path.join(models_path, "midas")
17
+ old_modeldir = os.path.dirname(os.path.realpath(__file__))
18
+ remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
19
+
20
+ ISL_PATHS = {
21
+ "dpt_large": os.path.join(base_model_path, "dpt_large-midas-2f21e586.pt"),
22
+ "dpt_hybrid": os.path.join(base_model_path, "dpt_hybrid-midas-501f0c75.pt"),
23
+ "midas_v21": "",
24
+ "midas_v21_small": "",
25
+ }
26
+
27
+ OLD_ISL_PATHS = {
28
+ "dpt_large": os.path.join(old_modeldir, "dpt_large-midas-2f21e586.pt"),
29
+ "dpt_hybrid": os.path.join(old_modeldir, "dpt_hybrid-midas-501f0c75.pt"),
30
+ "midas_v21": "",
31
+ "midas_v21_small": "",
32
+ }
33
+
34
+
35
+ def disabled_train(self, mode=True):
36
+ """Overwrite model.train with this function to make sure train/eval mode
37
+ does not change anymore."""
38
+ return self
39
+
40
+
41
+ def load_midas_transform(model_type):
42
+ # https://github.com/isl-org/MiDaS/blob/master/run.py
43
+ # load transform only
44
+ if model_type == "dpt_large": # DPT-Large
45
+ net_w, net_h = 384, 384
46
+ resize_mode = "minimal"
47
+ normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
48
+
49
+ elif model_type == "dpt_hybrid": # DPT-Hybrid
50
+ net_w, net_h = 384, 384
51
+ resize_mode = "minimal"
52
+ normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
53
+
54
+ elif model_type == "midas_v21":
55
+ net_w, net_h = 384, 384
56
+ resize_mode = "upper_bound"
57
+ normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
58
+
59
+ elif model_type == "midas_v21_small":
60
+ net_w, net_h = 256, 256
61
+ resize_mode = "upper_bound"
62
+ normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
63
+
64
+ else:
65
+ assert False, f"model_type '{model_type}' not implemented, use: --model_type large"
66
+
67
+ transform = Compose(
68
+ [
69
+ Resize(
70
+ net_w,
71
+ net_h,
72
+ resize_target=None,
73
+ keep_aspect_ratio=True,
74
+ ensure_multiple_of=32,
75
+ resize_method=resize_mode,
76
+ image_interpolation_method=cv2.INTER_CUBIC,
77
+ ),
78
+ normalization,
79
+ PrepareForNet(),
80
+ ]
81
+ )
82
+
83
+ return transform
84
+
85
+
86
+ def load_model(model_type):
87
+ # https://github.com/isl-org/MiDaS/blob/master/run.py
88
+ # load network
89
+ model_path = ISL_PATHS[model_type]
90
+ old_model_path = OLD_ISL_PATHS[model_type]
91
+ if model_type == "dpt_large": # DPT-Large
92
+ model = DPTDepthModel(
93
+ path=model_path,
94
+ backbone="vitl16_384",
95
+ non_negative=True,
96
+ )
97
+ net_w, net_h = 384, 384
98
+ resize_mode = "minimal"
99
+ normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
100
+
101
+ elif model_type == "dpt_hybrid": # DPT-Hybrid
102
+ if os.path.exists(old_model_path):
103
+ model_path = old_model_path
104
+ elif not os.path.exists(model_path):
105
+ from basicsr.utils.download_util import load_file_from_url
106
+ load_file_from_url(remote_model_path, model_dir=base_model_path)
107
+
108
+ model = DPTDepthModel(
109
+ path=model_path,
110
+ backbone="vitb_rn50_384",
111
+ non_negative=True,
112
+ )
113
+ net_w, net_h = 384, 384
114
+ resize_mode = "minimal"
115
+ normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
116
+
117
+ elif model_type == "midas_v21":
118
+ model = MidasNet(model_path, non_negative=True)
119
+ net_w, net_h = 384, 384
120
+ resize_mode = "upper_bound"
121
+ normalization = NormalizeImage(
122
+ mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
123
+ )
124
+
125
+ elif model_type == "midas_v21_small":
126
+ model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True,
127
+ non_negative=True, blocks={'expand': True})
128
+ net_w, net_h = 256, 256
129
+ resize_mode = "upper_bound"
130
+ normalization = NormalizeImage(
131
+ mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
132
+ )
133
+
134
+ else:
135
+ print(f"model_type '{model_type}' not implemented, use: --model_type large")
136
+ assert False
137
+
138
+ transform = Compose(
139
+ [
140
+ Resize(
141
+ net_w,
142
+ net_h,
143
+ resize_target=None,
144
+ keep_aspect_ratio=True,
145
+ ensure_multiple_of=32,
146
+ resize_method=resize_mode,
147
+ image_interpolation_method=cv2.INTER_CUBIC,
148
+ ),
149
+ normalization,
150
+ PrepareForNet(),
151
+ ]
152
+ )
153
+
154
+ return model.eval(), transform
155
+
156
+
157
+ class MiDaSInference(nn.Module):
158
+ MODEL_TYPES_TORCH_HUB = [
159
+ "DPT_Large",
160
+ "DPT_Hybrid",
161
+ "MiDaS_small"
162
+ ]
163
+ MODEL_TYPES_ISL = [
164
+ "dpt_large",
165
+ "dpt_hybrid",
166
+ "midas_v21",
167
+ "midas_v21_small",
168
+ ]
169
+
170
+ def __init__(self, model_type):
171
+ super().__init__()
172
+ assert (model_type in self.MODEL_TYPES_ISL)
173
+ model, _ = load_model(model_type)
174
+ self.model = model
175
+ self.model.train = disabled_train
176
+
177
+ def forward(self, x):
178
+ with torch.no_grad():
179
+ prediction = self.model(x)
180
+ return prediction
181
+
extensions/microsoftexcel-controlnet/annotator/midas/midas/__init__.py ADDED
File without changes
extensions/microsoftexcel-controlnet/annotator/midas/midas/base_model.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ class BaseModel(torch.nn.Module):
5
+ def load(self, path):
6
+ """Load model from file.
7
+
8
+ Args:
9
+ path (str): file path
10
+ """
11
+ parameters = torch.load(path, map_location=torch.device('cpu'))
12
+
13
+ if "optimizer" in parameters:
14
+ parameters = parameters["model"]
15
+
16
+ self.load_state_dict(parameters)
extensions/microsoftexcel-controlnet/annotator/midas/midas/blocks.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from .vit import (
5
+ _make_pretrained_vitb_rn50_384,
6
+ _make_pretrained_vitl16_384,
7
+ _make_pretrained_vitb16_384,
8
+ forward_vit,
9
+ )
10
+
11
+ def _make_encoder(backbone, features, use_pretrained, groups=1, expand=False, exportable=True, hooks=None, use_vit_only=False, use_readout="ignore",):
12
+ if backbone == "vitl16_384":
13
+ pretrained = _make_pretrained_vitl16_384(
14
+ use_pretrained, hooks=hooks, use_readout=use_readout
15
+ )
16
+ scratch = _make_scratch(
17
+ [256, 512, 1024, 1024], features, groups=groups, expand=expand
18
+ ) # ViT-L/16 - 85.0% Top1 (backbone)
19
+ elif backbone == "vitb_rn50_384":
20
+ pretrained = _make_pretrained_vitb_rn50_384(
21
+ use_pretrained,
22
+ hooks=hooks,
23
+ use_vit_only=use_vit_only,
24
+ use_readout=use_readout,
25
+ )
26
+ scratch = _make_scratch(
27
+ [256, 512, 768, 768], features, groups=groups, expand=expand
28
+ ) # ViT-H/16 - 85.0% Top1 (backbone)
29
+ elif backbone == "vitb16_384":
30
+ pretrained = _make_pretrained_vitb16_384(
31
+ use_pretrained, hooks=hooks, use_readout=use_readout
32
+ )
33
+ scratch = _make_scratch(
34
+ [96, 192, 384, 768], features, groups=groups, expand=expand
35
+ ) # ViT-B/16 - 84.6% Top1 (backbone)
36
+ elif backbone == "resnext101_wsl":
37
+ pretrained = _make_pretrained_resnext101_wsl(use_pretrained)
38
+ scratch = _make_scratch([256, 512, 1024, 2048], features, groups=groups, expand=expand) # efficientnet_lite3
39
+ elif backbone == "efficientnet_lite3":
40
+ pretrained = _make_pretrained_efficientnet_lite3(use_pretrained, exportable=exportable)
41
+ scratch = _make_scratch([32, 48, 136, 384], features, groups=groups, expand=expand) # efficientnet_lite3
42
+ else:
43
+ print(f"Backbone '{backbone}' not implemented")
44
+ assert False
45
+
46
+ return pretrained, scratch
47
+
48
+
49
+ def _make_scratch(in_shape, out_shape, groups=1, expand=False):
50
+ scratch = nn.Module()
51
+
52
+ out_shape1 = out_shape
53
+ out_shape2 = out_shape
54
+ out_shape3 = out_shape
55
+ out_shape4 = out_shape
56
+ if expand==True:
57
+ out_shape1 = out_shape
58
+ out_shape2 = out_shape*2
59
+ out_shape3 = out_shape*4
60
+ out_shape4 = out_shape*8
61
+
62
+ scratch.layer1_rn = nn.Conv2d(
63
+ in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
64
+ )
65
+ scratch.layer2_rn = nn.Conv2d(
66
+ in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
67
+ )
68
+ scratch.layer3_rn = nn.Conv2d(
69
+ in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
70
+ )
71
+ scratch.layer4_rn = nn.Conv2d(
72
+ in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
73
+ )
74
+
75
+ return scratch
76
+
77
+
78
+ def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False):
79
+ efficientnet = torch.hub.load(
80
+ "rwightman/gen-efficientnet-pytorch",
81
+ "tf_efficientnet_lite3",
82
+ pretrained=use_pretrained,
83
+ exportable=exportable
84
+ )
85
+ return _make_efficientnet_backbone(efficientnet)
86
+
87
+
88
+ def _make_efficientnet_backbone(effnet):
89
+ pretrained = nn.Module()
90
+
91
+ pretrained.layer1 = nn.Sequential(
92
+ effnet.conv_stem, effnet.bn1, effnet.act1, *effnet.blocks[0:2]
93
+ )
94
+ pretrained.layer2 = nn.Sequential(*effnet.blocks[2:3])
95
+ pretrained.layer3 = nn.Sequential(*effnet.blocks[3:5])
96
+ pretrained.layer4 = nn.Sequential(*effnet.blocks[5:9])
97
+
98
+ return pretrained
99
+
100
+
101
+ def _make_resnet_backbone(resnet):
102
+ pretrained = nn.Module()
103
+ pretrained.layer1 = nn.Sequential(
104
+ resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1
105
+ )
106
+
107
+ pretrained.layer2 = resnet.layer2
108
+ pretrained.layer3 = resnet.layer3
109
+ pretrained.layer4 = resnet.layer4
110
+
111
+ return pretrained
112
+
113
+
114
+ def _make_pretrained_resnext101_wsl(use_pretrained):
115
+ resnet = torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")
116
+ return _make_resnet_backbone(resnet)
117
+
118
+
119
+
120
+ class Interpolate(nn.Module):
121
+ """Interpolation module.
122
+ """
123
+
124
+ def __init__(self, scale_factor, mode, align_corners=False):
125
+ """Init.
126
+
127
+ Args:
128
+ scale_factor (float): scaling
129
+ mode (str): interpolation mode
130
+ """
131
+ super(Interpolate, self).__init__()
132
+
133
+ self.interp = nn.functional.interpolate
134
+ self.scale_factor = scale_factor
135
+ self.mode = mode
136
+ self.align_corners = align_corners
137
+
138
+ def forward(self, x):
139
+ """Forward pass.
140
+
141
+ Args:
142
+ x (tensor): input
143
+
144
+ Returns:
145
+ tensor: interpolated data
146
+ """
147
+
148
+ x = self.interp(
149
+ x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners
150
+ )
151
+
152
+ return x
153
+
154
+
155
+ class ResidualConvUnit(nn.Module):
156
+ """Residual convolution module.
157
+ """
158
+
159
+ def __init__(self, features):
160
+ """Init.
161
+
162
+ Args:
163
+ features (int): number of features
164
+ """
165
+ super().__init__()
166
+
167
+ self.conv1 = nn.Conv2d(
168
+ features, features, kernel_size=3, stride=1, padding=1, bias=True
169
+ )
170
+
171
+ self.conv2 = nn.Conv2d(
172
+ features, features, kernel_size=3, stride=1, padding=1, bias=True
173
+ )
174
+
175
+ self.relu = nn.ReLU(inplace=True)
176
+
177
+ def forward(self, x):
178
+ """Forward pass.
179
+
180
+ Args:
181
+ x (tensor): input
182
+
183
+ Returns:
184
+ tensor: output
185
+ """
186
+ out = self.relu(x)
187
+ out = self.conv1(out)
188
+ out = self.relu(out)
189
+ out = self.conv2(out)
190
+
191
+ return out + x
192
+
193
+
194
+ class FeatureFusionBlock(nn.Module):
195
+ """Feature fusion block.
196
+ """
197
+
198
+ def __init__(self, features):
199
+ """Init.
200
+
201
+ Args:
202
+ features (int): number of features
203
+ """
204
+ super(FeatureFusionBlock, self).__init__()
205
+
206
+ self.resConfUnit1 = ResidualConvUnit(features)
207
+ self.resConfUnit2 = ResidualConvUnit(features)
208
+
209
+ def forward(self, *xs):
210
+ """Forward pass.
211
+
212
+ Returns:
213
+ tensor: output
214
+ """
215
+ output = xs[0]
216
+
217
+ if len(xs) == 2:
218
+ output += self.resConfUnit1(xs[1])
219
+
220
+ output = self.resConfUnit2(output)
221
+
222
+ output = nn.functional.interpolate(
223
+ output, scale_factor=2, mode="bilinear", align_corners=True
224
+ )
225
+
226
+ return output
227
+
228
+
229
+
230
+
231
+ class ResidualConvUnit_custom(nn.Module):
232
+ """Residual convolution module.
233
+ """
234
+
235
+ def __init__(self, features, activation, bn):
236
+ """Init.
237
+
238
+ Args:
239
+ features (int): number of features
240
+ """
241
+ super().__init__()
242
+
243
+ self.bn = bn
244
+
245
+ self.groups=1
246
+
247
+ self.conv1 = nn.Conv2d(
248
+ features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
249
+ )
250
+
251
+ self.conv2 = nn.Conv2d(
252
+ features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
253
+ )
254
+
255
+ if self.bn==True:
256
+ self.bn1 = nn.BatchNorm2d(features)
257
+ self.bn2 = nn.BatchNorm2d(features)
258
+
259
+ self.activation = activation
260
+
261
+ self.skip_add = nn.quantized.FloatFunctional()
262
+
263
+ def forward(self, x):
264
+ """Forward pass.
265
+
266
+ Args:
267
+ x (tensor): input
268
+
269
+ Returns:
270
+ tensor: output
271
+ """
272
+
273
+ out = self.activation(x)
274
+ out = self.conv1(out)
275
+ if self.bn==True:
276
+ out = self.bn1(out)
277
+
278
+ out = self.activation(out)
279
+ out = self.conv2(out)
280
+ if self.bn==True:
281
+ out = self.bn2(out)
282
+
283
+ if self.groups > 1:
284
+ out = self.conv_merge(out)
285
+
286
+ return self.skip_add.add(out, x)
287
+
288
+ # return out + x
289
+
290
+
291
+ class FeatureFusionBlock_custom(nn.Module):
292
+ """Feature fusion block.
293
+ """
294
+
295
+ def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True):
296
+ """Init.
297
+
298
+ Args:
299
+ features (int): number of features
300
+ """
301
+ super(FeatureFusionBlock_custom, self).__init__()
302
+
303
+ self.deconv = deconv
304
+ self.align_corners = align_corners
305
+
306
+ self.groups=1
307
+
308
+ self.expand = expand
309
+ out_features = features
310
+ if self.expand==True:
311
+ out_features = features//2
312
+
313
+ self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
314
+
315
+ self.resConfUnit1 = ResidualConvUnit_custom(features, activation, bn)
316
+ self.resConfUnit2 = ResidualConvUnit_custom(features, activation, bn)
317
+
318
+ self.skip_add = nn.quantized.FloatFunctional()
319
+
320
+ def forward(self, *xs):
321
+ """Forward pass.
322
+
323
+ Returns:
324
+ tensor: output
325
+ """
326
+ output = xs[0]
327
+
328
+ if len(xs) == 2:
329
+ res = self.resConfUnit1(xs[1])
330
+ output = self.skip_add.add(output, res)
331
+ # output += res
332
+
333
+ output = self.resConfUnit2(output)
334
+
335
+ output = nn.functional.interpolate(
336
+ output, scale_factor=2, mode="bilinear", align_corners=self.align_corners
337
+ )
338
+
339
+ output = self.out_conv(output)
340
+
341
+ return output
342
+
extensions/microsoftexcel-controlnet/annotator/midas/midas/dpt_depth.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ from .base_model import BaseModel
6
+ from .blocks import (
7
+ FeatureFusionBlock,
8
+ FeatureFusionBlock_custom,
9
+ Interpolate,
10
+ _make_encoder,
11
+ forward_vit,
12
+ )
13
+
14
+
15
+ def _make_fusion_block(features, use_bn):
16
+ return FeatureFusionBlock_custom(
17
+ features,
18
+ nn.ReLU(False),
19
+ deconv=False,
20
+ bn=use_bn,
21
+ expand=False,
22
+ align_corners=True,
23
+ )
24
+
25
+
26
+ class DPT(BaseModel):
27
+ def __init__(
28
+ self,
29
+ head,
30
+ features=256,
31
+ backbone="vitb_rn50_384",
32
+ readout="project",
33
+ channels_last=False,
34
+ use_bn=False,
35
+ ):
36
+
37
+ super(DPT, self).__init__()
38
+
39
+ self.channels_last = channels_last
40
+
41
+ hooks = {
42
+ "vitb_rn50_384": [0, 1, 8, 11],
43
+ "vitb16_384": [2, 5, 8, 11],
44
+ "vitl16_384": [5, 11, 17, 23],
45
+ }
46
+
47
+ # Instantiate backbone and reassemble blocks
48
+ self.pretrained, self.scratch = _make_encoder(
49
+ backbone,
50
+ features,
51
+ False, # Set to true of you want to train from scratch, uses ImageNet weights
52
+ groups=1,
53
+ expand=False,
54
+ exportable=False,
55
+ hooks=hooks[backbone],
56
+ use_readout=readout,
57
+ )
58
+
59
+ self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
60
+ self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
61
+ self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
62
+ self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
63
+
64
+ self.scratch.output_conv = head
65
+
66
+
67
+ def forward(self, x):
68
+ if self.channels_last == True:
69
+ x.contiguous(memory_format=torch.channels_last)
70
+
71
+ layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x)
72
+
73
+ layer_1_rn = self.scratch.layer1_rn(layer_1)
74
+ layer_2_rn = self.scratch.layer2_rn(layer_2)
75
+ layer_3_rn = self.scratch.layer3_rn(layer_3)
76
+ layer_4_rn = self.scratch.layer4_rn(layer_4)
77
+
78
+ path_4 = self.scratch.refinenet4(layer_4_rn)
79
+ path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
80
+ path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
81
+ path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
82
+
83
+ out = self.scratch.output_conv(path_1)
84
+
85
+ return out
86
+
87
+
88
+ class DPTDepthModel(DPT):
89
+ def __init__(self, path=None, non_negative=True, **kwargs):
90
+ features = kwargs["features"] if "features" in kwargs else 256
91
+
92
+ head = nn.Sequential(
93
+ nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1),
94
+ Interpolate(scale_factor=2, mode="bilinear", align_corners=True),
95
+ nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1),
96
+ nn.ReLU(True),
97
+ nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
98
+ nn.ReLU(True) if non_negative else nn.Identity(),
99
+ nn.Identity(),
100
+ )
101
+
102
+ super().__init__(head, **kwargs)
103
+
104
+ if path is not None:
105
+ self.load(path)
106
+
107
+ def forward(self, x):
108
+ return super().forward(x).squeeze(dim=1)
109
+
extensions/microsoftexcel-controlnet/annotator/midas/midas/midas_net.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
2
+ This file contains code that is adapted from
3
+ https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
4
+ """
5
+ import torch
6
+ import torch.nn as nn
7
+
8
+ from .base_model import BaseModel
9
+ from .blocks import FeatureFusionBlock, Interpolate, _make_encoder
10
+
11
+
12
+ class MidasNet(BaseModel):
13
+ """Network for monocular depth estimation.
14
+ """
15
+
16
+ def __init__(self, path=None, features=256, non_negative=True):
17
+ """Init.
18
+
19
+ Args:
20
+ path (str, optional): Path to saved model. Defaults to None.
21
+ features (int, optional): Number of features. Defaults to 256.
22
+ backbone (str, optional): Backbone network for encoder. Defaults to resnet50
23
+ """
24
+ print("Loading weights: ", path)
25
+
26
+ super(MidasNet, self).__init__()
27
+
28
+ use_pretrained = False if path is None else True
29
+
30
+ self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained)
31
+
32
+ self.scratch.refinenet4 = FeatureFusionBlock(features)
33
+ self.scratch.refinenet3 = FeatureFusionBlock(features)
34
+ self.scratch.refinenet2 = FeatureFusionBlock(features)
35
+ self.scratch.refinenet1 = FeatureFusionBlock(features)
36
+
37
+ self.scratch.output_conv = nn.Sequential(
38
+ nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
39
+ Interpolate(scale_factor=2, mode="bilinear"),
40
+ nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1),
41
+ nn.ReLU(True),
42
+ nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
43
+ nn.ReLU(True) if non_negative else nn.Identity(),
44
+ )
45
+
46
+ if path:
47
+ self.load(path)
48
+
49
+ def forward(self, x):
50
+ """Forward pass.
51
+
52
+ Args:
53
+ x (tensor): input data (image)
54
+
55
+ Returns:
56
+ tensor: depth
57
+ """
58
+
59
+ layer_1 = self.pretrained.layer1(x)
60
+ layer_2 = self.pretrained.layer2(layer_1)
61
+ layer_3 = self.pretrained.layer3(layer_2)
62
+ layer_4 = self.pretrained.layer4(layer_3)
63
+
64
+ layer_1_rn = self.scratch.layer1_rn(layer_1)
65
+ layer_2_rn = self.scratch.layer2_rn(layer_2)
66
+ layer_3_rn = self.scratch.layer3_rn(layer_3)
67
+ layer_4_rn = self.scratch.layer4_rn(layer_4)
68
+
69
+ path_4 = self.scratch.refinenet4(layer_4_rn)
70
+ path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
71
+ path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
72
+ path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
73
+
74
+ out = self.scratch.output_conv(path_1)
75
+
76
+ return torch.squeeze(out, dim=1)
extensions/microsoftexcel-controlnet/annotator/midas/midas/midas_net_custom.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
2
+ This file contains code that is adapted from
3
+ https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
4
+ """
5
+ import torch
6
+ import torch.nn as nn
7
+
8
+ from .base_model import BaseModel
9
+ from .blocks import FeatureFusionBlock, FeatureFusionBlock_custom, Interpolate, _make_encoder
10
+
11
+
12
+ class MidasNet_small(BaseModel):
13
+ """Network for monocular depth estimation.
14
+ """
15
+
16
+ def __init__(self, path=None, features=64, backbone="efficientnet_lite3", non_negative=True, exportable=True, channels_last=False, align_corners=True,
17
+ blocks={'expand': True}):
18
+ """Init.
19
+
20
+ Args:
21
+ path (str, optional): Path to saved model. Defaults to None.
22
+ features (int, optional): Number of features. Defaults to 256.
23
+ backbone (str, optional): Backbone network for encoder. Defaults to resnet50
24
+ """
25
+ print("Loading weights: ", path)
26
+
27
+ super(MidasNet_small, self).__init__()
28
+
29
+ use_pretrained = False if path else True
30
+
31
+ self.channels_last = channels_last
32
+ self.blocks = blocks
33
+ self.backbone = backbone
34
+
35
+ self.groups = 1
36
+
37
+ features1=features
38
+ features2=features
39
+ features3=features
40
+ features4=features
41
+ self.expand = False
42
+ if "expand" in self.blocks and self.blocks['expand'] == True:
43
+ self.expand = True
44
+ features1=features
45
+ features2=features*2
46
+ features3=features*4
47
+ features4=features*8
48
+
49
+ self.pretrained, self.scratch = _make_encoder(self.backbone, features, use_pretrained, groups=self.groups, expand=self.expand, exportable=exportable)
50
+
51
+ self.scratch.activation = nn.ReLU(False)
52
+
53
+ self.scratch.refinenet4 = FeatureFusionBlock_custom(features4, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners)
54
+ self.scratch.refinenet3 = FeatureFusionBlock_custom(features3, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners)
55
+ self.scratch.refinenet2 = FeatureFusionBlock_custom(features2, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners)
56
+ self.scratch.refinenet1 = FeatureFusionBlock_custom(features1, self.scratch.activation, deconv=False, bn=False, align_corners=align_corners)
57
+
58
+
59
+ self.scratch.output_conv = nn.Sequential(
60
+ nn.Conv2d(features, features//2, kernel_size=3, stride=1, padding=1, groups=self.groups),
61
+ Interpolate(scale_factor=2, mode="bilinear"),
62
+ nn.Conv2d(features//2, 32, kernel_size=3, stride=1, padding=1),
63
+ self.scratch.activation,
64
+ nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
65
+ nn.ReLU(True) if non_negative else nn.Identity(),
66
+ nn.Identity(),
67
+ )
68
+
69
+ if path:
70
+ self.load(path)
71
+
72
+
73
+ def forward(self, x):
74
+ """Forward pass.
75
+
76
+ Args:
77
+ x (tensor): input data (image)
78
+
79
+ Returns:
80
+ tensor: depth
81
+ """
82
+ if self.channels_last==True:
83
+ print("self.channels_last = ", self.channels_last)
84
+ x.contiguous(memory_format=torch.channels_last)
85
+
86
+
87
+ layer_1 = self.pretrained.layer1(x)
88
+ layer_2 = self.pretrained.layer2(layer_1)
89
+ layer_3 = self.pretrained.layer3(layer_2)
90
+ layer_4 = self.pretrained.layer4(layer_3)
91
+
92
+ layer_1_rn = self.scratch.layer1_rn(layer_1)
93
+ layer_2_rn = self.scratch.layer2_rn(layer_2)
94
+ layer_3_rn = self.scratch.layer3_rn(layer_3)
95
+ layer_4_rn = self.scratch.layer4_rn(layer_4)
96
+
97
+
98
+ path_4 = self.scratch.refinenet4(layer_4_rn)
99
+ path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
100
+ path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
101
+ path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
102
+
103
+ out = self.scratch.output_conv(path_1)
104
+
105
+ return torch.squeeze(out, dim=1)
106
+
107
+
108
+
109
+ def fuse_model(m):
110
+ prev_previous_type = nn.Identity()
111
+ prev_previous_name = ''
112
+ previous_type = nn.Identity()
113
+ previous_name = ''
114
+ for name, module in m.named_modules():
115
+ if prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d and type(module) == nn.ReLU:
116
+ # print("FUSED ", prev_previous_name, previous_name, name)
117
+ torch.quantization.fuse_modules(m, [prev_previous_name, previous_name, name], inplace=True)
118
+ elif prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d:
119
+ # print("FUSED ", prev_previous_name, previous_name)
120
+ torch.quantization.fuse_modules(m, [prev_previous_name, previous_name], inplace=True)
121
+ # elif previous_type == nn.Conv2d and type(module) == nn.ReLU:
122
+ # print("FUSED ", previous_name, name)
123
+ # torch.quantization.fuse_modules(m, [previous_name, name], inplace=True)
124
+
125
+ prev_previous_type = previous_type
126
+ prev_previous_name = previous_name
127
+ previous_type = type(module)
128
+ previous_name = name
extensions/microsoftexcel-controlnet/annotator/midas/midas/transforms.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import math
4
+
5
+
6
+ def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
7
+ """Rezise the sample to ensure the given size. Keeps aspect ratio.
8
+
9
+ Args:
10
+ sample (dict): sample
11
+ size (tuple): image size
12
+
13
+ Returns:
14
+ tuple: new size
15
+ """
16
+ shape = list(sample["disparity"].shape)
17
+
18
+ if shape[0] >= size[0] and shape[1] >= size[1]:
19
+ return sample
20
+
21
+ scale = [0, 0]
22
+ scale[0] = size[0] / shape[0]
23
+ scale[1] = size[1] / shape[1]
24
+
25
+ scale = max(scale)
26
+
27
+ shape[0] = math.ceil(scale * shape[0])
28
+ shape[1] = math.ceil(scale * shape[1])
29
+
30
+ # resize
31
+ sample["image"] = cv2.resize(
32
+ sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
33
+ )
34
+
35
+ sample["disparity"] = cv2.resize(
36
+ sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
37
+ )
38
+ sample["mask"] = cv2.resize(
39
+ sample["mask"].astype(np.float32),
40
+ tuple(shape[::-1]),
41
+ interpolation=cv2.INTER_NEAREST,
42
+ )
43
+ sample["mask"] = sample["mask"].astype(bool)
44
+
45
+ return tuple(shape)
46
+
47
+
48
+ class Resize(object):
49
+ """Resize sample to given size (width, height).
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ width,
55
+ height,
56
+ resize_target=True,
57
+ keep_aspect_ratio=False,
58
+ ensure_multiple_of=1,
59
+ resize_method="lower_bound",
60
+ image_interpolation_method=cv2.INTER_AREA,
61
+ ):
62
+ """Init.
63
+
64
+ Args:
65
+ width (int): desired output width
66
+ height (int): desired output height
67
+ resize_target (bool, optional):
68
+ True: Resize the full sample (image, mask, target).
69
+ False: Resize image only.
70
+ Defaults to True.
71
+ keep_aspect_ratio (bool, optional):
72
+ True: Keep the aspect ratio of the input sample.
73
+ Output sample might not have the given width and height, and
74
+ resize behaviour depends on the parameter 'resize_method'.
75
+ Defaults to False.
76
+ ensure_multiple_of (int, optional):
77
+ Output width and height is constrained to be multiple of this parameter.
78
+ Defaults to 1.
79
+ resize_method (str, optional):
80
+ "lower_bound": Output will be at least as large as the given size.
81
+ "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
82
+ "minimal": Scale as least as possible. (Output size might be smaller than given size.)
83
+ Defaults to "lower_bound".
84
+ """
85
+ self.__width = width
86
+ self.__height = height
87
+
88
+ self.__resize_target = resize_target
89
+ self.__keep_aspect_ratio = keep_aspect_ratio
90
+ self.__multiple_of = ensure_multiple_of
91
+ self.__resize_method = resize_method
92
+ self.__image_interpolation_method = image_interpolation_method
93
+
94
+ def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
95
+ y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
96
+
97
+ if max_val is not None and y > max_val:
98
+ y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
99
+
100
+ if y < min_val:
101
+ y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
102
+
103
+ return y
104
+
105
+ def get_size(self, width, height):
106
+ # determine new height and width
107
+ scale_height = self.__height / height
108
+ scale_width = self.__width / width
109
+
110
+ if self.__keep_aspect_ratio:
111
+ if self.__resize_method == "lower_bound":
112
+ # scale such that output size is lower bound
113
+ if scale_width > scale_height:
114
+ # fit width
115
+ scale_height = scale_width
116
+ else:
117
+ # fit height
118
+ scale_width = scale_height
119
+ elif self.__resize_method == "upper_bound":
120
+ # scale such that output size is upper bound
121
+ if scale_width < scale_height:
122
+ # fit width
123
+ scale_height = scale_width
124
+ else:
125
+ # fit height
126
+ scale_width = scale_height
127
+ elif self.__resize_method == "minimal":
128
+ # scale as least as possbile
129
+ if abs(1 - scale_width) < abs(1 - scale_height):
130
+ # fit width
131
+ scale_height = scale_width
132
+ else:
133
+ # fit height
134
+ scale_width = scale_height
135
+ else:
136
+ raise ValueError(
137
+ f"resize_method {self.__resize_method} not implemented"
138
+ )
139
+
140
+ if self.__resize_method == "lower_bound":
141
+ new_height = self.constrain_to_multiple_of(
142
+ scale_height * height, min_val=self.__height
143
+ )
144
+ new_width = self.constrain_to_multiple_of(
145
+ scale_width * width, min_val=self.__width
146
+ )
147
+ elif self.__resize_method == "upper_bound":
148
+ new_height = self.constrain_to_multiple_of(
149
+ scale_height * height, max_val=self.__height
150
+ )
151
+ new_width = self.constrain_to_multiple_of(
152
+ scale_width * width, max_val=self.__width
153
+ )
154
+ elif self.__resize_method == "minimal":
155
+ new_height = self.constrain_to_multiple_of(scale_height * height)
156
+ new_width = self.constrain_to_multiple_of(scale_width * width)
157
+ else:
158
+ raise ValueError(f"resize_method {self.__resize_method} not implemented")
159
+
160
+ return (new_width, new_height)
161
+
162
+ def __call__(self, sample):
163
+ width, height = self.get_size(
164
+ sample["image"].shape[1], sample["image"].shape[0]
165
+ )
166
+
167
+ # resize sample
168
+ sample["image"] = cv2.resize(
169
+ sample["image"],
170
+ (width, height),
171
+ interpolation=self.__image_interpolation_method,
172
+ )
173
+
174
+ if self.__resize_target:
175
+ if "disparity" in sample:
176
+ sample["disparity"] = cv2.resize(
177
+ sample["disparity"],
178
+ (width, height),
179
+ interpolation=cv2.INTER_NEAREST,
180
+ )
181
+
182
+ if "depth" in sample:
183
+ sample["depth"] = cv2.resize(
184
+ sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
185
+ )
186
+
187
+ sample["mask"] = cv2.resize(
188
+ sample["mask"].astype(np.float32),
189
+ (width, height),
190
+ interpolation=cv2.INTER_NEAREST,
191
+ )
192
+ sample["mask"] = sample["mask"].astype(bool)
193
+
194
+ return sample
195
+
196
+
197
+ class NormalizeImage(object):
198
+ """Normlize image by given mean and std.
199
+ """
200
+
201
+ def __init__(self, mean, std):
202
+ self.__mean = mean
203
+ self.__std = std
204
+
205
+ def __call__(self, sample):
206
+ sample["image"] = (sample["image"] - self.__mean) / self.__std
207
+
208
+ return sample
209
+
210
+
211
+ class PrepareForNet(object):
212
+ """Prepare sample for usage as network input.
213
+ """
214
+
215
+ def __init__(self):
216
+ pass
217
+
218
+ def __call__(self, sample):
219
+ image = np.transpose(sample["image"], (2, 0, 1))
220
+ sample["image"] = np.ascontiguousarray(image).astype(np.float32)
221
+
222
+ if "mask" in sample:
223
+ sample["mask"] = sample["mask"].astype(np.float32)
224
+ sample["mask"] = np.ascontiguousarray(sample["mask"])
225
+
226
+ if "disparity" in sample:
227
+ disparity = sample["disparity"].astype(np.float32)
228
+ sample["disparity"] = np.ascontiguousarray(disparity)
229
+
230
+ if "depth" in sample:
231
+ depth = sample["depth"].astype(np.float32)
232
+ sample["depth"] = np.ascontiguousarray(depth)
233
+
234
+ return sample
extensions/microsoftexcel-controlnet/annotator/midas/midas/vit.py ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import timm
4
+ import types
5
+ import math
6
+ import torch.nn.functional as F
7
+
8
+
9
+ class Slice(nn.Module):
10
+ def __init__(self, start_index=1):
11
+ super(Slice, self).__init__()
12
+ self.start_index = start_index
13
+
14
+ def forward(self, x):
15
+ return x[:, self.start_index :]
16
+
17
+
18
+ class AddReadout(nn.Module):
19
+ def __init__(self, start_index=1):
20
+ super(AddReadout, self).__init__()
21
+ self.start_index = start_index
22
+
23
+ def forward(self, x):
24
+ if self.start_index == 2:
25
+ readout = (x[:, 0] + x[:, 1]) / 2
26
+ else:
27
+ readout = x[:, 0]
28
+ return x[:, self.start_index :] + readout.unsqueeze(1)
29
+
30
+
31
+ class ProjectReadout(nn.Module):
32
+ def __init__(self, in_features, start_index=1):
33
+ super(ProjectReadout, self).__init__()
34
+ self.start_index = start_index
35
+
36
+ self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU())
37
+
38
+ def forward(self, x):
39
+ readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index :])
40
+ features = torch.cat((x[:, self.start_index :], readout), -1)
41
+
42
+ return self.project(features)
43
+
44
+
45
+ class Transpose(nn.Module):
46
+ def __init__(self, dim0, dim1):
47
+ super(Transpose, self).__init__()
48
+ self.dim0 = dim0
49
+ self.dim1 = dim1
50
+
51
+ def forward(self, x):
52
+ x = x.transpose(self.dim0, self.dim1)
53
+ return x
54
+
55
+
56
+ def forward_vit(pretrained, x):
57
+ b, c, h, w = x.shape
58
+
59
+ glob = pretrained.model.forward_flex(x)
60
+
61
+ layer_1 = pretrained.activations["1"]
62
+ layer_2 = pretrained.activations["2"]
63
+ layer_3 = pretrained.activations["3"]
64
+ layer_4 = pretrained.activations["4"]
65
+
66
+ layer_1 = pretrained.act_postprocess1[0:2](layer_1)
67
+ layer_2 = pretrained.act_postprocess2[0:2](layer_2)
68
+ layer_3 = pretrained.act_postprocess3[0:2](layer_3)
69
+ layer_4 = pretrained.act_postprocess4[0:2](layer_4)
70
+
71
+ unflatten = nn.Sequential(
72
+ nn.Unflatten(
73
+ 2,
74
+ torch.Size(
75
+ [
76
+ h // pretrained.model.patch_size[1],
77
+ w // pretrained.model.patch_size[0],
78
+ ]
79
+ ),
80
+ )
81
+ )
82
+
83
+ if layer_1.ndim == 3:
84
+ layer_1 = unflatten(layer_1)
85
+ if layer_2.ndim == 3:
86
+ layer_2 = unflatten(layer_2)
87
+ if layer_3.ndim == 3:
88
+ layer_3 = unflatten(layer_3)
89
+ if layer_4.ndim == 3:
90
+ layer_4 = unflatten(layer_4)
91
+
92
+ layer_1 = pretrained.act_postprocess1[3 : len(pretrained.act_postprocess1)](layer_1)
93
+ layer_2 = pretrained.act_postprocess2[3 : len(pretrained.act_postprocess2)](layer_2)
94
+ layer_3 = pretrained.act_postprocess3[3 : len(pretrained.act_postprocess3)](layer_3)
95
+ layer_4 = pretrained.act_postprocess4[3 : len(pretrained.act_postprocess4)](layer_4)
96
+
97
+ return layer_1, layer_2, layer_3, layer_4
98
+
99
+
100
+ def _resize_pos_embed(self, posemb, gs_h, gs_w):
101
+ posemb_tok, posemb_grid = (
102
+ posemb[:, : self.start_index],
103
+ posemb[0, self.start_index :],
104
+ )
105
+
106
+ gs_old = int(math.sqrt(len(posemb_grid)))
107
+
108
+ posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2)
109
+ posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear")
110
+ posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1)
111
+
112
+ posemb = torch.cat([posemb_tok, posemb_grid], dim=1)
113
+
114
+ return posemb
115
+
116
+
117
+ def forward_flex(self, x):
118
+ b, c, h, w = x.shape
119
+
120
+ pos_embed = self._resize_pos_embed(
121
+ self.pos_embed, h // self.patch_size[1], w // self.patch_size[0]
122
+ )
123
+
124
+ B = x.shape[0]
125
+
126
+ if hasattr(self.patch_embed, "backbone"):
127
+ x = self.patch_embed.backbone(x)
128
+ if isinstance(x, (list, tuple)):
129
+ x = x[-1] # last feature if backbone outputs list/tuple of features
130
+
131
+ x = self.patch_embed.proj(x).flatten(2).transpose(1, 2)
132
+
133
+ if getattr(self, "dist_token", None) is not None:
134
+ cls_tokens = self.cls_token.expand(
135
+ B, -1, -1
136
+ ) # stole cls_tokens impl from Phil Wang, thanks
137
+ dist_token = self.dist_token.expand(B, -1, -1)
138
+ x = torch.cat((cls_tokens, dist_token, x), dim=1)
139
+ else:
140
+ cls_tokens = self.cls_token.expand(
141
+ B, -1, -1
142
+ ) # stole cls_tokens impl from Phil Wang, thanks
143
+ x = torch.cat((cls_tokens, x), dim=1)
144
+
145
+ x = x + pos_embed
146
+ x = self.pos_drop(x)
147
+
148
+ for blk in self.blocks:
149
+ x = blk(x)
150
+
151
+ x = self.norm(x)
152
+
153
+ return x
154
+
155
+
156
+ activations = {}
157
+
158
+
159
+ def get_activation(name):
160
+ def hook(model, input, output):
161
+ activations[name] = output
162
+
163
+ return hook
164
+
165
+
166
+ def get_readout_oper(vit_features, features, use_readout, start_index=1):
167
+ if use_readout == "ignore":
168
+ readout_oper = [Slice(start_index)] * len(features)
169
+ elif use_readout == "add":
170
+ readout_oper = [AddReadout(start_index)] * len(features)
171
+ elif use_readout == "project":
172
+ readout_oper = [
173
+ ProjectReadout(vit_features, start_index) for out_feat in features
174
+ ]
175
+ else:
176
+ assert (
177
+ False
178
+ ), "wrong operation for readout token, use_readout can be 'ignore', 'add', or 'project'"
179
+
180
+ return readout_oper
181
+
182
+
183
+ def _make_vit_b16_backbone(
184
+ model,
185
+ features=[96, 192, 384, 768],
186
+ size=[384, 384],
187
+ hooks=[2, 5, 8, 11],
188
+ vit_features=768,
189
+ use_readout="ignore",
190
+ start_index=1,
191
+ ):
192
+ pretrained = nn.Module()
193
+
194
+ pretrained.model = model
195
+ pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1"))
196
+ pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2"))
197
+ pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3"))
198
+ pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4"))
199
+
200
+ pretrained.activations = activations
201
+
202
+ readout_oper = get_readout_oper(vit_features, features, use_readout, start_index)
203
+
204
+ # 32, 48, 136, 384
205
+ pretrained.act_postprocess1 = nn.Sequential(
206
+ readout_oper[0],
207
+ Transpose(1, 2),
208
+ nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
209
+ nn.Conv2d(
210
+ in_channels=vit_features,
211
+ out_channels=features[0],
212
+ kernel_size=1,
213
+ stride=1,
214
+ padding=0,
215
+ ),
216
+ nn.ConvTranspose2d(
217
+ in_channels=features[0],
218
+ out_channels=features[0],
219
+ kernel_size=4,
220
+ stride=4,
221
+ padding=0,
222
+ bias=True,
223
+ dilation=1,
224
+ groups=1,
225
+ ),
226
+ )
227
+
228
+ pretrained.act_postprocess2 = nn.Sequential(
229
+ readout_oper[1],
230
+ Transpose(1, 2),
231
+ nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
232
+ nn.Conv2d(
233
+ in_channels=vit_features,
234
+ out_channels=features[1],
235
+ kernel_size=1,
236
+ stride=1,
237
+ padding=0,
238
+ ),
239
+ nn.ConvTranspose2d(
240
+ in_channels=features[1],
241
+ out_channels=features[1],
242
+ kernel_size=2,
243
+ stride=2,
244
+ padding=0,
245
+ bias=True,
246
+ dilation=1,
247
+ groups=1,
248
+ ),
249
+ )
250
+
251
+ pretrained.act_postprocess3 = nn.Sequential(
252
+ readout_oper[2],
253
+ Transpose(1, 2),
254
+ nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
255
+ nn.Conv2d(
256
+ in_channels=vit_features,
257
+ out_channels=features[2],
258
+ kernel_size=1,
259
+ stride=1,
260
+ padding=0,
261
+ ),
262
+ )
263
+
264
+ pretrained.act_postprocess4 = nn.Sequential(
265
+ readout_oper[3],
266
+ Transpose(1, 2),
267
+ nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
268
+ nn.Conv2d(
269
+ in_channels=vit_features,
270
+ out_channels=features[3],
271
+ kernel_size=1,
272
+ stride=1,
273
+ padding=0,
274
+ ),
275
+ nn.Conv2d(
276
+ in_channels=features[3],
277
+ out_channels=features[3],
278
+ kernel_size=3,
279
+ stride=2,
280
+ padding=1,
281
+ ),
282
+ )
283
+
284
+ pretrained.model.start_index = start_index
285
+ pretrained.model.patch_size = [16, 16]
286
+
287
+ # We inject this function into the VisionTransformer instances so that
288
+ # we can use it with interpolated position embeddings without modifying the library source.
289
+ pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model)
290
+ pretrained.model._resize_pos_embed = types.MethodType(
291
+ _resize_pos_embed, pretrained.model
292
+ )
293
+
294
+ return pretrained
295
+
296
+
297
+ def _make_pretrained_vitl16_384(pretrained, use_readout="ignore", hooks=None):
298
+ model = timm.create_model("vit_large_patch16_384", pretrained=pretrained)
299
+
300
+ hooks = [5, 11, 17, 23] if hooks == None else hooks
301
+ return _make_vit_b16_backbone(
302
+ model,
303
+ features=[256, 512, 1024, 1024],
304
+ hooks=hooks,
305
+ vit_features=1024,
306
+ use_readout=use_readout,
307
+ )
308
+
309
+
310
+ def _make_pretrained_vitb16_384(pretrained, use_readout="ignore", hooks=None):
311
+ model = timm.create_model("vit_base_patch16_384", pretrained=pretrained)
312
+
313
+ hooks = [2, 5, 8, 11] if hooks == None else hooks
314
+ return _make_vit_b16_backbone(
315
+ model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout
316
+ )
317
+
318
+
319
+ def _make_pretrained_deitb16_384(pretrained, use_readout="ignore", hooks=None):
320
+ model = timm.create_model("vit_deit_base_patch16_384", pretrained=pretrained)
321
+
322
+ hooks = [2, 5, 8, 11] if hooks == None else hooks
323
+ return _make_vit_b16_backbone(
324
+ model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout
325
+ )
326
+
327
+
328
+ def _make_pretrained_deitb16_distil_384(pretrained, use_readout="ignore", hooks=None):
329
+ model = timm.create_model(
330
+ "vit_deit_base_distilled_patch16_384", pretrained=pretrained
331
+ )
332
+
333
+ hooks = [2, 5, 8, 11] if hooks == None else hooks
334
+ return _make_vit_b16_backbone(
335
+ model,
336
+ features=[96, 192, 384, 768],
337
+ hooks=hooks,
338
+ use_readout=use_readout,
339
+ start_index=2,
340
+ )
341
+
342
+
343
+ def _make_vit_b_rn50_backbone(
344
+ model,
345
+ features=[256, 512, 768, 768],
346
+ size=[384, 384],
347
+ hooks=[0, 1, 8, 11],
348
+ vit_features=768,
349
+ use_vit_only=False,
350
+ use_readout="ignore",
351
+ start_index=1,
352
+ ):
353
+ pretrained = nn.Module()
354
+
355
+ pretrained.model = model
356
+
357
+ if use_vit_only == True:
358
+ pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1"))
359
+ pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2"))
360
+ else:
361
+ pretrained.model.patch_embed.backbone.stages[0].register_forward_hook(
362
+ get_activation("1")
363
+ )
364
+ pretrained.model.patch_embed.backbone.stages[1].register_forward_hook(
365
+ get_activation("2")
366
+ )
367
+
368
+ pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3"))
369
+ pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4"))
370
+
371
+ pretrained.activations = activations
372
+
373
+ readout_oper = get_readout_oper(vit_features, features, use_readout, start_index)
374
+
375
+ if use_vit_only == True:
376
+ pretrained.act_postprocess1 = nn.Sequential(
377
+ readout_oper[0],
378
+ Transpose(1, 2),
379
+ nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
380
+ nn.Conv2d(
381
+ in_channels=vit_features,
382
+ out_channels=features[0],
383
+ kernel_size=1,
384
+ stride=1,
385
+ padding=0,
386
+ ),
387
+ nn.ConvTranspose2d(
388
+ in_channels=features[0],
389
+ out_channels=features[0],
390
+ kernel_size=4,
391
+ stride=4,
392
+ padding=0,
393
+ bias=True,
394
+ dilation=1,
395
+ groups=1,
396
+ ),
397
+ )
398
+
399
+ pretrained.act_postprocess2 = nn.Sequential(
400
+ readout_oper[1],
401
+ Transpose(1, 2),
402
+ nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
403
+ nn.Conv2d(
404
+ in_channels=vit_features,
405
+ out_channels=features[1],
406
+ kernel_size=1,
407
+ stride=1,
408
+ padding=0,
409
+ ),
410
+ nn.ConvTranspose2d(
411
+ in_channels=features[1],
412
+ out_channels=features[1],
413
+ kernel_size=2,
414
+ stride=2,
415
+ padding=0,
416
+ bias=True,
417
+ dilation=1,
418
+ groups=1,
419
+ ),
420
+ )
421
+ else:
422
+ pretrained.act_postprocess1 = nn.Sequential(
423
+ nn.Identity(), nn.Identity(), nn.Identity()
424
+ )
425
+ pretrained.act_postprocess2 = nn.Sequential(
426
+ nn.Identity(), nn.Identity(), nn.Identity()
427
+ )
428
+
429
+ pretrained.act_postprocess3 = nn.Sequential(
430
+ readout_oper[2],
431
+ Transpose(1, 2),
432
+ nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
433
+ nn.Conv2d(
434
+ in_channels=vit_features,
435
+ out_channels=features[2],
436
+ kernel_size=1,
437
+ stride=1,
438
+ padding=0,
439
+ ),
440
+ )
441
+
442
+ pretrained.act_postprocess4 = nn.Sequential(
443
+ readout_oper[3],
444
+ Transpose(1, 2),
445
+ nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
446
+ nn.Conv2d(
447
+ in_channels=vit_features,
448
+ out_channels=features[3],
449
+ kernel_size=1,
450
+ stride=1,
451
+ padding=0,
452
+ ),
453
+ nn.Conv2d(
454
+ in_channels=features[3],
455
+ out_channels=features[3],
456
+ kernel_size=3,
457
+ stride=2,
458
+ padding=1,
459
+ ),
460
+ )
461
+
462
+ pretrained.model.start_index = start_index
463
+ pretrained.model.patch_size = [16, 16]
464
+
465
+ # We inject this function into the VisionTransformer instances so that
466
+ # we can use it with interpolated position embeddings without modifying the library source.
467
+ pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model)
468
+
469
+ # We inject this function into the VisionTransformer instances so that
470
+ # we can use it with interpolated position embeddings without modifying the library source.
471
+ pretrained.model._resize_pos_embed = types.MethodType(
472
+ _resize_pos_embed, pretrained.model
473
+ )
474
+
475
+ return pretrained
476
+
477
+
478
+ def _make_pretrained_vitb_rn50_384(
479
+ pretrained, use_readout="ignore", hooks=None, use_vit_only=False
480
+ ):
481
+ model = timm.create_model("vit_base_resnet50_384", pretrained=pretrained)
482
+
483
+ hooks = [0, 1, 8, 11] if hooks == None else hooks
484
+ return _make_vit_b_rn50_backbone(
485
+ model,
486
+ features=[256, 512, 768, 768],
487
+ size=[384, 384],
488
+ hooks=hooks,
489
+ use_vit_only=use_vit_only,
490
+ use_readout=use_readout,
491
+ )
extensions/microsoftexcel-controlnet/annotator/midas/utils.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utils for monoDepth."""
2
+ import sys
3
+ import re
4
+ import numpy as np
5
+ import cv2
6
+ import torch
7
+
8
+
9
+ def read_pfm(path):
10
+ """Read pfm file.
11
+
12
+ Args:
13
+ path (str): path to file
14
+
15
+ Returns:
16
+ tuple: (data, scale)
17
+ """
18
+ with open(path, "rb") as file:
19
+
20
+ color = None
21
+ width = None
22
+ height = None
23
+ scale = None
24
+ endian = None
25
+
26
+ header = file.readline().rstrip()
27
+ if header.decode("ascii") == "PF":
28
+ color = True
29
+ elif header.decode("ascii") == "Pf":
30
+ color = False
31
+ else:
32
+ raise Exception("Not a PFM file: " + path)
33
+
34
+ dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii"))
35
+ if dim_match:
36
+ width, height = list(map(int, dim_match.groups()))
37
+ else:
38
+ raise Exception("Malformed PFM header.")
39
+
40
+ scale = float(file.readline().decode("ascii").rstrip())
41
+ if scale < 0:
42
+ # little-endian
43
+ endian = "<"
44
+ scale = -scale
45
+ else:
46
+ # big-endian
47
+ endian = ">"
48
+
49
+ data = np.fromfile(file, endian + "f")
50
+ shape = (height, width, 3) if color else (height, width)
51
+
52
+ data = np.reshape(data, shape)
53
+ data = np.flipud(data)
54
+
55
+ return data, scale
56
+
57
+
58
+ def write_pfm(path, image, scale=1):
59
+ """Write pfm file.
60
+
61
+ Args:
62
+ path (str): pathto file
63
+ image (array): data
64
+ scale (int, optional): Scale. Defaults to 1.
65
+ """
66
+
67
+ with open(path, "wb") as file:
68
+ color = None
69
+
70
+ if image.dtype.name != "float32":
71
+ raise Exception("Image dtype must be float32.")
72
+
73
+ image = np.flipud(image)
74
+
75
+ if len(image.shape) == 3 and image.shape[2] == 3: # color image
76
+ color = True
77
+ elif (
78
+ len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
79
+ ): # greyscale
80
+ color = False
81
+ else:
82
+ raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")
83
+
84
+ file.write("PF\n" if color else "Pf\n".encode())
85
+ file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
86
+
87
+ endian = image.dtype.byteorder
88
+
89
+ if endian == "<" or endian == "=" and sys.byteorder == "little":
90
+ scale = -scale
91
+
92
+ file.write("%f\n".encode() % scale)
93
+
94
+ image.tofile(file)
95
+
96
+
97
+ def read_image(path):
98
+ """Read image and output RGB image (0-1).
99
+
100
+ Args:
101
+ path (str): path to file
102
+
103
+ Returns:
104
+ array: RGB image (0-1)
105
+ """
106
+ img = cv2.imread(path)
107
+
108
+ if img.ndim == 2:
109
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
110
+
111
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
112
+
113
+ return img
114
+
115
+
116
+ def resize_image(img):
117
+ """Resize image and make it fit for network.
118
+
119
+ Args:
120
+ img (array): image
121
+
122
+ Returns:
123
+ tensor: data ready for network
124
+ """
125
+ height_orig = img.shape[0]
126
+ width_orig = img.shape[1]
127
+
128
+ if width_orig > height_orig:
129
+ scale = width_orig / 384
130
+ else:
131
+ scale = height_orig / 384
132
+
133
+ height = (np.ceil(height_orig / scale / 32) * 32).astype(int)
134
+ width = (np.ceil(width_orig / scale / 32) * 32).astype(int)
135
+
136
+ img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)
137
+
138
+ img_resized = (
139
+ torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float()
140
+ )
141
+ img_resized = img_resized.unsqueeze(0)
142
+
143
+ return img_resized
144
+
145
+
146
+ def resize_depth(depth, width, height):
147
+ """Resize depth map and bring to CPU (numpy).
148
+
149
+ Args:
150
+ depth (tensor): depth
151
+ width (int): image width
152
+ height (int): image height
153
+
154
+ Returns:
155
+ array: processed depth
156
+ """
157
+ depth = torch.squeeze(depth[0, :, :, :]).to("cpu")
158
+
159
+ depth_resized = cv2.resize(
160
+ depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC
161
+ )
162
+
163
+ return depth_resized
164
+
165
+ def write_depth(path, depth, bits=1):
166
+ """Write depth map to pfm and png file.
167
+
168
+ Args:
169
+ path (str): filepath without extension
170
+ depth (array): depth
171
+ """
172
+ write_pfm(path + ".pfm", depth.astype(np.float32))
173
+
174
+ depth_min = depth.min()
175
+ depth_max = depth.max()
176
+
177
+ max_val = (2**(8*bits))-1
178
+
179
+ if depth_max - depth_min > np.finfo("float").eps:
180
+ out = max_val * (depth - depth_min) / (depth_max - depth_min)
181
+ else:
182
+ out = np.zeros(depth.shape, dtype=depth.type)
183
+
184
+ if bits == 1:
185
+ cv2.imwrite(path + ".png", out.astype("uint8"))
186
+ elif bits == 2:
187
+ cv2.imwrite(path + ".png", out.astype("uint16"))
188
+
189
+ return
extensions/microsoftexcel-controlnet/annotator/mlsd/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "{}"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2021-present NAVER Corp.
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
extensions/microsoftexcel-controlnet/annotator/mlsd/__init__.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+ import os
5
+
6
+ from einops import rearrange
7
+ from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny
8
+ from .models.mbv2_mlsd_large import MobileV2_MLSD_Large
9
+ from .utils import pred_lines
10
+ from modules import devices
11
+ from annotator.annotator_path import models_path
12
+
13
+ mlsdmodel = None
14
+ remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/mlsd_large_512_fp32.pth"
15
+ old_modeldir = os.path.dirname(os.path.realpath(__file__))
16
+ modeldir = os.path.join(models_path, "mlsd")
17
+
18
+ def unload_mlsd_model():
19
+ global mlsdmodel
20
+ if mlsdmodel is not None:
21
+ mlsdmodel = mlsdmodel.cpu()
22
+
23
+ def apply_mlsd(input_image, thr_v, thr_d):
24
+ global modelpath, mlsdmodel
25
+ if mlsdmodel is None:
26
+ modelpath = os.path.join(modeldir, "mlsd_large_512_fp32.pth")
27
+ old_modelpath = os.path.join(old_modeldir, "mlsd_large_512_fp32.pth")
28
+ if os.path.exists(old_modelpath):
29
+ modelpath = old_modelpath
30
+ elif not os.path.exists(modelpath):
31
+ from basicsr.utils.download_util import load_file_from_url
32
+ load_file_from_url(remote_model_path, model_dir=modeldir)
33
+ mlsdmodel = MobileV2_MLSD_Large()
34
+ mlsdmodel.load_state_dict(torch.load(modelpath), strict=True)
35
+ mlsdmodel = mlsdmodel.to(devices.get_device_for("controlnet")).eval()
36
+
37
+ model = mlsdmodel
38
+ assert input_image.ndim == 3
39
+ img = input_image
40
+ img_output = np.zeros_like(img)
41
+ try:
42
+ with torch.no_grad():
43
+ lines = pred_lines(img, model, [img.shape[0], img.shape[1]], thr_v, thr_d)
44
+ for line in lines:
45
+ x_start, y_start, x_end, y_end = [int(val) for val in line]
46
+ cv2.line(img_output, (x_start, y_start), (x_end, y_end), [255, 255, 255], 1)
47
+ except Exception as e:
48
+ pass
49
+ return img_output[:, :, 0]
extensions/microsoftexcel-controlnet/annotator/mlsd/models/mbv2_mlsd_large.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.utils.model_zoo as model_zoo
6
+ from torch.nn import functional as F
7
+
8
+
9
+ class BlockTypeA(nn.Module):
10
+ def __init__(self, in_c1, in_c2, out_c1, out_c2, upscale = True):
11
+ super(BlockTypeA, self).__init__()
12
+ self.conv1 = nn.Sequential(
13
+ nn.Conv2d(in_c2, out_c2, kernel_size=1),
14
+ nn.BatchNorm2d(out_c2),
15
+ nn.ReLU(inplace=True)
16
+ )
17
+ self.conv2 = nn.Sequential(
18
+ nn.Conv2d(in_c1, out_c1, kernel_size=1),
19
+ nn.BatchNorm2d(out_c1),
20
+ nn.ReLU(inplace=True)
21
+ )
22
+ self.upscale = upscale
23
+
24
+ def forward(self, a, b):
25
+ b = self.conv1(b)
26
+ a = self.conv2(a)
27
+ if self.upscale:
28
+ b = F.interpolate(b, scale_factor=2.0, mode='bilinear', align_corners=True)
29
+ return torch.cat((a, b), dim=1)
30
+
31
+
32
+ class BlockTypeB(nn.Module):
33
+ def __init__(self, in_c, out_c):
34
+ super(BlockTypeB, self).__init__()
35
+ self.conv1 = nn.Sequential(
36
+ nn.Conv2d(in_c, in_c, kernel_size=3, padding=1),
37
+ nn.BatchNorm2d(in_c),
38
+ nn.ReLU()
39
+ )
40
+ self.conv2 = nn.Sequential(
41
+ nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
42
+ nn.BatchNorm2d(out_c),
43
+ nn.ReLU()
44
+ )
45
+
46
+ def forward(self, x):
47
+ x = self.conv1(x) + x
48
+ x = self.conv2(x)
49
+ return x
50
+
51
+ class BlockTypeC(nn.Module):
52
+ def __init__(self, in_c, out_c):
53
+ super(BlockTypeC, self).__init__()
54
+ self.conv1 = nn.Sequential(
55
+ nn.Conv2d(in_c, in_c, kernel_size=3, padding=5, dilation=5),
56
+ nn.BatchNorm2d(in_c),
57
+ nn.ReLU()
58
+ )
59
+ self.conv2 = nn.Sequential(
60
+ nn.Conv2d(in_c, in_c, kernel_size=3, padding=1),
61
+ nn.BatchNorm2d(in_c),
62
+ nn.ReLU()
63
+ )
64
+ self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1)
65
+
66
+ def forward(self, x):
67
+ x = self.conv1(x)
68
+ x = self.conv2(x)
69
+ x = self.conv3(x)
70
+ return x
71
+
72
+ def _make_divisible(v, divisor, min_value=None):
73
+ """
74
+ This function is taken from the original tf repo.
75
+ It ensures that all layers have a channel number that is divisible by 8
76
+ It can be seen here:
77
+ https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
78
+ :param v:
79
+ :param divisor:
80
+ :param min_value:
81
+ :return:
82
+ """
83
+ if min_value is None:
84
+ min_value = divisor
85
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
86
+ # Make sure that round down does not go down by more than 10%.
87
+ if new_v < 0.9 * v:
88
+ new_v += divisor
89
+ return new_v
90
+
91
+
92
+ class ConvBNReLU(nn.Sequential):
93
+ def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
94
+ self.channel_pad = out_planes - in_planes
95
+ self.stride = stride
96
+ #padding = (kernel_size - 1) // 2
97
+
98
+ # TFLite uses slightly different padding than PyTorch
99
+ if stride == 2:
100
+ padding = 0
101
+ else:
102
+ padding = (kernel_size - 1) // 2
103
+
104
+ super(ConvBNReLU, self).__init__(
105
+ nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
106
+ nn.BatchNorm2d(out_planes),
107
+ nn.ReLU6(inplace=True)
108
+ )
109
+ self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride)
110
+
111
+
112
+ def forward(self, x):
113
+ # TFLite uses different padding
114
+ if self.stride == 2:
115
+ x = F.pad(x, (0, 1, 0, 1), "constant", 0)
116
+ #print(x.shape)
117
+
118
+ for module in self:
119
+ if not isinstance(module, nn.MaxPool2d):
120
+ x = module(x)
121
+ return x
122
+
123
+
124
+ class InvertedResidual(nn.Module):
125
+ def __init__(self, inp, oup, stride, expand_ratio):
126
+ super(InvertedResidual, self).__init__()
127
+ self.stride = stride
128
+ assert stride in [1, 2]
129
+
130
+ hidden_dim = int(round(inp * expand_ratio))
131
+ self.use_res_connect = self.stride == 1 and inp == oup
132
+
133
+ layers = []
134
+ if expand_ratio != 1:
135
+ # pw
136
+ layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
137
+ layers.extend([
138
+ # dw
139
+ ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
140
+ # pw-linear
141
+ nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
142
+ nn.BatchNorm2d(oup),
143
+ ])
144
+ self.conv = nn.Sequential(*layers)
145
+
146
+ def forward(self, x):
147
+ if self.use_res_connect:
148
+ return x + self.conv(x)
149
+ else:
150
+ return self.conv(x)
151
+
152
+
153
+ class MobileNetV2(nn.Module):
154
+ def __init__(self, pretrained=True):
155
+ """
156
+ MobileNet V2 main class
157
+ Args:
158
+ num_classes (int): Number of classes
159
+ width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
160
+ inverted_residual_setting: Network structure
161
+ round_nearest (int): Round the number of channels in each layer to be a multiple of this number
162
+ Set to 1 to turn off rounding
163
+ block: Module specifying inverted residual building block for mobilenet
164
+ """
165
+ super(MobileNetV2, self).__init__()
166
+
167
+ block = InvertedResidual
168
+ input_channel = 32
169
+ last_channel = 1280
170
+ width_mult = 1.0
171
+ round_nearest = 8
172
+
173
+ inverted_residual_setting = [
174
+ # t, c, n, s
175
+ [1, 16, 1, 1],
176
+ [6, 24, 2, 2],
177
+ [6, 32, 3, 2],
178
+ [6, 64, 4, 2],
179
+ [6, 96, 3, 1],
180
+ #[6, 160, 3, 2],
181
+ #[6, 320, 1, 1],
182
+ ]
183
+
184
+ # only check the first element, assuming user knows t,c,n,s are required
185
+ if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
186
+ raise ValueError("inverted_residual_setting should be non-empty "
187
+ "or a 4-element list, got {}".format(inverted_residual_setting))
188
+
189
+ # building first layer
190
+ input_channel = _make_divisible(input_channel * width_mult, round_nearest)
191
+ self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
192
+ features = [ConvBNReLU(4, input_channel, stride=2)]
193
+ # building inverted residual blocks
194
+ for t, c, n, s in inverted_residual_setting:
195
+ output_channel = _make_divisible(c * width_mult, round_nearest)
196
+ for i in range(n):
197
+ stride = s if i == 0 else 1
198
+ features.append(block(input_channel, output_channel, stride, expand_ratio=t))
199
+ input_channel = output_channel
200
+
201
+ self.features = nn.Sequential(*features)
202
+ self.fpn_selected = [1, 3, 6, 10, 13]
203
+ # weight initialization
204
+ for m in self.modules():
205
+ if isinstance(m, nn.Conv2d):
206
+ nn.init.kaiming_normal_(m.weight, mode='fan_out')
207
+ if m.bias is not None:
208
+ nn.init.zeros_(m.bias)
209
+ elif isinstance(m, nn.BatchNorm2d):
210
+ nn.init.ones_(m.weight)
211
+ nn.init.zeros_(m.bias)
212
+ elif isinstance(m, nn.Linear):
213
+ nn.init.normal_(m.weight, 0, 0.01)
214
+ nn.init.zeros_(m.bias)
215
+ if pretrained:
216
+ self._load_pretrained_model()
217
+
218
+ def _forward_impl(self, x):
219
+ # This exists since TorchScript doesn't support inheritance, so the superclass method
220
+ # (this one) needs to have a name other than `forward` that can be accessed in a subclass
221
+ fpn_features = []
222
+ for i, f in enumerate(self.features):
223
+ if i > self.fpn_selected[-1]:
224
+ break
225
+ x = f(x)
226
+ if i in self.fpn_selected:
227
+ fpn_features.append(x)
228
+
229
+ c1, c2, c3, c4, c5 = fpn_features
230
+ return c1, c2, c3, c4, c5
231
+
232
+
233
+ def forward(self, x):
234
+ return self._forward_impl(x)
235
+
236
+ def _load_pretrained_model(self):
237
+ pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/mobilenet_v2-b0353104.pth')
238
+ model_dict = {}
239
+ state_dict = self.state_dict()
240
+ for k, v in pretrain_dict.items():
241
+ if k in state_dict:
242
+ model_dict[k] = v
243
+ state_dict.update(model_dict)
244
+ self.load_state_dict(state_dict)
245
+
246
+
247
+ class MobileV2_MLSD_Large(nn.Module):
248
+ def __init__(self):
249
+ super(MobileV2_MLSD_Large, self).__init__()
250
+
251
+ self.backbone = MobileNetV2(pretrained=False)
252
+ ## A, B
253
+ self.block15 = BlockTypeA(in_c1= 64, in_c2= 96,
254
+ out_c1= 64, out_c2=64,
255
+ upscale=False)
256
+ self.block16 = BlockTypeB(128, 64)
257
+
258
+ ## A, B
259
+ self.block17 = BlockTypeA(in_c1 = 32, in_c2 = 64,
260
+ out_c1= 64, out_c2= 64)
261
+ self.block18 = BlockTypeB(128, 64)
262
+
263
+ ## A, B
264
+ self.block19 = BlockTypeA(in_c1=24, in_c2=64,
265
+ out_c1=64, out_c2=64)
266
+ self.block20 = BlockTypeB(128, 64)
267
+
268
+ ## A, B, C
269
+ self.block21 = BlockTypeA(in_c1=16, in_c2=64,
270
+ out_c1=64, out_c2=64)
271
+ self.block22 = BlockTypeB(128, 64)
272
+
273
+ self.block23 = BlockTypeC(64, 16)
274
+
275
+ def forward(self, x):
276
+ c1, c2, c3, c4, c5 = self.backbone(x)
277
+
278
+ x = self.block15(c4, c5)
279
+ x = self.block16(x)
280
+
281
+ x = self.block17(c3, x)
282
+ x = self.block18(x)
283
+
284
+ x = self.block19(c2, x)
285
+ x = self.block20(x)
286
+
287
+ x = self.block21(c1, x)
288
+ x = self.block22(x)
289
+ x = self.block23(x)
290
+ x = x[:, 7:, :, :]
291
+
292
+ return x
extensions/microsoftexcel-controlnet/annotator/mlsd/models/mbv2_mlsd_tiny.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.utils.model_zoo as model_zoo
6
+ from torch.nn import functional as F
7
+
8
+
9
+ class BlockTypeA(nn.Module):
10
+ def __init__(self, in_c1, in_c2, out_c1, out_c2, upscale = True):
11
+ super(BlockTypeA, self).__init__()
12
+ self.conv1 = nn.Sequential(
13
+ nn.Conv2d(in_c2, out_c2, kernel_size=1),
14
+ nn.BatchNorm2d(out_c2),
15
+ nn.ReLU(inplace=True)
16
+ )
17
+ self.conv2 = nn.Sequential(
18
+ nn.Conv2d(in_c1, out_c1, kernel_size=1),
19
+ nn.BatchNorm2d(out_c1),
20
+ nn.ReLU(inplace=True)
21
+ )
22
+ self.upscale = upscale
23
+
24
+ def forward(self, a, b):
25
+ b = self.conv1(b)
26
+ a = self.conv2(a)
27
+ b = F.interpolate(b, scale_factor=2.0, mode='bilinear', align_corners=True)
28
+ return torch.cat((a, b), dim=1)
29
+
30
+
31
+ class BlockTypeB(nn.Module):
32
+ def __init__(self, in_c, out_c):
33
+ super(BlockTypeB, self).__init__()
34
+ self.conv1 = nn.Sequential(
35
+ nn.Conv2d(in_c, in_c, kernel_size=3, padding=1),
36
+ nn.BatchNorm2d(in_c),
37
+ nn.ReLU()
38
+ )
39
+ self.conv2 = nn.Sequential(
40
+ nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
41
+ nn.BatchNorm2d(out_c),
42
+ nn.ReLU()
43
+ )
44
+
45
+ def forward(self, x):
46
+ x = self.conv1(x) + x
47
+ x = self.conv2(x)
48
+ return x
49
+
50
+ class BlockTypeC(nn.Module):
51
+ def __init__(self, in_c, out_c):
52
+ super(BlockTypeC, self).__init__()
53
+ self.conv1 = nn.Sequential(
54
+ nn.Conv2d(in_c, in_c, kernel_size=3, padding=5, dilation=5),
55
+ nn.BatchNorm2d(in_c),
56
+ nn.ReLU()
57
+ )
58
+ self.conv2 = nn.Sequential(
59
+ nn.Conv2d(in_c, in_c, kernel_size=3, padding=1),
60
+ nn.BatchNorm2d(in_c),
61
+ nn.ReLU()
62
+ )
63
+ self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1)
64
+
65
+ def forward(self, x):
66
+ x = self.conv1(x)
67
+ x = self.conv2(x)
68
+ x = self.conv3(x)
69
+ return x
70
+
71
+ def _make_divisible(v, divisor, min_value=None):
72
+ """
73
+ This function is taken from the original tf repo.
74
+ It ensures that all layers have a channel number that is divisible by 8
75
+ It can be seen here:
76
+ https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
77
+ :param v:
78
+ :param divisor:
79
+ :param min_value:
80
+ :return:
81
+ """
82
+ if min_value is None:
83
+ min_value = divisor
84
+ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
85
+ # Make sure that round down does not go down by more than 10%.
86
+ if new_v < 0.9 * v:
87
+ new_v += divisor
88
+ return new_v
89
+
90
+
91
+ class ConvBNReLU(nn.Sequential):
92
+ def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
93
+ self.channel_pad = out_planes - in_planes
94
+ self.stride = stride
95
+ #padding = (kernel_size - 1) // 2
96
+
97
+ # TFLite uses slightly different padding than PyTorch
98
+ if stride == 2:
99
+ padding = 0
100
+ else:
101
+ padding = (kernel_size - 1) // 2
102
+
103
+ super(ConvBNReLU, self).__init__(
104
+ nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
105
+ nn.BatchNorm2d(out_planes),
106
+ nn.ReLU6(inplace=True)
107
+ )
108
+ self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride)
109
+
110
+
111
+ def forward(self, x):
112
+ # TFLite uses different padding
113
+ if self.stride == 2:
114
+ x = F.pad(x, (0, 1, 0, 1), "constant", 0)
115
+ #print(x.shape)
116
+
117
+ for module in self:
118
+ if not isinstance(module, nn.MaxPool2d):
119
+ x = module(x)
120
+ return x
121
+
122
+
123
+ class InvertedResidual(nn.Module):
124
+ def __init__(self, inp, oup, stride, expand_ratio):
125
+ super(InvertedResidual, self).__init__()
126
+ self.stride = stride
127
+ assert stride in [1, 2]
128
+
129
+ hidden_dim = int(round(inp * expand_ratio))
130
+ self.use_res_connect = self.stride == 1 and inp == oup
131
+
132
+ layers = []
133
+ if expand_ratio != 1:
134
+ # pw
135
+ layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
136
+ layers.extend([
137
+ # dw
138
+ ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
139
+ # pw-linear
140
+ nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
141
+ nn.BatchNorm2d(oup),
142
+ ])
143
+ self.conv = nn.Sequential(*layers)
144
+
145
+ def forward(self, x):
146
+ if self.use_res_connect:
147
+ return x + self.conv(x)
148
+ else:
149
+ return self.conv(x)
150
+
151
+
152
+ class MobileNetV2(nn.Module):
153
+ def __init__(self, pretrained=True):
154
+ """
155
+ MobileNet V2 main class
156
+ Args:
157
+ num_classes (int): Number of classes
158
+ width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
159
+ inverted_residual_setting: Network structure
160
+ round_nearest (int): Round the number of channels in each layer to be a multiple of this number
161
+ Set to 1 to turn off rounding
162
+ block: Module specifying inverted residual building block for mobilenet
163
+ """
164
+ super(MobileNetV2, self).__init__()
165
+
166
+ block = InvertedResidual
167
+ input_channel = 32
168
+ last_channel = 1280
169
+ width_mult = 1.0
170
+ round_nearest = 8
171
+
172
+ inverted_residual_setting = [
173
+ # t, c, n, s
174
+ [1, 16, 1, 1],
175
+ [6, 24, 2, 2],
176
+ [6, 32, 3, 2],
177
+ [6, 64, 4, 2],
178
+ #[6, 96, 3, 1],
179
+ #[6, 160, 3, 2],
180
+ #[6, 320, 1, 1],
181
+ ]
182
+
183
+ # only check the first element, assuming user knows t,c,n,s are required
184
+ if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
185
+ raise ValueError("inverted_residual_setting should be non-empty "
186
+ "or a 4-element list, got {}".format(inverted_residual_setting))
187
+
188
+ # building first layer
189
+ input_channel = _make_divisible(input_channel * width_mult, round_nearest)
190
+ self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
191
+ features = [ConvBNReLU(4, input_channel, stride=2)]
192
+ # building inverted residual blocks
193
+ for t, c, n, s in inverted_residual_setting:
194
+ output_channel = _make_divisible(c * width_mult, round_nearest)
195
+ for i in range(n):
196
+ stride = s if i == 0 else 1
197
+ features.append(block(input_channel, output_channel, stride, expand_ratio=t))
198
+ input_channel = output_channel
199
+ self.features = nn.Sequential(*features)
200
+
201
+ self.fpn_selected = [3, 6, 10]
202
+ # weight initialization
203
+ for m in self.modules():
204
+ if isinstance(m, nn.Conv2d):
205
+ nn.init.kaiming_normal_(m.weight, mode='fan_out')
206
+ if m.bias is not None:
207
+ nn.init.zeros_(m.bias)
208
+ elif isinstance(m, nn.BatchNorm2d):
209
+ nn.init.ones_(m.weight)
210
+ nn.init.zeros_(m.bias)
211
+ elif isinstance(m, nn.Linear):
212
+ nn.init.normal_(m.weight, 0, 0.01)
213
+ nn.init.zeros_(m.bias)
214
+
215
+ #if pretrained:
216
+ # self._load_pretrained_model()
217
+
218
+ def _forward_impl(self, x):
219
+ # This exists since TorchScript doesn't support inheritance, so the superclass method
220
+ # (this one) needs to have a name other than `forward` that can be accessed in a subclass
221
+ fpn_features = []
222
+ for i, f in enumerate(self.features):
223
+ if i > self.fpn_selected[-1]:
224
+ break
225
+ x = f(x)
226
+ if i in self.fpn_selected:
227
+ fpn_features.append(x)
228
+
229
+ c2, c3, c4 = fpn_features
230
+ return c2, c3, c4
231
+
232
+
233
+ def forward(self, x):
234
+ return self._forward_impl(x)
235
+
236
+ def _load_pretrained_model(self):
237
+ pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/mobilenet_v2-b0353104.pth')
238
+ model_dict = {}
239
+ state_dict = self.state_dict()
240
+ for k, v in pretrain_dict.items():
241
+ if k in state_dict:
242
+ model_dict[k] = v
243
+ state_dict.update(model_dict)
244
+ self.load_state_dict(state_dict)
245
+
246
+
247
+ class MobileV2_MLSD_Tiny(nn.Module):
248
+ def __init__(self):
249
+ super(MobileV2_MLSD_Tiny, self).__init__()
250
+
251
+ self.backbone = MobileNetV2(pretrained=True)
252
+
253
+ self.block12 = BlockTypeA(in_c1= 32, in_c2= 64,
254
+ out_c1= 64, out_c2=64)
255
+ self.block13 = BlockTypeB(128, 64)
256
+
257
+ self.block14 = BlockTypeA(in_c1 = 24, in_c2 = 64,
258
+ out_c1= 32, out_c2= 32)
259
+ self.block15 = BlockTypeB(64, 64)
260
+
261
+ self.block16 = BlockTypeC(64, 16)
262
+
263
+ def forward(self, x):
264
+ c2, c3, c4 = self.backbone(x)
265
+
266
+ x = self.block12(c3, c4)
267
+ x = self.block13(x)
268
+ x = self.block14(c2, x)
269
+ x = self.block15(x)
270
+ x = self.block16(x)
271
+ x = x[:, 7:, :, :]
272
+ #print(x.shape)
273
+ x = F.interpolate(x, scale_factor=2.0, mode='bilinear', align_corners=True)
274
+
275
+ return x
extensions/microsoftexcel-controlnet/annotator/mlsd/utils.py ADDED
@@ -0,0 +1,581 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ modified by lihaoweicv
3
+ pytorch version
4
+ '''
5
+
6
+ '''
7
+ M-LSD
8
+ Copyright 2021-present NAVER Corp.
9
+ Apache License v2.0
10
+ '''
11
+
12
+ import os
13
+ import numpy as np
14
+ import cv2
15
+ import torch
16
+ from torch.nn import functional as F
17
+ from modules import devices
18
+
19
+
20
+ def deccode_output_score_and_ptss(tpMap, topk_n = 200, ksize = 5):
21
+ '''
22
+ tpMap:
23
+ center: tpMap[1, 0, :, :]
24
+ displacement: tpMap[1, 1:5, :, :]
25
+ '''
26
+ b, c, h, w = tpMap.shape
27
+ assert b==1, 'only support bsize==1'
28
+ displacement = tpMap[:, 1:5, :, :][0]
29
+ center = tpMap[:, 0, :, :]
30
+ heat = torch.sigmoid(center)
31
+ hmax = F.max_pool2d( heat, (ksize, ksize), stride=1, padding=(ksize-1)//2)
32
+ keep = (hmax == heat).float()
33
+ heat = heat * keep
34
+ heat = heat.reshape(-1, )
35
+
36
+ scores, indices = torch.topk(heat, topk_n, dim=-1, largest=True)
37
+ yy = torch.floor_divide(indices, w).unsqueeze(-1)
38
+ xx = torch.fmod(indices, w).unsqueeze(-1)
39
+ ptss = torch.cat((yy, xx),dim=-1)
40
+
41
+ ptss = ptss.detach().cpu().numpy()
42
+ scores = scores.detach().cpu().numpy()
43
+ displacement = displacement.detach().cpu().numpy()
44
+ displacement = displacement.transpose((1,2,0))
45
+ return ptss, scores, displacement
46
+
47
+
48
+ def pred_lines(image, model,
49
+ input_shape=[512, 512],
50
+ score_thr=0.10,
51
+ dist_thr=20.0):
52
+ h, w, _ = image.shape
53
+ h_ratio, w_ratio = [h / input_shape[0], w / input_shape[1]]
54
+
55
+ resized_image = np.concatenate([cv2.resize(image, (input_shape[1], input_shape[0]), interpolation=cv2.INTER_AREA),
56
+ np.ones([input_shape[0], input_shape[1], 1])], axis=-1)
57
+
58
+ resized_image = resized_image.transpose((2,0,1))
59
+ batch_image = np.expand_dims(resized_image, axis=0).astype('float32')
60
+ batch_image = (batch_image / 127.5) - 1.0
61
+
62
+ batch_image = torch.from_numpy(batch_image).float().to(devices.get_device_for("controlnet"))
63
+ outputs = model(batch_image)
64
+ pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3)
65
+ start = vmap[:, :, :2]
66
+ end = vmap[:, :, 2:]
67
+ dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1))
68
+
69
+ segments_list = []
70
+ for center, score in zip(pts, pts_score):
71
+ y, x = center
72
+ distance = dist_map[y, x]
73
+ if score > score_thr and distance > dist_thr:
74
+ disp_x_start, disp_y_start, disp_x_end, disp_y_end = vmap[y, x, :]
75
+ x_start = x + disp_x_start
76
+ y_start = y + disp_y_start
77
+ x_end = x + disp_x_end
78
+ y_end = y + disp_y_end
79
+ segments_list.append([x_start, y_start, x_end, y_end])
80
+
81
+ lines = 2 * np.array(segments_list) # 256 > 512
82
+ lines[:, 0] = lines[:, 0] * w_ratio
83
+ lines[:, 1] = lines[:, 1] * h_ratio
84
+ lines[:, 2] = lines[:, 2] * w_ratio
85
+ lines[:, 3] = lines[:, 3] * h_ratio
86
+
87
+ return lines
88
+
89
+
90
+ def pred_squares(image,
91
+ model,
92
+ input_shape=[512, 512],
93
+ params={'score': 0.06,
94
+ 'outside_ratio': 0.28,
95
+ 'inside_ratio': 0.45,
96
+ 'w_overlap': 0.0,
97
+ 'w_degree': 1.95,
98
+ 'w_length': 0.0,
99
+ 'w_area': 1.86,
100
+ 'w_center': 0.14}):
101
+ '''
102
+ shape = [height, width]
103
+ '''
104
+ h, w, _ = image.shape
105
+ original_shape = [h, w]
106
+
107
+ resized_image = np.concatenate([cv2.resize(image, (input_shape[0], input_shape[1]), interpolation=cv2.INTER_AREA),
108
+ np.ones([input_shape[0], input_shape[1], 1])], axis=-1)
109
+ resized_image = resized_image.transpose((2, 0, 1))
110
+ batch_image = np.expand_dims(resized_image, axis=0).astype('float32')
111
+ batch_image = (batch_image / 127.5) - 1.0
112
+
113
+ batch_image = torch.from_numpy(batch_image).float().to(devices.get_device_for("controlnet"))
114
+ outputs = model(batch_image)
115
+
116
+ pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3)
117
+ start = vmap[:, :, :2] # (x, y)
118
+ end = vmap[:, :, 2:] # (x, y)
119
+ dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1))
120
+
121
+ junc_list = []
122
+ segments_list = []
123
+ for junc, score in zip(pts, pts_score):
124
+ y, x = junc
125
+ distance = dist_map[y, x]
126
+ if score > params['score'] and distance > 20.0:
127
+ junc_list.append([x, y])
128
+ disp_x_start, disp_y_start, disp_x_end, disp_y_end = vmap[y, x, :]
129
+ d_arrow = 1.0
130
+ x_start = x + d_arrow * disp_x_start
131
+ y_start = y + d_arrow * disp_y_start
132
+ x_end = x + d_arrow * disp_x_end
133
+ y_end = y + d_arrow * disp_y_end
134
+ segments_list.append([x_start, y_start, x_end, y_end])
135
+
136
+ segments = np.array(segments_list)
137
+
138
+ ####### post processing for squares
139
+ # 1. get unique lines
140
+ point = np.array([[0, 0]])
141
+ point = point[0]
142
+ start = segments[:, :2]
143
+ end = segments[:, 2:]
144
+ diff = start - end
145
+ a = diff[:, 1]
146
+ b = -diff[:, 0]
147
+ c = a * start[:, 0] + b * start[:, 1]
148
+
149
+ d = np.abs(a * point[0] + b * point[1] - c) / np.sqrt(a ** 2 + b ** 2 + 1e-10)
150
+ theta = np.arctan2(diff[:, 0], diff[:, 1]) * 180 / np.pi
151
+ theta[theta < 0.0] += 180
152
+ hough = np.concatenate([d[:, None], theta[:, None]], axis=-1)
153
+
154
+ d_quant = 1
155
+ theta_quant = 2
156
+ hough[:, 0] //= d_quant
157
+ hough[:, 1] //= theta_quant
158
+ _, indices, counts = np.unique(hough, axis=0, return_index=True, return_counts=True)
159
+
160
+ acc_map = np.zeros([512 // d_quant + 1, 360 // theta_quant + 1], dtype='float32')
161
+ idx_map = np.zeros([512 // d_quant + 1, 360 // theta_quant + 1], dtype='int32') - 1
162
+ yx_indices = hough[indices, :].astype('int32')
163
+ acc_map[yx_indices[:, 0], yx_indices[:, 1]] = counts
164
+ idx_map[yx_indices[:, 0], yx_indices[:, 1]] = indices
165
+
166
+ acc_map_np = acc_map
167
+ # acc_map = acc_map[None, :, :, None]
168
+ #
169
+ # ### fast suppression using tensorflow op
170
+ # acc_map = tf.constant(acc_map, dtype=tf.float32)
171
+ # max_acc_map = tf.keras.layers.MaxPool2D(pool_size=(5, 5), strides=1, padding='same')(acc_map)
172
+ # acc_map = acc_map * tf.cast(tf.math.equal(acc_map, max_acc_map), tf.float32)
173
+ # flatten_acc_map = tf.reshape(acc_map, [1, -1])
174
+ # topk_values, topk_indices = tf.math.top_k(flatten_acc_map, k=len(pts))
175
+ # _, h, w, _ = acc_map.shape
176
+ # y = tf.expand_dims(topk_indices // w, axis=-1)
177
+ # x = tf.expand_dims(topk_indices % w, axis=-1)
178
+ # yx = tf.concat([y, x], axis=-1)
179
+
180
+ ### fast suppression using pytorch op
181
+ acc_map = torch.from_numpy(acc_map_np).unsqueeze(0).unsqueeze(0)
182
+ _,_, h, w = acc_map.shape
183
+ max_acc_map = F.max_pool2d(acc_map,kernel_size=5, stride=1, padding=2)
184
+ acc_map = acc_map * ( (acc_map == max_acc_map).float() )
185
+ flatten_acc_map = acc_map.reshape([-1, ])
186
+
187
+ scores, indices = torch.topk(flatten_acc_map, len(pts), dim=-1, largest=True)
188
+ yy = torch.div(indices, w, rounding_mode='floor').unsqueeze(-1)
189
+ xx = torch.fmod(indices, w).unsqueeze(-1)
190
+ yx = torch.cat((yy, xx), dim=-1)
191
+
192
+ yx = yx.detach().cpu().numpy()
193
+
194
+ topk_values = scores.detach().cpu().numpy()
195
+ indices = idx_map[yx[:, 0], yx[:, 1]]
196
+ basis = 5 // 2
197
+
198
+ merged_segments = []
199
+ for yx_pt, max_indice, value in zip(yx, indices, topk_values):
200
+ y, x = yx_pt
201
+ if max_indice == -1 or value == 0:
202
+ continue
203
+ segment_list = []
204
+ for y_offset in range(-basis, basis + 1):
205
+ for x_offset in range(-basis, basis + 1):
206
+ indice = idx_map[y + y_offset, x + x_offset]
207
+ cnt = int(acc_map_np[y + y_offset, x + x_offset])
208
+ if indice != -1:
209
+ segment_list.append(segments[indice])
210
+ if cnt > 1:
211
+ check_cnt = 1
212
+ current_hough = hough[indice]
213
+ for new_indice, new_hough in enumerate(hough):
214
+ if (current_hough == new_hough).all() and indice != new_indice:
215
+ segment_list.append(segments[new_indice])
216
+ check_cnt += 1
217
+ if check_cnt == cnt:
218
+ break
219
+ group_segments = np.array(segment_list).reshape([-1, 2])
220
+ sorted_group_segments = np.sort(group_segments, axis=0)
221
+ x_min, y_min = sorted_group_segments[0, :]
222
+ x_max, y_max = sorted_group_segments[-1, :]
223
+
224
+ deg = theta[max_indice]
225
+ if deg >= 90:
226
+ merged_segments.append([x_min, y_max, x_max, y_min])
227
+ else:
228
+ merged_segments.append([x_min, y_min, x_max, y_max])
229
+
230
+ # 2. get intersections
231
+ new_segments = np.array(merged_segments) # (x1, y1, x2, y2)
232
+ start = new_segments[:, :2] # (x1, y1)
233
+ end = new_segments[:, 2:] # (x2, y2)
234
+ new_centers = (start + end) / 2.0
235
+ diff = start - end
236
+ dist_segments = np.sqrt(np.sum(diff ** 2, axis=-1))
237
+
238
+ # ax + by = c
239
+ a = diff[:, 1]
240
+ b = -diff[:, 0]
241
+ c = a * start[:, 0] + b * start[:, 1]
242
+ pre_det = a[:, None] * b[None, :]
243
+ det = pre_det - np.transpose(pre_det)
244
+
245
+ pre_inter_y = a[:, None] * c[None, :]
246
+ inter_y = (pre_inter_y - np.transpose(pre_inter_y)) / (det + 1e-10)
247
+ pre_inter_x = c[:, None] * b[None, :]
248
+ inter_x = (pre_inter_x - np.transpose(pre_inter_x)) / (det + 1e-10)
249
+ inter_pts = np.concatenate([inter_x[:, :, None], inter_y[:, :, None]], axis=-1).astype('int32')
250
+
251
+ # 3. get corner information
252
+ # 3.1 get distance
253
+ '''
254
+ dist_segments:
255
+ | dist(0), dist(1), dist(2), ...|
256
+ dist_inter_to_segment1:
257
+ | dist(inter,0), dist(inter,0), dist(inter,0), ... |
258
+ | dist(inter,1), dist(inter,1), dist(inter,1), ... |
259
+ ...
260
+ dist_inter_to_semgnet2:
261
+ | dist(inter,0), dist(inter,1), dist(inter,2), ... |
262
+ | dist(inter,0), dist(inter,1), dist(inter,2), ... |
263
+ ...
264
+ '''
265
+
266
+ dist_inter_to_segment1_start = np.sqrt(
267
+ np.sum(((inter_pts - start[:, None, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1]
268
+ dist_inter_to_segment1_end = np.sqrt(
269
+ np.sum(((inter_pts - end[:, None, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1]
270
+ dist_inter_to_segment2_start = np.sqrt(
271
+ np.sum(((inter_pts - start[None, :, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1]
272
+ dist_inter_to_segment2_end = np.sqrt(
273
+ np.sum(((inter_pts - end[None, :, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1]
274
+
275
+ # sort ascending
276
+ dist_inter_to_segment1 = np.sort(
277
+ np.concatenate([dist_inter_to_segment1_start, dist_inter_to_segment1_end], axis=-1),
278
+ axis=-1) # [n_batch, n_batch, 2]
279
+ dist_inter_to_segment2 = np.sort(
280
+ np.concatenate([dist_inter_to_segment2_start, dist_inter_to_segment2_end], axis=-1),
281
+ axis=-1) # [n_batch, n_batch, 2]
282
+
283
+ # 3.2 get degree
284
+ inter_to_start = new_centers[:, None, :] - inter_pts
285
+ deg_inter_to_start = np.arctan2(inter_to_start[:, :, 1], inter_to_start[:, :, 0]) * 180 / np.pi
286
+ deg_inter_to_start[deg_inter_to_start < 0.0] += 360
287
+ inter_to_end = new_centers[None, :, :] - inter_pts
288
+ deg_inter_to_end = np.arctan2(inter_to_end[:, :, 1], inter_to_end[:, :, 0]) * 180 / np.pi
289
+ deg_inter_to_end[deg_inter_to_end < 0.0] += 360
290
+
291
+ '''
292
+ B -- G
293
+ | |
294
+ C -- R
295
+ B : blue / G: green / C: cyan / R: red
296
+
297
+ 0 -- 1
298
+ | |
299
+ 3 -- 2
300
+ '''
301
+ # rename variables
302
+ deg1_map, deg2_map = deg_inter_to_start, deg_inter_to_end
303
+ # sort deg ascending
304
+ deg_sort = np.sort(np.concatenate([deg1_map[:, :, None], deg2_map[:, :, None]], axis=-1), axis=-1)
305
+
306
+ deg_diff_map = np.abs(deg1_map - deg2_map)
307
+ # we only consider the smallest degree of intersect
308
+ deg_diff_map[deg_diff_map > 180] = 360 - deg_diff_map[deg_diff_map > 180]
309
+
310
+ # define available degree range
311
+ deg_range = [60, 120]
312
+
313
+ corner_dict = {corner_info: [] for corner_info in range(4)}
314
+ inter_points = []
315
+ for i in range(inter_pts.shape[0]):
316
+ for j in range(i + 1, inter_pts.shape[1]):
317
+ # i, j > line index, always i < j
318
+ x, y = inter_pts[i, j, :]
319
+ deg1, deg2 = deg_sort[i, j, :]
320
+ deg_diff = deg_diff_map[i, j]
321
+
322
+ check_degree = deg_diff > deg_range[0] and deg_diff < deg_range[1]
323
+
324
+ outside_ratio = params['outside_ratio'] # over ratio >>> drop it!
325
+ inside_ratio = params['inside_ratio'] # over ratio >>> drop it!
326
+ check_distance = ((dist_inter_to_segment1[i, j, 1] >= dist_segments[i] and \
327
+ dist_inter_to_segment1[i, j, 0] <= dist_segments[i] * outside_ratio) or \
328
+ (dist_inter_to_segment1[i, j, 1] <= dist_segments[i] and \
329
+ dist_inter_to_segment1[i, j, 0] <= dist_segments[i] * inside_ratio)) and \
330
+ ((dist_inter_to_segment2[i, j, 1] >= dist_segments[j] and \
331
+ dist_inter_to_segment2[i, j, 0] <= dist_segments[j] * outside_ratio) or \
332
+ (dist_inter_to_segment2[i, j, 1] <= dist_segments[j] and \
333
+ dist_inter_to_segment2[i, j, 0] <= dist_segments[j] * inside_ratio))
334
+
335
+ if check_degree and check_distance:
336
+ corner_info = None
337
+
338
+ if (deg1 >= 0 and deg1 <= 45 and deg2 >= 45 and deg2 <= 120) or \
339
+ (deg2 >= 315 and deg1 >= 45 and deg1 <= 120):
340
+ corner_info, color_info = 0, 'blue'
341
+ elif (deg1 >= 45 and deg1 <= 125 and deg2 >= 125 and deg2 <= 225):
342
+ corner_info, color_info = 1, 'green'
343
+ elif (deg1 >= 125 and deg1 <= 225 and deg2 >= 225 and deg2 <= 315):
344
+ corner_info, color_info = 2, 'black'
345
+ elif (deg1 >= 0 and deg1 <= 45 and deg2 >= 225 and deg2 <= 315) or \
346
+ (deg2 >= 315 and deg1 >= 225 and deg1 <= 315):
347
+ corner_info, color_info = 3, 'cyan'
348
+ else:
349
+ corner_info, color_info = 4, 'red' # we don't use it
350
+ continue
351
+
352
+ corner_dict[corner_info].append([x, y, i, j])
353
+ inter_points.append([x, y])
354
+
355
+ square_list = []
356
+ connect_list = []
357
+ segments_list = []
358
+ for corner0 in corner_dict[0]:
359
+ for corner1 in corner_dict[1]:
360
+ connect01 = False
361
+ for corner0_line in corner0[2:]:
362
+ if corner0_line in corner1[2:]:
363
+ connect01 = True
364
+ break
365
+ if connect01:
366
+ for corner2 in corner_dict[2]:
367
+ connect12 = False
368
+ for corner1_line in corner1[2:]:
369
+ if corner1_line in corner2[2:]:
370
+ connect12 = True
371
+ break
372
+ if connect12:
373
+ for corner3 in corner_dict[3]:
374
+ connect23 = False
375
+ for corner2_line in corner2[2:]:
376
+ if corner2_line in corner3[2:]:
377
+ connect23 = True
378
+ break
379
+ if connect23:
380
+ for corner3_line in corner3[2:]:
381
+ if corner3_line in corner0[2:]:
382
+ # SQUARE!!!
383
+ '''
384
+ 0 -- 1
385
+ | |
386
+ 3 -- 2
387
+ square_list:
388
+ order: 0 > 1 > 2 > 3
389
+ | x0, y0, x1, y1, x2, y2, x3, y3 |
390
+ | x0, y0, x1, y1, x2, y2, x3, y3 |
391
+ ...
392
+ connect_list:
393
+ order: 01 > 12 > 23 > 30
394
+ | line_idx01, line_idx12, line_idx23, line_idx30 |
395
+ | line_idx01, line_idx12, line_idx23, line_idx30 |
396
+ ...
397
+ segments_list:
398
+ order: 0 > 1 > 2 > 3
399
+ | line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j |
400
+ | line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j |
401
+ ...
402
+ '''
403
+ square_list.append(corner0[:2] + corner1[:2] + corner2[:2] + corner3[:2])
404
+ connect_list.append([corner0_line, corner1_line, corner2_line, corner3_line])
405
+ segments_list.append(corner0[2:] + corner1[2:] + corner2[2:] + corner3[2:])
406
+
407
+ def check_outside_inside(segments_info, connect_idx):
408
+ # return 'outside or inside', min distance, cover_param, peri_param
409
+ if connect_idx == segments_info[0]:
410
+ check_dist_mat = dist_inter_to_segment1
411
+ else:
412
+ check_dist_mat = dist_inter_to_segment2
413
+
414
+ i, j = segments_info
415
+ min_dist, max_dist = check_dist_mat[i, j, :]
416
+ connect_dist = dist_segments[connect_idx]
417
+ if max_dist > connect_dist:
418
+ return 'outside', min_dist, 0, 1
419
+ else:
420
+ return 'inside', min_dist, -1, -1
421
+
422
+ top_square = None
423
+
424
+ try:
425
+ map_size = input_shape[0] / 2
426
+ squares = np.array(square_list).reshape([-1, 4, 2])
427
+ score_array = []
428
+ connect_array = np.array(connect_list)
429
+ segments_array = np.array(segments_list).reshape([-1, 4, 2])
430
+
431
+ # get degree of corners:
432
+ squares_rollup = np.roll(squares, 1, axis=1)
433
+ squares_rolldown = np.roll(squares, -1, axis=1)
434
+ vec1 = squares_rollup - squares
435
+ normalized_vec1 = vec1 / (np.linalg.norm(vec1, axis=-1, keepdims=True) + 1e-10)
436
+ vec2 = squares_rolldown - squares
437
+ normalized_vec2 = vec2 / (np.linalg.norm(vec2, axis=-1, keepdims=True) + 1e-10)
438
+ inner_products = np.sum(normalized_vec1 * normalized_vec2, axis=-1) # [n_squares, 4]
439
+ squares_degree = np.arccos(inner_products) * 180 / np.pi # [n_squares, 4]
440
+
441
+ # get square score
442
+ overlap_scores = []
443
+ degree_scores = []
444
+ length_scores = []
445
+
446
+ for connects, segments, square, degree in zip(connect_array, segments_array, squares, squares_degree):
447
+ '''
448
+ 0 -- 1
449
+ | |
450
+ 3 -- 2
451
+
452
+ # segments: [4, 2]
453
+ # connects: [4]
454
+ '''
455
+
456
+ ###################################### OVERLAP SCORES
457
+ cover = 0
458
+ perimeter = 0
459
+ # check 0 > 1 > 2 > 3
460
+ square_length = []
461
+
462
+ for start_idx in range(4):
463
+ end_idx = (start_idx + 1) % 4
464
+
465
+ connect_idx = connects[start_idx] # segment idx of segment01
466
+ start_segments = segments[start_idx]
467
+ end_segments = segments[end_idx]
468
+
469
+ start_point = square[start_idx]
470
+ end_point = square[end_idx]
471
+
472
+ # check whether outside or inside
473
+ start_position, start_min, start_cover_param, start_peri_param = check_outside_inside(start_segments,
474
+ connect_idx)
475
+ end_position, end_min, end_cover_param, end_peri_param = check_outside_inside(end_segments, connect_idx)
476
+
477
+ cover += dist_segments[connect_idx] + start_cover_param * start_min + end_cover_param * end_min
478
+ perimeter += dist_segments[connect_idx] + start_peri_param * start_min + end_peri_param * end_min
479
+
480
+ square_length.append(
481
+ dist_segments[connect_idx] + start_peri_param * start_min + end_peri_param * end_min)
482
+
483
+ overlap_scores.append(cover / perimeter)
484
+ ######################################
485
+ ###################################### DEGREE SCORES
486
+ '''
487
+ deg0 vs deg2
488
+ deg1 vs deg3
489
+ '''
490
+ deg0, deg1, deg2, deg3 = degree
491
+ deg_ratio1 = deg0 / deg2
492
+ if deg_ratio1 > 1.0:
493
+ deg_ratio1 = 1 / deg_ratio1
494
+ deg_ratio2 = deg1 / deg3
495
+ if deg_ratio2 > 1.0:
496
+ deg_ratio2 = 1 / deg_ratio2
497
+ degree_scores.append((deg_ratio1 + deg_ratio2) / 2)
498
+ ######################################
499
+ ###################################### LENGTH SCORES
500
+ '''
501
+ len0 vs len2
502
+ len1 vs len3
503
+ '''
504
+ len0, len1, len2, len3 = square_length
505
+ len_ratio1 = len0 / len2 if len2 > len0 else len2 / len0
506
+ len_ratio2 = len1 / len3 if len3 > len1 else len3 / len1
507
+ length_scores.append((len_ratio1 + len_ratio2) / 2)
508
+
509
+ ######################################
510
+
511
+ overlap_scores = np.array(overlap_scores)
512
+ overlap_scores /= np.max(overlap_scores)
513
+
514
+ degree_scores = np.array(degree_scores)
515
+ # degree_scores /= np.max(degree_scores)
516
+
517
+ length_scores = np.array(length_scores)
518
+
519
+ ###################################### AREA SCORES
520
+ area_scores = np.reshape(squares, [-1, 4, 2])
521
+ area_x = area_scores[:, :, 0]
522
+ area_y = area_scores[:, :, 1]
523
+ correction = area_x[:, -1] * area_y[:, 0] - area_y[:, -1] * area_x[:, 0]
524
+ area_scores = np.sum(area_x[:, :-1] * area_y[:, 1:], axis=-1) - np.sum(area_y[:, :-1] * area_x[:, 1:], axis=-1)
525
+ area_scores = 0.5 * np.abs(area_scores + correction)
526
+ area_scores /= (map_size * map_size) # np.max(area_scores)
527
+ ######################################
528
+
529
+ ###################################### CENTER SCORES
530
+ centers = np.array([[256 // 2, 256 // 2]], dtype='float32') # [1, 2]
531
+ # squares: [n, 4, 2]
532
+ square_centers = np.mean(squares, axis=1) # [n, 2]
533
+ center2center = np.sqrt(np.sum((centers - square_centers) ** 2))
534
+ center_scores = center2center / (map_size / np.sqrt(2.0))
535
+
536
+ '''
537
+ score_w = [overlap, degree, area, center, length]
538
+ '''
539
+ score_w = [0.0, 1.0, 10.0, 0.5, 1.0]
540
+ score_array = params['w_overlap'] * overlap_scores \
541
+ + params['w_degree'] * degree_scores \
542
+ + params['w_area'] * area_scores \
543
+ - params['w_center'] * center_scores \
544
+ + params['w_length'] * length_scores
545
+
546
+ best_square = []
547
+
548
+ sorted_idx = np.argsort(score_array)[::-1]
549
+ score_array = score_array[sorted_idx]
550
+ squares = squares[sorted_idx]
551
+
552
+ except Exception as e:
553
+ pass
554
+
555
+ '''return list
556
+ merged_lines, squares, scores
557
+ '''
558
+
559
+ try:
560
+ new_segments[:, 0] = new_segments[:, 0] * 2 / input_shape[1] * original_shape[1]
561
+ new_segments[:, 1] = new_segments[:, 1] * 2 / input_shape[0] * original_shape[0]
562
+ new_segments[:, 2] = new_segments[:, 2] * 2 / input_shape[1] * original_shape[1]
563
+ new_segments[:, 3] = new_segments[:, 3] * 2 / input_shape[0] * original_shape[0]
564
+ except:
565
+ new_segments = []
566
+
567
+ try:
568
+ squares[:, :, 0] = squares[:, :, 0] * 2 / input_shape[1] * original_shape[1]
569
+ squares[:, :, 1] = squares[:, :, 1] * 2 / input_shape[0] * original_shape[0]
570
+ except:
571
+ squares = []
572
+ score_array = []
573
+
574
+ try:
575
+ inter_points = np.array(inter_points)
576
+ inter_points[:, 0] = inter_points[:, 0] * 2 / input_shape[1] * original_shape[1]
577
+ inter_points[:, 1] = inter_points[:, 1] * 2 / input_shape[0] * original_shape[0]
578
+ except:
579
+ inter_points = []
580
+
581
+ return new_segments, squares, score_array, inter_points
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ # flake8: noqa
3
+ from .arraymisc import *
4
+ from .fileio import *
5
+ from .image import *
6
+ from .utils import *
7
+ from .version import *
8
+ from .video import *
9
+ from .visualization import *
10
+
11
+ # The following modules are not imported to this level, so mmcv may be used
12
+ # without PyTorch.
13
+ # - runner
14
+ # - parallel
15
+ # - op
extensions/microsoftexcel-controlnet/annotator/mmpkg/mmcv/arraymisc/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .quantization import dequantize, quantize
3
+
4
+ __all__ = ['quantize', 'dequantize']