lev1 commited on
Commit
f7ac35e
1 Parent(s): fe72737

auto anotators

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. annotator/canny/__init__.py +6 -0
  2. annotator/ckpts/body_pose_model.pth +3 -0
  3. annotator/ckpts/ckpts.txt +1 -0
  4. annotator/ckpts/hand_pose_model.pth +3 -0
  5. annotator/openpose/__init__.py +44 -0
  6. annotator/openpose/body.py +219 -0
  7. annotator/openpose/hand.py +86 -0
  8. annotator/openpose/model.py +219 -0
  9. annotator/openpose/util.py +164 -0
  10. annotator/uniformer/__init__.py +23 -0
  11. annotator/uniformer/configs/_base_/datasets/ade20k.py +54 -0
  12. annotator/uniformer/configs/_base_/datasets/chase_db1.py +59 -0
  13. annotator/uniformer/configs/_base_/datasets/cityscapes.py +54 -0
  14. annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py +35 -0
  15. annotator/uniformer/configs/_base_/datasets/drive.py +59 -0
  16. annotator/uniformer/configs/_base_/datasets/hrf.py +59 -0
  17. annotator/uniformer/configs/_base_/datasets/pascal_context.py +60 -0
  18. annotator/uniformer/configs/_base_/datasets/pascal_context_59.py +60 -0
  19. annotator/uniformer/configs/_base_/datasets/pascal_voc12.py +57 -0
  20. annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py +9 -0
  21. annotator/uniformer/configs/_base_/datasets/stare.py +59 -0
  22. annotator/uniformer/configs/_base_/default_runtime.py +14 -0
  23. annotator/uniformer/configs/_base_/models/ann_r50-d8.py +46 -0
  24. annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py +44 -0
  25. annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py +44 -0
  26. annotator/uniformer/configs/_base_/models/cgnet.py +35 -0
  27. annotator/uniformer/configs/_base_/models/danet_r50-d8.py +44 -0
  28. annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py +44 -0
  29. annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py +50 -0
  30. annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py +46 -0
  31. annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py +44 -0
  32. annotator/uniformer/configs/_base_/models/dnl_r50-d8.py +46 -0
  33. annotator/uniformer/configs/_base_/models/emanet_r50-d8.py +47 -0
  34. annotator/uniformer/configs/_base_/models/encnet_r50-d8.py +48 -0
  35. annotator/uniformer/configs/_base_/models/fast_scnn.py +57 -0
  36. annotator/uniformer/configs/_base_/models/fcn_hr18.py +52 -0
  37. annotator/uniformer/configs/_base_/models/fcn_r50-d8.py +45 -0
  38. annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py +51 -0
  39. annotator/uniformer/configs/_base_/models/fpn_r50.py +36 -0
  40. annotator/uniformer/configs/_base_/models/fpn_uniformer.py +35 -0
  41. annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py +46 -0
  42. annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py +25 -0
  43. annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py +46 -0
  44. annotator/uniformer/configs/_base_/models/ocrnet_hr18.py +68 -0
  45. annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py +47 -0
  46. annotator/uniformer/configs/_base_/models/pointrend_r50.py +56 -0
  47. annotator/uniformer/configs/_base_/models/psanet_r50-d8.py +49 -0
  48. annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py +44 -0
  49. annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py +50 -0
  50. annotator/uniformer/configs/_base_/models/upernet_r50.py +44 -0
annotator/canny/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ import cv2
2
+
3
+
4
+ class CannyDetector:
5
+ def __call__(self, img, low_threshold, high_threshold):
6
+ return cv2.Canny(img, low_threshold, high_threshold)
annotator/ckpts/body_pose_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a948c16078b0f08e236bda51a385d855ef4c153598947c28c0d47ed94bb746
3
+ size 209267595
annotator/ckpts/ckpts.txt ADDED
@@ -0,0 +1 @@
 
1
+ Weights here.
annotator/ckpts/hand_pose_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b76b00d1750901abd07b9f9d8c98cc3385b8fe834a26d4b4f0aad439e75fc600
3
+ size 147341049
annotator/openpose/__init__.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
3
+
4
+ import torch
5
+ import numpy as np
6
+ from . import util
7
+ from .body import Body
8
+ from .hand import Hand
9
+ from annotator.util import annotator_ckpts_path
10
+
11
+
12
+ body_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/body_pose_model.pth"
13
+ hand_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/hand_pose_model.pth"
14
+
15
+
16
+ class OpenposeDetector:
17
+ def __init__(self):
18
+ body_modelpath = os.path.join(annotator_ckpts_path, "body_pose_model.pth")
19
+ hand_modelpath = os.path.join(annotator_ckpts_path, "hand_pose_model.pth")
20
+
21
+ if not os.path.exists(hand_modelpath):
22
+ from basicsr.utils.download_util import load_file_from_url
23
+ load_file_from_url(body_model_path, model_dir=annotator_ckpts_path)
24
+ load_file_from_url(hand_model_path, model_dir=annotator_ckpts_path)
25
+
26
+ self.body_estimation = Body(body_modelpath)
27
+ self.hand_estimation = Hand(hand_modelpath)
28
+
29
+ def __call__(self, oriImg, hand=False):
30
+ oriImg = oriImg[:, :, ::-1].copy()
31
+ with torch.no_grad():
32
+ candidate, subset = self.body_estimation(oriImg)
33
+ canvas = np.zeros_like(oriImg)
34
+ canvas = util.draw_bodypose(canvas, candidate, subset)
35
+ if hand:
36
+ hands_list = util.handDetect(candidate, subset, oriImg)
37
+ all_hand_peaks = []
38
+ for x, y, w, is_left in hands_list:
39
+ peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :])
40
+ peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
41
+ peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
42
+ all_hand_peaks.append(peaks)
43
+ canvas = util.draw_handpose(canvas, all_hand_peaks)
44
+ return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist())
annotator/openpose/body.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import math
4
+ import time
5
+ from scipy.ndimage.filters import gaussian_filter
6
+ import matplotlib.pyplot as plt
7
+ import matplotlib
8
+ import torch
9
+ from torchvision import transforms
10
+
11
+ from . import util
12
+ from .model import bodypose_model
13
+
14
+ class Body(object):
15
+ def __init__(self, model_path):
16
+ self.model = bodypose_model()
17
+ if torch.cuda.is_available():
18
+ self.model = self.model.cuda()
19
+ print('cuda')
20
+ model_dict = util.transfer(self.model, torch.load(model_path))
21
+ self.model.load_state_dict(model_dict)
22
+ self.model.eval()
23
+
24
+ def __call__(self, oriImg):
25
+ # scale_search = [0.5, 1.0, 1.5, 2.0]
26
+ scale_search = [0.5]
27
+ boxsize = 368
28
+ stride = 8
29
+ padValue = 128
30
+ thre1 = 0.1
31
+ thre2 = 0.05
32
+ multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
33
+ heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
34
+ paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
35
+
36
+ for m in range(len(multiplier)):
37
+ scale = multiplier[m]
38
+ imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
39
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
40
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
41
+ im = np.ascontiguousarray(im)
42
+
43
+ data = torch.from_numpy(im).float()
44
+ if torch.cuda.is_available():
45
+ data = data.cuda()
46
+ # data = data.permute([2, 0, 1]).unsqueeze(0).float()
47
+ with torch.no_grad():
48
+ Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
49
+ Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
50
+ Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
51
+
52
+ # extract outputs, resize, and remove padding
53
+ # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
54
+ heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
55
+ heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
56
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
57
+ heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
58
+
59
+ # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
60
+ paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
61
+ paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
62
+ paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
63
+ paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
64
+
65
+ heatmap_avg += heatmap_avg + heatmap / len(multiplier)
66
+ paf_avg += + paf / len(multiplier)
67
+
68
+ all_peaks = []
69
+ peak_counter = 0
70
+
71
+ for part in range(18):
72
+ map_ori = heatmap_avg[:, :, part]
73
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
74
+
75
+ map_left = np.zeros(one_heatmap.shape)
76
+ map_left[1:, :] = one_heatmap[:-1, :]
77
+ map_right = np.zeros(one_heatmap.shape)
78
+ map_right[:-1, :] = one_heatmap[1:, :]
79
+ map_up = np.zeros(one_heatmap.shape)
80
+ map_up[:, 1:] = one_heatmap[:, :-1]
81
+ map_down = np.zeros(one_heatmap.shape)
82
+ map_down[:, :-1] = one_heatmap[:, 1:]
83
+
84
+ peaks_binary = np.logical_and.reduce(
85
+ (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
86
+ peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
87
+ peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
88
+ peak_id = range(peak_counter, peak_counter + len(peaks))
89
+ peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
90
+
91
+ all_peaks.append(peaks_with_score_and_id)
92
+ peak_counter += len(peaks)
93
+
94
+ # find connection in the specified sequence, center 29 is in the position 15
95
+ limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
96
+ [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
97
+ [1, 16], [16, 18], [3, 17], [6, 18]]
98
+ # the middle joints heatmap correpondence
99
+ mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
100
+ [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
101
+ [55, 56], [37, 38], [45, 46]]
102
+
103
+ connection_all = []
104
+ special_k = []
105
+ mid_num = 10
106
+
107
+ for k in range(len(mapIdx)):
108
+ score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
109
+ candA = all_peaks[limbSeq[k][0] - 1]
110
+ candB = all_peaks[limbSeq[k][1] - 1]
111
+ nA = len(candA)
112
+ nB = len(candB)
113
+ indexA, indexB = limbSeq[k]
114
+ if (nA != 0 and nB != 0):
115
+ connection_candidate = []
116
+ for i in range(nA):
117
+ for j in range(nB):
118
+ vec = np.subtract(candB[j][:2], candA[i][:2])
119
+ norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
120
+ norm = max(0.001, norm)
121
+ vec = np.divide(vec, norm)
122
+
123
+ startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
124
+ np.linspace(candA[i][1], candB[j][1], num=mid_num)))
125
+
126
+ vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
127
+ for I in range(len(startend))])
128
+ vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
129
+ for I in range(len(startend))])
130
+
131
+ score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
132
+ score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
133
+ 0.5 * oriImg.shape[0] / norm - 1, 0)
134
+ criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
135
+ criterion2 = score_with_dist_prior > 0
136
+ if criterion1 and criterion2:
137
+ connection_candidate.append(
138
+ [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
139
+
140
+ connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
141
+ connection = np.zeros((0, 5))
142
+ for c in range(len(connection_candidate)):
143
+ i, j, s = connection_candidate[c][0:3]
144
+ if (i not in connection[:, 3] and j not in connection[:, 4]):
145
+ connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
146
+ if (len(connection) >= min(nA, nB)):
147
+ break
148
+
149
+ connection_all.append(connection)
150
+ else:
151
+ special_k.append(k)
152
+ connection_all.append([])
153
+
154
+ # last number in each row is the total parts number of that person
155
+ # the second last number in each row is the score of the overall configuration
156
+ subset = -1 * np.ones((0, 20))
157
+ candidate = np.array([item for sublist in all_peaks for item in sublist])
158
+
159
+ for k in range(len(mapIdx)):
160
+ if k not in special_k:
161
+ partAs = connection_all[k][:, 0]
162
+ partBs = connection_all[k][:, 1]
163
+ indexA, indexB = np.array(limbSeq[k]) - 1
164
+
165
+ for i in range(len(connection_all[k])): # = 1:size(temp,1)
166
+ found = 0
167
+ subset_idx = [-1, -1]
168
+ for j in range(len(subset)): # 1:size(subset,1):
169
+ if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
170
+ subset_idx[found] = j
171
+ found += 1
172
+
173
+ if found == 1:
174
+ j = subset_idx[0]
175
+ if subset[j][indexB] != partBs[i]:
176
+ subset[j][indexB] = partBs[i]
177
+ subset[j][-1] += 1
178
+ subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
179
+ elif found == 2: # if found 2 and disjoint, merge them
180
+ j1, j2 = subset_idx
181
+ membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
182
+ if len(np.nonzero(membership == 2)[0]) == 0: # merge
183
+ subset[j1][:-2] += (subset[j2][:-2] + 1)
184
+ subset[j1][-2:] += subset[j2][-2:]
185
+ subset[j1][-2] += connection_all[k][i][2]
186
+ subset = np.delete(subset, j2, 0)
187
+ else: # as like found == 1
188
+ subset[j1][indexB] = partBs[i]
189
+ subset[j1][-1] += 1
190
+ subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
191
+
192
+ # if find no partA in the subset, create a new subset
193
+ elif not found and k < 17:
194
+ row = -1 * np.ones(20)
195
+ row[indexA] = partAs[i]
196
+ row[indexB] = partBs[i]
197
+ row[-1] = 2
198
+ row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
199
+ subset = np.vstack([subset, row])
200
+ # delete some rows of subset which has few parts occur
201
+ deleteIdx = []
202
+ for i in range(len(subset)):
203
+ if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
204
+ deleteIdx.append(i)
205
+ subset = np.delete(subset, deleteIdx, axis=0)
206
+
207
+ # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
208
+ # candidate: x, y, score, id
209
+ return candidate, subset
210
+
211
+ if __name__ == "__main__":
212
+ body_estimation = Body('../model/body_pose_model.pth')
213
+
214
+ test_image = '../images/ski.jpg'
215
+ oriImg = cv2.imread(test_image) # B,G,R order
216
+ candidate, subset = body_estimation(oriImg)
217
+ canvas = util.draw_bodypose(oriImg, candidate, subset)
218
+ plt.imshow(canvas[:, :, [2, 1, 0]])
219
+ plt.show()
annotator/openpose/hand.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import json
3
+ import numpy as np
4
+ import math
5
+ import time
6
+ from scipy.ndimage.filters import gaussian_filter
7
+ import matplotlib.pyplot as plt
8
+ import matplotlib
9
+ import torch
10
+ from skimage.measure import label
11
+
12
+ from .model import handpose_model
13
+ from . import util
14
+
15
+ class Hand(object):
16
+ def __init__(self, model_path):
17
+ self.model = handpose_model()
18
+ if torch.cuda.is_available():
19
+ self.model = self.model.cuda()
20
+ print('cuda')
21
+ model_dict = util.transfer(self.model, torch.load(model_path))
22
+ self.model.load_state_dict(model_dict)
23
+ self.model.eval()
24
+
25
+ def __call__(self, oriImg):
26
+ scale_search = [0.5, 1.0, 1.5, 2.0]
27
+ # scale_search = [0.5]
28
+ boxsize = 368
29
+ stride = 8
30
+ padValue = 128
31
+ thre = 0.05
32
+ multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
33
+ heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
34
+ # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
35
+
36
+ for m in range(len(multiplier)):
37
+ scale = multiplier[m]
38
+ imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
39
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
40
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
41
+ im = np.ascontiguousarray(im)
42
+
43
+ data = torch.from_numpy(im).float()
44
+ if torch.cuda.is_available():
45
+ data = data.cuda()
46
+ # data = data.permute([2, 0, 1]).unsqueeze(0).float()
47
+ with torch.no_grad():
48
+ output = self.model(data).cpu().numpy()
49
+ # output = self.model(data).numpy()q
50
+
51
+ # extract outputs, resize, and remove padding
52
+ heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
53
+ heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
54
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
55
+ heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
56
+
57
+ heatmap_avg += heatmap / len(multiplier)
58
+
59
+ all_peaks = []
60
+ for part in range(21):
61
+ map_ori = heatmap_avg[:, :, part]
62
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
63
+ binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
64
+ # 全部小于阈值
65
+ if np.sum(binary) == 0:
66
+ all_peaks.append([0, 0])
67
+ continue
68
+ label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
69
+ max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
70
+ label_img[label_img != max_index] = 0
71
+ map_ori[label_img == 0] = 0
72
+
73
+ y, x = util.npmax(map_ori)
74
+ all_peaks.append([x, y])
75
+ return np.array(all_peaks)
76
+
77
+ if __name__ == "__main__":
78
+ hand_estimation = Hand('../model/hand_pose_model.pth')
79
+
80
+ # test_image = '../images/hand.jpg'
81
+ test_image = '../images/hand.jpg'
82
+ oriImg = cv2.imread(test_image) # B,G,R order
83
+ peaks = hand_estimation(oriImg)
84
+ canvas = util.draw_handpose(oriImg, peaks, True)
85
+ cv2.imshow('', canvas)
86
+ cv2.waitKey(0)
annotator/openpose/model.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from collections import OrderedDict
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ def make_layers(block, no_relu_layers):
8
+ layers = []
9
+ for layer_name, v in block.items():
10
+ if 'pool' in layer_name:
11
+ layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
12
+ padding=v[2])
13
+ layers.append((layer_name, layer))
14
+ else:
15
+ conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
16
+ kernel_size=v[2], stride=v[3],
17
+ padding=v[4])
18
+ layers.append((layer_name, conv2d))
19
+ if layer_name not in no_relu_layers:
20
+ layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
21
+
22
+ return nn.Sequential(OrderedDict(layers))
23
+
24
+ class bodypose_model(nn.Module):
25
+ def __init__(self):
26
+ super(bodypose_model, self).__init__()
27
+
28
+ # these layers have no relu layer
29
+ no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
30
+ 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
31
+ 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
32
+ 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
33
+ blocks = {}
34
+ block0 = OrderedDict([
35
+ ('conv1_1', [3, 64, 3, 1, 1]),
36
+ ('conv1_2', [64, 64, 3, 1, 1]),
37
+ ('pool1_stage1', [2, 2, 0]),
38
+ ('conv2_1', [64, 128, 3, 1, 1]),
39
+ ('conv2_2', [128, 128, 3, 1, 1]),
40
+ ('pool2_stage1', [2, 2, 0]),
41
+ ('conv3_1', [128, 256, 3, 1, 1]),
42
+ ('conv3_2', [256, 256, 3, 1, 1]),
43
+ ('conv3_3', [256, 256, 3, 1, 1]),
44
+ ('conv3_4', [256, 256, 3, 1, 1]),
45
+ ('pool3_stage1', [2, 2, 0]),
46
+ ('conv4_1', [256, 512, 3, 1, 1]),
47
+ ('conv4_2', [512, 512, 3, 1, 1]),
48
+ ('conv4_3_CPM', [512, 256, 3, 1, 1]),
49
+ ('conv4_4_CPM', [256, 128, 3, 1, 1])
50
+ ])
51
+
52
+
53
+ # Stage 1
54
+ block1_1 = OrderedDict([
55
+ ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
56
+ ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
57
+ ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
58
+ ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
59
+ ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
60
+ ])
61
+
62
+ block1_2 = OrderedDict([
63
+ ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
64
+ ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
65
+ ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
66
+ ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
67
+ ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
68
+ ])
69
+ blocks['block1_1'] = block1_1
70
+ blocks['block1_2'] = block1_2
71
+
72
+ self.model0 = make_layers(block0, no_relu_layers)
73
+
74
+ # Stages 2 - 6
75
+ for i in range(2, 7):
76
+ blocks['block%d_1' % i] = OrderedDict([
77
+ ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
78
+ ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
79
+ ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
80
+ ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
81
+ ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
82
+ ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
83
+ ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
84
+ ])
85
+
86
+ blocks['block%d_2' % i] = OrderedDict([
87
+ ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
88
+ ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
89
+ ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
90
+ ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
91
+ ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
92
+ ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
93
+ ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
94
+ ])
95
+
96
+ for k in blocks.keys():
97
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
98
+
99
+ self.model1_1 = blocks['block1_1']
100
+ self.model2_1 = blocks['block2_1']
101
+ self.model3_1 = blocks['block3_1']
102
+ self.model4_1 = blocks['block4_1']
103
+ self.model5_1 = blocks['block5_1']
104
+ self.model6_1 = blocks['block6_1']
105
+
106
+ self.model1_2 = blocks['block1_2']
107
+ self.model2_2 = blocks['block2_2']
108
+ self.model3_2 = blocks['block3_2']
109
+ self.model4_2 = blocks['block4_2']
110
+ self.model5_2 = blocks['block5_2']
111
+ self.model6_2 = blocks['block6_2']
112
+
113
+
114
+ def forward(self, x):
115
+
116
+ out1 = self.model0(x)
117
+
118
+ out1_1 = self.model1_1(out1)
119
+ out1_2 = self.model1_2(out1)
120
+ out2 = torch.cat([out1_1, out1_2, out1], 1)
121
+
122
+ out2_1 = self.model2_1(out2)
123
+ out2_2 = self.model2_2(out2)
124
+ out3 = torch.cat([out2_1, out2_2, out1], 1)
125
+
126
+ out3_1 = self.model3_1(out3)
127
+ out3_2 = self.model3_2(out3)
128
+ out4 = torch.cat([out3_1, out3_2, out1], 1)
129
+
130
+ out4_1 = self.model4_1(out4)
131
+ out4_2 = self.model4_2(out4)
132
+ out5 = torch.cat([out4_1, out4_2, out1], 1)
133
+
134
+ out5_1 = self.model5_1(out5)
135
+ out5_2 = self.model5_2(out5)
136
+ out6 = torch.cat([out5_1, out5_2, out1], 1)
137
+
138
+ out6_1 = self.model6_1(out6)
139
+ out6_2 = self.model6_2(out6)
140
+
141
+ return out6_1, out6_2
142
+
143
+ class handpose_model(nn.Module):
144
+ def __init__(self):
145
+ super(handpose_model, self).__init__()
146
+
147
+ # these layers have no relu layer
148
+ no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
149
+ 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
150
+ # stage 1
151
+ block1_0 = OrderedDict([
152
+ ('conv1_1', [3, 64, 3, 1, 1]),
153
+ ('conv1_2', [64, 64, 3, 1, 1]),
154
+ ('pool1_stage1', [2, 2, 0]),
155
+ ('conv2_1', [64, 128, 3, 1, 1]),
156
+ ('conv2_2', [128, 128, 3, 1, 1]),
157
+ ('pool2_stage1', [2, 2, 0]),
158
+ ('conv3_1', [128, 256, 3, 1, 1]),
159
+ ('conv3_2', [256, 256, 3, 1, 1]),
160
+ ('conv3_3', [256, 256, 3, 1, 1]),
161
+ ('conv3_4', [256, 256, 3, 1, 1]),
162
+ ('pool3_stage1', [2, 2, 0]),
163
+ ('conv4_1', [256, 512, 3, 1, 1]),
164
+ ('conv4_2', [512, 512, 3, 1, 1]),
165
+ ('conv4_3', [512, 512, 3, 1, 1]),
166
+ ('conv4_4', [512, 512, 3, 1, 1]),
167
+ ('conv5_1', [512, 512, 3, 1, 1]),
168
+ ('conv5_2', [512, 512, 3, 1, 1]),
169
+ ('conv5_3_CPM', [512, 128, 3, 1, 1])
170
+ ])
171
+
172
+ block1_1 = OrderedDict([
173
+ ('conv6_1_CPM', [128, 512, 1, 1, 0]),
174
+ ('conv6_2_CPM', [512, 22, 1, 1, 0])
175
+ ])
176
+
177
+ blocks = {}
178
+ blocks['block1_0'] = block1_0
179
+ blocks['block1_1'] = block1_1
180
+
181
+ # stage 2-6
182
+ for i in range(2, 7):
183
+ blocks['block%d' % i] = OrderedDict([
184
+ ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
185
+ ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
186
+ ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
187
+ ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
188
+ ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
189
+ ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
190
+ ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
191
+ ])
192
+
193
+ for k in blocks.keys():
194
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
195
+
196
+ self.model1_0 = blocks['block1_0']
197
+ self.model1_1 = blocks['block1_1']
198
+ self.model2 = blocks['block2']
199
+ self.model3 = blocks['block3']
200
+ self.model4 = blocks['block4']
201
+ self.model5 = blocks['block5']
202
+ self.model6 = blocks['block6']
203
+
204
+ def forward(self, x):
205
+ out1_0 = self.model1_0(x)
206
+ out1_1 = self.model1_1(out1_0)
207
+ concat_stage2 = torch.cat([out1_1, out1_0], 1)
208
+ out_stage2 = self.model2(concat_stage2)
209
+ concat_stage3 = torch.cat([out_stage2, out1_0], 1)
210
+ out_stage3 = self.model3(concat_stage3)
211
+ concat_stage4 = torch.cat([out_stage3, out1_0], 1)
212
+ out_stage4 = self.model4(concat_stage4)
213
+ concat_stage5 = torch.cat([out_stage4, out1_0], 1)
214
+ out_stage5 = self.model5(concat_stage5)
215
+ concat_stage6 = torch.cat([out_stage5, out1_0], 1)
216
+ out_stage6 = self.model6(concat_stage6)
217
+ return out_stage6
218
+
219
+
annotator/openpose/util.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import numpy as np
3
+ import matplotlib
4
+ import cv2
5
+
6
+
7
+ def padRightDownCorner(img, stride, padValue):
8
+ h = img.shape[0]
9
+ w = img.shape[1]
10
+
11
+ pad = 4 * [None]
12
+ pad[0] = 0 # up
13
+ pad[1] = 0 # left
14
+ pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
15
+ pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
16
+
17
+ img_padded = img
18
+ pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
19
+ img_padded = np.concatenate((pad_up, img_padded), axis=0)
20
+ pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
21
+ img_padded = np.concatenate((pad_left, img_padded), axis=1)
22
+ pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
23
+ img_padded = np.concatenate((img_padded, pad_down), axis=0)
24
+ pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
25
+ img_padded = np.concatenate((img_padded, pad_right), axis=1)
26
+
27
+ return img_padded, pad
28
+
29
+ # transfer caffe model to pytorch which will match the layer name
30
+ def transfer(model, model_weights):
31
+ transfered_model_weights = {}
32
+ for weights_name in model.state_dict().keys():
33
+ transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
34
+ return transfered_model_weights
35
+
36
+ # draw the body keypoint and lims
37
+ def draw_bodypose(canvas, candidate, subset):
38
+ stickwidth = 4
39
+ limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
40
+ [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
41
+ [1, 16], [16, 18], [3, 17], [6, 18]]
42
+
43
+ colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
44
+ [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
45
+ [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
46
+ for i in range(18):
47
+ for n in range(len(subset)):
48
+ index = int(subset[n][i])
49
+ if index == -1:
50
+ continue
51
+ x, y = candidate[index][0:2]
52
+ cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
53
+ for i in range(17):
54
+ for n in range(len(subset)):
55
+ index = subset[n][np.array(limbSeq[i]) - 1]
56
+ if -1 in index:
57
+ continue
58
+ cur_canvas = canvas.copy()
59
+ Y = candidate[index.astype(int), 0]
60
+ X = candidate[index.astype(int), 1]
61
+ mX = np.mean(X)
62
+ mY = np.mean(Y)
63
+ length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
64
+ angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
65
+ polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
66
+ cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
67
+ canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
68
+ # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
69
+ # plt.imshow(canvas[:, :, [2, 1, 0]])
70
+ return canvas
71
+
72
+
73
+ # image drawed by opencv is not good.
74
+ def draw_handpose(canvas, all_hand_peaks, show_number=False):
75
+ edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
76
+ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
77
+
78
+ for peaks in all_hand_peaks:
79
+ for ie, e in enumerate(edges):
80
+ if np.sum(np.all(peaks[e], axis=1)==0)==0:
81
+ x1, y1 = peaks[e[0]]
82
+ x2, y2 = peaks[e[1]]
83
+ cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2)
84
+
85
+ for i, keyponit in enumerate(peaks):
86
+ x, y = keyponit
87
+ cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
88
+ if show_number:
89
+ cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA)
90
+ return canvas
91
+
92
+ # detect hand according to body pose keypoints
93
+ # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
94
+ def handDetect(candidate, subset, oriImg):
95
+ # right hand: wrist 4, elbow 3, shoulder 2
96
+ # left hand: wrist 7, elbow 6, shoulder 5
97
+ ratioWristElbow = 0.33
98
+ detect_result = []
99
+ image_height, image_width = oriImg.shape[0:2]
100
+ for person in subset.astype(int):
101
+ # if any of three not detected
102
+ has_left = np.sum(person[[5, 6, 7]] == -1) == 0
103
+ has_right = np.sum(person[[2, 3, 4]] == -1) == 0
104
+ if not (has_left or has_right):
105
+ continue
106
+ hands = []
107
+ #left hand
108
+ if has_left:
109
+ left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
110
+ x1, y1 = candidate[left_shoulder_index][:2]
111
+ x2, y2 = candidate[left_elbow_index][:2]
112
+ x3, y3 = candidate[left_wrist_index][:2]
113
+ hands.append([x1, y1, x2, y2, x3, y3, True])
114
+ # right hand
115
+ if has_right:
116
+ right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
117
+ x1, y1 = candidate[right_shoulder_index][:2]
118
+ x2, y2 = candidate[right_elbow_index][:2]
119
+ x3, y3 = candidate[right_wrist_index][:2]
120
+ hands.append([x1, y1, x2, y2, x3, y3, False])
121
+
122
+ for x1, y1, x2, y2, x3, y3, is_left in hands:
123
+ # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
124
+ # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
125
+ # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
126
+ # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
127
+ # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
128
+ # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
129
+ x = x3 + ratioWristElbow * (x3 - x2)
130
+ y = y3 + ratioWristElbow * (y3 - y2)
131
+ distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
132
+ distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
133
+ width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
134
+ # x-y refers to the center --> offset to topLeft point
135
+ # handRectangle.x -= handRectangle.width / 2.f;
136
+ # handRectangle.y -= handRectangle.height / 2.f;
137
+ x -= width / 2
138
+ y -= width / 2 # width = height
139
+ # overflow the image
140
+ if x < 0: x = 0
141
+ if y < 0: y = 0
142
+ width1 = width
143
+ width2 = width
144
+ if x + width > image_width: width1 = image_width - x
145
+ if y + width > image_height: width2 = image_height - y
146
+ width = min(width1, width2)
147
+ # the max hand box value is 20 pixels
148
+ if width >= 20:
149
+ detect_result.append([int(x), int(y), int(width), is_left])
150
+
151
+ '''
152
+ return value: [[x, y, w, True if left hand else False]].
153
+ width=height since the network require squared input.
154
+ x, y is the coordinate of top left
155
+ '''
156
+ return detect_result
157
+
158
+ # get max index of 2d array
159
+ def npmax(array):
160
+ arrayindex = array.argmax(1)
161
+ arrayvalue = array.max(1)
162
+ i = arrayvalue.argmax()
163
+ j = arrayindex[i]
164
+ return i, j
annotator/uniformer/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot
4
+ from annotator.uniformer.mmseg.core.evaluation import get_palette
5
+ from annotator.util import annotator_ckpts_path
6
+
7
+
8
+ checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth"
9
+
10
+
11
+ class UniformerDetector:
12
+ def __init__(self):
13
+ modelpath = os.path.join(annotator_ckpts_path, "upernet_global_small.pth")
14
+ if not os.path.exists(modelpath):
15
+ from basicsr.utils.download_util import load_file_from_url
16
+ load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path)
17
+ config_file = os.path.join(os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py")
18
+ self.model = init_segmentor(config_file, modelpath).cuda()
19
+
20
+ def __call__(self, img):
21
+ result = inference_segmentor(self.model, img)
22
+ res_img = show_result_pyplot(self.model, img, result, get_palette('ade'), opacity=1)
23
+ return res_img
annotator/uniformer/configs/_base_/datasets/ade20k.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'ADE20KDataset'
3
+ data_root = 'data/ade/ADEChallengeData2016'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ crop_size = (512, 512)
7
+ train_pipeline = [
8
+ dict(type='LoadImageFromFile'),
9
+ dict(type='LoadAnnotations', reduce_zero_label=True),
10
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
+ dict(type='RandomFlip', prob=0.5),
13
+ dict(type='PhotoMetricDistortion'),
14
+ dict(type='Normalize', **img_norm_cfg),
15
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16
+ dict(type='DefaultFormatBundle'),
17
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18
+ ]
19
+ test_pipeline = [
20
+ dict(type='LoadImageFromFile'),
21
+ dict(
22
+ type='MultiScaleFlipAug',
23
+ img_scale=(2048, 512),
24
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25
+ flip=False,
26
+ transforms=[
27
+ dict(type='Resize', keep_ratio=True),
28
+ dict(type='RandomFlip'),
29
+ dict(type='Normalize', **img_norm_cfg),
30
+ dict(type='ImageToTensor', keys=['img']),
31
+ dict(type='Collect', keys=['img']),
32
+ ])
33
+ ]
34
+ data = dict(
35
+ samples_per_gpu=4,
36
+ workers_per_gpu=4,
37
+ train=dict(
38
+ type=dataset_type,
39
+ data_root=data_root,
40
+ img_dir='images/training',
41
+ ann_dir='annotations/training',
42
+ pipeline=train_pipeline),
43
+ val=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ img_dir='images/validation',
47
+ ann_dir='annotations/validation',
48
+ pipeline=test_pipeline),
49
+ test=dict(
50
+ type=dataset_type,
51
+ data_root=data_root,
52
+ img_dir='images/validation',
53
+ ann_dir='annotations/validation',
54
+ pipeline=test_pipeline))
annotator/uniformer/configs/_base_/datasets/chase_db1.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'ChaseDB1Dataset'
3
+ data_root = 'data/CHASE_DB1'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ img_scale = (960, 999)
7
+ crop_size = (128, 128)
8
+ train_pipeline = [
9
+ dict(type='LoadImageFromFile'),
10
+ dict(type='LoadAnnotations'),
11
+ dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13
+ dict(type='RandomFlip', prob=0.5),
14
+ dict(type='PhotoMetricDistortion'),
15
+ dict(type='Normalize', **img_norm_cfg),
16
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17
+ dict(type='DefaultFormatBundle'),
18
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19
+ ]
20
+ test_pipeline = [
21
+ dict(type='LoadImageFromFile'),
22
+ dict(
23
+ type='MultiScaleFlipAug',
24
+ img_scale=img_scale,
25
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26
+ flip=False,
27
+ transforms=[
28
+ dict(type='Resize', keep_ratio=True),
29
+ dict(type='RandomFlip'),
30
+ dict(type='Normalize', **img_norm_cfg),
31
+ dict(type='ImageToTensor', keys=['img']),
32
+ dict(type='Collect', keys=['img'])
33
+ ])
34
+ ]
35
+
36
+ data = dict(
37
+ samples_per_gpu=4,
38
+ workers_per_gpu=4,
39
+ train=dict(
40
+ type='RepeatDataset',
41
+ times=40000,
42
+ dataset=dict(
43
+ type=dataset_type,
44
+ data_root=data_root,
45
+ img_dir='images/training',
46
+ ann_dir='annotations/training',
47
+ pipeline=train_pipeline)),
48
+ val=dict(
49
+ type=dataset_type,
50
+ data_root=data_root,
51
+ img_dir='images/validation',
52
+ ann_dir='annotations/validation',
53
+ pipeline=test_pipeline),
54
+ test=dict(
55
+ type=dataset_type,
56
+ data_root=data_root,
57
+ img_dir='images/validation',
58
+ ann_dir='annotations/validation',
59
+ pipeline=test_pipeline))
annotator/uniformer/configs/_base_/datasets/cityscapes.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'CityscapesDataset'
3
+ data_root = 'data/cityscapes/'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ crop_size = (512, 1024)
7
+ train_pipeline = [
8
+ dict(type='LoadImageFromFile'),
9
+ dict(type='LoadAnnotations'),
10
+ dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
11
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
+ dict(type='RandomFlip', prob=0.5),
13
+ dict(type='PhotoMetricDistortion'),
14
+ dict(type='Normalize', **img_norm_cfg),
15
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16
+ dict(type='DefaultFormatBundle'),
17
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18
+ ]
19
+ test_pipeline = [
20
+ dict(type='LoadImageFromFile'),
21
+ dict(
22
+ type='MultiScaleFlipAug',
23
+ img_scale=(2048, 1024),
24
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25
+ flip=False,
26
+ transforms=[
27
+ dict(type='Resize', keep_ratio=True),
28
+ dict(type='RandomFlip'),
29
+ dict(type='Normalize', **img_norm_cfg),
30
+ dict(type='ImageToTensor', keys=['img']),
31
+ dict(type='Collect', keys=['img']),
32
+ ])
33
+ ]
34
+ data = dict(
35
+ samples_per_gpu=2,
36
+ workers_per_gpu=2,
37
+ train=dict(
38
+ type=dataset_type,
39
+ data_root=data_root,
40
+ img_dir='leftImg8bit/train',
41
+ ann_dir='gtFine/train',
42
+ pipeline=train_pipeline),
43
+ val=dict(
44
+ type=dataset_type,
45
+ data_root=data_root,
46
+ img_dir='leftImg8bit/val',
47
+ ann_dir='gtFine/val',
48
+ pipeline=test_pipeline),
49
+ test=dict(
50
+ type=dataset_type,
51
+ data_root=data_root,
52
+ img_dir='leftImg8bit/val',
53
+ ann_dir='gtFine/val',
54
+ pipeline=test_pipeline))
annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './cityscapes.py'
2
+ img_norm_cfg = dict(
3
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
4
+ crop_size = (769, 769)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations'),
8
+ dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
9
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10
+ dict(type='RandomFlip', prob=0.5),
11
+ dict(type='PhotoMetricDistortion'),
12
+ dict(type='Normalize', **img_norm_cfg),
13
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14
+ dict(type='DefaultFormatBundle'),
15
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16
+ ]
17
+ test_pipeline = [
18
+ dict(type='LoadImageFromFile'),
19
+ dict(
20
+ type='MultiScaleFlipAug',
21
+ img_scale=(2049, 1025),
22
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23
+ flip=False,
24
+ transforms=[
25
+ dict(type='Resize', keep_ratio=True),
26
+ dict(type='RandomFlip'),
27
+ dict(type='Normalize', **img_norm_cfg),
28
+ dict(type='ImageToTensor', keys=['img']),
29
+ dict(type='Collect', keys=['img']),
30
+ ])
31
+ ]
32
+ data = dict(
33
+ train=dict(pipeline=train_pipeline),
34
+ val=dict(pipeline=test_pipeline),
35
+ test=dict(pipeline=test_pipeline))
annotator/uniformer/configs/_base_/datasets/drive.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'DRIVEDataset'
3
+ data_root = 'data/DRIVE'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ img_scale = (584, 565)
7
+ crop_size = (64, 64)
8
+ train_pipeline = [
9
+ dict(type='LoadImageFromFile'),
10
+ dict(type='LoadAnnotations'),
11
+ dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13
+ dict(type='RandomFlip', prob=0.5),
14
+ dict(type='PhotoMetricDistortion'),
15
+ dict(type='Normalize', **img_norm_cfg),
16
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17
+ dict(type='DefaultFormatBundle'),
18
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19
+ ]
20
+ test_pipeline = [
21
+ dict(type='LoadImageFromFile'),
22
+ dict(
23
+ type='MultiScaleFlipAug',
24
+ img_scale=img_scale,
25
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26
+ flip=False,
27
+ transforms=[
28
+ dict(type='Resize', keep_ratio=True),
29
+ dict(type='RandomFlip'),
30
+ dict(type='Normalize', **img_norm_cfg),
31
+ dict(type='ImageToTensor', keys=['img']),
32
+ dict(type='Collect', keys=['img'])
33
+ ])
34
+ ]
35
+
36
+ data = dict(
37
+ samples_per_gpu=4,
38
+ workers_per_gpu=4,
39
+ train=dict(
40
+ type='RepeatDataset',
41
+ times=40000,
42
+ dataset=dict(
43
+ type=dataset_type,
44
+ data_root=data_root,
45
+ img_dir='images/training',
46
+ ann_dir='annotations/training',
47
+ pipeline=train_pipeline)),
48
+ val=dict(
49
+ type=dataset_type,
50
+ data_root=data_root,
51
+ img_dir='images/validation',
52
+ ann_dir='annotations/validation',
53
+ pipeline=test_pipeline),
54
+ test=dict(
55
+ type=dataset_type,
56
+ data_root=data_root,
57
+ img_dir='images/validation',
58
+ ann_dir='annotations/validation',
59
+ pipeline=test_pipeline))
annotator/uniformer/configs/_base_/datasets/hrf.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'HRFDataset'
3
+ data_root = 'data/HRF'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ img_scale = (2336, 3504)
7
+ crop_size = (256, 256)
8
+ train_pipeline = [
9
+ dict(type='LoadImageFromFile'),
10
+ dict(type='LoadAnnotations'),
11
+ dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13
+ dict(type='RandomFlip', prob=0.5),
14
+ dict(type='PhotoMetricDistortion'),
15
+ dict(type='Normalize', **img_norm_cfg),
16
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17
+ dict(type='DefaultFormatBundle'),
18
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19
+ ]
20
+ test_pipeline = [
21
+ dict(type='LoadImageFromFile'),
22
+ dict(
23
+ type='MultiScaleFlipAug',
24
+ img_scale=img_scale,
25
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26
+ flip=False,
27
+ transforms=[
28
+ dict(type='Resize', keep_ratio=True),
29
+ dict(type='RandomFlip'),
30
+ dict(type='Normalize', **img_norm_cfg),
31
+ dict(type='ImageToTensor', keys=['img']),
32
+ dict(type='Collect', keys=['img'])
33
+ ])
34
+ ]
35
+
36
+ data = dict(
37
+ samples_per_gpu=4,
38
+ workers_per_gpu=4,
39
+ train=dict(
40
+ type='RepeatDataset',
41
+ times=40000,
42
+ dataset=dict(
43
+ type=dataset_type,
44
+ data_root=data_root,
45
+ img_dir='images/training',
46
+ ann_dir='annotations/training',
47
+ pipeline=train_pipeline)),
48
+ val=dict(
49
+ type=dataset_type,
50
+ data_root=data_root,
51
+ img_dir='images/validation',
52
+ ann_dir='annotations/validation',
53
+ pipeline=test_pipeline),
54
+ test=dict(
55
+ type=dataset_type,
56
+ data_root=data_root,
57
+ img_dir='images/validation',
58
+ ann_dir='annotations/validation',
59
+ pipeline=test_pipeline))
annotator/uniformer/configs/_base_/datasets/pascal_context.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PascalContextDataset'
3
+ data_root = 'data/VOCdevkit/VOC2010/'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+
7
+ img_scale = (520, 520)
8
+ crop_size = (480, 480)
9
+
10
+ train_pipeline = [
11
+ dict(type='LoadImageFromFile'),
12
+ dict(type='LoadAnnotations'),
13
+ dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
14
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15
+ dict(type='RandomFlip', prob=0.5),
16
+ dict(type='PhotoMetricDistortion'),
17
+ dict(type='Normalize', **img_norm_cfg),
18
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19
+ dict(type='DefaultFormatBundle'),
20
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21
+ ]
22
+ test_pipeline = [
23
+ dict(type='LoadImageFromFile'),
24
+ dict(
25
+ type='MultiScaleFlipAug',
26
+ img_scale=img_scale,
27
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28
+ flip=False,
29
+ transforms=[
30
+ dict(type='Resize', keep_ratio=True),
31
+ dict(type='RandomFlip'),
32
+ dict(type='Normalize', **img_norm_cfg),
33
+ dict(type='ImageToTensor', keys=['img']),
34
+ dict(type='Collect', keys=['img']),
35
+ ])
36
+ ]
37
+ data = dict(
38
+ samples_per_gpu=4,
39
+ workers_per_gpu=4,
40
+ train=dict(
41
+ type=dataset_type,
42
+ data_root=data_root,
43
+ img_dir='JPEGImages',
44
+ ann_dir='SegmentationClassContext',
45
+ split='ImageSets/SegmentationContext/train.txt',
46
+ pipeline=train_pipeline),
47
+ val=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ img_dir='JPEGImages',
51
+ ann_dir='SegmentationClassContext',
52
+ split='ImageSets/SegmentationContext/val.txt',
53
+ pipeline=test_pipeline),
54
+ test=dict(
55
+ type=dataset_type,
56
+ data_root=data_root,
57
+ img_dir='JPEGImages',
58
+ ann_dir='SegmentationClassContext',
59
+ split='ImageSets/SegmentationContext/val.txt',
60
+ pipeline=test_pipeline))
annotator/uniformer/configs/_base_/datasets/pascal_context_59.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PascalContextDataset59'
3
+ data_root = 'data/VOCdevkit/VOC2010/'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+
7
+ img_scale = (520, 520)
8
+ crop_size = (480, 480)
9
+
10
+ train_pipeline = [
11
+ dict(type='LoadImageFromFile'),
12
+ dict(type='LoadAnnotations', reduce_zero_label=True),
13
+ dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
14
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15
+ dict(type='RandomFlip', prob=0.5),
16
+ dict(type='PhotoMetricDistortion'),
17
+ dict(type='Normalize', **img_norm_cfg),
18
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19
+ dict(type='DefaultFormatBundle'),
20
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21
+ ]
22
+ test_pipeline = [
23
+ dict(type='LoadImageFromFile'),
24
+ dict(
25
+ type='MultiScaleFlipAug',
26
+ img_scale=img_scale,
27
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28
+ flip=False,
29
+ transforms=[
30
+ dict(type='Resize', keep_ratio=True),
31
+ dict(type='RandomFlip'),
32
+ dict(type='Normalize', **img_norm_cfg),
33
+ dict(type='ImageToTensor', keys=['img']),
34
+ dict(type='Collect', keys=['img']),
35
+ ])
36
+ ]
37
+ data = dict(
38
+ samples_per_gpu=4,
39
+ workers_per_gpu=4,
40
+ train=dict(
41
+ type=dataset_type,
42
+ data_root=data_root,
43
+ img_dir='JPEGImages',
44
+ ann_dir='SegmentationClassContext',
45
+ split='ImageSets/SegmentationContext/train.txt',
46
+ pipeline=train_pipeline),
47
+ val=dict(
48
+ type=dataset_type,
49
+ data_root=data_root,
50
+ img_dir='JPEGImages',
51
+ ann_dir='SegmentationClassContext',
52
+ split='ImageSets/SegmentationContext/val.txt',
53
+ pipeline=test_pipeline),
54
+ test=dict(
55
+ type=dataset_type,
56
+ data_root=data_root,
57
+ img_dir='JPEGImages',
58
+ ann_dir='SegmentationClassContext',
59
+ split='ImageSets/SegmentationContext/val.txt',
60
+ pipeline=test_pipeline))
annotator/uniformer/configs/_base_/datasets/pascal_voc12.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PascalVOCDataset'
3
+ data_root = 'data/VOCdevkit/VOC2012'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ crop_size = (512, 512)
7
+ train_pipeline = [
8
+ dict(type='LoadImageFromFile'),
9
+ dict(type='LoadAnnotations'),
10
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
+ dict(type='RandomFlip', prob=0.5),
13
+ dict(type='PhotoMetricDistortion'),
14
+ dict(type='Normalize', **img_norm_cfg),
15
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16
+ dict(type='DefaultFormatBundle'),
17
+ dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18
+ ]
19
+ test_pipeline = [
20
+ dict(type='LoadImageFromFile'),
21
+ dict(
22
+ type='MultiScaleFlipAug',
23
+ img_scale=(2048, 512),
24
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25
+ flip=False,
26
+ transforms=[
27
+ dict(type='Resize', keep_ratio=True),
28
+ dict(type='RandomFlip'),
29
+ dict(type='Normalize', **img_norm_cfg),
30
+ dict(type='ImageToTensor', keys=['img']),
31
+ dict(type='Collect', keys=['img']),
32
+ ])
33
+ ]
34
+ data = dict(
35
+ samples_per_gpu=4,
36
+ workers_per_gpu=4,
37
+ train=dict(
38
+ type=dataset_type,
39
+ data_root=data_root,
40
+ img_dir='JPEGImages',
41
+ ann_dir='SegmentationClass',
42
+ split='ImageSets/Segmentation/train.txt',
43
+ pipeline=train_pipeline),
44
+ val=dict(
45
+ type=dataset_type,
46
+ data_root=data_root,
47
+ img_dir='JPEGImages',
48
+ ann_dir='SegmentationClass',
49
+ split='ImageSets/Segmentation/val.txt',
50
+ pipeline=test_pipeline),
51
+ test=dict(
52
+ type=dataset_type,
53
+ data_root=data_root,
54
+ img_dir='JPEGImages',
55
+ ann_dir='SegmentationClass',
56
+ split='ImageSets/Segmentation/val.txt',
57
+ pipeline=test_pipeline))
annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ _base_ = './pascal_voc12.py'
2
+ # dataset settings
3
+ data = dict(
4
+ train=dict(
5
+ ann_dir=['SegmentationClass', 'SegmentationClassAug'],
6
+ split=[
7
+ 'ImageSets/Segmentation/train.txt',
8
+ 'ImageSets/Segmentation/aug.txt'
9
+ ]))
annotator/uniformer/configs/_base_/datasets/stare.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'STAREDataset'
3
+ data_root = 'data/STARE'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ img_scale = (605, 700)
7
+ crop_size = (128, 128)
8
+ train_pipeline = [
9
+ dict(type='LoadImageFromFile'),
10
+ dict(type='LoadAnnotations'),
11
+ dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12
+ dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13
+ dict(type='RandomFlip', prob=0.5),
14
+ dict(type='PhotoMetricDistortion'),
15
+ dict(type='Normalize', **img_norm_cfg),
16
+ dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17
+ dict(type='DefaultFormatBundle'),
18
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19
+ ]
20
+ test_pipeline = [
21
+ dict(type='LoadImageFromFile'),
22
+ dict(
23
+ type='MultiScaleFlipAug',
24
+ img_scale=img_scale,
25
+ # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26
+ flip=False,
27
+ transforms=[
28
+ dict(type='Resize', keep_ratio=True),
29
+ dict(type='RandomFlip'),
30
+ dict(type='Normalize', **img_norm_cfg),
31
+ dict(type='ImageToTensor', keys=['img']),
32
+ dict(type='Collect', keys=['img'])
33
+ ])
34
+ ]
35
+
36
+ data = dict(
37
+ samples_per_gpu=4,
38
+ workers_per_gpu=4,
39
+ train=dict(
40
+ type='RepeatDataset',
41
+ times=40000,
42
+ dataset=dict(
43
+ type=dataset_type,
44
+ data_root=data_root,
45
+ img_dir='images/training',
46
+ ann_dir='annotations/training',
47
+ pipeline=train_pipeline)),
48
+ val=dict(
49
+ type=dataset_type,
50
+ data_root=data_root,
51
+ img_dir='images/validation',
52
+ ann_dir='annotations/validation',
53
+ pipeline=test_pipeline),
54
+ test=dict(
55
+ type=dataset_type,
56
+ data_root=data_root,
57
+ img_dir='images/validation',
58
+ ann_dir='annotations/validation',
59
+ pipeline=test_pipeline))
annotator/uniformer/configs/_base_/default_runtime.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # yapf:disable
2
+ log_config = dict(
3
+ interval=50,
4
+ hooks=[
5
+ dict(type='TextLoggerHook', by_epoch=False),
6
+ # dict(type='TensorboardLoggerHook')
7
+ ])
8
+ # yapf:enable
9
+ dist_params = dict(backend='nccl')
10
+ log_level = 'INFO'
11
+ load_from = None
12
+ resume_from = None
13
+ workflow = [('train', 1)]
14
+ cudnn_benchmark = True
annotator/uniformer/configs/_base_/models/ann_r50-d8.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='ANNHead',
19
+ in_channels=[1024, 2048],
20
+ in_index=[2, 3],
21
+ channels=512,
22
+ project_channels=256,
23
+ query_scales=(1, ),
24
+ key_pool_scales=(1, 3, 6, 8),
25
+ dropout_ratio=0.1,
26
+ num_classes=19,
27
+ norm_cfg=norm_cfg,
28
+ align_corners=False,
29
+ loss_decode=dict(
30
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31
+ auxiliary_head=dict(
32
+ type='FCNHead',
33
+ in_channels=1024,
34
+ in_index=2,
35
+ channels=256,
36
+ num_convs=1,
37
+ concat_input=False,
38
+ dropout_ratio=0.1,
39
+ num_classes=19,
40
+ norm_cfg=norm_cfg,
41
+ align_corners=False,
42
+ loss_decode=dict(
43
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44
+ # model training and testing settings
45
+ train_cfg=dict(),
46
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='APCHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ pool_scales=(1, 2, 3, 6),
23
+ dropout_ratio=0.1,
24
+ num_classes=19,
25
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
26
+ align_corners=False,
27
+ loss_decode=dict(
28
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
+ auxiliary_head=dict(
30
+ type='FCNHead',
31
+ in_channels=1024,
32
+ in_index=2,
33
+ channels=256,
34
+ num_convs=1,
35
+ concat_input=False,
36
+ dropout_ratio=0.1,
37
+ num_classes=19,
38
+ norm_cfg=norm_cfg,
39
+ align_corners=False,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
+ # model training and testing settings
43
+ train_cfg=dict(),
44
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='CCHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ recurrence=2,
23
+ dropout_ratio=0.1,
24
+ num_classes=19,
25
+ norm_cfg=norm_cfg,
26
+ align_corners=False,
27
+ loss_decode=dict(
28
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
+ auxiliary_head=dict(
30
+ type='FCNHead',
31
+ in_channels=1024,
32
+ in_index=2,
33
+ channels=256,
34
+ num_convs=1,
35
+ concat_input=False,
36
+ dropout_ratio=0.1,
37
+ num_classes=19,
38
+ norm_cfg=norm_cfg,
39
+ align_corners=False,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
+ # model training and testing settings
43
+ train_cfg=dict(),
44
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/cgnet.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ backbone=dict(
6
+ type='CGNet',
7
+ norm_cfg=norm_cfg,
8
+ in_channels=3,
9
+ num_channels=(32, 64, 128),
10
+ num_blocks=(3, 21),
11
+ dilations=(2, 4),
12
+ reductions=(8, 16)),
13
+ decode_head=dict(
14
+ type='FCNHead',
15
+ in_channels=256,
16
+ in_index=2,
17
+ channels=256,
18
+ num_convs=0,
19
+ concat_input=False,
20
+ dropout_ratio=0,
21
+ num_classes=19,
22
+ norm_cfg=norm_cfg,
23
+ loss_decode=dict(
24
+ type='CrossEntropyLoss',
25
+ use_sigmoid=False,
26
+ loss_weight=1.0,
27
+ class_weight=[
28
+ 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
29
+ 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
30
+ 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
31
+ 10.396974, 10.055647
32
+ ])),
33
+ # model training and testing settings
34
+ train_cfg=dict(sampler=None),
35
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/danet_r50-d8.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='DAHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ pam_channels=64,
23
+ dropout_ratio=0.1,
24
+ num_classes=19,
25
+ norm_cfg=norm_cfg,
26
+ align_corners=False,
27
+ loss_decode=dict(
28
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
+ auxiliary_head=dict(
30
+ type='FCNHead',
31
+ in_channels=1024,
32
+ in_index=2,
33
+ channels=256,
34
+ num_convs=1,
35
+ concat_input=False,
36
+ dropout_ratio=0.1,
37
+ num_classes=19,
38
+ norm_cfg=norm_cfg,
39
+ align_corners=False,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
+ # model training and testing settings
43
+ train_cfg=dict(),
44
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='ASPPHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ dilations=(1, 12, 24, 36),
23
+ dropout_ratio=0.1,
24
+ num_classes=19,
25
+ norm_cfg=norm_cfg,
26
+ align_corners=False,
27
+ loss_decode=dict(
28
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
+ auxiliary_head=dict(
30
+ type='FCNHead',
31
+ in_channels=1024,
32
+ in_index=2,
33
+ channels=256,
34
+ num_convs=1,
35
+ concat_input=False,
36
+ dropout_ratio=0.1,
37
+ num_classes=19,
38
+ norm_cfg=norm_cfg,
39
+ align_corners=False,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
+ # model training and testing settings
43
+ train_cfg=dict(),
44
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained=None,
6
+ backbone=dict(
7
+ type='UNet',
8
+ in_channels=3,
9
+ base_channels=64,
10
+ num_stages=5,
11
+ strides=(1, 1, 1, 1, 1),
12
+ enc_num_convs=(2, 2, 2, 2, 2),
13
+ dec_num_convs=(2, 2, 2, 2),
14
+ downsamples=(True, True, True, True),
15
+ enc_dilations=(1, 1, 1, 1, 1),
16
+ dec_dilations=(1, 1, 1, 1),
17
+ with_cp=False,
18
+ conv_cfg=None,
19
+ norm_cfg=norm_cfg,
20
+ act_cfg=dict(type='ReLU'),
21
+ upsample_cfg=dict(type='InterpConv'),
22
+ norm_eval=False),
23
+ decode_head=dict(
24
+ type='ASPPHead',
25
+ in_channels=64,
26
+ in_index=4,
27
+ channels=16,
28
+ dilations=(1, 12, 24, 36),
29
+ dropout_ratio=0.1,
30
+ num_classes=2,
31
+ norm_cfg=norm_cfg,
32
+ align_corners=False,
33
+ loss_decode=dict(
34
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35
+ auxiliary_head=dict(
36
+ type='FCNHead',
37
+ in_channels=128,
38
+ in_index=3,
39
+ channels=64,
40
+ num_convs=1,
41
+ concat_input=False,
42
+ dropout_ratio=0.1,
43
+ num_classes=2,
44
+ norm_cfg=norm_cfg,
45
+ align_corners=False,
46
+ loss_decode=dict(
47
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
48
+ # model training and testing settings
49
+ train_cfg=dict(),
50
+ test_cfg=dict(mode='slide', crop_size=256, stride=170))
annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='DepthwiseSeparableASPPHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ dilations=(1, 12, 24, 36),
23
+ c1_in_channels=256,
24
+ c1_channels=48,
25
+ dropout_ratio=0.1,
26
+ num_classes=19,
27
+ norm_cfg=norm_cfg,
28
+ align_corners=False,
29
+ loss_decode=dict(
30
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31
+ auxiliary_head=dict(
32
+ type='FCNHead',
33
+ in_channels=1024,
34
+ in_index=2,
35
+ channels=256,
36
+ num_convs=1,
37
+ concat_input=False,
38
+ dropout_ratio=0.1,
39
+ num_classes=19,
40
+ norm_cfg=norm_cfg,
41
+ align_corners=False,
42
+ loss_decode=dict(
43
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44
+ # model training and testing settings
45
+ train_cfg=dict(),
46
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='DMHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ filter_sizes=(1, 3, 5, 7),
23
+ dropout_ratio=0.1,
24
+ num_classes=19,
25
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
26
+ align_corners=False,
27
+ loss_decode=dict(
28
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
+ auxiliary_head=dict(
30
+ type='FCNHead',
31
+ in_channels=1024,
32
+ in_index=2,
33
+ channels=256,
34
+ num_convs=1,
35
+ concat_input=False,
36
+ dropout_ratio=0.1,
37
+ num_classes=19,
38
+ norm_cfg=norm_cfg,
39
+ align_corners=False,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
+ # model training and testing settings
43
+ train_cfg=dict(),
44
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/dnl_r50-d8.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='DNLHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ dropout_ratio=0.1,
23
+ reduction=2,
24
+ use_scale=True,
25
+ mode='embedded_gaussian',
26
+ num_classes=19,
27
+ norm_cfg=norm_cfg,
28
+ align_corners=False,
29
+ loss_decode=dict(
30
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31
+ auxiliary_head=dict(
32
+ type='FCNHead',
33
+ in_channels=1024,
34
+ in_index=2,
35
+ channels=256,
36
+ num_convs=1,
37
+ concat_input=False,
38
+ dropout_ratio=0.1,
39
+ num_classes=19,
40
+ norm_cfg=norm_cfg,
41
+ align_corners=False,
42
+ loss_decode=dict(
43
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44
+ # model training and testing settings
45
+ train_cfg=dict(),
46
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/emanet_r50-d8.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='EMAHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=256,
22
+ ema_channels=512,
23
+ num_bases=64,
24
+ num_stages=3,
25
+ momentum=0.1,
26
+ dropout_ratio=0.1,
27
+ num_classes=19,
28
+ norm_cfg=norm_cfg,
29
+ align_corners=False,
30
+ loss_decode=dict(
31
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
32
+ auxiliary_head=dict(
33
+ type='FCNHead',
34
+ in_channels=1024,
35
+ in_index=2,
36
+ channels=256,
37
+ num_convs=1,
38
+ concat_input=False,
39
+ dropout_ratio=0.1,
40
+ num_classes=19,
41
+ norm_cfg=norm_cfg,
42
+ align_corners=False,
43
+ loss_decode=dict(
44
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
45
+ # model training and testing settings
46
+ train_cfg=dict(),
47
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/encnet_r50-d8.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='EncHead',
19
+ in_channels=[512, 1024, 2048],
20
+ in_index=(1, 2, 3),
21
+ channels=512,
22
+ num_codes=32,
23
+ use_se_loss=True,
24
+ add_lateral=False,
25
+ dropout_ratio=0.1,
26
+ num_classes=19,
27
+ norm_cfg=norm_cfg,
28
+ align_corners=False,
29
+ loss_decode=dict(
30
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
31
+ loss_se_decode=dict(
32
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
33
+ auxiliary_head=dict(
34
+ type='FCNHead',
35
+ in_channels=1024,
36
+ in_index=2,
37
+ channels=256,
38
+ num_convs=1,
39
+ concat_input=False,
40
+ dropout_ratio=0.1,
41
+ num_classes=19,
42
+ norm_cfg=norm_cfg,
43
+ align_corners=False,
44
+ loss_decode=dict(
45
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
46
+ # model training and testing settings
47
+ train_cfg=dict(),
48
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/fast_scnn.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ backbone=dict(
6
+ type='FastSCNN',
7
+ downsample_dw_channels=(32, 48),
8
+ global_in_channels=64,
9
+ global_block_channels=(64, 96, 128),
10
+ global_block_strides=(2, 2, 1),
11
+ global_out_channels=128,
12
+ higher_in_channels=64,
13
+ lower_in_channels=128,
14
+ fusion_out_channels=128,
15
+ out_indices=(0, 1, 2),
16
+ norm_cfg=norm_cfg,
17
+ align_corners=False),
18
+ decode_head=dict(
19
+ type='DepthwiseSeparableFCNHead',
20
+ in_channels=128,
21
+ channels=128,
22
+ concat_input=False,
23
+ num_classes=19,
24
+ in_index=-1,
25
+ norm_cfg=norm_cfg,
26
+ align_corners=False,
27
+ loss_decode=dict(
28
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
29
+ auxiliary_head=[
30
+ dict(
31
+ type='FCNHead',
32
+ in_channels=128,
33
+ channels=32,
34
+ num_convs=1,
35
+ num_classes=19,
36
+ in_index=-2,
37
+ norm_cfg=norm_cfg,
38
+ concat_input=False,
39
+ align_corners=False,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
42
+ dict(
43
+ type='FCNHead',
44
+ in_channels=64,
45
+ channels=32,
46
+ num_convs=1,
47
+ num_classes=19,
48
+ in_index=-3,
49
+ norm_cfg=norm_cfg,
50
+ concat_input=False,
51
+ align_corners=False,
52
+ loss_decode=dict(
53
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
54
+ ],
55
+ # model training and testing settings
56
+ train_cfg=dict(),
57
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/fcn_hr18.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://msra/hrnetv2_w18',
6
+ backbone=dict(
7
+ type='HRNet',
8
+ norm_cfg=norm_cfg,
9
+ norm_eval=False,
10
+ extra=dict(
11
+ stage1=dict(
12
+ num_modules=1,
13
+ num_branches=1,
14
+ block='BOTTLENECK',
15
+ num_blocks=(4, ),
16
+ num_channels=(64, )),
17
+ stage2=dict(
18
+ num_modules=1,
19
+ num_branches=2,
20
+ block='BASIC',
21
+ num_blocks=(4, 4),
22
+ num_channels=(18, 36)),
23
+ stage3=dict(
24
+ num_modules=4,
25
+ num_branches=3,
26
+ block='BASIC',
27
+ num_blocks=(4, 4, 4),
28
+ num_channels=(18, 36, 72)),
29
+ stage4=dict(
30
+ num_modules=3,
31
+ num_branches=4,
32
+ block='BASIC',
33
+ num_blocks=(4, 4, 4, 4),
34
+ num_channels=(18, 36, 72, 144)))),
35
+ decode_head=dict(
36
+ type='FCNHead',
37
+ in_channels=[18, 36, 72, 144],
38
+ in_index=(0, 1, 2, 3),
39
+ channels=sum([18, 36, 72, 144]),
40
+ input_transform='resize_concat',
41
+ kernel_size=1,
42
+ num_convs=1,
43
+ concat_input=False,
44
+ dropout_ratio=-1,
45
+ num_classes=19,
46
+ norm_cfg=norm_cfg,
47
+ align_corners=False,
48
+ loss_decode=dict(
49
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
50
+ # model training and testing settings
51
+ train_cfg=dict(),
52
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/fcn_r50-d8.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='FCNHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ num_convs=2,
23
+ concat_input=True,
24
+ dropout_ratio=0.1,
25
+ num_classes=19,
26
+ norm_cfg=norm_cfg,
27
+ align_corners=False,
28
+ loss_decode=dict(
29
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
30
+ auxiliary_head=dict(
31
+ type='FCNHead',
32
+ in_channels=1024,
33
+ in_index=2,
34
+ channels=256,
35
+ num_convs=1,
36
+ concat_input=False,
37
+ dropout_ratio=0.1,
38
+ num_classes=19,
39
+ norm_cfg=norm_cfg,
40
+ align_corners=False,
41
+ loss_decode=dict(
42
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
43
+ # model training and testing settings
44
+ train_cfg=dict(),
45
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained=None,
6
+ backbone=dict(
7
+ type='UNet',
8
+ in_channels=3,
9
+ base_channels=64,
10
+ num_stages=5,
11
+ strides=(1, 1, 1, 1, 1),
12
+ enc_num_convs=(2, 2, 2, 2, 2),
13
+ dec_num_convs=(2, 2, 2, 2),
14
+ downsamples=(True, True, True, True),
15
+ enc_dilations=(1, 1, 1, 1, 1),
16
+ dec_dilations=(1, 1, 1, 1),
17
+ with_cp=False,
18
+ conv_cfg=None,
19
+ norm_cfg=norm_cfg,
20
+ act_cfg=dict(type='ReLU'),
21
+ upsample_cfg=dict(type='InterpConv'),
22
+ norm_eval=False),
23
+ decode_head=dict(
24
+ type='FCNHead',
25
+ in_channels=64,
26
+ in_index=4,
27
+ channels=64,
28
+ num_convs=1,
29
+ concat_input=False,
30
+ dropout_ratio=0.1,
31
+ num_classes=2,
32
+ norm_cfg=norm_cfg,
33
+ align_corners=False,
34
+ loss_decode=dict(
35
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36
+ auxiliary_head=dict(
37
+ type='FCNHead',
38
+ in_channels=128,
39
+ in_index=3,
40
+ channels=64,
41
+ num_convs=1,
42
+ concat_input=False,
43
+ dropout_ratio=0.1,
44
+ num_classes=2,
45
+ norm_cfg=norm_cfg,
46
+ align_corners=False,
47
+ loss_decode=dict(
48
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
49
+ # model training and testing settings
50
+ train_cfg=dict(),
51
+ test_cfg=dict(mode='slide', crop_size=256, stride=170))
annotator/uniformer/configs/_base_/models/fpn_r50.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 1, 1),
12
+ strides=(1, 2, 2, 2),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ neck=dict(
18
+ type='FPN',
19
+ in_channels=[256, 512, 1024, 2048],
20
+ out_channels=256,
21
+ num_outs=4),
22
+ decode_head=dict(
23
+ type='FPNHead',
24
+ in_channels=[256, 256, 256, 256],
25
+ in_index=[0, 1, 2, 3],
26
+ feature_strides=[4, 8, 16, 32],
27
+ channels=128,
28
+ dropout_ratio=0.1,
29
+ num_classes=19,
30
+ norm_cfg=norm_cfg,
31
+ align_corners=False,
32
+ loss_decode=dict(
33
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
34
+ # model training and testing settings
35
+ train_cfg=dict(),
36
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/fpn_uniformer.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ backbone=dict(
6
+ type='UniFormer',
7
+ embed_dim=[64, 128, 320, 512],
8
+ layers=[3, 4, 8, 3],
9
+ head_dim=64,
10
+ mlp_ratio=4.,
11
+ qkv_bias=True,
12
+ drop_rate=0.,
13
+ attn_drop_rate=0.,
14
+ drop_path_rate=0.1),
15
+ neck=dict(
16
+ type='FPN',
17
+ in_channels=[64, 128, 320, 512],
18
+ out_channels=256,
19
+ num_outs=4),
20
+ decode_head=dict(
21
+ type='FPNHead',
22
+ in_channels=[256, 256, 256, 256],
23
+ in_index=[0, 1, 2, 3],
24
+ feature_strides=[4, 8, 16, 32],
25
+ channels=128,
26
+ dropout_ratio=0.1,
27
+ num_classes=150,
28
+ norm_cfg=norm_cfg,
29
+ align_corners=False,
30
+ loss_decode=dict(
31
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
32
+ # model training and testing settings
33
+ train_cfg=dict(),
34
+ test_cfg=dict(mode='whole')
35
+ )
annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='GCHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ ratio=1 / 4.,
23
+ pooling_type='att',
24
+ fusion_types=('channel_add', ),
25
+ dropout_ratio=0.1,
26
+ num_classes=19,
27
+ norm_cfg=norm_cfg,
28
+ align_corners=False,
29
+ loss_decode=dict(
30
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31
+ auxiliary_head=dict(
32
+ type='FCNHead',
33
+ in_channels=1024,
34
+ in_index=2,
35
+ channels=256,
36
+ num_convs=1,
37
+ concat_input=False,
38
+ dropout_ratio=0.1,
39
+ num_classes=19,
40
+ norm_cfg=norm_cfg,
41
+ align_corners=False,
42
+ loss_decode=dict(
43
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44
+ # model training and testing settings
45
+ train_cfg=dict(),
46
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ backbone=dict(
6
+ type='MobileNetV3',
7
+ arch='large',
8
+ out_indices=(1, 3, 16),
9
+ norm_cfg=norm_cfg),
10
+ decode_head=dict(
11
+ type='LRASPPHead',
12
+ in_channels=(16, 24, 960),
13
+ in_index=(0, 1, 2),
14
+ channels=128,
15
+ input_transform='multiple_select',
16
+ dropout_ratio=0.1,
17
+ num_classes=19,
18
+ norm_cfg=norm_cfg,
19
+ act_cfg=dict(type='ReLU'),
20
+ align_corners=False,
21
+ loss_decode=dict(
22
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
23
+ # model training and testing settings
24
+ train_cfg=dict(),
25
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='NLHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ dropout_ratio=0.1,
23
+ reduction=2,
24
+ use_scale=True,
25
+ mode='embedded_gaussian',
26
+ num_classes=19,
27
+ norm_cfg=norm_cfg,
28
+ align_corners=False,
29
+ loss_decode=dict(
30
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31
+ auxiliary_head=dict(
32
+ type='FCNHead',
33
+ in_channels=1024,
34
+ in_index=2,
35
+ channels=256,
36
+ num_convs=1,
37
+ concat_input=False,
38
+ dropout_ratio=0.1,
39
+ num_classes=19,
40
+ norm_cfg=norm_cfg,
41
+ align_corners=False,
42
+ loss_decode=dict(
43
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44
+ # model training and testing settings
45
+ train_cfg=dict(),
46
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/ocrnet_hr18.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='CascadeEncoderDecoder',
5
+ num_stages=2,
6
+ pretrained='open-mmlab://msra/hrnetv2_w18',
7
+ backbone=dict(
8
+ type='HRNet',
9
+ norm_cfg=norm_cfg,
10
+ norm_eval=False,
11
+ extra=dict(
12
+ stage1=dict(
13
+ num_modules=1,
14
+ num_branches=1,
15
+ block='BOTTLENECK',
16
+ num_blocks=(4, ),
17
+ num_channels=(64, )),
18
+ stage2=dict(
19
+ num_modules=1,
20
+ num_branches=2,
21
+ block='BASIC',
22
+ num_blocks=(4, 4),
23
+ num_channels=(18, 36)),
24
+ stage3=dict(
25
+ num_modules=4,
26
+ num_branches=3,
27
+ block='BASIC',
28
+ num_blocks=(4, 4, 4),
29
+ num_channels=(18, 36, 72)),
30
+ stage4=dict(
31
+ num_modules=3,
32
+ num_branches=4,
33
+ block='BASIC',
34
+ num_blocks=(4, 4, 4, 4),
35
+ num_channels=(18, 36, 72, 144)))),
36
+ decode_head=[
37
+ dict(
38
+ type='FCNHead',
39
+ in_channels=[18, 36, 72, 144],
40
+ channels=sum([18, 36, 72, 144]),
41
+ in_index=(0, 1, 2, 3),
42
+ input_transform='resize_concat',
43
+ kernel_size=1,
44
+ num_convs=1,
45
+ concat_input=False,
46
+ dropout_ratio=-1,
47
+ num_classes=19,
48
+ norm_cfg=norm_cfg,
49
+ align_corners=False,
50
+ loss_decode=dict(
51
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
52
+ dict(
53
+ type='OCRHead',
54
+ in_channels=[18, 36, 72, 144],
55
+ in_index=(0, 1, 2, 3),
56
+ input_transform='resize_concat',
57
+ channels=512,
58
+ ocr_channels=256,
59
+ dropout_ratio=-1,
60
+ num_classes=19,
61
+ norm_cfg=norm_cfg,
62
+ align_corners=False,
63
+ loss_decode=dict(
64
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
65
+ ],
66
+ # model training and testing settings
67
+ train_cfg=dict(),
68
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='CascadeEncoderDecoder',
5
+ num_stages=2,
6
+ pretrained='open-mmlab://resnet50_v1c',
7
+ backbone=dict(
8
+ type='ResNetV1c',
9
+ depth=50,
10
+ num_stages=4,
11
+ out_indices=(0, 1, 2, 3),
12
+ dilations=(1, 1, 2, 4),
13
+ strides=(1, 2, 1, 1),
14
+ norm_cfg=norm_cfg,
15
+ norm_eval=False,
16
+ style='pytorch',
17
+ contract_dilation=True),
18
+ decode_head=[
19
+ dict(
20
+ type='FCNHead',
21
+ in_channels=1024,
22
+ in_index=2,
23
+ channels=256,
24
+ num_convs=1,
25
+ concat_input=False,
26
+ dropout_ratio=0.1,
27
+ num_classes=19,
28
+ norm_cfg=norm_cfg,
29
+ align_corners=False,
30
+ loss_decode=dict(
31
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
32
+ dict(
33
+ type='OCRHead',
34
+ in_channels=2048,
35
+ in_index=3,
36
+ channels=512,
37
+ ocr_channels=256,
38
+ dropout_ratio=0.1,
39
+ num_classes=19,
40
+ norm_cfg=norm_cfg,
41
+ align_corners=False,
42
+ loss_decode=dict(
43
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
44
+ ],
45
+ # model training and testing settings
46
+ train_cfg=dict(),
47
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/pointrend_r50.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='CascadeEncoderDecoder',
5
+ num_stages=2,
6
+ pretrained='open-mmlab://resnet50_v1c',
7
+ backbone=dict(
8
+ type='ResNetV1c',
9
+ depth=50,
10
+ num_stages=4,
11
+ out_indices=(0, 1, 2, 3),
12
+ dilations=(1, 1, 1, 1),
13
+ strides=(1, 2, 2, 2),
14
+ norm_cfg=norm_cfg,
15
+ norm_eval=False,
16
+ style='pytorch',
17
+ contract_dilation=True),
18
+ neck=dict(
19
+ type='FPN',
20
+ in_channels=[256, 512, 1024, 2048],
21
+ out_channels=256,
22
+ num_outs=4),
23
+ decode_head=[
24
+ dict(
25
+ type='FPNHead',
26
+ in_channels=[256, 256, 256, 256],
27
+ in_index=[0, 1, 2, 3],
28
+ feature_strides=[4, 8, 16, 32],
29
+ channels=128,
30
+ dropout_ratio=-1,
31
+ num_classes=19,
32
+ norm_cfg=norm_cfg,
33
+ align_corners=False,
34
+ loss_decode=dict(
35
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
36
+ dict(
37
+ type='PointHead',
38
+ in_channels=[256],
39
+ in_index=[0],
40
+ channels=256,
41
+ num_fcs=3,
42
+ coarse_pred_each_layer=True,
43
+ dropout_ratio=-1,
44
+ num_classes=19,
45
+ align_corners=False,
46
+ loss_decode=dict(
47
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
48
+ ],
49
+ # model training and testing settings
50
+ train_cfg=dict(
51
+ num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
52
+ test_cfg=dict(
53
+ mode='whole',
54
+ subdivision_steps=2,
55
+ subdivision_num_points=8196,
56
+ scale_factor=2))
annotator/uniformer/configs/_base_/models/psanet_r50-d8.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='PSAHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ mask_size=(97, 97),
23
+ psa_type='bi-direction',
24
+ compact=False,
25
+ shrink_factor=2,
26
+ normalization_factor=1.0,
27
+ psa_softmax=True,
28
+ dropout_ratio=0.1,
29
+ num_classes=19,
30
+ norm_cfg=norm_cfg,
31
+ align_corners=False,
32
+ loss_decode=dict(
33
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
34
+ auxiliary_head=dict(
35
+ type='FCNHead',
36
+ in_channels=1024,
37
+ in_index=2,
38
+ channels=256,
39
+ num_convs=1,
40
+ concat_input=False,
41
+ dropout_ratio=0.1,
42
+ num_classes=19,
43
+ norm_cfg=norm_cfg,
44
+ align_corners=False,
45
+ loss_decode=dict(
46
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
47
+ # model training and testing settings
48
+ train_cfg=dict(),
49
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 2, 4),
12
+ strides=(1, 2, 1, 1),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='PSPHead',
19
+ in_channels=2048,
20
+ in_index=3,
21
+ channels=512,
22
+ pool_scales=(1, 2, 3, 6),
23
+ dropout_ratio=0.1,
24
+ num_classes=19,
25
+ norm_cfg=norm_cfg,
26
+ align_corners=False,
27
+ loss_decode=dict(
28
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
+ auxiliary_head=dict(
30
+ type='FCNHead',
31
+ in_channels=1024,
32
+ in_index=2,
33
+ channels=256,
34
+ num_convs=1,
35
+ concat_input=False,
36
+ dropout_ratio=0.1,
37
+ num_classes=19,
38
+ norm_cfg=norm_cfg,
39
+ align_corners=False,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
+ # model training and testing settings
43
+ train_cfg=dict(),
44
+ test_cfg=dict(mode='whole'))
annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained=None,
6
+ backbone=dict(
7
+ type='UNet',
8
+ in_channels=3,
9
+ base_channels=64,
10
+ num_stages=5,
11
+ strides=(1, 1, 1, 1, 1),
12
+ enc_num_convs=(2, 2, 2, 2, 2),
13
+ dec_num_convs=(2, 2, 2, 2),
14
+ downsamples=(True, True, True, True),
15
+ enc_dilations=(1, 1, 1, 1, 1),
16
+ dec_dilations=(1, 1, 1, 1),
17
+ with_cp=False,
18
+ conv_cfg=None,
19
+ norm_cfg=norm_cfg,
20
+ act_cfg=dict(type='ReLU'),
21
+ upsample_cfg=dict(type='InterpConv'),
22
+ norm_eval=False),
23
+ decode_head=dict(
24
+ type='PSPHead',
25
+ in_channels=64,
26
+ in_index=4,
27
+ channels=16,
28
+ pool_scales=(1, 2, 3, 6),
29
+ dropout_ratio=0.1,
30
+ num_classes=2,
31
+ norm_cfg=norm_cfg,
32
+ align_corners=False,
33
+ loss_decode=dict(
34
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35
+ auxiliary_head=dict(
36
+ type='FCNHead',
37
+ in_channels=128,
38
+ in_index=3,
39
+ channels=64,
40
+ num_convs=1,
41
+ concat_input=False,
42
+ dropout_ratio=0.1,
43
+ num_classes=2,
44
+ norm_cfg=norm_cfg,
45
+ align_corners=False,
46
+ loss_decode=dict(
47
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
48
+ # model training and testing settings
49
+ train_cfg=dict(),
50
+ test_cfg=dict(mode='slide', crop_size=256, stride=170))
annotator/uniformer/configs/_base_/models/upernet_r50.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
3
+ model = dict(
4
+ type='EncoderDecoder',
5
+ pretrained='open-mmlab://resnet50_v1c',
6
+ backbone=dict(
7
+ type='ResNetV1c',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ dilations=(1, 1, 1, 1),
12
+ strides=(1, 2, 2, 2),
13
+ norm_cfg=norm_cfg,
14
+ norm_eval=False,
15
+ style='pytorch',
16
+ contract_dilation=True),
17
+ decode_head=dict(
18
+ type='UPerHead',
19
+ in_channels=[256, 512, 1024, 2048],
20
+ in_index=[0, 1, 2, 3],
21
+ pool_scales=(1, 2, 3, 6),
22
+ channels=512,
23
+ dropout_ratio=0.1,
24
+ num_classes=19,
25
+ norm_cfg=norm_cfg,
26
+ align_corners=False,
27
+ loss_decode=dict(
28
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
+ auxiliary_head=dict(
30
+ type='FCNHead',
31
+ in_channels=1024,
32
+ in_index=2,
33
+ channels=256,
34
+ num_convs=1,
35
+ concat_input=False,
36
+ dropout_ratio=0.1,
37
+ num_classes=19,
38
+ norm_cfg=norm_cfg,
39
+ align_corners=False,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
+ # model training and testing settings
43
+ train_cfg=dict(),
44
+ test_cfg=dict(mode='whole'))