zhengrongzhang commited on
Commit
21794d5
1 Parent(s): 2a1a580

init model

Browse files
README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - detection-datasets/coco
5
+ language:
6
+ - en
7
+ metrics:
8
+ - accuracy
9
+ tags:
10
+ - RyzenAI
11
+ - pose estimation
12
+ ---
13
+
14
+ # MoveNet
15
+
16
+ MoveNet is an ultra fast and accurate model that detects 17 keypoints of a body. It released in [movenet.pytorch](https://github.com/fire717/movenet.pytorch/blob/master/README.md?plain=1)
17
+
18
+
19
+ We develop a modified version that could be supported by [AMD Ryzen AI](https://ryzenai.docs.amd.com/).
20
+
21
+
22
+
23
+ ## How to use
24
+
25
+ ### Installation
26
+
27
+ Follow [Ryzen AI Installation](https://ryzenai.docs.amd.com/en/latest/inst.html) to prepare the environment for Ryzen AI.
28
+ Run the following script to install pre-requisites for this model.
29
+ ```bash
30
+ pip install -r requirements.txt
31
+ ```
32
+
33
+
34
+ ### Data Preparation (optional: for accuracy evaluation)
35
+
36
+ 1.Download COCO dataset2017 from https://cocodataset.org/. (You need train2017.zip, val2017.zip and annotations.)Unzip to `./data/` like this:
37
+
38
+ ```
39
+ ├── data
40
+ ├── annotations (person_keypoints_train2017.json, person_keypoints_val2017.json, ...)
41
+ ├── train2017 (xx.jpg, xx.jpg,...)
42
+ └── val2017 (xx.jpg, xx.jpg,...)
43
+
44
+ ```
45
+
46
+
47
+ 2.Make data to our data format.
48
+ - Modify the path in line 282~287 in make_coco_data_17keypoints.py if needed
49
+ - run the code to pre-process the dataset
50
+ ```
51
+ python make_coco_data_17keypoints.py
52
+ ```
53
+ ```
54
+ Our data format: JSON file
55
+ Keypoints order:['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
56
+ 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist',
57
+ 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle',
58
+ 'right_ankle']
59
+
60
+ One item:
61
+ [{"img_name": "0.jpg",
62
+ "keypoints": [x0,y0,z0,x1,y1,z1,...],
63
+ #z: 0 for no label, 1 for labeled but invisible, 2 for labeled and visible
64
+ "center": [x,y],
65
+ "bbox":[x0,y0,x1,y1],
66
+ "other_centers": [[x0,y0],[x1,y1],...],
67
+ "other_keypoints": [[[x0,y0],[x1,y1],...],[[x0,y0],[x1,y1],...],...], #lenth = num_keypoints
68
+ },
69
+ ...
70
+ ]
71
+ ```
72
+
73
+
74
+
75
+
76
+ ### Test & Evaluation
77
+
78
+ - Modify the DATASET_PATH in eval_onnx.py if needed
79
+ - Test accuracy of the quantized model
80
+ ```python
81
+ python eval_onnx.py --ipu --provider_config Path\To\vaip_config.json
82
+ ```
83
+
84
+ ### Performance
85
+
86
+ |Metric |Accuracy on IPU|
87
+ | :----: | :----: |
88
+ |accuracy | 79.745%|
89
+
90
+
91
+ ## Citation
92
+ 1.[model card](https://storage.googleapis.com/movenet/MoveNet.SinglePose%20Model%20Card.pdf)
93
+ 2.[movenet.pytorch](https://github.com/fire717/movenet.pytorch/blob/master/README.md?plain=1)
center_weight_origin.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a437d41816cc6526e64f0014fb977d4d3f216bbec7daaea59492a1f3f1494a
3
+ size 9296
eval_onnx.py ADDED
@@ -0,0 +1,533 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnxruntime as rt
2
+ import numpy as np
3
+ import json
4
+ import torch
5
+ import cv2
6
+ import os
7
+ from torch.utils.data.dataset import Dataset
8
+ import random
9
+ import math
10
+ import argparse
11
+
12
+ # Constants and paths defining model, image, and dataset specifics
13
+ MODEL_DIR = './movenet_int8.onnx' # Path to the MoveNet model
14
+ IMG_SIZE = 192 # Image size used for processing
15
+ FEATURE_MAP_SIZE = 48 # Feature map size used in the model
16
+ CENTER_WEIGHT_ORIGIN_PATH = './center_weight_origin.npy' # Path to center weight origin file
17
+ DATASET_PATH = 'your_dataset_path' # Base path for the dataset
18
+ EVAL_LABLE_PATH = os.path.join(DATASET_PATH, "val2017.json") # Path to validation labels JSON file
19
+ EVAL_IMG_PATH = os.path.join(DATASET_PATH, 'imgs') # Path to validation images
20
+
21
+
22
+ def getDist(pre, labels):
23
+ """
24
+ Calculate the Euclidean distance between predicted and labeled keypoints.
25
+
26
+ Args:
27
+ pre: Predicted keypoints [batchsize, 14]
28
+ labels: Labeled keypoints [batchsize, 14]
29
+
30
+ Returns:
31
+ dist: Distance between keypoints [batchsize, 7]
32
+ """
33
+ pre = pre.reshape([-1, 17, 2])
34
+ labels = labels.reshape([-1, 17, 2])
35
+ res = np.power(pre[:,:,0]-labels[:,:,0],2)+np.power(pre[:,:,1]-labels[:,:,1],2)
36
+ return res
37
+
38
+
39
+ def getAccRight(dist, th = 5/IMG_SIZE):
40
+ """
41
+ Compute accuracy for each keypoint based on a threshold.
42
+
43
+ Args:
44
+ dist: Distance between keypoints [batchsize, 7]
45
+ th: Threshold for accuracy computation
46
+
47
+ Returns:
48
+ res: Accuracy per keypoint [7,] representing the count of correct predictions
49
+ """
50
+ res = np.zeros(dist.shape[1], dtype=np.int64)
51
+ for i in range(dist.shape[1]):
52
+ res[i] = sum(dist[:,i]<th)
53
+ return res
54
+
55
+ def myAcc(output, target):
56
+ '''
57
+ Compute accuracy across keypoints.
58
+
59
+ Args:
60
+ output: Predicted keypoints
61
+ target: Labeled keypoints
62
+
63
+ Returns:
64
+ cate_acc: Categorical accuracy [7,] representing the count of correct predictions per keypoint
65
+ '''
66
+ # [h, ls, rs, lb, rb, lr, rr]
67
+ # output[:,6:10] = output[:,6:10]+output[:,2:6]
68
+ # output[:,10:14] = output[:,10:14]+output[:,6:10]
69
+ # Calculate distance between predicted and labeled keypoints
70
+ dist = getDist(output, target)
71
+ # Calculate accuracy for each keypoint
72
+ cate_acc = getAccRight(dist)
73
+ return cate_acc
74
+
75
+ # Predefined numpy arrays and weights for calculations
76
+ _range_weight_x = np.array([[x for x in range(FEATURE_MAP_SIZE)] for _ in range(FEATURE_MAP_SIZE)])
77
+ _range_weight_y = _range_weight_x.T
78
+ _center_weight = np.load(CENTER_WEIGHT_ORIGIN_PATH).reshape(FEATURE_MAP_SIZE,FEATURE_MAP_SIZE)
79
+
80
+ def maxPoint(heatmap, center=True):
81
+ """
82
+ Find the coordinates of maximum values in a heatmap.
83
+
84
+ Args:
85
+ heatmap: Input heatmap data
86
+ center: Flag to indicate whether to consider center-weighted points
87
+
88
+ Returns:
89
+ x, y: Coordinates of maximum values in the heatmap
90
+ """
91
+ if len(heatmap.shape) == 3:
92
+ batch_size,h,w = heatmap.shape
93
+ c = 1
94
+ elif len(heatmap.shape) == 4:
95
+ # n,c,h,w
96
+ batch_size,c,h,w = heatmap.shape
97
+ if center:
98
+ heatmap = heatmap*_center_weight
99
+ heatmap = heatmap.reshape((batch_size,c, -1)) #64,c, cfg['feature_map_size']xcfg['feature_map_size']
100
+ max_id = np.argmax(heatmap,2)#64,c, 1
101
+ y = max_id//w
102
+ x = max_id%w
103
+ # bv
104
+ return x,y
105
+
106
+ def movenetDecode(data, kps_mask=None,mode='output', num_joints = 17,
107
+ img_size=192, hm_th=0.1):
108
+
109
+ '''
110
+ Decode MoveNet output data to predicted keypoints.
111
+
112
+ Args:
113
+ data: MoveNet output data
114
+ kps_mask: Keypoints mask
115
+ mode: Mode of decoding ('output' or 'label')
116
+ num_joints: Number of joints/keypoints
117
+ img_size: Image size
118
+ hm_th: Threshold for heatmap processing
119
+
120
+ Returns:
121
+ res: Decoded keypoints
122
+ '''
123
+
124
+ ##data [64, 7, 48, 48] [64, 1, 48, 48] [64, 14, 48, 48] [64, 14, 48, 48]
125
+ #kps_mask [n, 7]
126
+ if mode == 'output':
127
+ batch_size = data[0].shape[0]
128
+ heatmaps = data[0]
129
+ heatmaps[heatmaps < hm_th] = 0
130
+ centers = data[1]
131
+ regs = data[2]
132
+ offsets = data[3]
133
+ cx,cy = maxPoint(centers)
134
+ dim0 = np.arange(batch_size,dtype=np.int32).reshape(batch_size,1)
135
+ dim1 = np.zeros((batch_size,1),dtype=np.int32)
136
+ res = []
137
+ for n in range(num_joints):
138
+ reg_x_origin = (regs[dim0,dim1+n*2,cy,cx]+0.5).astype(np.int32)
139
+ reg_y_origin = (regs[dim0,dim1+n*2+1,cy,cx]+0.5).astype(np.int32)
140
+ reg_x = reg_x_origin+cx
141
+ reg_y = reg_y_origin+cy
142
+ ### for post process
143
+ reg_x = np.reshape(reg_x, (reg_x.shape[0],1,1))
144
+ reg_y = np.reshape(reg_y, (reg_y.shape[0],1,1))
145
+ reg_x = reg_x.repeat(FEATURE_MAP_SIZE,1).repeat(FEATURE_MAP_SIZE,2)
146
+ reg_y = reg_y.repeat(FEATURE_MAP_SIZE,1).repeat(FEATURE_MAP_SIZE,2)
147
+ range_weight_x = np.reshape(_range_weight_x,(1,FEATURE_MAP_SIZE,FEATURE_MAP_SIZE)).repeat(reg_x.shape[0],0)
148
+ range_weight_y = np.reshape(_range_weight_y,(1,FEATURE_MAP_SIZE,FEATURE_MAP_SIZE)).repeat(reg_x.shape[0],0)
149
+ tmp_reg_x = (range_weight_x-reg_x)**2
150
+ tmp_reg_y = (range_weight_y-reg_y)**2
151
+ tmp_reg = (tmp_reg_x+tmp_reg_y)**0.5+1.8#origin 1.8
152
+ tmp_reg = heatmaps[:,n,...]/tmp_reg
153
+ tmp_reg = tmp_reg[:,np.newaxis,:,:]
154
+ reg_x,reg_y = maxPoint(tmp_reg, center=False)
155
+ reg_x[reg_x>47] = 47
156
+ reg_x[reg_x<0] = 0
157
+ reg_y[reg_y>47] = 47
158
+ reg_y[reg_y<0] = 0
159
+ score = heatmaps[dim0,dim1+n,reg_y,reg_x]
160
+ offset_x = offsets[dim0,dim1+n*2,reg_y,reg_x]#*img_size//4
161
+ offset_y = offsets[dim0,dim1+n*2+1,reg_y,reg_x]#*img_size//4
162
+ res_x = (reg_x+offset_x)/(img_size//4)
163
+ res_y = (reg_y+offset_y)/(img_size//4)
164
+ res_x[score<hm_th] = -1
165
+ res_y[score<hm_th] = -1
166
+ res.extend([res_x, res_y])
167
+ res = np.concatenate(res,axis=1) #bs*14
168
+ elif mode == 'label':
169
+ kps_mask = kps_mask.detach().cpu().numpy()
170
+ data = data.detach().cpu().numpy()
171
+ batch_size = data.shape[0]
172
+ heatmaps = data[:,:17,:,:]
173
+ centers = data[:,17:18,:,:]
174
+ regs = data[:,18:52,:,:]
175
+ offsets = data[:,52:,:,:]
176
+ cx,cy = maxPoint(centers)
177
+ dim0 = np.arange(batch_size,dtype=np.int32).reshape(batch_size,1)
178
+ dim1 = np.zeros((batch_size,1),dtype=np.int32)
179
+ res = []
180
+ for n in range(num_joints):
181
+ reg_x_origin = (regs[dim0,dim1+n*2,cy,cx]+0.5).astype(np.int32)
182
+ reg_y_origin = (regs[dim0,dim1+n*2+1,cy,cx]+0.5).astype(np.int32)
183
+ reg_x = reg_x_origin+cx
184
+ reg_y = reg_y_origin+cy
185
+ reg_x[reg_x>47] = 47
186
+ reg_x[reg_x<0] = 0
187
+ reg_y[reg_y>47] = 47
188
+ reg_y[reg_y<0] = 0
189
+ offset_x = offsets[dim0,dim1+n*2,reg_y,reg_x]#*img_size//4
190
+ offset_y = offsets[dim0,dim1+n*2+1,reg_y,reg_x]#*img_size//4
191
+ res_x = (reg_x+offset_x)/(img_size//4)
192
+ res_y = (reg_y+offset_y)/(img_size//4)
193
+ res_x[kps_mask[:,n]==0] = -1
194
+ res_y[kps_mask[:,n]==0] = -1
195
+ res.extend([res_x, res_y])
196
+ res = np.concatenate(res,axis=1) #bs*14
197
+ return res
198
+
199
+ def label2heatmap(keypoints, other_keypoints, img_size):
200
+ '''
201
+ Convert labeled keypoints to heatmaps for keypoints.
202
+
203
+ Args:
204
+ keypoints: Target person's keypoints
205
+ other_keypoints: Other people's keypoints
206
+ img_size: Size of the image
207
+
208
+ Returns:
209
+ heatmaps: Heatmaps for keypoints
210
+ sigma: Value used for heatmap generation
211
+ '''
212
+ #keypoints: target person
213
+ #other_keypoints: other people's keypoints need to be add to the heatmap
214
+ heatmaps = []
215
+ keypoints_range = np.reshape(keypoints,(-1,3))
216
+ keypoints_range = keypoints_range[keypoints_range[:,2]>0]
217
+ min_x = np.min(keypoints_range[:,0])
218
+ min_y = np.min(keypoints_range[:,1])
219
+ max_x = np.max(keypoints_range[:,0])
220
+ max_y = np.max(keypoints_range[:,1])
221
+ area = (max_y-min_y)*(max_x-min_x)
222
+ sigma = 3
223
+ if area < 0.16:
224
+ sigma = 3
225
+ elif area < 0.3:
226
+ sigma = 5
227
+ else:
228
+ sigma = 7
229
+ for i in range(0,len(keypoints),3):
230
+ if keypoints[i+2]==0:
231
+ heatmaps.append(np.zeros((img_size//4, img_size//4)))
232
+ continue
233
+ x = int(keypoints[i]*img_size//4)
234
+ y = int(keypoints[i+1]*img_size//4)
235
+ if x==img_size//4:x=(img_size//4-1)
236
+ if y==img_size//4:y=(img_size//4-1)
237
+ if x>img_size//4 or x<0:x=-1
238
+ if y>img_size//4 or y<0:y=-1
239
+ heatmap = generate_heatmap(x, y, other_keypoints[i//3], (img_size//4, img_size//4),sigma)
240
+ heatmaps.append(heatmap)
241
+ heatmaps = np.array(heatmaps, dtype=np.float32)
242
+ return heatmaps,sigma
243
+
244
+ def generate_heatmap(x, y, other_keypoints, size, sigma):
245
+ '''
246
+ Generate a heatmap for a specific keypoint.
247
+
248
+ Args:
249
+ x, y: Absolute position of the keypoint
250
+ other_keypoints: Position of other keypoints
251
+ size: Size of the heatmap
252
+ sigma: Value used for heatmap generation
253
+
254
+ Returns:
255
+ heatmap: Generated heatmap for the keypoint
256
+ '''
257
+ #x,y abs postion
258
+ #other_keypoints positive position
259
+ sigma+=6
260
+ heatmap = np.zeros(size)
261
+ if x<0 or y<0 or x>=size[0] or y>=size[1]:
262
+ return heatmap
263
+ tops = [[x,y]]
264
+ if len(other_keypoints)>0:
265
+ #add other people's keypoints
266
+ for i in range(len(other_keypoints)):
267
+ x = int(other_keypoints[i][0]*size[0])
268
+ y = int(other_keypoints[i][1]*size[1])
269
+ if x==size[0]:x=(size[0]-1)
270
+ if y==size[1]:y=(size[1]-1)
271
+ if x>size[0] or x<0 or y>size[1] or y<0: continue
272
+ tops.append([x,y])
273
+ for top in tops:
274
+ #heatmap[top[1]][top[0]] = 1
275
+ x,y = top
276
+ x0 = max(0,x-sigma//2)
277
+ x1 = min(size[0],x+sigma//2)
278
+ y0 = max(0,y-sigma//2)
279
+ y1 = min(size[1],y+sigma//2)
280
+ for map_y in range(y0, y1):
281
+ for map_x in range(x0, x1):
282
+ d2 = ((map_x - x) ** 2 + (map_y - y) ** 2)**0.5
283
+ if d2<=sigma//2:
284
+ heatmap[map_y, map_x] += math.exp(-d2/(sigma//2)*3)
285
+ if heatmap[map_y, map_x] > 1:
286
+ heatmap[map_y, map_x] = 1
287
+ # heatmap[heatmap<0.1] = 0
288
+ return heatmap
289
+
290
+ def label2center(cx, cy, other_centers, img_size, sigma):
291
+ '''
292
+ Convert labeled keypoints to a center heatmap.
293
+
294
+ Args:
295
+ cx, cy: Center coordinates
296
+ other_centers: Other people's centers
297
+ img_size: Size of the image
298
+ sigma: Value used for heatmap generation
299
+
300
+ Returns:
301
+ heatmaps: Heatmap representing the center
302
+ '''
303
+ heatmaps = []
304
+ heatmap = generate_heatmap(cx, cy, other_centers, (img_size//4, img_size//4),sigma+2)
305
+ heatmaps.append(heatmap)
306
+ heatmaps = np.array(heatmaps, dtype=np.float32)
307
+ return heatmaps
308
+
309
+ def label2reg(keypoints, cx, cy, img_size):
310
+ '''
311
+ Convert labeled keypoints to regression maps.
312
+
313
+ Args:
314
+ keypoints: Labeled keypoints
315
+ cx, cy: Center coordinates
316
+ img_size: Size of the image
317
+
318
+ Returns:
319
+ heatmaps: Regression maps for keypoints
320
+ '''
321
+
322
+ heatmaps = np.zeros((len(keypoints)//3*2, img_size//4, img_size//4), dtype=np.float32)
323
+ for i in range(len(keypoints)//3):
324
+ if keypoints[i*3+2]==0:
325
+ continue
326
+ x = keypoints[i*3]*img_size//4
327
+ y = keypoints[i*3+1]*img_size//4
328
+ if x==img_size//4:x=(img_size//4-1)
329
+ if y==img_size//4:y=(img_size//4-1)
330
+ if x>img_size//4 or x<0 or y>img_size//4 or y<0:
331
+ continue
332
+ reg_x = x-cx
333
+ reg_y = y-cy
334
+ for j in range(cy-2,cy+3):
335
+ if j<0 or j>img_size//4-1:
336
+ continue
337
+ for k in range(cx-2,cx+3):
338
+ if k<0 or k>img_size//4-1:
339
+ continue
340
+ if cx<img_size//4/2-1:
341
+ heatmaps[i*2][j][k] = reg_x-(cx-k)#/(img_size//4)
342
+ else:
343
+ heatmaps[i*2][j][k] = reg_x+(cx-k)#/(img_size//4)
344
+ if cy<img_size//4/2-1:
345
+ heatmaps[i*2+1][j][k] = reg_y-(cy-j)#/(img_size//4)
346
+ else:
347
+ heatmaps[i*2+1][j][k] = reg_y+(cy-j)
348
+ return heatmaps
349
+
350
+ def label2offset(keypoints, cx, cy, regs, img_size):
351
+ '''
352
+ Convert labeled keypoints to offset maps.
353
+
354
+ Args:
355
+ keypoints: Labeled keypoints
356
+ cx, cy: Center coordinates
357
+ regs: Regression maps
358
+ img_size: Size of the image
359
+
360
+ Returns:
361
+ heatmaps: Offset maps for keypoints
362
+ '''
363
+ heatmaps = np.zeros((len(keypoints)//3*2, img_size//4, img_size//4), dtype=np.float32)
364
+ for i in range(len(keypoints)//3):
365
+ if keypoints[i*3+2]==0:
366
+ continue
367
+ large_x = int(keypoints[i*3]*img_size)
368
+ large_y = int(keypoints[i*3+1]*img_size)
369
+ small_x = int(regs[i*2,cy,cx]+cx)
370
+ small_y = int(regs[i*2+1,cy,cx]+cy)
371
+ offset_x = large_x/4-small_x
372
+ offset_y = large_y/4-small_y
373
+ if small_x==img_size//4:small_x=(img_size//4-1)
374
+ if small_y==img_size//4:small_y=(img_size//4-1)
375
+ if small_x>img_size//4 or small_x<0 or small_y>img_size//4 or small_y<0:
376
+ continue
377
+ heatmaps[i*2][small_y][small_x] = offset_x#/(img_size//4)
378
+ heatmaps[i*2+1][small_y][small_x] = offset_y#/(img_size//4)
379
+ return heatmaps
380
+
381
+ class TensorDataset(Dataset):
382
+ '''
383
+ Custom Dataset class for handling data loading and preprocessing
384
+ '''
385
+
386
+ def __init__(self, data_labels, img_dir, img_size, data_aug=None):
387
+ self.data_labels = data_labels
388
+ self.img_dir = img_dir
389
+ self.data_aug = data_aug
390
+ self.img_size = img_size
391
+ self.interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA,
392
+ cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
393
+
394
+
395
+ def __getitem__(self, index):
396
+ item = self.data_labels[index]
397
+ """
398
+ item = {
399
+ "img_name":save_name,
400
+ "keypoints":save_keypoints,
401
+ "center":save_center,
402
+ "other_centers":other_centers,
403
+ "other_keypoints":other_keypoints,
404
+ }
405
+ """
406
+ # [name,h,w,keypoints...]
407
+ img_path = os.path.join(self.img_dir, item["img_name"])
408
+ img = cv2.imread(img_path, cv2.IMREAD_COLOR)
409
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
410
+ img = cv2.resize(img, (self.img_size, self.img_size),
411
+ interpolation=random.choice(self.interp_methods))
412
+ #### Data Augmentation
413
+ if self.data_aug is not None:
414
+ img, item = self.data_aug(img, item)
415
+ img = img.astype(np.float32)
416
+ img = np.transpose(img,axes=[2,0,1])
417
+ keypoints = item["keypoints"]
418
+ center = item['center']
419
+ other_centers = item["other_centers"]
420
+ other_keypoints = item["other_keypoints"]
421
+ kps_mask = np.ones(len(keypoints)//3)
422
+ for i in range(len(keypoints)//3):
423
+ if keypoints[i*3+2]==0:
424
+ kps_mask[i] = 0
425
+ heatmaps,sigma = label2heatmap(keypoints, other_keypoints, self.img_size) #(17, 48, 48)
426
+ cx = min(max(0,int(center[0]*self.img_size//4)),self.img_size//4-1)
427
+ cy = min(max(0,int(center[1]*self.img_size//4)),self.img_size//4-1)
428
+ centers = label2center(cx, cy, other_centers, self.img_size, sigma) #(1, 48, 48)
429
+ regs = label2reg(keypoints, cx, cy, self.img_size) #(14, 48, 48)
430
+ offsets = label2offset(keypoints, cx, cy, regs, self.img_size)#(14, 48, 48)
431
+ labels = np.concatenate([heatmaps,centers,regs,offsets],axis=0)
432
+ img = img / 127.5 - 1.0
433
+ return img, labels, kps_mask, img_path
434
+
435
+ def __len__(self):
436
+ return len(self.data_labels)
437
+
438
+ # Function to get data loader based on mode (e.g., evaluation)
439
+ def getDataLoader(mode, input_data):
440
+ '''
441
+ Function to get data loader based on mode (e.g., evaluation).
442
+
443
+ Args:
444
+ mode: Mode of data loader (e.g., 'eval')
445
+ input_data: Input data
446
+
447
+ Returns:
448
+ data_loader: DataLoader for specified mode
449
+ '''
450
+
451
+ if mode=="eval":
452
+ val_loader = torch.utils.data.DataLoader(
453
+ TensorDataset(input_data[0],
454
+ EVAL_IMG_PATH,
455
+ IMG_SIZE,
456
+ ),
457
+ batch_size=1,
458
+ shuffle=False,
459
+ num_workers=0,
460
+ pin_memory=False)
461
+ return val_loader
462
+
463
+ # Class for managing data and obtaining evaluation data loader
464
+ class Data():
465
+ '''
466
+ Class for managing data and obtaining evaluation data loader.
467
+ '''
468
+ def __init__(self):
469
+ pass
470
+
471
+ def getEvalDataloader(self):
472
+ with open(EVAL_LABLE_PATH, 'r') as f:
473
+ data_label_list = json.loads(f.readlines()[0])
474
+ print("[INFO] Total images: ", len(data_label_list))
475
+ input_data = [data_label_list]
476
+ data_loader = getDataLoader("eval",
477
+ input_data)
478
+ return data_loader
479
+
480
+ # Configs for onnx inference session
481
+ def make_parser():
482
+ '''
483
+ Create parser for MoveNet ONNX runtime inference.
484
+
485
+ Returns:
486
+ parser: Argument parser for MoveNet inference
487
+ '''
488
+ parser = argparse.ArgumentParser("movenet onnxruntime inference")
489
+ parser.add_argument(
490
+ "--ipu",
491
+ action="store_true",
492
+ help="Use IPU for inference.",
493
+ )
494
+ parser.add_argument(
495
+ "--provider_config",
496
+ type=str,
497
+ default="vaip_config.json",
498
+ help="Path of the config file for seting provider_options.",
499
+ )
500
+ return parser.parse_args()
501
+
502
+ if __name__ == '__main__':
503
+
504
+ args = make_parser()
505
+
506
+ if args.ipu:
507
+ providers = ["VitisAIExecutionProvider"]
508
+ provider_options = [{"config_file": args.provider_config}]
509
+ else:
510
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
511
+ provider_options = None
512
+ # Get evaluation data loader using the Data class
513
+ data = Data()
514
+ data_loader = data.getEvalDataloader()
515
+ # Load MoveNet model using ONNX runtime
516
+ model = rt.InferenceSession(MODEL_DIR, providers=providers, provider_options=provider_options)
517
+
518
+ correct = 0
519
+ total = 0
520
+ # Loop through the data loader for evaluation
521
+ for batch_idx, (imgs, labels, kps_mask, img_names) in enumerate(data_loader):
522
+ if batch_idx%100 == 0:
523
+ print('Finish ',batch_idx)
524
+ imgs = imgs.detach().cpu().numpy()
525
+ output = model.run(['1548','1607','1665','1723'],{'blob.1':imgs})
526
+ pre = movenetDecode(output, kps_mask,mode='output',img_size=IMG_SIZE)
527
+ gt = movenetDecode(labels, kps_mask,mode='label',img_size=IMG_SIZE)
528
+ acc = myAcc(pre, gt)
529
+ correct += sum(acc)
530
+ total += len(acc)
531
+ # Compute and print accuracy based on evaluated data
532
+ acc = correct/total
533
+ print('[Info] acc: {:.3f}% \n'.format(100. * acc))
make_coco_data_17keypoints.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ @Fire
3
+ https://github.com/fire717
4
+ """
5
+ import os
6
+ import json
7
+ import pickle
8
+ import cv2
9
+ import numpy as np
10
+
11
+
12
+
13
+
14
+ """
15
+ segmentation格式取决于这个实例是一个单个的对象(即iscrowd=0,将使用polygons格式)
16
+ 还是一组对象(即iscrowd=1,将使用RLE格式
17
+
18
+ iscrowd=1时(将标注一组对象,比如一群人)
19
+
20
+
21
+ 标注说明:x,y,v,x,y,v,...
22
+ 其中v:#0没有标注;1有标注不可见(被遮挡);2有标注可见
23
+
24
+ 关键点顺序:'keypoints': ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
25
+ 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist',
26
+ 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle',
27
+ 'right_ankle']
28
+
29
+ """
30
+
31
+
32
+ def main(img_dir, labels_path, output_name, output_img_dir):
33
+
34
+ if not os.path.exists(output_img_dir):
35
+ os.makedirs(output_img_dir)
36
+
37
+
38
+ with open(labels_path, 'r') as f:
39
+ data = json.load(f)
40
+
41
+ #print("total: ", len(data)) 5
42
+ #print(data.keys())#['info', 'licenses', 'images', 'annotations', 'categories']
43
+ #print(len(data['annotations']), len(data['images']))#88153 40504
44
+ #print(data['categories'])
45
+ """
46
+ [{'supercategory': 'person', 'name': 'person',
47
+ 'skeleton': [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13],
48
+ [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5],
49
+ 4, 6], [5, 7]],
50
+ 'keypoints': ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
51
+ 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist',
52
+ 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle',
53
+ 'right_ankle'], 'id': 1}]
54
+ """
55
+ #print(data['images'][:3])#有filename和id
56
+
57
+ img_id_to_name = {}
58
+ img_name_to_id = {}
59
+ for item in data['images']:
60
+ idx = item['id']
61
+ name = item['file_name']
62
+ img_id_to_name[idx] = name
63
+ img_name_to_id[name] = idx
64
+ print(len(img_id_to_name))
65
+
66
+
67
+ anno_by_imgname = {}
68
+ for annotation in data['annotations']:
69
+ name = img_id_to_name[annotation['image_id']]
70
+ if name in anno_by_imgname:
71
+ anno_by_imgname[name] += [annotation]
72
+ else:
73
+ anno_by_imgname[name] = [annotation]
74
+ print(len(anno_by_imgname))
75
+
76
+
77
+
78
+ new_label = []
79
+ for k,v in anno_by_imgname.items():
80
+ #filter out more than 3 people
81
+ if len(v)>3:
82
+ continue
83
+
84
+ # print(k)
85
+ # print(v)
86
+
87
+ img = cv2.imread(os.path.join(img_dir, k))
88
+ if img is None:
89
+ print(os.path.join(img_dir, k))
90
+ continue
91
+ h,w = img.shape[:2]
92
+ for idx,item in enumerate(v):
93
+ if item['iscrowd'] != 0:
94
+ continue
95
+
96
+ bbox = [int(x) for x in item['bbox']]#x,y,w,h
97
+ # cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0,255,0), 2)
98
+
99
+ keypoints = item['keypoints']
100
+
101
+ # for i in range(len(keypoints)//3):
102
+ # x = keypoints[i*3]
103
+ # y = keypoints[i*3+1]
104
+ # z = keypoints[i*3+2]#0没有标注;1有标注不可见(被遮挡);2有标注可见
105
+ # # print(x,y,z)
106
+ # if z==1:
107
+ # color = (255,0,0)
108
+ # elif z==2:
109
+ # color = (0,0,255)
110
+ # else:
111
+ # continue
112
+ # cv2.circle(img, (x, y), 4, color, 3)
113
+
114
+
115
+ # merge bbox and keypoints to get max bbox
116
+ keypoints = np.array(keypoints).reshape((17,3))
117
+
118
+ keypoints_v = keypoints[keypoints[:,2]>0]
119
+ if len(keypoints_v)<8:#filter out keypoints not enough
120
+ continue
121
+ min_key_x = np.min(keypoints_v[:,0])
122
+ max_key_x = np.max(keypoints_v[:,0])
123
+ min_key_y = np.min(keypoints_v[:,1])
124
+ max_key_y = np.max(keypoints_v[:,1])
125
+
126
+ x0 = min(bbox[0], min_key_x)
127
+ x1 = max(bbox[0]+bbox[2], max_key_x)
128
+ y0 = min(bbox[1], min_key_y)
129
+ y1 = max(bbox[1]+bbox[3], max_key_y)
130
+ # cv2.rectangle(img, (x0, y0), (x1, y1), (0,255,255), 2)
131
+
132
+ # expand to square then expand
133
+ cx = (x0+x1)/2
134
+ cy = (y0+y1)/2
135
+
136
+ half_size = ((x1-x0)+(y1-y0))/2 * EXPAND_RATIO
137
+ new_x0 = int(cx - half_size)
138
+ new_x1 = int(cx + half_size)
139
+ new_y0 = int(cy - half_size)
140
+ new_y1 = int(cy + half_size)
141
+
142
+ #pad where exceed edge
143
+ pad_top = 0
144
+ pad_left = 0
145
+ pad_right = 0
146
+ pad_bottom = 0
147
+ if new_x0 < 0:
148
+ pad_left = -new_x0+1
149
+ if new_y0 < 0:
150
+ pad_top = -new_y0+1
151
+ if new_x1 > w:
152
+ pad_right = new_x1-w+1
153
+ if new_y1 > h:
154
+ pad_bottom = new_y1-h+1
155
+
156
+ pad_img = np.zeros((h+pad_top+pad_bottom, w+pad_left+pad_right, 3))
157
+ pad_img[pad_top:pad_top+h,pad_left:pad_left+w] = img
158
+ new_x0 += pad_left
159
+ new_y0 += pad_top
160
+ new_x1 += pad_left
161
+ new_y1 += pad_top
162
+ # cv2.rectangle(pad_img, (new_x0, new_y0), (new_x1, new_y1), (0,255,0), 2)
163
+
164
+ # final save data
165
+ save_name = k[:-4]+"_"+str(idx)+".jpg"
166
+ new_w = new_x1-new_x0
167
+ new_h = new_y1-new_y0
168
+ save_img = pad_img[new_y0:new_y1,new_x0:new_x1]
169
+ save_bbox = [(bbox[0]+pad_left-new_x0)/new_w,
170
+ (bbox[1]+pad_top-new_y0)/new_h,
171
+ (bbox[0]+bbox[2]+pad_left-new_x0)/new_w,
172
+ (bbox[1]+bbox[3]+pad_top-new_y0)/new_h
173
+ ]
174
+ save_center = [(save_bbox[0]+save_bbox[2])/2,(save_bbox[1]+save_bbox[3])/2]
175
+
176
+ save_keypoints = []
177
+ for kid in range(len(keypoints)):
178
+ save_keypoints.extend([(int(keypoints[kid][0])+pad_left-new_x0)/new_w,
179
+ (int(keypoints[kid][1])+pad_top-new_y0)/new_h,
180
+ int(keypoints[kid][2])
181
+ ])
182
+ other_centers = []
183
+ other_keypoints = [[] for _ in range(17)]
184
+ for idx2,item2 in enumerate(v):
185
+ if item2['iscrowd'] != 0 or idx2==idx:
186
+ continue
187
+ bbox2 = [int(x) for x in item2['bbox']]#x,y,w,h
188
+
189
+ save_bbox2 = [(bbox2[0]+pad_left-new_x0)/new_w,
190
+ (bbox2[1]+pad_top-new_y0)/new_h,
191
+ (bbox2[0]+bbox2[2]+pad_left-new_x0)/new_w,
192
+ (bbox2[1]+bbox2[3]+pad_top-new_y0)/new_h
193
+ ]
194
+ save_center2 = [(save_bbox2[0]+save_bbox2[2])/2,
195
+ (save_bbox2[1]+save_bbox2[3])/2]
196
+ if save_center2[0]>0 and save_center2[0]<1 and save_center2[1]>0 and save_center2[1]<1:
197
+ other_centers.append(save_center2)
198
+
199
+ keypoints2 = item2['keypoints']
200
+ keypoints2 = np.array(keypoints2).reshape((17,3))
201
+ for kid2 in range(17):
202
+ if keypoints2[kid2][2]==0:
203
+ continue
204
+ kx = (keypoints2[kid2][0]+pad_left-new_x0)/new_w
205
+ ky = (keypoints2[kid2][1]+pad_top-new_y0)/new_h
206
+ if kx>0 and kx<1 and ky>0 and ky<1:
207
+ other_keypoints[kid2].append([kx,ky])
208
+
209
+ save_item = {
210
+ "img_name":save_name,
211
+ "keypoints":save_keypoints,
212
+ "center":save_center,
213
+ "bbox":save_bbox,
214
+ "other_centers":other_centers,
215
+ "other_keypoints":other_keypoints,
216
+ }
217
+ # for k,v in save_item.items():
218
+ # print(type(v[0]))
219
+ # b
220
+ new_label.append(save_item)
221
+
222
+
223
+
224
+ ###visul for exam, comment when use
225
+ if SHOW_POINTS_ON_IMG:
226
+ cv2.circle(save_img, (int(save_center[0]*new_w), int(save_center[1]*new_h)), 4, (0,255,0), 3)
227
+ for show_kid in range(len(save_keypoints)//3):
228
+ if save_keypoints[show_kid*3+2]==1:
229
+ color = (255,0,0)
230
+ elif save_keypoints[show_kid*3+2]==2:
231
+ color = (0,0,255)
232
+ else:
233
+ continue
234
+ cv2.circle(save_img, (int(save_keypoints[show_kid*3]*new_w),
235
+ int(save_keypoints[show_kid*3+1]*new_h)), 3, color, 2)
236
+ cv2.rectangle(save_img, (int(save_bbox[0]*new_w), int(save_bbox[1]*new_h)),
237
+ (int(save_bbox[2]*new_w), int(save_bbox[3]*new_h)), (0,255,0), 2)
238
+ for show_c in other_centers:
239
+ cv2.circle(save_img, (int(show_c[0]*new_w), int(show_c[1]*new_h)), 4, (0,255,255), 3)
240
+ for show_ks in other_keypoints:
241
+ for show_k in show_ks:
242
+ cv2.circle(save_img, (int(show_k[0]*new_w), int(show_k[1]*new_h)), 3, (255,255,0), 2)
243
+
244
+
245
+ cv2.imwrite(os.path.join(output_img_dir, save_name), save_img)
246
+
247
+ # print(save_item, save_img.shape)
248
+
249
+ # b
250
+ # cv2.imwrite(os.path.join("show", k), pad_img)
251
+
252
+ with open(output_name,'w') as f:
253
+ json.dump(new_label, f, ensure_ascii=False)
254
+ print('Total write ', len(new_label))
255
+
256
+
257
+ if __name__ == '__main__':
258
+
259
+ #### PARAM ####
260
+
261
+ SHOW_POINTS_ON_IMG = False
262
+ #whether to show points on img for debug
263
+
264
+ EXPAND_RATIO = 1.
265
+ #person body bbox expand range to image edge
266
+
267
+
268
+ output_img_dir = "./data/croped/imgs"
269
+ img_dir = "./data/val2017"
270
+ labels_path = "./data/annotations/person_keypoints_val2017.json"
271
+ output_name = "./data/croped/val2017.json"
272
+ main(img_dir, labels_path, output_name, output_img_dir)
273
+ img_dir = "./data/train2017"
274
+ labels_path = "./data/annotations/person_keypoints_train2017.json"
275
+ output_name = "./data/croped/train2017.json"
276
+ main(img_dir, labels_path, output_name, output_img_dir)
277
+
movenet_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e3243a492e8886d29d9dcdd2831da3c69eb18bc76185f2db565cfe8ddcd58d8
3
+ size 7681846
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ numpy
3
+ opencv-python
4
+ # onnxruntime