sinkers commited on
Commit
a1199fc
·
verified ·
1 Parent(s): 45f59a6

Delete thirdparty/utils/image_utils.py

Browse files
Files changed (1) hide show
  1. thirdparty/utils/image_utils.py +0 -414
thirdparty/utils/image_utils.py DELETED
@@ -1,414 +0,0 @@
1
- """
2
- This file contains functions that are used to perform data augmentation.
3
- """
4
- import torch
5
- import numpy as np
6
- import scipy.misc
7
- import cv2
8
- import math
9
- import joblib
10
- from trimesh.visual import color
11
-
12
- import jpeg4py as jpeg
13
- from skimage.transform import rotate, resize
14
- import matplotlib.pyplot as plt
15
- import matplotlib.patches as mpatches
16
- import matplotlib.gridspec as gridspec
17
-
18
- from ..core import constants
19
- from .vibe_image_utils import gen_trans_from_patch_cv
20
-
21
- def get_transform(center, scale, res, rot=0):
22
- """Generate transformation matrix."""
23
- h = 200 * scale
24
- t = np.zeros((3, 3))
25
- t[0, 0] = float(res[1]) / h
26
- t[1, 1] = float(res[0]) / h
27
- t[0, 2] = res[1] * (-float(center[0]) / h + .5)
28
- t[1, 2] = res[0] * (-float(center[1]) / h + .5)
29
- t[2, 2] = 1
30
- if not rot == 0:
31
- rot = -rot # To match direction of rotation from cropping
32
- rot_mat = np.zeros((3, 3))
33
- rot_rad = rot * np.pi / 180
34
- sn, cs = np.sin(rot_rad), np.cos(rot_rad)
35
- rot_mat[0, :2] = [cs, -sn]
36
- rot_mat[1, :2] = [sn, cs]
37
- rot_mat[2, 2] = 1
38
- # Need to rotate around center
39
- t_mat = np.eye(3)
40
- t_mat[0, 2] = -res[1] / 2
41
- t_mat[1, 2] = -res[0] / 2
42
- t_inv = t_mat.copy()
43
- t_inv[:2, 2] *= -1
44
- t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
45
- return t
46
-
47
- def read_img(img_fn):
48
- # return pil_img.fromarray(
49
- # cv2.cvtColor(cv2.imread(img_fn), cv2.COLOR_BGR2RGB))
50
- # with open(img_fn, 'rb') as f:
51
- # img = pil_img.open(f).convert('RGB')
52
- # return img
53
- if img_fn.endswith('jpeg') or img_fn.endswith('jpg'):
54
- try:
55
- with open(img_fn, 'rb') as f:
56
- img = np.array(jpeg.JPEG(f).decode())
57
- except jpeg.JPEGRuntimeError:
58
- # logger.warning('{} produced a JPEGRuntimeError', img_fn)
59
- img = cv2.cvtColor(cv2.imread(img_fn), cv2.COLOR_BGR2RGB)
60
- else:
61
- # elif img_fn.endswith('png') or img_fn.endswith('JPG') or img_fn.endswith(''):
62
- img = cv2.cvtColor(cv2.imread(img_fn), cv2.COLOR_BGR2RGB)
63
- return img
64
-
65
- def get_random_crop_coords(height, width, crop_height, crop_width, h_start, w_start):
66
- y1 = int((height - crop_height) * h_start)
67
- y2 = y1 + crop_height
68
- x1 = int((width - crop_width) * w_start)
69
- x2 = x1 + crop_width
70
- return x1, y1, x2, y2
71
-
72
- def random_crop(center, scale, crop_scale_factor, axis='all'):
73
- '''
74
- center: bbox center [x,y]
75
- scale: bbox height / 200
76
- crop_scale_factor: amount of cropping to be applied
77
- axis: axis which cropping will be applied
78
- "x": center the y axis and get random crops in x
79
- "y": center the x axis and get random crops in y
80
- "all": randomly crop from all locations
81
- '''
82
- orig_size = int(scale * 200.)
83
- ul = (center - (orig_size / 2.)).astype(int)
84
-
85
- crop_size = int(orig_size * crop_scale_factor)
86
-
87
- if axis == 'all':
88
- h_start = np.random.rand()
89
- w_start = np.random.rand()
90
- elif axis == 'x':
91
- h_start = np.random.rand()
92
- w_start = 0.5
93
- elif axis == 'y':
94
- h_start = 0.5
95
- w_start = np.random.rand()
96
- else:
97
- raise ValueError(f'axis {axis} is undefined!')
98
-
99
- x1, y1, x2, y2 = get_random_crop_coords(
100
- height=orig_size,
101
- width=orig_size,
102
- crop_height=crop_size,
103
- crop_width=crop_size,
104
- h_start=h_start,
105
- w_start=w_start,
106
- )
107
- scale = (y2 - y1) / 200.
108
- center = ul + np.array([(y1 + y2) / 2, (x1 + x2) / 2])
109
- return center, scale
110
-
111
- def transform(pt, center, scale, res, invert=0, rot=0):
112
- """Transform pixel location to different reference."""
113
- t = get_transform(center, scale, res, rot=rot)
114
- if invert:
115
- t = np.linalg.inv(t)
116
- new_pt = np.array([pt[0] - 1, pt[1] - 1, 1.]).T
117
- new_pt = np.dot(t, new_pt)
118
- return new_pt[:2].astype(int) + 1
119
-
120
- def convert_crop_coords_to_orig_img_cliff(bbox, keypoints, crop_size):
121
- cx, cy, h = bbox[:, 0], bbox[:, 1], bbox[:, 2]
122
-
123
- # unnormalize to crop coords
124
- keypoints[:,:,:2] = 0.5 * crop_size * (keypoints[:,:,:2] + 1.0)
125
-
126
- # rescale to orig img crop
127
- keypoints[:,:,:2] *= h[..., None, None] / crop_size
128
-
129
- # transform into original image coords
130
- keypoints[:,:,0] = (cx - h/2)[..., None] + keypoints[:,:,0]
131
- keypoints[:,:,1] = (cy - h/2)[..., None] + keypoints[:,:,1]
132
- return keypoints[0]
133
-
134
-
135
- def crop(img, center, scale, res, rot=0):
136
- """Crop image according to the supplied bounding box."""
137
- # Upper left point
138
- ul = np.array(transform([1, 1], center, scale, res, invert=1)) - 1
139
- # Bottom right point
140
- br = np.array(transform([res[0] + 1,
141
- res[1] + 1], center, scale, res, invert=1)) - 1
142
-
143
- # Padding so that when rotated proper amount of context is included
144
- pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2)
145
- if not rot == 0:
146
- ul -= pad
147
- br += pad
148
-
149
- new_shape = [br[1] - ul[1], br[0] - ul[0]]
150
- if len(img.shape) > 2:
151
- new_shape += [img.shape[2]]
152
- new_img = np.zeros(new_shape)
153
-
154
- # Range to fill new array
155
- new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
156
- new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
157
- # Range to sample from original image
158
- old_x = max(0, ul[0]), min(len(img[0]), br[0])
159
- old_y = max(0, ul[1]), min(len(img), br[1])
160
- new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1],
161
- old_x[0]:old_x[1]]
162
-
163
- if not rot == 0:
164
- # Remove padding
165
- new_img = rotate(new_img, rot)
166
- new_img = new_img[pad:-pad, pad:-pad]
167
-
168
- new_img = resize(new_img, res)
169
- return new_img
170
-
171
- def calculate_focal_length(img_h, img_w):
172
- return float((img_w**2 + img_h**2)**0.5)
173
-
174
- def calculate_bbox_info(bb_center, bb_scale, orig_shape):
175
-
176
- img_h, img_w = orig_shape[0], orig_shape[1]
177
- cx, cy = bb_center[0], bb_center[1]
178
- b = bb_scale * 200
179
- focal_length = calculate_focal_length(img_h, img_w)
180
-
181
- bbox_info = np.array([cx - img_w / 2., cy - img_h / 2., b])
182
-
183
- # The constants below are used for normalization, and calculated from H36M data.
184
- bbox_info[:2] = bbox_info[:2] / focal_length * 2.8
185
- bbox_info[2] = (bbox_info[2] - 0.24 * focal_length) / (0.06 * focal_length)
186
-
187
- return bbox_info.astype(np.float32)
188
-
189
- def crop_cv2(img, center, scale, res, rot=0):
190
- c_x, c_y = center
191
- c_x, c_y = int(round(c_x)), int(round(c_y))
192
- patch_width, patch_height = int(round(res[0])), int(round(res[1]))
193
- bb_width = bb_height = int(round(scale * 200.))
194
-
195
- trans = gen_trans_from_patch_cv(
196
- c_x, c_y, bb_width, bb_height,
197
- patch_width, patch_height,
198
- scale=1.0, rot=rot, inv=False,
199
- )
200
-
201
- crop_img = cv2.warpAffine(
202
- img, trans, (int(patch_width), int(patch_height)),
203
- flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT
204
- )
205
-
206
- return crop_img
207
-
208
-
209
- def uncrop(img, center, scale, orig_shape, rot=0, is_rgb=True):
210
- """'Undo' the image cropping/resizing.
211
- This function is used when evaluating mask/part segmentation.
212
- """
213
- res = img.shape[:2]
214
- # Upper left point
215
- ul = np.array(transform([1, 1], center, scale, res, invert=1)) - 1
216
- # Bottom right point
217
- br = np.array(transform([res[0] + 1, res[1] + 1], center, scale, res, invert=1)) - 1
218
- # size of cropped image
219
- crop_shape = [br[1] - ul[1], br[0] - ul[0]]
220
-
221
- new_shape = [br[1] - ul[1], br[0] - ul[0]]
222
- if len(img.shape) > 2:
223
- new_shape += [img.shape[2]]
224
- new_img = np.zeros(orig_shape, dtype=np.uint8)
225
- # Range to fill new array
226
- new_x = max(0, -ul[0]), min(br[0], orig_shape[1]) - ul[0]
227
- new_y = max(0, -ul[1]), min(br[1], orig_shape[0]) - ul[1]
228
- # Range to sample from original image
229
- old_x = max(0, ul[0]), min(orig_shape[1], br[0])
230
- old_y = max(0, ul[1]), min(orig_shape[0], br[1])
231
- img = scipy.misc.imresize(img, crop_shape, interp='nearest')
232
- new_img[old_y[0]:old_y[1], old_x[0]:old_x[1]] = img[new_y[0]:new_y[1], new_x[0]:new_x[1]]
233
- return new_img
234
-
235
-
236
- def rot_aa(aa, rot):
237
- """Rotate axis angle parameters."""
238
- # pose parameters
239
- R = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0],
240
- [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0],
241
- [0, 0, 1]])
242
- # find the rotation of the body in camera frame
243
- per_rdg, _ = cv2.Rodrigues(aa)
244
- # apply the global rotation to the global orientation
245
- resrot, _ = cv2.Rodrigues(np.dot(R, per_rdg))
246
- aa = (resrot.T)[0]
247
- return aa
248
-
249
-
250
- def flip_img(img):
251
- """Flip rgb images or masks.
252
- channels come last, e.g. (256,256,3).
253
- """
254
- img = np.fliplr(img)
255
- return img
256
-
257
-
258
- def flip_kp(kp):
259
- """Flip keypoints."""
260
- if len(kp) == 24:
261
- flipped_parts = constants.J24_FLIP_PERM
262
- elif len(kp) == 49:
263
- flipped_parts = constants.J49_FLIP_PERM
264
- kp = kp[flipped_parts]
265
- kp[:, 0] = - kp[:, 0]
266
- return kp
267
-
268
-
269
- def flip_pose(pose):
270
- """Flip pose.
271
- The flipping is based on SMPL parameters.
272
- """
273
- flipped_parts = constants.SMPL_POSE_FLIP_PERM
274
- pose = pose[flipped_parts]
275
- # we also negate the second and the third dimension of the axis-angle
276
- pose[1::3] = -pose[1::3]
277
- pose[2::3] = -pose[2::3]
278
- return pose
279
-
280
- def rescale_cv2(img, rescale_fac):
281
- width = int(img.shape[1] * rescale_fac)
282
- height = int(img.shape[0] * rescale_fac)
283
-
284
- dsize = (width, height)
285
- img = cv2.resize(img, dsize, interpolation = cv2.INTER_LINEAR)
286
- return img
287
-
288
-
289
- def generate_part_labels(vertices, faces, cam_t, K, R, dist_coeffs, body_part_texture, part_bins, neural_renderer):
290
- batch_size = vertices.shape[0]
291
-
292
- body_parts, depth, mask = neural_renderer(
293
- vertices,
294
- faces.expand(batch_size, -1, -1),
295
- textures=body_part_texture.expand(batch_size, -1, -1, -1, -1, -1),
296
- K=K.expand(batch_size, -1, -1),
297
- R=R.expand(batch_size, -1, -1),
298
- dist_coeffs=dist_coeffs,
299
- t=cam_t.unsqueeze(1),
300
- )
301
-
302
- render_rgb = body_parts.clone()
303
-
304
- body_parts = body_parts.permute(0, 2, 3, 1)
305
- body_parts *= 255. # multiply it with 255 to make labels distant
306
- body_parts, _ = body_parts.max(-1) # reduce to single channel
307
-
308
- body_parts = torch.bucketize(body_parts.detach(), part_bins, right=True) # np.digitize(body_parts, bins, right=True)
309
-
310
- # add 1 to make background label 0
311
- body_parts = body_parts.long() + 1
312
- body_parts = body_parts * mask.detach()
313
-
314
- return body_parts.long(), render_rgb
315
-
316
- def get_body_part_texture(faces, n_vertices=6890, non_parametric=False):
317
- smpl_segmentation = joblib.load('data/smpl_partSegmentation_mapping.pkl')
318
-
319
- smpl_vert_idx = smpl_segmentation['smpl_index']
320
- nparts = 24.
321
-
322
- if non_parametric:
323
- # reduce the number of body_parts to 14
324
- # by mapping some joints to others
325
- nparts = 14.
326
- joint_mapping = map_smpl_to_common()
327
-
328
- for jm in joint_mapping:
329
- for j in jm[0]:
330
- smpl_vert_idx[smpl_vert_idx==j] = jm[1]
331
-
332
- vertex_colors = np.ones((n_vertices, 4))
333
- vertex_colors[:, :3] = smpl_vert_idx[..., None]
334
-
335
- vertex_colors = color.to_rgba(vertex_colors)
336
- face_colors = vertex_colors[faces].min(axis=1)
337
-
338
- texture = np.zeros((1, faces.shape[0], 1, 1, 1, 3), dtype=np.float32)
339
- texture[0, :, 0, 0, 0, :] = face_colors[:, :3] / nparts
340
- texture = torch.from_numpy(texture).float()
341
- return texture
342
-
343
- def get_default_camera(focal_length, img_size):
344
- K = torch.eye(3)
345
- K[0, 0] = focal_length
346
- K[1, 1] = focal_length
347
- K[2, 2] = 1
348
- K[0, 2] = img_size / 2.
349
- K[1, 2] = img_size / 2.
350
- K = K[None, :, :]
351
- R = torch.eye(3)[None, :, :]
352
- dist_coeffs = torch.FloatTensor([[0., 0., 0., 0., 0.,]])
353
- return K, R, dist_coeffs
354
-
355
- def overlay_text(image, txt_str, str_id=1):
356
- font = cv2.FONT_HERSHEY_SIMPLEX
357
- font_scale = image.shape[0]*0.0016
358
- thickness = int(image.shape[0]*0.005)
359
- bbox_offset = int(image.shape[0]*0.01)
360
- text_offset_x, text_offset_y = int(image.shape[1]*0.02), int(image.shape[0]*0.06*str_id)
361
-
362
- (text_width, text_height) = cv2.getTextSize(txt_str, font, fontScale=font_scale, thickness=thickness)[0]
363
- box_coords = ((text_offset_x, text_offset_y + bbox_offset), (text_offset_x + text_width + bbox_offset, text_offset_y - text_height - bbox_offset))
364
-
365
- cv2.rectangle(image, box_coords[0], box_coords[1], (255, 255, 255), cv2.FILLED)
366
- cv2.putText(image, txt_str, (text_offset_x, text_offset_y), font, font_scale, (0, 0, 255), thickness)
367
- return image
368
-
369
- def show_imgs(imgs, num_rows=1, size=15, live=False, legend=False, cmap=None, label=None,
370
- save_img=False, filename=None):
371
- if live == True:
372
- clear_output(wait=True)
373
- num_imgs_per_row = math.ceil(len(imgs)/num_rows)
374
- fig, axs = plt.subplots(num_rows, num_imgs_per_row, squeeze=False,
375
- figsize=(size,size), constrained_layout=True)
376
- img_idx = 0
377
- for row in range(num_rows):
378
- for i in range(num_imgs_per_row):
379
- axs[row,i].imshow(imgs[img_idx])
380
- axs[row,i].axis('off')
381
- if img_idx < len(imgs) - 1:
382
- img_idx += 1
383
- if legend == True:
384
- patches = [mpatches.Patch(color=cmap[i],
385
- label=label[i]) for i in cmap]
386
- plt.legend(handles=patches, loc=4,
387
- borderaxespad=1, fontsize=8)
388
- if save_img == True:
389
- plt.savefig(filename, dpi=500, bbox_inches='tight')
390
- else:
391
- plt.show()
392
-
393
- def concat_images_np(imga, imgb):
394
- """
395
- Combines two color image ndarrays side-by-side.
396
- """
397
- assert imga.dtype == imgb.dtype, ''
398
- ha,wa = imga.shape[:2]
399
- hb,wb = imgb.shape[:2]
400
- max_height = np.max([ha, hb])
401
- total_width = wa+wb
402
- new_img = np.zeros(shape=(max_height, total_width, 3)).astype(imga.dtype)
403
- new_img[:ha,:wa]=imga
404
- new_img[:hb,wa:wa+wb]=imgb
405
- return new_img
406
-
407
- def concat_n_images_np(image_np_list):
408
- """
409
- Combines N color images from a list of image ndarrays
410
- """
411
- output = None
412
- for i, img_np in enumerate(image_np_list):
413
- output = img_np if i==0 else contact_images_np(output, img)
414
- return output