Yuliang commited on
Commit
7641d7c
1 Parent(s): f191c89

use mask_rcnn as detector

Browse files
Files changed (3) hide show
  1. app.py +0 -2
  2. lib/dataset/TestDataset.py +2 -9
  3. lib/pymaf/utils/imutils.py +16 -16
app.py CHANGED
@@ -18,8 +18,6 @@ if os.getenv('SYSTEM') == 'spaces':
18
  'pip install https://download.is.tue.mpg.de/icon/HF/kaolin-0.11.0-cp38-cp38-linux_x86_64.whl'.split())
19
  subprocess.run(
20
  'pip install https://download.is.tue.mpg.de/icon/HF/pytorch3d-0.7.0-cp38-cp38-linux_x86_64.whl'.split())
21
- subprocess.run(
22
- 'pip install git+https://github.com/Project-Splinter/human_det.git'.split())
23
  subprocess.run(
24
  'pip install git+https://github.com/YuliangXiu/neural_voxelization_layer.git'.split())
25
 
 
18
  'pip install https://download.is.tue.mpg.de/icon/HF/kaolin-0.11.0-cp38-cp38-linux_x86_64.whl'.split())
19
  subprocess.run(
20
  'pip install https://download.is.tue.mpg.de/icon/HF/pytorch3d-0.7.0-cp38-cp38-linux_x86_64.whl'.split())
 
 
21
  subprocess.run(
22
  'pip install git+https://github.com/YuliangXiu/neural_voxelization_layer.git'.split())
23
 
lib/dataset/TestDataset.py CHANGED
@@ -30,7 +30,6 @@ import os.path as osp
30
  import torch
31
  import numpy as np
32
  import random
33
- import human_det
34
  from termcolor import colored
35
  from PIL import ImageFile
36
  from huggingface_hub import cached_download
@@ -52,12 +51,6 @@ class TestDataset():
52
 
53
  self.device = device
54
 
55
- if self.has_det:
56
- self.det = human_det.Detection()
57
- else:
58
- self.det = None
59
-
60
-
61
  self.subject_list = [self.image_path]
62
 
63
  # smpl related
@@ -155,7 +148,7 @@ class TestDataset():
155
 
156
  if self.seg_dir is None:
157
  img_icon, img_hps, img_ori, img_mask, uncrop_param = process_image(
158
- img_path, self.det, self.hps_type, 512, self.device)
159
 
160
  data_dict = {
161
  'name': img_name,
@@ -167,7 +160,7 @@ class TestDataset():
167
 
168
  else:
169
  img_icon, img_hps, img_ori, img_mask, uncrop_param, segmentations = process_image(
170
- img_path, self.det, self.hps_type, 512, self.device,
171
  seg_path=os.path.join(self.seg_dir, f'{img_name}.json'))
172
  data_dict = {
173
  'name': img_name,
 
30
  import torch
31
  import numpy as np
32
  import random
 
33
  from termcolor import colored
34
  from PIL import ImageFile
35
  from huggingface_hub import cached_download
 
51
 
52
  self.device = device
53
 
 
 
 
 
 
 
54
  self.subject_list = [self.image_path]
55
 
56
  # smpl related
 
148
 
149
  if self.seg_dir is None:
150
  img_icon, img_hps, img_ori, img_mask, uncrop_param = process_image(
151
+ img_path, self.hps_type, 512, self.device)
152
 
153
  data_dict = {
154
  'name': img_name,
 
160
 
161
  else:
162
  img_icon, img_hps, img_ori, img_mask, uncrop_param, segmentations = process_image(
163
+ img_path, self.hps_type, 512, self.device,
164
  seg_path=os.path.join(self.seg_dir, f'{img_name}.json'))
165
  data_dict = {
166
  'name': img_name,
lib/pymaf/utils/imutils.py CHANGED
@@ -7,6 +7,7 @@ import torch
7
  import numpy as np
8
  from PIL import Image
9
  from rembg.bg import remove
 
10
 
11
  from lib.pymaf.core import constants
12
  from lib.pymaf.utils.streamer import aug_matrix
@@ -83,7 +84,7 @@ def get_transformer(input_res):
83
  return [image_to_tensor, mask_to_tensor, image_to_pymaf_tensor, image_to_pixie_tensor, image_to_hybrik_tensor]
84
 
85
 
86
- def process_image(img_file, det, hps_type, input_res=512, device=None, seg_path=None):
87
  """Read image, do preprocessing and possibly crop it according to the bounding box.
88
  If there are bounding box annotations, use them to crop the image.
89
  If no bounding box is specified but openpose detections are available, use them to get the bounding box.
@@ -101,21 +102,20 @@ def process_image(img_file, det, hps_type, input_res=512, device=None, seg_path=
101
  img_for_crop = cv2.warpAffine(img_ori, M[0:2, :],
102
  (input_res*2, input_res*2), flags=cv2.INTER_CUBIC)
103
 
104
- if det is not None:
105
-
106
- # detection for bbox
107
- bbox = get_bbox(img_for_crop, det)
108
-
109
- width = bbox[2] - bbox[0]
110
- height = bbox[3] - bbox[1]
111
- center = np.array([(bbox[0] + bbox[2]) / 2.0,
112
- (bbox[1] + bbox[3]) / 2.0])
113
-
114
- else:
115
- # Assume that the person is centerered in the image
116
- height = img_for_crop.shape[0]
117
- width = img_for_crop.shape[1]
118
- center = np.array([width // 2, height // 2])
119
 
120
  scale = max(height, width) / 180
121
 
 
7
  import numpy as np
8
  from PIL import Image
9
  from rembg.bg import remove
10
+ from torchvision.models import detection
11
 
12
  from lib.pymaf.core import constants
13
  from lib.pymaf.utils.streamer import aug_matrix
 
84
  return [image_to_tensor, mask_to_tensor, image_to_pymaf_tensor, image_to_pixie_tensor, image_to_hybrik_tensor]
85
 
86
 
87
+ def process_image(img_file, hps_type, input_res=512, device=None, seg_path=None):
88
  """Read image, do preprocessing and possibly crop it according to the bounding box.
89
  If there are bounding box annotations, use them to crop the image.
90
  If no bounding box is specified but openpose detections are available, use them to get the bounding box.
 
102
  img_for_crop = cv2.warpAffine(img_ori, M[0:2, :],
103
  (input_res*2, input_res*2), flags=cv2.INTER_CUBIC)
104
 
105
+ # detection for bbox
106
+ detector = detection.maskrcnn_resnet50_fpn(pretrained=True)
107
+ detector.eval()
108
+ predictions = detector(
109
+ [torch.from_numpy(img_for_crop).permute(2, 0, 1) / 255.])[0]
110
+ human_ids = torch.logical_and(
111
+ predictions["labels"] == 1,
112
+ predictions["scores"] == predictions["scores"].max()).nonzero().squeeze(1)
113
+ bbox = predictions["boxes"][human_ids, :].flatten().detach().cpu().numpy()
114
+
115
+ width = bbox[2] - bbox[0]
116
+ height = bbox[3] - bbox[1]
117
+ center = np.array([(bbox[0] + bbox[2]) / 2.0,
118
+ (bbox[1] + bbox[3]) / 2.0])
 
119
 
120
  scale = max(height, width) / 180
121