Nadine Rueegg commited on
Commit
d847241
β€’
1 Parent(s): 3051026

add faster rcnn visualization and avoid reloading model parameters

Browse files
.gitignore CHANGED
@@ -2,6 +2,7 @@ gradio_demo_old
2
  gradio_demo/*.png
3
  gradio_demo/*.glb
4
  gradio_cached_examples/
 
5
  results/gradio_examples/*.png
6
  results/gradio_examples/*.jpg
7
  results/gradio_examples/*.glb
 
2
  gradio_demo/*.png
3
  gradio_demo/*.glb
4
  gradio_cached_examples/
5
+ datasets/test_image_crops_old
6
  results/gradio_examples/*.png
7
  results/gradio_examples/*.jpg
8
  results/gradio_examples/*.glb
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: pink
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.0.2
8
- app_file: ./gradio_demo/barc_demo_v3.py
9
  pinned: false
10
  python_version: 3.7.6
11
  ---
 
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.0.2
8
+ app_file: ./gradio_demo/barc_demo_v6.py
9
  pinned: false
10
  python_version: 3.7.6
11
  ---
datasets/test_image_crops/201030094143-stock-rhodesian-ridgeback-super-tease.jpg DELETED
Binary file (102 kB)
 
datasets/test_image_crops/Akita-standing-outdoors-in-the-summer-400x267.jpg DELETED
Binary file (22.9 kB)
 
datasets/test_image_crops/Picture10.png ADDED
datasets/test_image_crops/Picture11.png ADDED
datasets/test_image_crops/Picture14.png ADDED
datasets/test_image_crops/Picture15.png ADDED
datasets/test_image_crops/Picture2.jpg ADDED
datasets/test_image_crops/Picture22.png ADDED
datasets/test_image_crops/Picture25.jpg ADDED
datasets/test_image_crops/Picture26.png ADDED
datasets/test_image_crops/Picture5.png ADDED
datasets/test_image_crops/Picture7.png ADDED
datasets/test_image_crops/image_n02089078-black-and-tan_coonhound_n02089078_3810.png DELETED
Binary file (129 kB)
 
datasets/test_image_crops/z_dog_lying_2.jpg ADDED
datasets/test_image_crops/z_dog_sitting.jpg ADDED
datasets/test_image_crops/z_dog_training.jpg ADDED
datasets/test_image_crops/z_ibizan_standing.jpg ADDED
gradio_demo/barc_demo_v6.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # python gradio_demo/barc_demo_v6.py
2
+
3
+ import os
4
+ os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
5
+ os.environ["CUDA_VISIBLE_DEVICES"]="0"
6
+ try:
7
+ # os.system("pip install --upgrade torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html")
8
+ os.system("pip install --upgrade torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/cu101/torch_stable.html")
9
+ except Exception as e:
10
+ print(e)
11
+
12
+ import numpy as np
13
+ import os
14
+ import glob
15
+ import torch
16
+ from torch.utils.data import DataLoader
17
+ import torchvision
18
+ from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
19
+ import torchvision.transforms as T
20
+ import cv2
21
+ from matplotlib import pyplot as plt
22
+ from PIL import Image
23
+ import random
24
+ import gradio as gr
25
+
26
+ import sys
27
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../', 'src'))
28
+ from stacked_hourglass.datasets.imgcropslist import ImgCrops
29
+ from combined_model.train_main_image_to_3d_withbreedrel import do_visual_epoch
30
+ from combined_model.model_shape_v7 import ModelImageTo3d_withshape_withproj
31
+ from configs.barc_cfg_defaults import get_cfg_global_updated
32
+
33
+ random.seed(0)
34
+
35
+ print(
36
+ "torch: ", torch.__version__,
37
+ "\ntorchvision: ", torchvision.__version__,
38
+ )
39
+
40
+
41
+
42
+ def get_prediction(model, img_path_or_img, confidence=0.5):
43
+ """
44
+ see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g
45
+ get_prediction
46
+ parameters:
47
+ - img_path - path of the input image
48
+ - confidence - threshold value for prediction score
49
+ method:
50
+ - Image is obtained from the image path
51
+ - the image is converted to image tensor using PyTorch's Transforms
52
+ - image is passed through the model to get the predictions
53
+ - class, box coordinates are obtained, but only prediction score > threshold
54
+ are chosen.
55
+ """
56
+ if isinstance(img_path_or_img, str):
57
+ img = Image.open(img_path_or_img).convert('RGB')
58
+ else:
59
+ img = img_path_or_img
60
+ transform = T.Compose([T.ToTensor()])
61
+ img = transform(img)
62
+ pred = model([img])
63
+ # pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
64
+ pred_class = list(pred[0]['labels'].numpy())
65
+ pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())]
66
+ pred_score = list(pred[0]['scores'].detach().numpy())
67
+ try:
68
+ pred_t = [pred_score.index(x) for x in pred_score if x>confidence][-1]
69
+ pred_boxes = pred_boxes[:pred_t+1]
70
+ pred_class = pred_class[:pred_t+1]
71
+ return pred_boxes, pred_class, pred_score
72
+ except:
73
+ print('no bounding box with a score that is high enough found! -> work on full image')
74
+ return None, None, None
75
+
76
+
77
+ def detect_object(model, img_path_or_img, confidence=0.5, rect_th=2, text_size=0.5, text_th=1):
78
+ """
79
+ see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g
80
+ object_detection_api
81
+ parameters:
82
+ - img_path_or_img - path of the input image
83
+ - confidence - threshold value for prediction score
84
+ - rect_th - thickness of bounding box
85
+ - text_size - size of the class label text
86
+ - text_th - thichness of the text
87
+ method:
88
+ - prediction is obtained from get_prediction method
89
+ - for each prediction, bounding box is drawn and text is written
90
+ with opencv
91
+ - the final image is displayed
92
+ """
93
+ boxes, pred_cls, pred_scores = get_prediction(model, img_path_or_img, confidence)
94
+ if isinstance(img_path_or_img, str):
95
+ img = cv2.imread(img_path_or_img)
96
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
97
+ else:
98
+ img = img_path_or_img
99
+ is_first = True
100
+ bbox = None
101
+ if boxes is not None:
102
+ for i in range(len(boxes)):
103
+ cls = pred_cls[i]
104
+ if cls == 18 and bbox is None:
105
+ cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th)
106
+ # cv2.putText(img, pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
107
+ # cv2.putText(img, str(pred_scores[i]), boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
108
+ bbox = boxes[i]
109
+ return img, bbox
110
+
111
+
112
+ # -------------------------------------------------------------------------------------------------------------------- #
113
+ model_bbox = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
114
+ model_bbox.eval()
115
+
116
+ def run_bbox_inference(input_image):
117
+ # load configs
118
+ cfg = get_cfg_global_updated()
119
+ out_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples', 'test2.png')
120
+ img, bbox = detect_object(model=model_bbox, img_path_or_img=input_image, confidence=0.5)
121
+ fig = plt.figure() # plt.figure(figsize=(20,30))
122
+ plt.imsave(out_path, img)
123
+ return img, bbox
124
+ # -------------------------------------------------------------------------------------------------------------------- #
125
+
126
+
127
+ # -------------------------------------------------------------------------------------------------------------------- #
128
+ # load configs
129
+ cfg = get_cfg_global_updated()
130
+ # Select the hardware device to use for inference.
131
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
132
+ print('---> device: ' + device)
133
+ # disable gradient calculations.
134
+ torch.set_grad_enabled(False)
135
+ # prepare complete model
136
+ complete_model = ModelImageTo3d_withshape_withproj(
137
+ num_stage_comb=cfg.params.NUM_STAGE_COMB, num_stage_heads=cfg.params.NUM_STAGE_HEADS, \
138
+ num_stage_heads_pose=cfg.params.NUM_STAGE_HEADS_POSE, trans_sep=cfg.params.TRANS_SEP, \
139
+ arch=cfg.params.ARCH, n_joints=cfg.params.N_JOINTS, n_classes=cfg.params.N_CLASSES, \
140
+ n_keyp=cfg.params.N_KEYP, n_bones=cfg.params.N_BONES, n_betas=cfg.params.N_BETAS, n_betas_limbs=cfg.params.N_BETAS_LIMBS, \
141
+ n_breeds=cfg.params.N_BREEDS, n_z=cfg.params.N_Z, image_size=cfg.params.IMG_SIZE, \
142
+ silh_no_tail=cfg.params.SILH_NO_TAIL, thr_keyp_sc=cfg.params.KP_THRESHOLD, add_z_to_3d_input=cfg.params.ADD_Z_TO_3D_INPUT,
143
+ n_segbps=cfg.params.N_SEGBPS, add_segbps_to_3d_input=cfg.params.ADD_SEGBPS_TO_3D_INPUT, add_partseg=cfg.params.ADD_PARTSEG, n_partseg=cfg.params.N_PARTSEG, \
144
+ fix_flength=cfg.params.FIX_FLENGTH, structure_z_to_betas=cfg.params.STRUCTURE_Z_TO_B, structure_pose_net=cfg.params.STRUCTURE_POSE_NET,
145
+ nf_version=cfg.params.NF_VERSION)
146
+ # load trained model
147
+ path_model_file_complete = os.path.join(cfg.paths.ROOT_CHECKPOINT_PATH, 'barc_complete', 'model_best.pth.tar')
148
+ print(path_model_file_complete)
149
+ assert os.path.isfile(path_model_file_complete)
150
+ print('Loading model weights from file: {}'.format(path_model_file_complete))
151
+ checkpoint_complete = torch.load(path_model_file_complete, map_location=device)
152
+ state_dict_complete = checkpoint_complete['state_dict']
153
+ complete_model.load_state_dict(state_dict_complete, strict=False)
154
+ complete_model = complete_model.to(device)
155
+ # create path for output files
156
+ save_imgs_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples')
157
+ if not os.path.exists(save_imgs_path):
158
+ os.makedirs(save_imgs_path)
159
+
160
+ def run_barc_inference(input_image, bbox=None):
161
+ input_image_list = [input_image]
162
+ if bbox is not None:
163
+ input_bbox_list = [bbox]
164
+ else:
165
+ input_bbox_list = None
166
+ # prepare data loader
167
+ val_dataset = ImgCrops(image_list=input_image_list, bbox_list=input_bbox_list, dataset_mode='complete')
168
+ test_name_list = val_dataset.test_name_list
169
+ val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False,
170
+ num_workers=0, pin_memory=True, drop_last=False)
171
+ # run visual evaluation
172
+ all_results = do_visual_epoch(val_loader, complete_model, device,
173
+ ImgCrops.DATA_INFO,
174
+ weight_dict=None,
175
+ acc_joints=ImgCrops.ACC_JOINTS,
176
+ save_imgs_path=None, # save_imgs_path,
177
+ metrics='all',
178
+ test_name_list=test_name_list,
179
+ render_all=cfg.params.RENDER_ALL,
180
+ pck_thresh=cfg.params.PCK_THRESH,
181
+ return_results=True)
182
+ # prepare output mesh
183
+ mesh = all_results[0]['mesh_posed']
184
+ mesh.apply_transform([[-1, 0, 0, 0],
185
+ [0, -1, 0, 0],
186
+ [0, 0, 1, 1],
187
+ [0, 0, 0, 1]])
188
+ result_path = os.path.join(save_imgs_path, test_name_list[0] + '_z')
189
+ mesh.export(file_obj=result_path + '.glb')
190
+ result_gltf = result_path + '.glb'
191
+ return result_gltf
192
+ # -------------------------------------------------------------------------------------------------------------------- #
193
+
194
+
195
+ def run_complete_inference(img_path_or_img, crop_choice):
196
+ # depending on crop_choice: run faster r-cnn or take the input image directly
197
+ if crop_choice == "input image is cropped":
198
+ if isinstance(img_path_or_img, str):
199
+ img = cv2.imread(img_path_or_img)
200
+ output_interm_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
201
+ else:
202
+ output_interm_image = img_path_or_img
203
+ output_interm_bbox = None
204
+ else:
205
+ output_interm_image, output_interm_bbox = run_bbox_inference(img_path_or_img.copy())
206
+ # run barc inference
207
+ result_gltf = run_barc_inference(img_path_or_img, output_interm_bbox)
208
+ # add white border to image for nicer alignment
209
+ output_interm_image_vis = np.concatenate((255*np.ones_like(output_interm_image), output_interm_image, 255*np.ones_like(output_interm_image)), axis=1)
210
+ return [result_gltf, result_gltf, output_interm_image_vis]
211
+
212
+
213
+
214
+
215
+ ########################################################################################################################
216
+
217
+ # see: https://huggingface.co/spaces/radames/PIFu-Clothed-Human-Digitization/blob/main/PIFu/spaces.py
218
+
219
+ description = '''
220
+ # BARC
221
+
222
+ #### Project Page
223
+ * https://barc.is.tue.mpg.de/
224
+
225
+ #### Description
226
+ This is a demo for BARC (*B*reed *A*ugmented *R*egression using *C*lassification).
227
+ You can either submit a cropped image or choose the option to run a pretrained Faster R-CNN in order to obtain a bounding box.
228
+ Please have a look at the examples below.
229
+ <details>
230
+
231
+ <summary>More</summary>
232
+
233
+ #### Citation
234
+
235
+ ```
236
+ @inproceedings{BARC:2022,
237
+ title = {BARC}: Learning to Regress {3D} Dog Shape from Images by Exploiting Breed Information,
238
+ author = {Rueegg, Nadine and Zuffi, Silvia and Schindler, Konrad and Black, Michael J.},
239
+ booktitle = {Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR)},
240
+ year = {2022}
241
+ }
242
+ ```
243
+
244
+ #### Image Sources (Examples)
245
+ * Stanford extra image dataset
246
+ * Images from google search engine
247
+ * https://www.dogtrainingnation.com/wp-content/uploads/2015/02/keep-dog-training-sessions-short.jpg
248
+ * https://thumbs.dreamstime.com/b/hund-und-seine-neue-hundeh%C3%BCtte-36757551.jpg
249
+ * https://www.mydearwhippet.com/wp-content/uploads/2021/04/whippet-temperament-2.jpg
250
+ * https://media.istockphoto.com/photos/ibizan-hound-at-the-shore-in-winter-picture-id1092705644?k=20&m=1092705644&s=612x612&w=0&h=ppwg92s9jI8GWnk22SOR_DWWNP8b2IUmLXSQmVey5Ss=
251
+
252
+
253
+ </details>
254
+ '''
255
+
256
+
257
+
258
+
259
+
260
+
261
+ example_images = sorted(glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.jpg')) + glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.png')))
262
+ random.shuffle(example_images)
263
+ examples = []
264
+ for img in example_images:
265
+ if os.path.basename(img)[:2] == 'z_':
266
+ examples.append([img, "use Faster R-CNN to get a bounding box"])
267
+ else:
268
+ examples.append([img, "input image is cropped"])
269
+
270
+ demo = gr.Interface(
271
+ fn=run_complete_inference,
272
+ description=description,
273
+ # inputs=gr.Image(type="filepath", label="Input Image"),
274
+ inputs=[gr.Image(label="Input Image"),
275
+ gr.Radio(["input image is cropped", "use Faster R-CNN to get a bounding box"], value="use Faster R-CNN to get a bounding box", label="Crop Choice"),
276
+ ],
277
+ outputs=[
278
+ gr.Model3D(
279
+ clear_color=[0.0, 0.0, 0.0, 0.0], label="3D Model"),
280
+ gr.File(label="Download 3D Model"),
281
+ gr.Image(label="Bounding Box (Faster R-CNN prediction)"),
282
+
283
+ ],
284
+ examples=examples,
285
+ thumbnail="barc_thumbnail.png",
286
+ allow_flagging="never",
287
+ cache_examples=False, # True
288
+ examples_per_page=14,
289
+ )
290
+
291
+ demo.launch()