sfmig commited on
Commit
a065329
β€’
1 Parent(s): 6e8a870

refactoring. reverted background image fix.

Browse files
Files changed (1) hide show
  1. app.py +143 -156
app.py CHANGED
@@ -9,12 +9,11 @@ import torch
9
  import torchvision
10
  from dlclive import DLCLive, Processor
11
 
12
- from PIL import Image
13
- from PIL import ImageFont
14
- from PIL import ImageDraw
15
 
16
  import numpy as np
17
  import math
 
18
  # import json
19
  import os
20
  import yaml
@@ -22,8 +21,7 @@ import yaml
22
  # import pdb
23
 
24
  #########################################
25
-
26
-
27
  FONTS = {'amiko': "font/Amiko-Regular.ttf",
28
  'nature': "font/LoveNature.otf",
29
  'painter':"font/PainterDecorator.otf",
@@ -33,83 +31,32 @@ FONTS = {'amiko': "font/Amiko-Regular.ttf",
33
  Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt",
34
  'md_v5b': "megadet_model/md_v5b.0.0.pt"}
35
 
36
- #############################################
37
- # %%
38
- # Create user interface and launch: all inputs
39
- gr_image_input = gr.inputs.Image(type="pil", label="Input Image")
40
-
41
-
42
- gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog', 'primate_face', 'full_human', 'full_macaque'], # choices
43
- default='full_cat', # default option
44
- type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
45
- label='Select DeepLabCut model')
46
- gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
47
- label='Run DLClive only, directly on input image?')
48
- gr_str_labels_checkbox = gr.inputs.Checkbox(True,
49
- label='Show bodypart labels?')
50
- gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
51
- label='Set confidence threshold for animal detections')
52
- gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
53
- label='Set confidence threshold for keypoints')
54
-
55
- gr_keypt_color = gr.ColorPicker(label="choose color for keypoint label")
56
-
57
- gr_pose_font_input = gr.inputs.Dropdown(choices=['amiko', 'nature', 'painter', 'animals', 'zen'],
58
- default='amiko',
59
- type='value',
60
- label='Select keypoint label font')
61
- gr_slider_font_size = gr.inputs.Slider(5,30,1,8,
62
- label='Set font size')
63
-
64
- gr_slider_pose_size = gr.inputs.Slider(0.5,5,0.2,2,
65
- label='Set pose size')
66
- gr_mega_model_input = gr.inputs.Dropdown(choices=['md_v5a','md_v5b'],
67
- default='md_v5a', # default option
68
- type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
69
- label='Select MegaDetector model')
70
- inputs = [gr_image_input,
71
- gr_dlc_model_input,
72
- gr_dlc_only_checkbox,
73
- gr_str_labels_checkbox,
74
- gr_slider_conf_bboxes,
75
- gr_slider_conf_keypoints,
76
- gr_pose_font_input,
77
- gr_slider_font_size,
78
- gr_keypt_color,
79
- gr_slider_pose_size,
80
- gr_mega_model_input,
81
- ]
82
-
83
- #image = gr.inputs.Image(type="pil", label="Input Image")
84
- #chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight")
85
- #size = 640
86
-
87
  #########################################
88
- # %%
89
  def draw_keypoints_on_image(image,
90
  keypoints,
91
  map_label_id_to_str,
92
  flag_show_str_labels,
93
  use_normalized_coordinates=True,
94
- gr_pose_font_input='amiko',
95
- gr_slider_font_size=8,
96
- gr_keypt_color="#ff0000",
97
- gr_slider_pose_size='2'
98
- ):
99
  """Draws keypoints on an image.
100
  Modified from:
101
  https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
102
  Args:
103
  image: a PIL.Image object.
104
  keypoints: a numpy array with shape [num_keypoints, 2].
 
 
105
  color: color to draw the keypoints with. Default is red.
106
  radius: keypoint radius. Default value is 2.
107
  use_normalized_coordinates: if True (default), treat keypoint values as
108
  relative to the image. Otherwise treat them as absolute.
109
  """
110
  # get a drawing context
111
- draw = ImageDraw.Draw(image)
112
- # font = ImageFont.truetype("sans-serif.ttf", 16)
113
 
114
  im_width, im_height = image.size
115
  keypoints_x = [k[0] for k in keypoints]
@@ -121,41 +68,46 @@ def draw_keypoints_on_image(image,
121
  keypoints_y = tuple([im_height * y for y in keypoints_y])
122
 
123
  # draw ellipses around keypoints and add string labels
124
- font = ImageFont.truetype(FONTS[gr_pose_font_input], gr_slider_font_size) # font = ImageFont.truetype(<font-file>, <font-size>)
125
  for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
126
- draw.ellipse([(keypoint_x - gr_slider_pose_size, keypoint_y - gr_slider_pose_size),
127
- (keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size)],
128
- outline=gr_keypt_color, fill=gr_keypt_color)
 
129
 
130
  # add string labels around keypoints
131
  if flag_show_str_labels:
132
- # draw.text((x, y),"Sample Text",(r,g,b))
133
- draw.text((keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size),#(0.5*im_width, 0.5*im_height), #-------
134
- map_label_id_to_str[i],#"Sample Text",
135
- (gr_keypt_color), # rgb
 
136
  font=font)
137
 
138
  ############################################
139
- # %%
140
  # Predict detections with MegaDetector v5a model
141
- def predict_md(im, size=640, gr_mega_model_input='md_v5a'):
 
 
 
142
  # resize image
143
- g = (size / max(im.size)) # gain
144
- im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS) # resize
145
- MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[gr_mega_model_input])
 
 
146
  ## detect objects
147
  results = MD_model(im) # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
148
  results.render() # updates results.imgs with boxes and labels
149
 
150
- return results #Image.fromarray(results.imgs[0]) ---return animals only?
151
 
152
- # %%
153
  def crop_animal_detections(yolo_results,
154
  likelihood_th):
155
- ## crop if animal and return list of crops
156
- list_labels_as_str = yolo_results.names #['animal', 'person', 'vehicle']
157
- list_np_animal_crops = []
158
 
 
 
 
159
  # for every image
160
  for img, det_array in zip(yolo_results.imgs,
161
  yolo_results.xyxy):
@@ -170,9 +122,10 @@ def crop_animal_detections(yolo_results,
170
  xmax_rd = int(math.ceil(det_array[j,2]))
171
  ymax_rd = int(math.ceil(det_array[j,3]))
172
 
173
- pred_llk = det_array[j,4] #-----TODO: filter based on likelihood?
174
  pred_label = det_array[j,5]
175
 
 
176
  if (pred_label == list_labels_as_str.index('animal')) and \
177
  (pred_llk >= likelihood_th):
178
  area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)
@@ -183,23 +136,9 @@ def crop_animal_detections(yolo_results,
183
  # add to list
184
  list_np_animal_crops.append(crop_np)
185
 
186
- # for detections_dict in img_data["detections"]:
187
- # index = img_data["detections"].index(detections_dict)
188
- # if detections_dict["conf"] > 0.8:
189
- # x1, y1,w_box, h_box = detections_dict["bbox"]
190
- # ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
191
-
192
- # imageWidth=img.size[0]
193
- # imageHeight= img.size[1]
194
- # area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
195
- # ymax * imageHeight)
196
- # crop = img.crop(area)
197
- # crop_np = np.asarray(crop)
198
- #
199
- # if detections_dict["category"] == "1":
200
  return list_np_animal_crops
201
 
202
- # %%
203
  def predict_dlc(list_np_crops,
204
  kpts_likelihood_th,
205
  DLCmodel,
@@ -210,7 +149,7 @@ def predict_dlc(list_np_crops,
210
  dlc_live.init_inference(list_np_crops[0])
211
 
212
  list_kpts_per_crop = []
213
- np_aux = np.empty((1,3)) # can I avoid hardcoding?
214
  for crop in list_np_crops:
215
  # scale crop here?
216
  keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
@@ -222,22 +161,23 @@ def predict_dlc(list_np_crops,
222
  return list_kpts_per_crop
223
 
224
 
225
- # %%
226
  def predict_pipeline(img_input,
 
227
  model_input_str,
228
  flag_dlc_only,
229
  flag_show_str_labels,
230
  bbox_likelihood_th,
231
  kpts_likelihood_th,
232
- gr_pose_font_input=gr_pose_font_input,
233
- gr_slider_font_size=gr_slider_font_size,
234
- gr_keypt_color=gr_keypt_color,
235
- gr_slider_pose_size=gr_slider_pose_size,
236
- gr_mega_model_input=gr_mega_model_input,
237
  ):
238
 
239
  ############################################################
240
  ## Get DLC model and labels as strings
 
241
  if model_input_str == 'full_cat':
242
  path_to_DLCmodel = "model/DLC_Cat_resnet_50_iteration-0_shuffle-0"
243
  pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
@@ -255,15 +195,18 @@ def predict_pipeline(img_input,
255
  pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
256
 
257
 
258
- # read pose cfg as dict
 
259
  with open(pose_cfg_path, "r") as stream:
260
  pose_cfg_dict = yaml.safe_load(stream)
261
- map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']], # pose_cfg_dict['all_joints'] is a list of one-element lists,
262
  pose_cfg_dict['all_joints_names'])])
263
 
264
  ############################################################
265
  # ### Run Megadetector
266
- md_results = predict_md(img_input, gr_mega_model_input=gr_mega_model_input) #Image.fromarray(results.imgs[0])
 
 
267
 
268
  ################################################################
269
  # Obtain animal crops for bboxes with confidence above th
@@ -277,7 +220,7 @@ def predict_pipeline(img_input,
277
  # if required: ignore MD crops and run DLC on full image [mostly for testing]
278
  if flag_dlc_only:
279
  # compute kpts on input img
280
- list_kpts_per_crop = predict_dlc([np.asarray(img_input)],#list_crops,--------
281
  kpts_likelihood_th,
282
  path_to_DLCmodel,
283
  dlc_proc)
@@ -287,12 +230,11 @@ def predict_pipeline(img_input,
287
  map_label_id_to_str,
288
  flag_show_str_labels,
289
  use_normalized_coordinates=False,
290
- gr_pose_font_input=gr_pose_font_input,
291
- gr_slider_font_size=gr_slider_font_size,
292
- gr_keypt_color=gr_keypt_color,
293
- gr_slider_pose_size=gr_slider_pose_size,
294
- )
295
- return img_input, #list_kpts_per_crop
296
 
297
  else:
298
  # Compute kpts for each crop
@@ -302,7 +244,14 @@ def predict_pipeline(img_input,
302
  dlc_proc)
303
 
304
  # Produce final image
305
- img_background = img_input # img_input or Image.fromarray(md_results.imgs[0])?
 
 
 
 
 
 
 
306
  for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
307
  list_kpts_per_crop)):
308
 
@@ -312,66 +261,104 @@ def predict_pipeline(img_input,
312
  kpts_crop, # a numpy array with shape [num_keypoints, 2].
313
  map_label_id_to_str,
314
  flag_show_str_labels,
315
- use_normalized_coordinates=False, # if True, then I should use md_results.xyxyn
316
- gr_pose_font_input=gr_pose_font_input,
317
- gr_slider_font_size=gr_slider_font_size,
318
- gr_keypt_color=gr_keypt_color,
319
- gr_slider_pose_size=gr_slider_pose_size,
320
- )
321
 
322
  ## Paste crop in original image
323
  # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
324
  img_background.paste(img_crop,
325
- box = tuple([int(math.floor(t)) for t in md_results.xyxy[0][ic,:2]]))
326
 
327
- return img_background, #list_kpts_per_crop, #Image.fromarray(list_crops[0]) #Image.fromarray(md_results.imgs[0]) #list_annotated_crops #
328
 
329
-
330
- ##########################################################
331
  # %%
332
- # Get MegaDetector model
333
- # TODO: Allow user selectable model?
334
- # models = ["model_weights/md_v5a.0.0.pt","model_weights/md_v5b.0.0.pt"]
335
- #MD_model = torch.hub.load('ultralytics/yolov5', 'custom', "megadet_model/md_v5b.0.0.pt")
336
 
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  ####################################################
340
  # %%
341
- # Create user interface and launch: all outputs
342
- gr_gallery_output = gr.Gallery(type="pil", label="Output Gallery")
343
- gr_json_output = gr.JSON(label='megadetector json')
344
- gr_pose_output = gr.Dataframe(headers=['bodypart', 'x', 'y', 'confidence'],
345
- #row_count = number of bodyparts,
346
- label='pose coordinates')
347
-
348
- outputs = [gr_gallery_output, #gr_json_output,
349
- #gr_pose_output,
350
- ]
351
 
 
 
352
  gr_title = "MegaDetector v5 + DeepLabCut-Live!"
353
- gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang -- DLC AI Residents 2022\
354
  This App detects and estimate the pose of animals in camera trap images using <a href='https://github.com/microsoft/CameraTraps'>MegaDetector v5a</a> + <a href='https://github.com/DeepLabCut/DeepLabCut-live'>DeepLabCut-live</a>. \
355
- It additionally builds upon on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a> \
356
  <a href='https://huggingface.co/spaces/sofmi/MegaDetector_DLClive'>sofmi/MegaDetector_DLClive</a> \
357
- <a href='https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels'>Neslihan/megadetector_dlcmodels</a>\
358
- We experimented with two Megadetector models. See <a href='https://github.com/microsoft/CameraTraps/blob/main/megadetector.md'>here</a> for detailed information about models."
359
  # article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> | <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
360
- # examples = [['data/Macropod.jpg'], ['data/koala2.jpg'],['data/cat.jpg'],['data/BrushtailPossum.jpg']]
361
 
362
- # %%
363
  examples = [['example/monkey_full.jpg', 'full_macaque', False, True, 0.5, 0.3, 'amiko', 5, 'blue', 3, 'md_v5a'],
364
- ['example/dog.jpeg', 'full_dog', False, True, 0.5, 0.05, 'amiko', 5, 'yellow', 3, 'md_v5a'],
365
- ['example/cat.jpg', 'full_cat', False, True, 0.5, 0.05, 'amiko', 5, 'purple', 3, 'md_v5a']]
 
 
 
366
  demo = gr.Interface(predict_pipeline,
367
- inputs=inputs,
368
- outputs=outputs,
369
- title=gr_title,
370
- description=gr_description,
371
- examples = examples,
372
- theme="huggingface",
373
- #live=True
374
- )
375
 
376
  demo.launch(enable_queue=True, share=True)
377
 
 
9
  import torchvision
10
  from dlclive import DLCLive, Processor
11
 
12
+ from PIL import Image, ImageColor, ImageFont, ImageDraw
 
 
13
 
14
  import numpy as np
15
  import math
16
+
17
  # import json
18
  import os
19
  import yaml
 
21
  # import pdb
22
 
23
  #########################################
24
+ # Input params
 
25
  FONTS = {'amiko': "font/Amiko-Regular.ttf",
26
  'nature': "font/LoveNature.otf",
27
  'painter':"font/PainterDecorator.otf",
 
31
  Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt",
32
  'md_v5b': "megadet_model/md_v5b.0.0.pt"}
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  #########################################
35
+ # Draw keypoints on image
36
  def draw_keypoints_on_image(image,
37
  keypoints,
38
  map_label_id_to_str,
39
  flag_show_str_labels,
40
  use_normalized_coordinates=True,
41
+ font_style='amiko',
42
+ font_size=8,
43
+ keypt_color="#ff0000",
44
+ marker_size='2'):
 
45
  """Draws keypoints on an image.
46
  Modified from:
47
  https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
48
  Args:
49
  image: a PIL.Image object.
50
  keypoints: a numpy array with shape [num_keypoints, 2].
51
+ map_label_id_to_str: dict with keys=label number and values= label string
52
+ flag_show_str_labels: boolean to select whether or not to show string labels
53
  color: color to draw the keypoints with. Default is red.
54
  radius: keypoint radius. Default value is 2.
55
  use_normalized_coordinates: if True (default), treat keypoint values as
56
  relative to the image. Otherwise treat them as absolute.
57
  """
58
  # get a drawing context
59
+ draw = ImageDraw.Draw(image)
 
60
 
61
  im_width, im_height = image.size
62
  keypoints_x = [k[0] for k in keypoints]
 
68
  keypoints_y = tuple([im_height * y for y in keypoints_y])
69
 
70
  # draw ellipses around keypoints and add string labels
 
71
  for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
72
+ draw.ellipse([(keypoint_x - marker_size, keypoint_y - marker_size),
73
+ (keypoint_x + marker_size, keypoint_y + marker_size)],
74
+ outline=keypt_color,
75
+ fill=keypt_color)
76
 
77
  # add string labels around keypoints
78
  if flag_show_str_labels:
79
+ font = ImageFont.truetype(FONTS[font_style],
80
+ font_size)
81
+ draw.text((keypoint_x + marker_size, keypoint_y + marker_size),#(0.5*im_width, 0.5*im_height), #-------
82
+ map_label_id_to_str[i],
83
+ ImageColor.getcolor(keypt_color, "RGB"), # rgb
84
  font=font)
85
 
86
  ############################################
 
87
  # Predict detections with MegaDetector v5a model
88
+ def predict_md(im,
89
+ mega_model_input,
90
+ size=640):
91
+
92
  # resize image
93
+ g = (size / max(im.size)) # multipl factor to make max size of the image equal to input size
94
+ im = im.resize((int(x * g) for x in im.size),
95
+ Image.ANTIALIAS) # resize
96
+ MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[mega_model_input])
97
+
98
  ## detect objects
99
  results = MD_model(im) # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
100
  results.render() # updates results.imgs with boxes and labels
101
 
102
+ return results
103
 
104
+ ##########################################
105
  def crop_animal_detections(yolo_results,
106
  likelihood_th):
 
 
 
107
 
108
+ ## Extract animal crops
109
+ list_labels_as_str = yolo_results.names # ['animal', 'person', 'vehicle']
110
+ list_np_animal_crops = []
111
  # for every image
112
  for img, det_array in zip(yolo_results.imgs,
113
  yolo_results.xyxy):
 
122
  xmax_rd = int(math.ceil(det_array[j,2]))
123
  ymax_rd = int(math.ceil(det_array[j,3]))
124
 
125
+ pred_llk = det_array[j,4]
126
  pred_label = det_array[j,5]
127
 
128
+ # keep animal crops above threshold
129
  if (pred_label == list_labels_as_str.index('animal')) and \
130
  (pred_llk >= likelihood_th):
131
  area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)
 
136
  # add to list
137
  list_np_animal_crops.append(crop_np)
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  return list_np_animal_crops
140
 
141
+ ##########################################
142
  def predict_dlc(list_np_crops,
143
  kpts_likelihood_th,
144
  DLCmodel,
 
149
  dlc_live.init_inference(list_np_crops[0])
150
 
151
  list_kpts_per_crop = []
152
+ np_aux = np.empty((1,3)) # can I avoid hardcoding here?
153
  for crop in list_np_crops:
154
  # scale crop here?
155
  keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
 
161
  return list_kpts_per_crop
162
 
163
 
164
+ #####################################################
165
  def predict_pipeline(img_input,
166
+ mega_model_input,
167
  model_input_str,
168
  flag_dlc_only,
169
  flag_show_str_labels,
170
  bbox_likelihood_th,
171
  kpts_likelihood_th,
172
+ font_style,
173
+ font_size,
174
+ keypt_color,
175
+ marker_size,
 
176
  ):
177
 
178
  ############################################################
179
  ## Get DLC model and labels as strings
180
+ # TODO: make a dict as for megadetector
181
  if model_input_str == 'full_cat':
182
  path_to_DLCmodel = "model/DLC_Cat_resnet_50_iteration-0_shuffle-0"
183
  pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
 
195
  pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
196
 
197
 
198
+ # extract map label ids to strings
199
+ # pose_cfg_dict['all_joints'] is a list of one-element lists,
200
  with open(pose_cfg_path, "r") as stream:
201
  pose_cfg_dict = yaml.safe_load(stream)
202
+ map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']],
203
  pose_cfg_dict['all_joints_names'])])
204
 
205
  ############################################################
206
  # ### Run Megadetector
207
+ md_results = predict_md(img_input,
208
+ mega_model_input,
209
+ size=640) #Image.fromarray(results.imgs[0])
210
 
211
  ################################################################
212
  # Obtain animal crops for bboxes with confidence above th
 
220
  # if required: ignore MD crops and run DLC on full image [mostly for testing]
221
  if flag_dlc_only:
222
  # compute kpts on input img
223
+ list_kpts_per_crop = predict_dlc([np.asarray(img_input)],
224
  kpts_likelihood_th,
225
  path_to_DLCmodel,
226
  dlc_proc)
 
230
  map_label_id_to_str,
231
  flag_show_str_labels,
232
  use_normalized_coordinates=False,
233
+ font_style=font_style,
234
+ font_size=font_size,
235
+ keypt_color=keypt_color,
236
+ marker_size=marker_size)
237
+ return img_input
 
238
 
239
  else:
240
  # Compute kpts for each crop
 
244
  dlc_proc)
245
 
246
  # Produce final image
247
+ img_background = Image.fromarray(md_results.imgs[0]) # img_input or Image.fromarray(md_results.imgs[0])?
248
+ # Image.fromarray(md_results.imgs[0]) --> (640, 479)
249
+ # img_input.size ---> (259, 194)
250
+ # pdb.set_trace()
251
+
252
+ # resize image to match megadetector output
253
+ # g = (640 / max(img_background.size)) # gain
254
+ # img_background = img_background.resize((int(x * g) for x in img_background.size), Image.ANTIALIAS) # resize
255
  for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
256
  list_kpts_per_crop)):
257
 
 
261
  kpts_crop, # a numpy array with shape [num_keypoints, 2].
262
  map_label_id_to_str,
263
  flag_show_str_labels,
264
+ use_normalized_coordinates=False, # if True, then I should use md_results.xyxyn for list_kpts_crop
265
+ font_style=font_style,
266
+ font_size=font_size,
267
+ keypt_color=keypt_color,
268
+ marker_size=marker_size)
 
269
 
270
  ## Paste crop in original image
271
  # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
272
  img_background.paste(img_crop,
273
+ box = tuple([int(t) for t in md_results.xyxy[0][ic,:2]]))
274
 
275
+ return img_background
276
 
277
+ #############################################
 
278
  # %%
279
+ # User interface: inputs
280
+
281
+ # Input image
282
+ gr_image_input = gr.inputs.Image(type="pil", label="Input Image")
283
 
284
 
285
+ # Models
286
+ gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog', 'primate_face', 'full_human', 'full_macaque'], # choices
287
+ default='full_cat', # default option
288
+ type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
289
+ label='Select DeepLabCut model')
290
+ gr_mega_model_input = gr.inputs.Dropdown(choices=['md_v5a','md_v5b'],
291
+ default='md_v5a', # default option
292
+ type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
293
+ label='Select MegaDetector model')
294
+ # Other inputs
295
+ gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
296
+ label='Run DLClive only, directly on input image?')
297
+ gr_str_labels_checkbox = gr.inputs.Checkbox(True,
298
+ label='Show bodypart labels?')
299
+
300
+ gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
301
+ label='Set confidence threshold for animal detections')
302
+ gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
303
+ label='Set confidence threshold for keypoints')
304
 
305
+ # Data viz
306
+ gr_keypt_color = gr.ColorPicker(label="choose color for keypoint label")
307
+
308
+ gr_labels_font_style = gr.inputs.Dropdown(choices=['amiko', 'nature', 'painter', 'animals', 'zen'],
309
+ default='amiko',
310
+ type='value',
311
+ label='Select keypoint label font')
312
+ gr_slider_font_size = gr.inputs.Slider(5,30,1,8,
313
+ label='Set font size')
314
+ gr_slider_marker_size = gr.inputs.Slider(0.5,5,0.2,2,
315
+ label='Set marker size')
316
+
317
+ # list of inputs
318
+ inputs = [gr_image_input,
319
+ gr_mega_model_input,
320
+ gr_dlc_model_input,
321
+ gr_dlc_only_checkbox,
322
+ gr_str_labels_checkbox,
323
+ gr_slider_conf_bboxes,
324
+ gr_slider_conf_keypoints,
325
+ gr_labels_font_style,
326
+ gr_slider_font_size,
327
+ gr_keypt_color,
328
+ gr_slider_marker_size,
329
+ ]
330
  ####################################################
331
  # %%
332
+ # User interface: outputs
333
+ gr_image_output = gr.outputs.Image(type="pil", label="Output Image")
334
+ outputs = [gr_image_output]
 
 
 
 
 
 
 
335
 
336
+ ##############################################
337
+ # User interace: description
338
  gr_title = "MegaDetector v5 + DeepLabCut-Live!"
339
+ gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang -- DLC AI Residents 2022.\
340
  This App detects and estimate the pose of animals in camera trap images using <a href='https://github.com/microsoft/CameraTraps'>MegaDetector v5a</a> + <a href='https://github.com/DeepLabCut/DeepLabCut-live'>DeepLabCut-live</a>. \
341
+ We host models from the <a href='http://www.mackenziemathislab.org/dlc-modelzoo'>DeepLabCut ModelZoo Project</a>\, and two <a href='https://github.com/microsoft/CameraTraps/blob/main/megadetector.md'>MegaDetector Models</a>. Please carefully check their licensing information if you use this project. The App additionally builds upon on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a> \
342
  <a href='https://huggingface.co/spaces/sofmi/MegaDetector_DLClive'>sofmi/MegaDetector_DLClive</a> \
343
+ <a href='https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels'>Neslihan/megadetector_dlcmodels</a>\."
344
+
345
  # article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> | <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
 
346
 
 
347
  examples = [['example/monkey_full.jpg', 'full_macaque', False, True, 0.5, 0.3, 'amiko', 5, 'blue', 3, 'md_v5a'],
348
+ ['example/dog.jpeg', 'full_dog', False, True, 0.5, 0.05, 'amiko', 5, 'yellow', 3, 'md_v5a'],
349
+ ['example/cat.jpg', 'full_cat', False, True, 0.5, 0.05, 'amiko', 5, 'purple', 3, 'md_v5a']]
350
+
351
+ ################################################
352
+ # %% Define and launch gradio interface
353
  demo = gr.Interface(predict_pipeline,
354
+ inputs=inputs,
355
+ outputs=outputs,
356
+ title=gr_title,
357
+ description=gr_description,
358
+ examples=examples,
359
+ theme="huggingface",
360
+ #live=True
361
+ )
362
 
363
  demo.launch(enable_queue=True, share=True)
364