File size: 18,392 Bytes
c20ef4d
 
 
86af013
07db84b
86af013
07db84b
 
 
d581ff8
86af013
07db84b
 
 
 
 
 
 
 
 
d581ff8
07db84b
d581ff8
07db84b
d581ff8
2b6769d
 
 
 
 
 
 
66e5fd8
 
 
8d64a48
 
 
 
 
 
 
 
 
dfb8f5e
8d64a48
 
 
 
 
 
 
 
dfb8f5e
8d64a48
 
 
 
dfb8f5e
8d64a48
dfb8f5e
8d64a48
c20ef4d
8d64a48
dda9d4e
66e5fd8
 
dfb8f5e
8d64a48
 
 
 
 
 
 
 
 
66e5fd8
8d64a48
 
 
 
 
 
 
 
07db84b
 
 
 
6aea884
8d64a48
 
 
07db84b
 
 
 
 
 
 
 
 
 
 
 
 
 
2b6769d
07db84b
 
 
 
 
 
 
 
 
 
 
8d64a48
07db84b
8d64a48
 
 
07db84b
 
 
8d64a48
07db84b
66e5fd8
07db84b
 
 
8d64a48
07db84b
66e5fd8
07db84b
 
 
66e5fd8
07db84b
 
 
 
 
 
8d64a48
07db84b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d581ff8
07db84b
 
 
 
 
 
 
 
 
 
8d64a48
07db84b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d64a48
07db84b
 
 
 
2b6769d
8d64a48
 
 
 
66e5fd8
2b6769d
07db84b
 
 
 
1abe3ea
07db84b
 
1abe3ea
07db84b
6940bf0
1abe3ea
6940bf0
 
1abe3ea
6940bf0
 
1abe3ea
6940bf0
8d64a48
6940bf0
07db84b
 
 
 
 
 
 
 
66e5fd8
07db84b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b6769d
8d64a48
 
 
 
2b6769d
d81c118
07db84b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47a9bd7
8d64a48
 
 
07db84b
 
 
 
 
 
 
d81c118
07db84b
 
 
8d64a48
07db84b
 
 
66e5fd8
d581ff8
 
 
746600b
8d64a48
746600b
008d61a
c0c8b3a
93d6740
 
 
746600b
5b257fd
d81c118
 
746600b
fe1f878
 
 
 
c20ef4d
f47ec75
 
07db84b
 
d581ff8
8d64a48
db8ca4c
16fb395
746600b
 
07db84b
 
a0cb551
16fb395
2b6769d
 
d581ff8
8d64a48
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# Built from https://huggingface.co/spaces/hlydecker/MegaDetector_v5 
# Built from https://huggingface.co/spaces/sofmi/MegaDetector_DLClive/blob/main/app.py
# Built from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/blob/main/app.py 


import gradio as gr

import torch
import torchvision
from dlclive import DLCLive, Processor

from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw 

import numpy as np
import math
# import json
import os
import yaml

# import pdb

#########################################


FONTS = {'amiko': "font/Amiko-Regular.ttf",
        'nature': "font/LoveNature.otf", 
        'painter':"font/PainterDecorator.otf",
        'animals': "font/UncialAnimals.ttf", 
        'zen': "font/ZEN.TTF"}

Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt",
                  'md_v5b': "megadet_model/md_v5b.0.0.pt"}
                  
#############################################
# %% 
# Create user interface and launch: all inputs
gr_image_input = gr.inputs.Image(type="pil", label="Input Image")


gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog', 'monkey_face', 'full_human', 'full_monkey'], # choices
                                        default='full_cat', # default option
                                        type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
                                        label='Select DeepLabCut model')
gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
                                         label='Run DLClive only, directly on input image?')                                        

gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
                                        label='Set confidence threshold for animal detections')
gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
                                            label='Set confidence threshold for keypoints')
                                            
gr_keypt_color = gr.ColorPicker(label="choose color for keypoint label") 

gr_pose_font_input = gr.inputs.Dropdown(choices=['amiko', 'nature', 'painter', 'animals', 'zen'],
                                        default='amiko',
                                        type='value', 
                                        label='Select keypoint label font')
gr_slider_font_size = gr.inputs.Slider(5,30,1,8,
                                        label='Set font size')

gr_slider_pose_size = gr.inputs.Slider(0.5,5,0.2,2,
                                        label='Set pose size')                                          
gr_mega_model_input = gr.inputs.Dropdown(choices=['md_v5a','md_v5b'],
                                         default='md_v5a', # default option
                                         type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
                                         label='Select MegaDetector model')
inputs = [gr_image_input,
                     gr_dlc_model_input,
                     gr_dlc_only_checkbox,
                     gr_slider_conf_bboxes,
                     gr_slider_conf_keypoints,
                     gr_pose_font_input,
                     gr_slider_font_size, 
                     gr_keypt_color,
                     gr_slider_pose_size,
                     gr_mega_model_input,
                     ]                                                                                

#image = gr.inputs.Image(type="pil", label="Input Image")
#chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight")
#size = 640

#########################################
# %% 
def draw_keypoints_on_image(image,
                            keypoints,
                            map_label_id_to_str,
                            use_normalized_coordinates=True,
                            gr_pose_font_input='amiko',
                            gr_slider_font_size=8,
                            gr_keypt_color="#ff0000",
                            gr_slider_pose_size='2'
                            ):
    """Draws keypoints on an image.
    Modified from:
        https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
    Args:
    image: a PIL.Image object.
    keypoints: a numpy array with shape [num_keypoints, 2].
    color: color to draw the keypoints with. Default is red.
    radius: keypoint radius. Default value is 2.
    use_normalized_coordinates: if True (default), treat keypoint values as
        relative to the image.  Otherwise treat them as absolute.
    """
    # get a drawing context
    draw = ImageDraw.Draw(image)
    # font = ImageFont.truetype("sans-serif.ttf", 16)    

    im_width, im_height = image.size
    keypoints_x = [k[0] for k in keypoints]
    keypoints_y = [k[1] for k in keypoints]

    # adjust keypoints coords if required
    if use_normalized_coordinates:
        keypoints_x = tuple([im_width * x for x in keypoints_x])
        keypoints_y = tuple([im_height * y for y in keypoints_y])

    # draw ellipses around keypoints and add string labels
    font = ImageFont.truetype(FONTS[gr_pose_font_input], gr_slider_font_size) # font = ImageFont.truetype(<font-file>, <font-size>)
    for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
        draw.ellipse([(keypoint_x - gr_slider_pose_size, keypoint_y - gr_slider_pose_size),
                      (keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size)],
                       outline=gr_keypt_color, fill=gr_keypt_color)

        # add string labels around keypoints
        # draw.text((x, y),"Sample Text",(r,g,b))
        draw.text((keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size),#(0.5*im_width, 0.5*im_height), #-------
                  map_label_id_to_str[i],#"Sample Text",
                  (gr_keypt_color), # rgb
                  font=font)
                    
############################################
# %% 
# Predict detections with MegaDetector v5a model
def predict_md(im, size=640, gr_mega_model_input='md_v5a'):
    # resize image
    g = (size / max(im.size))  # gain
    im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS)  # resize
    MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[gr_mega_model_input])
    ## detect objects
    results = MD_model(im)  # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
    results.render()  # updates results.imgs with boxes and labels

    return results #Image.fromarray(results.imgs[0]) ---return animals only?

# %% 
def crop_animal_detections(yolo_results, 
                           likelihood_th):
    ## crop if animal and return list of crops
    list_labels_as_str = yolo_results.names #['animal', 'person', 'vehicle']
    list_np_animal_crops = []

    # for every image
    for img, det_array in zip(yolo_results.imgs,
                              yolo_results.xyxy):

        # for every detection
        for j in range(det_array.shape[0]):

            # compute coords around bbox rounded to the nearest integer (for pasting later)
            xmin_rd = int(math.floor(det_array[j,0])) # int() should suffice?
            ymin_rd = int(math.floor(det_array[j,1]))

            xmax_rd = int(math.ceil(det_array[j,2]))
            ymax_rd = int(math.ceil(det_array[j,3]))

            pred_llk = det_array[j,4] #-----TODO: filter based on likelihood?
            pred_label = det_array[j,5]

            if (pred_label == list_labels_as_str.index('animal')) and \
                (pred_llk >= likelihood_th):
                area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)

                crop = Image.fromarray(img).crop(area)
                crop_np = np.asarray(crop)

                # add to list
                list_np_animal_crops.append(crop_np)

    # for detections_dict in img_data["detections"]:
    #     index = img_data["detections"].index(detections_dict)
    #     if detections_dict["conf"] > 0.8: 
    #         x1, y1,w_box, h_box = detections_dict["bbox"]
    #         ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
            
    #         imageWidth=img.size[0]
    #         imageHeight= img.size[1]
    #         area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
    #                 ymax * imageHeight)
    #         crop = img.crop(area)
    #         crop_np = np.asarray(crop)
    # 
    # if detections_dict["category"] == "1":
    return list_np_animal_crops

# %%
def predict_dlc(list_np_crops,
                kpts_likelihood_th,
                DLCmodel,
                dlc_proc):

    # run dlc thru list of crops
    dlc_live = DLCLive(DLCmodel, processor=dlc_proc)
    dlc_live.init_inference(list_np_crops[0])

    list_kpts_per_crop = []
    np_aux = np.empty((1,3)) # can I avoid hardcoding?
    for crop in list_np_crops:
        # scale crop here?
        keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
        # set kpts below threhsold to nan
        keypts_xyp[keypts_xyp[:,-1] < kpts_likelihood_th,:] = np_aux.fill(np.nan)
        # add kpts of this crop to list 
        list_kpts_per_crop.append(keypts_xyp)

    return list_kpts_per_crop


# %% 
def predict_pipeline(img_input,
                     model_input_str,
                     flag_dlc_only,
                     bbox_likelihood_th,
                     kpts_likelihood_th,
                     gr_pose_font_input=gr_pose_font_input,
                     gr_slider_font_size=gr_slider_font_size,
                     gr_keypt_color=gr_keypt_color,
                     gr_slider_pose_size=gr_slider_pose_size,
                     gr_mega_model_input=gr_mega_model_input,
                     ):

    ############################################################
    ## Get DLC model and labels as strings   
    if model_input_str == 'full_cat':
        path_to_DLCmodel =  "model/DLC_Cat_resnet_50_iteration-0_shuffle-0"
        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
    elif model_input_str == 'full_dog':
        path_to_DLCmodel =  "model/DLC_Dog_resnet_50_iteration-0_shuffle-0"
        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
    elif model_input_str == 'monkey_face':
        path_to_DLCmodel =  "model/DLC_FacialLandmarks_resnet_50_iteration-1_shuffle-1"
        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
    elif model_input_str == 'full_human':
        path_to_DLCmodel =  "model/DLC_human_dancing_resnet_101_iteration-0_shuffle-1"
        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
    elif model_input_str == 'full_monkey':
        path_to_DLCmodel =  "model/DLC_monkey_resnet_50_iteration-0_shuffle-1"
        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')        
                     
        
    # read pose cfg as dict
    with open(pose_cfg_path, "r") as stream:
        pose_cfg_dict = yaml.safe_load(stream) 
    map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']], # pose_cfg_dict['all_joints'] is a list of one-element lists,
                                                     pose_cfg_dict['all_joints_names'])])

    ############################################################                                               
    # ### Run Megadetector
    md_results = predict_md(img_input, gr_mega_model_input=gr_mega_model_input) #Image.fromarray(results.imgs[0])

    ################################################################
    # Obtain animal crops for bboxes with confidence above th
    list_crops = crop_animal_detections(md_results,
                                        bbox_likelihood_th)

    ##############################################################
    # Run DLC
    dlc_proc = Processor()

    # if required: ignore MD crops and run DLC on full image [mostly for testing]
    if flag_dlc_only:
        # compute kpts on input img
        list_kpts_per_crop = predict_dlc([np.asarray(img_input)],#list_crops,--------
                                         kpts_likelihood_th,
                                         path_to_DLCmodel,
                                         dlc_proc)
        # draw kpts on input img
        draw_keypoints_on_image(img_input,
                                list_kpts_per_crop[0], # a numpy array with shape [num_keypoints, 2].
                                map_label_id_to_str,
                                use_normalized_coordinates=False,
                                gr_pose_font_input=gr_pose_font_input,
                                gr_slider_font_size=gr_slider_font_size,
                                gr_keypt_color=gr_keypt_color,
                                gr_slider_pose_size=gr_slider_pose_size,
                                )
        return img_input, #list_kpts_per_crop

    else:
        # Compute kpts for each crop
        list_kpts_per_crop = predict_dlc(list_crops,
                                         kpts_likelihood_th,
                                         path_to_DLCmodel,
                                         dlc_proc)

        # Produce final image
        img_background = Image.fromarray(md_results.imgs[0]) # img_input?
        for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
                                                      list_kpts_per_crop)):

            ## Draw keypts on crop
            img_crop = Image.fromarray(np_crop)
            draw_keypoints_on_image(img_crop,
                                    kpts_crop, # a numpy array with shape [num_keypoints, 2].
                                    map_label_id_to_str,
                                    use_normalized_coordinates=False,  # if True, then I should use md_results.xyxyn
                                    gr_pose_font_input=gr_pose_font_input,
                                    gr_slider_font_size=gr_slider_font_size,
                                    gr_keypt_color=gr_keypt_color,
                                    gr_slider_pose_size=gr_slider_pose_size,
                                    )

            ## Paste crop in original image
            # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
            img_background.paste(img_crop,
                                box = tuple([int(math.floor(t)) for t in md_results.xyxy[0][ic,:2]]))
                                                                
        return img_background, #list_kpts_per_crop, #Image.fromarray(list_crops[0]) #Image.fromarray(md_results.imgs[0]) #list_annotated_crops #


##########################################################
# %% 
# Get MegaDetector model
# TODO: Allow user selectable model?
# models = ["model_weights/md_v5a.0.0.pt","model_weights/md_v5b.0.0.pt"]
#MD_model = torch.hub.load('ultralytics/yolov5', 'custom', "megadet_model/md_v5b.0.0.pt")



####################################################
# %% 
# Create user interface and launch: all outputs
gr_gallery_output = gr.Gallery(type="pil", label="Output Gallery")
gr_json_output = gr.JSON(label='megadetector json')
gr_pose_output = gr.Dataframe(headers=['bodypart', 'x', 'y', 'confidence'],
                              #row_count = number of bodyparts,
                              label='pose coordinates')

outputs = [gr_gallery_output, #gr_json_output, 
#gr_pose_output,
]

gr_title = "MegaDetector v5 + DeepLabCut-Live!"
gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang -- DLC AI Residents 2022\
                  This App detects and estimate the pose of animals in camera trap images using <a href='https://github.com/microsoft/CameraTraps'>MegaDetector v5a</a> + <a href='https://github.com/DeepLabCut/DeepLabCut-live'>DeepLabCut-live</a>. \
                  It additionally builds upon on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a> \
                  <a href='https://huggingface.co/spaces/sofmi/MegaDetector_DLClive'>sofmi/MegaDetector_DLClive</a> \
                  <a href='https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels'>Neslihan/megadetector_dlcmodels</a>\
                  We experimented with two Megadetector models. See <a href='https://github.com/microsoft/CameraTraps/blob/main/megadetector.md'>here</a> for detailed information about models." 
# article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> | <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
# examples = [['data/Macropod.jpg'], ['data/koala2.jpg'],['data/cat.jpg'],['data/BrushtailPossum.jpg']]

# %% 
examples = [['example/monkey_full.jpg', 'model/DLC_monkey_resnet_50_iteration-0_shuffle-1', False, 0.5, 0.5, None, None, None, None, None]]
demo = gr.Interface(predict_pipeline, 
             inputs=inputs,
             outputs=outputs, 
             title=gr_title, 
             description=gr_description,
             examples = examples,
             theme="huggingface",
             #live=True
             )

demo.launch(enable_queue=True, share=True)