Spaces:

sofmi
/

MegaDetector_DLClive

Runtime error

File size: 13,883 Bytes

fcf3dc1
c4bc253
fcf3dc1
 
c4bc253
 
fcf3dc1
c4bc253
 
fcf3dc1
c4bc253
 
 
 
 
fcf3dc1
f16e16d
fcf3dc1
 
c4bc253
fcf3dc1
 
c4bc253
fcf3dc1
 
c4bc253
 
 
 
 
 
fcf3dc1
 
 
 
 
c4bc253
 
 
 
 
 
 
2042455
 
c4bc253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcf3dc1
 
 
 
 
42956cc
 
fcf3dc1
 
 
 
 
 
 
916c574
 
fcf3dc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
916c574
 
4687e19
 
916c574
fcf3dc1
 
 
 
 
916c574
fcf3dc1
916c574
 
 
 
c4bc253
916c574
fcf3dc1
916c574
fcf3dc1
 
 
916c574
4687e19
 
916c574
 
fcf3dc1
c4bc253
 
4687e19
 
c4bc253
4687e19
 
c4bc253
fcf3dc1
c4bc253
 
 
 
 
 
 
4687e19
fcf3dc1
 
c4bc253
 
fcf3dc1
916c574
fcf3dc1
c4bc253
fcf3dc1
 
c4bc253
 
4687e19
 
 
 
 
 
 
 
 
c4bc253
fcf3dc1
 
4687e19
 
 
 
 
 
 
 
 
 
 
efb0fa0
42956cc
 
4687e19
 
 
 
 
 
 
c4bc253
4687e19
 
c4bc253
 
4687e19
 
 
 
 
 
 
fcf3dc1
 
 
 
 
 
 
 
 
 
 
916c574
 
4687e19
 
 
 
 
 
 
916c574
4687e19
916c574
c4bc253
fcf3dc1
 
 
 
916c574
c4bc253
 
fcf3dc1
 
 
 
4687e19
 
 
 
 
916c574
 
 
fcf3dc1

import gradio as gr

import torch
import torchvision
from dlclive import DLCLive, Processor

from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw 

import numpy as np
import math
# import json
import os
import yaml

# import pdb

#########################################

def draw_keypoints_on_image(image,
                            keypoints,
                            map_label_id_to_str,
                            color='red',
                            radius=2,
                            use_normalized_coordinates=True,
                            ):
    """Draws keypoints on an image.
    Modified from:
        https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
    Args:
    image: a PIL.Image object.
    keypoints: a numpy array with shape [num_keypoints, 2].
    color: color to draw the keypoints with. Default is red.
    radius: keypoint radius. Default value is 2.
    use_normalized_coordinates: if True (default), treat keypoint values as
        relative to the image.  Otherwise treat them as absolute.
    """
    # get a drawing context
    draw = ImageDraw.Draw(image)
    # font = ImageFont.truetype("sans-serif.ttf", 16)

    im_width, im_height = image.size
    keypoints_x = [k[0] for k in keypoints]
    keypoints_y = [k[1] for k in keypoints]

    # adjust keypoints coords if required
    if use_normalized_coordinates:
        keypoints_x = tuple([im_width * x for x in keypoints_x])
        keypoints_y = tuple([im_height * y for y in keypoints_y])

    # draw ellipses around keypoints and add string labels
    font = ImageFont.truetype("Amiko-Regular.ttf", 8) # font = ImageFont.truetype(<font-file>, <font-size>)
    for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
        draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
                      (keypoint_x + radius, keypoint_y + radius)],
                       outline=color, fill=color)

        # add string labels around keypoints
        # draw.text((x, y),"Sample Text",(r,g,b))
        draw.text((keypoint_x + radius, keypoint_y + radius),#(0.5*im_width, 0.5*im_height), #-------
                  map_label_id_to_str[i],#"Sample Text",
                  (255,0,0), # rgb
                  font=font)
                    
############################################

# Predict detections with MegaDetector v5a model
def predict_md(im, size=640):
    # resize image
    g = (size / max(im.size))  # gain
    im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS)  # resize
    
    ## detect objects
    results = MD_model(im)  # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
    results.render()  # updates results.imgs with boxes and labels

    return results #Image.fromarray(results.imgs[0]) ---return animals only?

def crop_animal_detections(yolo_results, 
                           likelihood_th):
    ## crop if animal and return list of crops
    list_labels_as_str = yolo_results.names #['animal', 'person', 'vehicle']
    list_np_animal_crops = []

    # for every image
    for img, det_array in zip(yolo_results.imgs,
                              yolo_results.xyxy):

        # for every detection
        for j in range(det_array.shape[0]):

            # compute coords around bbox rounded to the nearest integer (for pasting later)
            xmin_rd = int(math.floor(det_array[j,0])) # int() should suffice?
            ymin_rd = int(math.floor(det_array[j,1]))

            xmax_rd = int(math.ceil(det_array[j,2]))
            ymax_rd = int(math.ceil(det_array[j,3]))

            pred_llk = det_array[j,4] #-----TODO: filter based on likelihood?
            pred_label = det_array[j,5]

            if (pred_label == list_labels_as_str.index('animal')) and \
                (pred_llk >= likelihood_th):
                area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)

                crop = Image.fromarray(img).crop(area)
                crop_np = np.asarray(crop)

                # add to list
                list_np_animal_crops.append(crop_np)

    # for detections_dict in img_data["detections"]:
    #     index = img_data["detections"].index(detections_dict)
    #     if detections_dict["conf"] > 0.8: 
    #         x1, y1,w_box, h_box = detections_dict["bbox"]
    #         ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
            
    #         imageWidth=img.size[0]
    #         imageHeight= img.size[1]
    #         area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
    #                 ymax * imageHeight)
    #         crop = img.crop(area)
    #         crop_np = np.asarray(crop)
    # 
    # if detections_dict["category"] == "1":
    return list_np_animal_crops

def predict_dlc(list_np_crops,
                kpts_likelihood_th,
                DLCmodel,
                dlc_proc):

    # run dlc thru list of crops
    dlc_live = DLCLive(DLCmodel, processor=dlc_proc)
    dlc_live.init_inference(list_np_crops[0])

    list_kpts_per_crop = []
    np_aux = np.empty((1,3)) # can I avoid hardcoding?
    for crop in list_np_crops:
        # scale crop here?
        keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
        # set kpts below threhsold to nan
        keypts_xyp[keypts_xyp[:,-1] < kpts_likelihood_th,:] = np_aux.fill(np.nan)
        # add kpts of this crop to list 
        list_kpts_per_crop.append(keypts_xyp)

    return list_kpts_per_crop



def predict_pipeline(img_input,
                     model_input_str,
                     flag_dlc_only,
                     bbox_likelihood_th,
                     kpts_likelihood_th):

    ############################################################
    ## Get DLC model and labels as strings   
    if model_input_str == 'full_cat':
        path_to_DLCmodel =  "DLC_models/DLC_Cat_resnet_50_iteration-0_shuffle-0"
        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
    elif model_input_str == 'full_dog':
        path_to_DLCmodel =  "DLC_models/DLC_Dog_resnet_50_iteration-0_shuffle-0"
        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')

    # read pose cfg as dict
    with open(pose_cfg_path, "r") as stream:
        pose_cfg_dict = yaml.safe_load(stream) 
    map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']], # pose_cfg_dict['all_joints'] is a list of one-element lists,
                                                     pose_cfg_dict['all_joints_names'])])

    ############################################################                                               
    # ### Run Megadetector
    md_results = predict_md(img_input) #Image.fromarray(results.imgs[0])

    ################################################################
    # Obtain animal crops for bboxes with confidence above th
    list_crops = crop_animal_detections(md_results,
                                        bbox_likelihood_th)

    ##############################################################
    # Run DLC
    dlc_proc = Processor()

    # if required: ignore MD crops and run DLC on full image [mostly for testing]
    if flag_dlc_only:
        # compute kpts on input img
        list_kpts_per_crop = predict_dlc([np.asarray(img_input)],#list_crops,--------
                                         kpts_likelihood_th,
                                         path_to_DLCmodel,
                                         dlc_proc)
        # draw kpts on input img
        draw_keypoints_on_image(img_input,
                                list_kpts_per_crop[0], # a numpy array with shape [num_keypoints, 2].
                                map_label_id_to_str,
                                color='red',
                                radius=2,
                                use_normalized_coordinates=False)
        return img_input

    else:
        # Compute kpts for each crop
        list_kpts_per_crop = predict_dlc(list_crops,
                                         kpts_likelihood_th,
                                         path_to_DLCmodel,
                                         dlc_proc)

        # Produce final image
        img_background = img_input # img_input? Image.fromarray(md_results.imgs[0])
        g = (size / max(img_background.size))  # gain
        img_background = img_background.resize((int(x * g) for x in img_background.size), Image.ANTIALIAS)  # resize
        for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
                                                      list_kpts_per_crop)):

            ## Draw keypts on crop
            img_crop = Image.fromarray(np_crop)
            draw_keypoints_on_image(img_crop,
                                    kpts_crop, # a numpy array with shape [num_keypoints, 2].
                                    map_label_id_to_str,
                                    color='red',
                                    radius=2,
                                    use_normalized_coordinates=False,  # if True, then I should use md_results.xyxyn
                                    )

            ## Paste crop in original image
            # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
            img_background.paste(img_crop,
                                box = tuple([int(math.floor(t)) for t in md_results.xyxy[0][ic,:2]]))
                                                                
        return img_background #Image.fromarray(list_crops[0]) #Image.fromarray(md_results.imgs[0]) #list_annotated_crops #


##########################################################
# Get MegaDetector model
# TODO: Allow user selectable model?
# models = ["model_weights/md_v5a.0.0.pt","model_weights/md_v5b.0.0.pt"]
MD_model = torch.hub.load('ultralytics/yolov5', 'custom', "model_weights/md_v5a.0.0.pt")


####################################################
# Create user interface and launch
gr_image_input = gr.inputs.Image(type="pil", label="Input Image")
gr_image_output = gr.outputs.Image(type="pil", label="Output Image")
gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog'], # choices
                                        default='full_cat', # default option
                                        type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
                                        label='Select DLC model')
gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
                                         label='Run DLClive only, directly on input image?')                                        

gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
                                        label='Set confidence threshold for animal detections')
gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
                                            label='Set confidence threshold for keypoints')
#image = gr.inputs.Image(type="pil", label="Input Image")
#chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight")
#size = 640

gr_title = "MegaDetector v5 + DLClive"
gr_description = "Detect and estimate the pose of animals in camera trap images, using MegaDetector v5a + DeepLabCut-live. \
                  Builds up on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a>"
# article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> | <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
# examples = [['data/Macropod.jpg'], ['data/koala2.jpg'],['data/cat.jpg'],['data/BrushtailPossum.jpg']]

gr.Interface(predict_pipeline, 
             inputs=[gr_image_input,
                     gr_dlc_model_input,
                     gr_dlc_only_checkbox,
                     gr_slider_conf_bboxes,
                     gr_slider_conf_keypoints], 
             outputs=gr_image_output, 
             title=gr_title, 
             description=gr_description,
             theme="huggingface").launch(enable_queue=True)


# def dlclive_pose(model, crop_np, crop, fname, index,dlc_proc):
#     dlc_live = DLCLive(model, processor=dlc_proc) 
#     dlc_live.init_inference(crop_np)
#     keypts = dlc_live.get_pose(crop_np) 
#     savetxt(str(fname)+ '_' + str(index) + '.csv' , keypts, delimiter=',')
#     xpose = []
#     ypose = []
#     for key in keypts[:,2]:
#        # if key > 0.05: # which value do we need here?
#             i = np.where(keypts[:,2]==key)
#             xpose.append(keypts[i,0])
#             ypose.append(keypts[i,1])
#     plt.imshow(crop)
#     plt.scatter(xpose[:], ypose[:], 40, color='cyan')
#     plt.savefig(str(fname)+ '_' + str(index) + '.png')
#     plt.show()
#     plt.clf()