Spaces:

DeepLabCut
/

MegaDetector_DeepLabCut

Build error

App Files Files Community

vchiang001 commited on Aug 11, 2022

Commit

07db84b

•

1 Parent(s): d7c24ee

Update app.py

Browse files

Files changed (1) hide show

app.py +289 -67

app.py CHANGED Viewed

@@ -1,84 +1,306 @@
 # Copied megadetector section from https://huggingface.co/spaces/hlydecker/MegaDetector_v5
 # Copied from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/blob/main/app.py
-print("before import")
 import gradio as gr
-import json
-import os
-import matplotlib.pyplot as plt
-import numpy as np
-from PIL import Image
 from dlclive import DLCLive, Processor
-from numpy import savetxt
-import PIL
-print("after import")
-# A method that allows using dlc live, but with different models, saves poses, and plots the poses onto image
-def dlclive_pose(model, crop_np, crop, index,dlc_proc):
-    dlc_live = DLCLive(model, processor=dlc_proc)
-    dlc_live.init_inference(crop_np)
-    keypts = dlc_live.get_pose(crop_np)
-    xpose = []
-    ypose = []
-    for key in keypts[:,2]:
-       # if key > 0.05: # which value do we need here?
-            i = np.where(keypts[:,2]==key)
-            xpose.append(keypts[i,0])
-            ypose.append(keypts[i,1])
-    plt.imshow(crop)
-    plt.scatter(xpose[:], ypose[:], 40, color='cyan')
-    canvas = plt.gca().figure.canvas
-    canvas.draw()
-    image = PIL.Image.frombytes('RGB', canvas.get_width_height(), canvas.tostring_rgb())
-    plt.clf()
-    return image
-def classify_image(img, file):
-    primate_face_model = 'model_weights/DLC_FacialLandmarks_resnet_50_iteration-1_shuffle-1'
-    human_model = 'model_weights/DLC_human_dancing_resnet_101_iteration-0_shuffle-1'
-    with open(file.name, 'r') as f:
-        detection_results = json.load(f)
-    dlc_proc = Processor()
-    # Assuming there is only 1 detection on the output
-    img_data = detection_results["images"][0]
-    output_images = []
-    for detections_dict in img_data["detections"]:
-        index = img_data["detections"].index(detections_dict)
-        if detections_dict["conf"] > 0.8:
-            x1, y1,w_box, h_box = detections_dict["bbox"]
-            ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
-            imageWidth=img.size[0]
-            imageHeight= img.size[1]
-            area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
-                    ymax * imageHeight)
-            crop = img.crop(area)
-            crop_np = np.asarray(crop)
-            if detections_dict["category"] == "1":
-                selected_model = primate_face_model
-            elif detections_dict["category"] == "2":
-                selected_model = human_model
-            # Until we know how to dynamically add output element to gradio, just return the first image
-            output_images.append(dlclive_pose(selected_model, crop_np, crop, index, dlc_proc))
-    return output_images[0], output_images[1] # lol
-input_image = gr.inputs.Image(type="pil", label="Input Image")
-input_file = gr.inputs.File(label="output.json")
-# Fake it till we make it, we know our example has 2 outputs
-outputs = [gr.outputs.Image(type="pil", label="Output Image"), gr.outputs.Image(type="pil", label="Output Image")]
-gr.Interface(fn=classify_image, inputs=[input_image, input_file], outputs=outputs, theme="huggingface").launch()

 # Copied megadetector section from https://huggingface.co/spaces/hlydecker/MegaDetector_v5
+# Copied from https://huggingface.co/spaces/sofmi/MegaDetector_DLClive/blob/main/app.py
 # Copied from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/blob/main/app.py
 import gradio as gr
+import torch
+import torchvision
 from dlclive import DLCLive, Processor
+from PIL import Image
+from PIL import ImageFont
+from PIL import ImageDraw
+import numpy as np
+import math
+# import json
+import os
+import yaml
+# import pdb
+#########################################
+def draw_keypoints_on_image(image,
+                            keypoints,
+                            map_label_id_to_str,
+                            color='red',
+                            radius=2,
+                            use_normalized_coordinates=True,
+                            ):
+    """Draws keypoints on an image.
+    Modified from:
+        https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
+    Args:
+    image: a PIL.Image object.
+    keypoints: a numpy array with shape [num_keypoints, 2].
+    color: color to draw the keypoints with. Default is red.
+    radius: keypoint radius. Default value is 2.
+    use_normalized_coordinates: if True (default), treat keypoint values as
+        relative to the image.  Otherwise treat them as absolute.
+    """
+    # get a drawing context
+    draw = ImageDraw.Draw(image)
+    # font = ImageFont.truetype("sans-serif.ttf", 16)
+    im_width, im_height = image.size
+    keypoints_x = [k[0] for k in keypoints]
+    keypoints_y = [k[1] for k in keypoints]
+    # adjust keypoints coords if required
+    if use_normalized_coordinates:
+        keypoints_x = tuple([im_width * x for x in keypoints_x])
+        keypoints_y = tuple([im_height * y for y in keypoints_y])
+    # draw ellipses around keypoints and add string labels
+    font = ImageFont.truetype("Amiko-Regular.ttf", 8) # font = ImageFont.truetype(<font-file>, <font-size>)
+    for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
+        draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
+                      (keypoint_x + radius, keypoint_y + radius)],
+                       outline=color, fill=color)
+        # add string labels around keypoints
+        # draw.text((x, y),"Sample Text",(r,g,b))
+        draw.text((keypoint_x + radius, keypoint_y + radius),#(0.5*im_width, 0.5*im_height), #-------
+                  map_label_id_to_str[i],#"Sample Text",
+                  (255,0,0), # rgb
+                  font=font)
+############################################
+# Predict detections with MegaDetector v5a model
+def predict_md(im, size=640):
+    # resize image
+    g = (size / max(im.size))  # gain
+    im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS)  # resize
+    ## detect objects
+    results = MD_model(im)  # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
+    results.render()  # updates results.imgs with boxes and labels
+    return results #Image.fromarray(results.imgs[0]) ---return animals only?
+def crop_animal_detections(yolo_results,
+                           likelihood_th):
+    ## crop if animal and return list of crops
+    list_labels_as_str = yolo_results.names #['animal', 'person', 'vehicle']
+    list_np_animal_crops = []
+    # for every image
+    for img, det_array in zip(yolo_results.imgs,
+                              yolo_results.xyxy):
+        # for every detection
+        for j in range(det_array.shape[0]):
+            # compute coords around bbox rounded to the nearest integer (for pasting later)
+            xmin_rd = int(math.floor(det_array[j,0])) # int() should suffice?
+            ymin_rd = int(math.floor(det_array[j,1]))
+            xmax_rd = int(math.ceil(det_array[j,2]))
+            ymax_rd = int(math.ceil(det_array[j,3]))
+            pred_llk = det_array[j,4] #-----TODO: filter based on likelihood?
+            pred_label = det_array[j,5]
+            if (pred_label == list_labels_as_str.index('animal')) and \
+                (pred_llk >= likelihood_th):
+                area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)
+                crop = Image.fromarray(img).crop(area)
+                crop_np = np.asarray(crop)
+                # add to list
+                list_np_animal_crops.append(crop_np)
+    # for detections_dict in img_data["detections"]:
+    #     index = img_data["detections"].index(detections_dict)
+    #     if detections_dict["conf"] > 0.8:
+    #         x1, y1,w_box, h_box = detections_dict["bbox"]
+    #         ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
+    #         imageWidth=img.size[0]
+    #         imageHeight= img.size[1]
+    #         area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
+    #                 ymax * imageHeight)
+    #         crop = img.crop(area)
+    #         crop_np = np.asarray(crop)
+    #
+    # if detections_dict["category"] == "1":
+    return list_np_animal_crops
+def predict_dlc(list_np_crops,
+                kpts_likelihood_th,
+                DLCmodel,
+                dlc_proc):
+    # run dlc thru list of crops
+    dlc_live = DLCLive(DLCmodel, processor=dlc_proc)
+    dlc_live.init_inference(list_np_crops[0])
+    list_kpts_per_crop = []
+    np_aux = np.empty((1,3)) # can I avoid hardcoding?
+    for crop in list_np_crops:
+        # scale crop here?
+        keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
+        # set kpts below threhsold to nan
+        keypts_xyp[keypts_xyp[:,-1] < kpts_likelihood_th,:] = np_aux.fill(np.nan)
+        # add kpts of this crop to list
+        list_kpts_per_crop.append(keypts_xyp)
+    return list_kpts_per_crop
+def predict_pipeline(img_input,
+                     model_input_str,
+                     flag_dlc_only,
+                     bbox_likelihood_th,
+                     kpts_likelihood_th):
+    ############################################################
+    ## Get DLC model and labels as strings
+    if model_input_str == 'full_cat':
+        path_to_DLCmodel =  "model/DLC_Cat_resnet_50_iteration-0_shuffle-0"
+        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
+    elif model_input_str == 'full_dog':
+        path_to_DLCmodel =  "model/DLC_Dog_resnet_50_iteration-0_shuffle-0"
+        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
+    elif model_input_str == 'full_monkey':
+        path_to_DLCmodel =  "DLC_monkey_resnet_50_iteration-0_shuffle-1"
+        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
+    elif model_input_str == 'full_human':
+        path_to_DLCmodel =  "DLC_human_dancing_resnet_101_iteration-0_shuffle-1"
+        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
+    elif model_input_str == 'monkey_face':
+        path_to_DLCmodel =  "model/DLC_FacialLandmarks_resnet_50_iteration-1_shuffle-1"
+        pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
+    # read pose cfg as dict
+    with open(pose_cfg_path, "r") as stream:
+        pose_cfg_dict = yaml.safe_load(stream)
+    map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']], # pose_cfg_dict['all_joints'] is a list of one-element lists,
+                                                     pose_cfg_dict['all_joints_names'])])
+    ############################################################
+    # ### Run Megadetector
+    md_results = predict_md(img_input) #Image.fromarray(results.imgs[0])
+    ################################################################
+    # Obtain animal crops for bboxes with confidence above th
+    list_crops = crop_animal_detections(md_results,
+                                        bbox_likelihood_th)
+    ##############################################################
+    # Run DLC
+    dlc_proc = Processor()
+    # if required: ignore MD crops and run DLC on full image [mostly for testing]
+    if flag_dlc_only:
+        # compute kpts on input img
+        list_kpts_per_crop = predict_dlc([np.asarray(img_input)],#list_crops,--------
+                                         kpts_likelihood_th,
+                                         path_to_DLCmodel,
+                                         dlc_proc)
+        # draw kpts on input img
+        draw_keypoints_on_image(img_input,
+                                list_kpts_per_crop[0], # a numpy array with shape [num_keypoints, 2].
+                                map_label_id_to_str,
+                                color='red',
+                                radius=2,
+                                use_normalized_coordinates=False)
+        return img_input
+    else:
+        # Compute kpts for each crop
+        list_kpts_per_crop = predict_dlc(list_crops,
+                                         kpts_likelihood_th,
+                                         path_to_DLCmodel,
+                                         dlc_proc)
+        # Produce final image
+        img_background = Image.fromarray(md_results.imgs[0]) # img_input?
+        for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
+                                                      list_kpts_per_crop)):
+            ## Draw keypts on crop
+            img_crop = Image.fromarray(np_crop)
+            draw_keypoints_on_image(img_crop,
+                                    kpts_crop, # a numpy array with shape [num_keypoints, 2].
+                                    map_label_id_to_str,
+                                    color='red',
+                                    radius=2,
+                                    use_normalized_coordinates=False,  # if True, then I should use md_results.xyxyn
+                                    )
+            ## Paste crop in original image
+            # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
+            img_background.paste(img_crop,
+                                box = tuple([int(math.floor(t)) for t in md_results.xyxy[0][ic,:2]]))
+        return img_background #Image.fromarray(list_crops[0]) #Image.fromarray(md_results.imgs[0]) #list_annotated_crops #
+##########################################################
+# Get MegaDetector model
+# TODO: Allow user selectable model?
+# models = ["model_weights/md_v5a.0.0.pt","model_weights/md_v5b.0.0.pt"]
+MD_model = torch.hub.load('ultralytics/yolov5', 'custom', "megadet_model/md_v5a.0.0.pt")
+####################################################
+# Create user interface and launch
+gr_image_input = gr.inputs.Image(type="pil", label="Input Image")
+gr_image_output = gr.outputs.Image(type="pil", label="Output Image")
+gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog'], # choices
+                                        default='full_cat', # default option
+                                        type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
+                                        label='Select DLC model')
+gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
+                                         label='Run DLClive only, directly on input image?')
+gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
+                                        label='Set confidence threshold for animal detections')
+gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
+                                            label='Set confidence threshold for keypoints')
+#image = gr.inputs.Image(type="pil", label="Input Image")
+#chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight")
+#size = 640
+gr_title = "MegaDetector v5 + DLClive"
+gr_description = "Detect and estimate the pose of animals in camera trap images, using MegaDetector v5a + DeepLabCut-live. \
+                  Builds up on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a>"
+# article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> | <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
+# examples = [['data/Macropod.jpg'], ['data/koala2.jpg'],['data/cat.jpg'],['data/BrushtailPossum.jpg']]
+gr.Interface(predict_pipeline,
+             inputs=[gr_image_input,
+                     gr_dlc_model_input,
+                     gr_dlc_only_checkbox,
+                     gr_slider_conf_bboxes,
+                     gr_slider_conf_keypoints],
+             outputs=gr_image_output,
+             title=gr_title,
+             description=gr_description,
+             theme="huggingface").launch(enable_queue=True)
+# def dlclive_pose(model, crop_np, crop, fname, index,dlc_proc):
+#     dlc_live = DLCLive(model, processor=dlc_proc)
+#     dlc_live.init_inference(crop_np)
+#     keypts = dlc_live.get_pose(crop_np)
+#     savetxt(str(fname)+ '_' + str(index) + '.csv' , keypts, delimiter=',')
+#     xpose = []
+#     ypose = []
+#     for key in keypts[:,2]:
+#        # if key > 0.05: # which value do we need here?
+#             i = np.where(keypts[:,2]==key)
+#             xpose.append(keypts[i,0])
+#             ypose.append(keypts[i,1])
+#     plt.imshow(crop)
+#     plt.scatter(xpose[:], ypose[:], 40, color='cyan')
+#     plt.savefig(str(fname)+ '_' + str(index) + '.png')
+#     plt.show()
+#     plt.clf()