Spaces:
Build error
Build error
File size: 18,392 Bytes
c20ef4d 86af013 07db84b 86af013 07db84b d581ff8 86af013 07db84b d581ff8 07db84b d581ff8 07db84b d581ff8 2b6769d 66e5fd8 8d64a48 dfb8f5e 8d64a48 dfb8f5e 8d64a48 dfb8f5e 8d64a48 dfb8f5e 8d64a48 c20ef4d 8d64a48 dda9d4e 66e5fd8 dfb8f5e 8d64a48 66e5fd8 8d64a48 07db84b 6aea884 8d64a48 07db84b 2b6769d 07db84b 8d64a48 07db84b 8d64a48 07db84b 8d64a48 07db84b 66e5fd8 07db84b 8d64a48 07db84b 66e5fd8 07db84b 66e5fd8 07db84b 8d64a48 07db84b d581ff8 07db84b 8d64a48 07db84b 8d64a48 07db84b 2b6769d 8d64a48 66e5fd8 2b6769d 07db84b 1abe3ea 07db84b 1abe3ea 07db84b 6940bf0 1abe3ea 6940bf0 1abe3ea 6940bf0 1abe3ea 6940bf0 8d64a48 6940bf0 07db84b 66e5fd8 07db84b 2b6769d 8d64a48 2b6769d d81c118 07db84b 47a9bd7 8d64a48 07db84b d81c118 07db84b 8d64a48 07db84b 66e5fd8 d581ff8 746600b 8d64a48 746600b 008d61a c0c8b3a 93d6740 746600b 5b257fd d81c118 746600b fe1f878 c20ef4d f47ec75 07db84b d581ff8 8d64a48 db8ca4c 16fb395 746600b 07db84b a0cb551 16fb395 2b6769d d581ff8 8d64a48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 |
# Built from https://huggingface.co/spaces/hlydecker/MegaDetector_v5
# Built from https://huggingface.co/spaces/sofmi/MegaDetector_DLClive/blob/main/app.py
# Built from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/blob/main/app.py
import gradio as gr
import torch
import torchvision
from dlclive import DLCLive, Processor
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw
import numpy as np
import math
# import json
import os
import yaml
# import pdb
#########################################
FONTS = {'amiko': "font/Amiko-Regular.ttf",
'nature': "font/LoveNature.otf",
'painter':"font/PainterDecorator.otf",
'animals': "font/UncialAnimals.ttf",
'zen': "font/ZEN.TTF"}
Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt",
'md_v5b': "megadet_model/md_v5b.0.0.pt"}
#############################################
# %%
# Create user interface and launch: all inputs
gr_image_input = gr.inputs.Image(type="pil", label="Input Image")
gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog', 'monkey_face', 'full_human', 'full_monkey'], # choices
default='full_cat', # default option
type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
label='Select DeepLabCut model')
gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
label='Run DLClive only, directly on input image?')
gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
label='Set confidence threshold for animal detections')
gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
label='Set confidence threshold for keypoints')
gr_keypt_color = gr.ColorPicker(label="choose color for keypoint label")
gr_pose_font_input = gr.inputs.Dropdown(choices=['amiko', 'nature', 'painter', 'animals', 'zen'],
default='amiko',
type='value',
label='Select keypoint label font')
gr_slider_font_size = gr.inputs.Slider(5,30,1,8,
label='Set font size')
gr_slider_pose_size = gr.inputs.Slider(0.5,5,0.2,2,
label='Set pose size')
gr_mega_model_input = gr.inputs.Dropdown(choices=['md_v5a','md_v5b'],
default='md_v5a', # default option
type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
label='Select MegaDetector model')
inputs = [gr_image_input,
gr_dlc_model_input,
gr_dlc_only_checkbox,
gr_slider_conf_bboxes,
gr_slider_conf_keypoints,
gr_pose_font_input,
gr_slider_font_size,
gr_keypt_color,
gr_slider_pose_size,
gr_mega_model_input,
]
#image = gr.inputs.Image(type="pil", label="Input Image")
#chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight")
#size = 640
#########################################
# %%
def draw_keypoints_on_image(image,
keypoints,
map_label_id_to_str,
use_normalized_coordinates=True,
gr_pose_font_input='amiko',
gr_slider_font_size=8,
gr_keypt_color="#ff0000",
gr_slider_pose_size='2'
):
"""Draws keypoints on an image.
Modified from:
https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
Args:
image: a PIL.Image object.
keypoints: a numpy array with shape [num_keypoints, 2].
color: color to draw the keypoints with. Default is red.
radius: keypoint radius. Default value is 2.
use_normalized_coordinates: if True (default), treat keypoint values as
relative to the image. Otherwise treat them as absolute.
"""
# get a drawing context
draw = ImageDraw.Draw(image)
# font = ImageFont.truetype("sans-serif.ttf", 16)
im_width, im_height = image.size
keypoints_x = [k[0] for k in keypoints]
keypoints_y = [k[1] for k in keypoints]
# adjust keypoints coords if required
if use_normalized_coordinates:
keypoints_x = tuple([im_width * x for x in keypoints_x])
keypoints_y = tuple([im_height * y for y in keypoints_y])
# draw ellipses around keypoints and add string labels
font = ImageFont.truetype(FONTS[gr_pose_font_input], gr_slider_font_size) # font = ImageFont.truetype(<font-file>, <font-size>)
for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
draw.ellipse([(keypoint_x - gr_slider_pose_size, keypoint_y - gr_slider_pose_size),
(keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size)],
outline=gr_keypt_color, fill=gr_keypt_color)
# add string labels around keypoints
# draw.text((x, y),"Sample Text",(r,g,b))
draw.text((keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size),#(0.5*im_width, 0.5*im_height), #-------
map_label_id_to_str[i],#"Sample Text",
(gr_keypt_color), # rgb
font=font)
############################################
# %%
# Predict detections with MegaDetector v5a model
def predict_md(im, size=640, gr_mega_model_input='md_v5a'):
# resize image
g = (size / max(im.size)) # gain
im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS) # resize
MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[gr_mega_model_input])
## detect objects
results = MD_model(im) # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
results.render() # updates results.imgs with boxes and labels
return results #Image.fromarray(results.imgs[0]) ---return animals only?
# %%
def crop_animal_detections(yolo_results,
likelihood_th):
## crop if animal and return list of crops
list_labels_as_str = yolo_results.names #['animal', 'person', 'vehicle']
list_np_animal_crops = []
# for every image
for img, det_array in zip(yolo_results.imgs,
yolo_results.xyxy):
# for every detection
for j in range(det_array.shape[0]):
# compute coords around bbox rounded to the nearest integer (for pasting later)
xmin_rd = int(math.floor(det_array[j,0])) # int() should suffice?
ymin_rd = int(math.floor(det_array[j,1]))
xmax_rd = int(math.ceil(det_array[j,2]))
ymax_rd = int(math.ceil(det_array[j,3]))
pred_llk = det_array[j,4] #-----TODO: filter based on likelihood?
pred_label = det_array[j,5]
if (pred_label == list_labels_as_str.index('animal')) and \
(pred_llk >= likelihood_th):
area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)
crop = Image.fromarray(img).crop(area)
crop_np = np.asarray(crop)
# add to list
list_np_animal_crops.append(crop_np)
# for detections_dict in img_data["detections"]:
# index = img_data["detections"].index(detections_dict)
# if detections_dict["conf"] > 0.8:
# x1, y1,w_box, h_box = detections_dict["bbox"]
# ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
# imageWidth=img.size[0]
# imageHeight= img.size[1]
# area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
# ymax * imageHeight)
# crop = img.crop(area)
# crop_np = np.asarray(crop)
#
# if detections_dict["category"] == "1":
return list_np_animal_crops
# %%
def predict_dlc(list_np_crops,
kpts_likelihood_th,
DLCmodel,
dlc_proc):
# run dlc thru list of crops
dlc_live = DLCLive(DLCmodel, processor=dlc_proc)
dlc_live.init_inference(list_np_crops[0])
list_kpts_per_crop = []
np_aux = np.empty((1,3)) # can I avoid hardcoding?
for crop in list_np_crops:
# scale crop here?
keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
# set kpts below threhsold to nan
keypts_xyp[keypts_xyp[:,-1] < kpts_likelihood_th,:] = np_aux.fill(np.nan)
# add kpts of this crop to list
list_kpts_per_crop.append(keypts_xyp)
return list_kpts_per_crop
# %%
def predict_pipeline(img_input,
model_input_str,
flag_dlc_only,
bbox_likelihood_th,
kpts_likelihood_th,
gr_pose_font_input=gr_pose_font_input,
gr_slider_font_size=gr_slider_font_size,
gr_keypt_color=gr_keypt_color,
gr_slider_pose_size=gr_slider_pose_size,
gr_mega_model_input=gr_mega_model_input,
):
############################################################
## Get DLC model and labels as strings
if model_input_str == 'full_cat':
path_to_DLCmodel = "model/DLC_Cat_resnet_50_iteration-0_shuffle-0"
pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
elif model_input_str == 'full_dog':
path_to_DLCmodel = "model/DLC_Dog_resnet_50_iteration-0_shuffle-0"
pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
elif model_input_str == 'monkey_face':
path_to_DLCmodel = "model/DLC_FacialLandmarks_resnet_50_iteration-1_shuffle-1"
pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
elif model_input_str == 'full_human':
path_to_DLCmodel = "model/DLC_human_dancing_resnet_101_iteration-0_shuffle-1"
pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
elif model_input_str == 'full_monkey':
path_to_DLCmodel = "model/DLC_monkey_resnet_50_iteration-0_shuffle-1"
pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
# read pose cfg as dict
with open(pose_cfg_path, "r") as stream:
pose_cfg_dict = yaml.safe_load(stream)
map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']], # pose_cfg_dict['all_joints'] is a list of one-element lists,
pose_cfg_dict['all_joints_names'])])
############################################################
# ### Run Megadetector
md_results = predict_md(img_input, gr_mega_model_input=gr_mega_model_input) #Image.fromarray(results.imgs[0])
################################################################
# Obtain animal crops for bboxes with confidence above th
list_crops = crop_animal_detections(md_results,
bbox_likelihood_th)
##############################################################
# Run DLC
dlc_proc = Processor()
# if required: ignore MD crops and run DLC on full image [mostly for testing]
if flag_dlc_only:
# compute kpts on input img
list_kpts_per_crop = predict_dlc([np.asarray(img_input)],#list_crops,--------
kpts_likelihood_th,
path_to_DLCmodel,
dlc_proc)
# draw kpts on input img
draw_keypoints_on_image(img_input,
list_kpts_per_crop[0], # a numpy array with shape [num_keypoints, 2].
map_label_id_to_str,
use_normalized_coordinates=False,
gr_pose_font_input=gr_pose_font_input,
gr_slider_font_size=gr_slider_font_size,
gr_keypt_color=gr_keypt_color,
gr_slider_pose_size=gr_slider_pose_size,
)
return img_input, #list_kpts_per_crop
else:
# Compute kpts for each crop
list_kpts_per_crop = predict_dlc(list_crops,
kpts_likelihood_th,
path_to_DLCmodel,
dlc_proc)
# Produce final image
img_background = Image.fromarray(md_results.imgs[0]) # img_input?
for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
list_kpts_per_crop)):
## Draw keypts on crop
img_crop = Image.fromarray(np_crop)
draw_keypoints_on_image(img_crop,
kpts_crop, # a numpy array with shape [num_keypoints, 2].
map_label_id_to_str,
use_normalized_coordinates=False, # if True, then I should use md_results.xyxyn
gr_pose_font_input=gr_pose_font_input,
gr_slider_font_size=gr_slider_font_size,
gr_keypt_color=gr_keypt_color,
gr_slider_pose_size=gr_slider_pose_size,
)
## Paste crop in original image
# https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
img_background.paste(img_crop,
box = tuple([int(math.floor(t)) for t in md_results.xyxy[0][ic,:2]]))
return img_background, #list_kpts_per_crop, #Image.fromarray(list_crops[0]) #Image.fromarray(md_results.imgs[0]) #list_annotated_crops #
##########################################################
# %%
# Get MegaDetector model
# TODO: Allow user selectable model?
# models = ["model_weights/md_v5a.0.0.pt","model_weights/md_v5b.0.0.pt"]
#MD_model = torch.hub.load('ultralytics/yolov5', 'custom', "megadet_model/md_v5b.0.0.pt")
####################################################
# %%
# Create user interface and launch: all outputs
gr_gallery_output = gr.Gallery(type="pil", label="Output Gallery")
gr_json_output = gr.JSON(label='megadetector json')
gr_pose_output = gr.Dataframe(headers=['bodypart', 'x', 'y', 'confidence'],
#row_count = number of bodyparts,
label='pose coordinates')
outputs = [gr_gallery_output, #gr_json_output,
#gr_pose_output,
]
gr_title = "MegaDetector v5 + DeepLabCut-Live!"
gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang -- DLC AI Residents 2022\
This App detects and estimate the pose of animals in camera trap images using <a href='https://github.com/microsoft/CameraTraps'>MegaDetector v5a</a> + <a href='https://github.com/DeepLabCut/DeepLabCut-live'>DeepLabCut-live</a>. \
It additionally builds upon on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a> \
<a href='https://huggingface.co/spaces/sofmi/MegaDetector_DLClive'>sofmi/MegaDetector_DLClive</a> \
<a href='https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels'>Neslihan/megadetector_dlcmodels</a>\
We experimented with two Megadetector models. See <a href='https://github.com/microsoft/CameraTraps/blob/main/megadetector.md'>here</a> for detailed information about models."
# article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> | <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
# examples = [['data/Macropod.jpg'], ['data/koala2.jpg'],['data/cat.jpg'],['data/BrushtailPossum.jpg']]
# %%
examples = [['example/monkey_full.jpg', 'model/DLC_monkey_resnet_50_iteration-0_shuffle-1', False, 0.5, 0.5, None, None, None, None, None]]
demo = gr.Interface(predict_pipeline,
inputs=inputs,
outputs=outputs,
title=gr_title,
description=gr_description,
examples = examples,
theme="huggingface",
#live=True
)
demo.launch(enable_queue=True, share=True)
|