Spaces:

DeepLabCut
/

MegaDetector_DeepLabCut

Build error

App Files Files Community

MegaDetector_DeepLabCut / app.py

Neslihan

Update app.py

db8ca4c almost 2 years ago

raw history blame

No virus

18.4 kB

	# Built from https://huggingface.co/spaces/hlydecker/MegaDetector_v5
	# Built from https://huggingface.co/spaces/sofmi/MegaDetector_DLClive/blob/main/app.py
	# Built from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/blob/main/app.py


	import gradio as gr

	import torch
	import torchvision
	from dlclive import DLCLive, Processor

	from PIL import Image
	from PIL import ImageFont
	from PIL import ImageDraw

	import numpy as np
	import math
	# import json
	import os
	import yaml

	# import pdb

	#########################################


	FONTS = {'amiko': "font/Amiko-Regular.ttf",
	'nature': "font/LoveNature.otf",
	'painter':"font/PainterDecorator.otf",
	'animals': "font/UncialAnimals.ttf",
	'zen': "font/ZEN.TTF"}

	Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt",
	'md_v5b': "megadet_model/md_v5b.0.0.pt"}

	#############################################
	# %%
	# Create user interface and launch: all inputs
	gr_image_input = gr.inputs.Image(type="pil", label="Input Image")


	gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog', 'monkey_face', 'full_human', 'full_monkey'], # choices
	default='full_cat', # default option
	type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
	label='Select DeepLabCut model')
	gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
	label='Run DLClive only, directly on input image?')

	gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
	label='Set confidence threshold for animal detections')
	gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
	label='Set confidence threshold for keypoints')

	gr_keypt_color = gr.ColorPicker(label="choose color for keypoint label")

	gr_pose_font_input = gr.inputs.Dropdown(choices=['amiko', 'nature', 'painter', 'animals', 'zen'],
	default='amiko',
	type='value',
	label='Select keypoint label font')
	gr_slider_font_size = gr.inputs.Slider(5,30,1,8,
	label='Set font size')

	gr_slider_pose_size = gr.inputs.Slider(0.5,5,0.2,2,
	label='Set pose size')
	gr_mega_model_input = gr.inputs.Dropdown(choices=['md_v5a','md_v5b'],
	default='md_v5a', # default option
	type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
	label='Select MegaDetector model')
	inputs = [gr_image_input,
	gr_dlc_model_input,
	gr_dlc_only_checkbox,
	gr_slider_conf_bboxes,
	gr_slider_conf_keypoints,
	gr_pose_font_input,
	gr_slider_font_size,
	gr_keypt_color,
	gr_slider_pose_size,
	gr_mega_model_input,
	]

	#image = gr.inputs.Image(type="pil", label="Input Image")
	#chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight")
	#size = 640

	#########################################
	# %%
	def draw_keypoints_on_image(image,
	keypoints,
	map_label_id_to_str,
	use_normalized_coordinates=True,
	gr_pose_font_input='amiko',
	gr_slider_font_size=8,
	gr_keypt_color="#ff0000",
	gr_slider_pose_size='2'
	):
	"""Draws keypoints on an image.
	Modified from:
	https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
	Args:
	image: a PIL.Image object.
	keypoints: a numpy array with shape [num_keypoints, 2].
	color: color to draw the keypoints with. Default is red.
	radius: keypoint radius. Default value is 2.
	use_normalized_coordinates: if True (default), treat keypoint values as
	relative to the image. Otherwise treat them as absolute.
	"""
	# get a drawing context
	draw = ImageDraw.Draw(image)
	# font = ImageFont.truetype("sans-serif.ttf", 16)

	im_width, im_height = image.size
	keypoints_x = [k[0] for k in keypoints]
	keypoints_y = [k[1] for k in keypoints]

	# adjust keypoints coords if required
	if use_normalized_coordinates:
	keypoints_x = tuple([im_width * x for x in keypoints_x])
	keypoints_y = tuple([im_height * y for y in keypoints_y])

	# draw ellipses around keypoints and add string labels
	font = ImageFont.truetype(FONTS[gr_pose_font_input], gr_slider_font_size) # font = ImageFont.truetype(<font-file>, <font-size>)
	for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
	draw.ellipse([(keypoint_x - gr_slider_pose_size, keypoint_y - gr_slider_pose_size),
	(keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size)],
	outline=gr_keypt_color, fill=gr_keypt_color)

	# add string labels around keypoints
	# draw.text((x, y),"Sample Text",(r,g,b))
	draw.text((keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size),#(0.5im_width, 0.5im_height), #-------
	map_label_id_to_str[i],#"Sample Text",
	(gr_keypt_color), # rgb
	font=font)

	############################################
	# %%
	# Predict detections with MegaDetector v5a model
	def predict_md(im, size=640, gr_mega_model_input='md_v5a'):
	# resize image
	g = (size / max(im.size)) # gain
	im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS) # resize
	MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[gr_mega_model_input])
	## detect objects
	results = MD_model(im) # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
	results.render() # updates results.imgs with boxes and labels

	return results #Image.fromarray(results.imgs[0]) ---return animals only?

	# %%
	def crop_animal_detections(yolo_results,
	likelihood_th):
	## crop if animal and return list of crops
	list_labels_as_str = yolo_results.names #['animal', 'person', 'vehicle']
	list_np_animal_crops = []

	# for every image
	for img, det_array in zip(yolo_results.imgs,
	yolo_results.xyxy):

	# for every detection
	for j in range(det_array.shape[0]):

	# compute coords around bbox rounded to the nearest integer (for pasting later)
	xmin_rd = int(math.floor(det_array[j,0])) # int() should suffice?
	ymin_rd = int(math.floor(det_array[j,1]))

	xmax_rd = int(math.ceil(det_array[j,2]))
	ymax_rd = int(math.ceil(det_array[j,3]))

	pred_llk = det_array[j,4] #-----TODO: filter based on likelihood?
	pred_label = det_array[j,5]

	if (pred_label == list_labels_as_str.index('animal')) and \
	(pred_llk >= likelihood_th):
	area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)

	crop = Image.fromarray(img).crop(area)
	crop_np = np.asarray(crop)

	# add to list
	list_np_animal_crops.append(crop_np)

	# for detections_dict in img_data["detections"]:
	# index = img_data["detections"].index(detections_dict)
	# if detections_dict["conf"] > 0.8:
	# x1, y1,w_box, h_box = detections_dict["bbox"]
	# ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box

	# imageWidth=img.size[0]
	# imageHeight= img.size[1]
	# area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
	# ymax * imageHeight)
	# crop = img.crop(area)
	# crop_np = np.asarray(crop)
	#
	# if detections_dict["category"] == "1":
	return list_np_animal_crops

	# %%
	def predict_dlc(list_np_crops,
	kpts_likelihood_th,
	DLCmodel,
	dlc_proc):

	# run dlc thru list of crops
	dlc_live = DLCLive(DLCmodel, processor=dlc_proc)
	dlc_live.init_inference(list_np_crops[0])

	list_kpts_per_crop = []
	np_aux = np.empty((1,3)) # can I avoid hardcoding?
	for crop in list_np_crops:
	# scale crop here?
	keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
	# set kpts below threhsold to nan
	keypts_xyp[keypts_xyp[:,-1] < kpts_likelihood_th,:] = np_aux.fill(np.nan)
	# add kpts of this crop to list
	list_kpts_per_crop.append(keypts_xyp)

	return list_kpts_per_crop


	# %%
	def predict_pipeline(img_input,
	model_input_str,
	flag_dlc_only,
	bbox_likelihood_th,
	kpts_likelihood_th,
	gr_pose_font_input=gr_pose_font_input,
	gr_slider_font_size=gr_slider_font_size,
	gr_keypt_color=gr_keypt_color,
	gr_slider_pose_size=gr_slider_pose_size,
	gr_mega_model_input=gr_mega_model_input,
	):

	############################################################
	## Get DLC model and labels as strings
	if model_input_str == 'full_cat':
	path_to_DLCmodel = "model/DLC_Cat_resnet_50_iteration-0_shuffle-0"
	pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
	elif model_input_str == 'full_dog':
	path_to_DLCmodel = "model/DLC_Dog_resnet_50_iteration-0_shuffle-0"
	pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
	elif model_input_str == 'monkey_face':
	path_to_DLCmodel = "model/DLC_FacialLandmarks_resnet_50_iteration-1_shuffle-1"
	pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
	elif model_input_str == 'full_human':
	path_to_DLCmodel = "model/DLC_human_dancing_resnet_101_iteration-0_shuffle-1"
	pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
	elif model_input_str == 'full_monkey':
	path_to_DLCmodel = "model/DLC_monkey_resnet_50_iteration-0_shuffle-1"
	pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')


	# read pose cfg as dict
	with open(pose_cfg_path, "r") as stream:
	pose_cfg_dict = yaml.safe_load(stream)
	map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']], # pose_cfg_dict['all_joints'] is a list of one-element lists,
	pose_cfg_dict['all_joints_names'])])

	############################################################
	# ### Run Megadetector
	md_results = predict_md(img_input, gr_mega_model_input=gr_mega_model_input) #Image.fromarray(results.imgs[0])

	################################################################
	# Obtain animal crops for bboxes with confidence above th
	list_crops = crop_animal_detections(md_results,
	bbox_likelihood_th)

	##############################################################
	# Run DLC
	dlc_proc = Processor()

	# if required: ignore MD crops and run DLC on full image [mostly for testing]
	if flag_dlc_only:
	# compute kpts on input img
	list_kpts_per_crop = predict_dlc([np.asarray(img_input)],#list_crops,--------
	kpts_likelihood_th,
	path_to_DLCmodel,
	dlc_proc)
	# draw kpts on input img
	draw_keypoints_on_image(img_input,
	list_kpts_per_crop[0], # a numpy array with shape [num_keypoints, 2].
	map_label_id_to_str,
	use_normalized_coordinates=False,
	gr_pose_font_input=gr_pose_font_input,
	gr_slider_font_size=gr_slider_font_size,
	gr_keypt_color=gr_keypt_color,
	gr_slider_pose_size=gr_slider_pose_size,
	)
	return img_input, #list_kpts_per_crop

	else:
	# Compute kpts for each crop
	list_kpts_per_crop = predict_dlc(list_crops,
	kpts_likelihood_th,
	path_to_DLCmodel,
	dlc_proc)

	# Produce final image
	img_background = Image.fromarray(md_results.imgs[0]) # img_input?
	for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
	list_kpts_per_crop)):

	## Draw keypts on crop
	img_crop = Image.fromarray(np_crop)
	draw_keypoints_on_image(img_crop,
	kpts_crop, # a numpy array with shape [num_keypoints, 2].
	map_label_id_to_str,
	use_normalized_coordinates=False, # if True, then I should use md_results.xyxyn
	gr_pose_font_input=gr_pose_font_input,
	gr_slider_font_size=gr_slider_font_size,
	gr_keypt_color=gr_keypt_color,
	gr_slider_pose_size=gr_slider_pose_size,
	)

	## Paste crop in original image
	# https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
	img_background.paste(img_crop,
	box = tuple([int(math.floor(t)) for t in md_results.xyxy[0][ic,:2]]))

	return img_background, #list_kpts_per_crop, #Image.fromarray(list_crops[0]) #Image.fromarray(md_results.imgs[0]) #list_annotated_crops #


	##########################################################
	# %%
	# Get MegaDetector model
	# TODO: Allow user selectable model?
	# models = ["model_weights/md_v5a.0.0.pt","model_weights/md_v5b.0.0.pt"]
	#MD_model = torch.hub.load('ultralytics/yolov5', 'custom', "megadet_model/md_v5b.0.0.pt")



	####################################################
	# %%
	# Create user interface and launch: all outputs
	gr_gallery_output = gr.Gallery(type="pil", label="Output Gallery")
	gr_json_output = gr.JSON(label='megadetector json')
	gr_pose_output = gr.Dataframe(headers=['bodypart', 'x', 'y', 'confidence'],
	#row_count = number of bodyparts,
	label='pose coordinates')

	outputs = [gr_gallery_output, #gr_json_output,
	#gr_pose_output,
	]

	gr_title = "MegaDetector v5 + DeepLabCut-Live!"
	gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang -- DLC AI Residents 2022\
	This App detects and estimate the pose of animals in camera trap images using <a href='https://github.com/microsoft/CameraTraps'>MegaDetector v5a</a> + <a href='https://github.com/DeepLabCut/DeepLabCut-live'>DeepLabCut-live</a>. \
	It additionally builds upon on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a> \
	<a href='https://huggingface.co/spaces/sofmi/MegaDetector_DLClive'>sofmi/MegaDetector_DLClive</a> \
	<a href='https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels'>Neslihan/megadetector_dlcmodels</a>\
	We experimented with two Megadetector models. See <a href='https://github.com/microsoft/CameraTraps/blob/main/megadetector.md'>here</a> for detailed information about models."
	# article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> \| <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
	# examples = [['data/Macropod.jpg'], ['data/koala2.jpg'],['data/cat.jpg'],['data/BrushtailPossum.jpg']]

	# %%
	examples = [['example/monkey_full.jpg', 'model/DLC_monkey_resnet_50_iteration-0_shuffle-1', False, 0.5, 0.5, None, None, None, None, None]]
	demo = gr.Interface(predict_pipeline,
	inputs=inputs,
	outputs=outputs,
	title=gr_title,
	description=gr_description,
	examples = examples,
	theme="huggingface",
	#live=True
	)

	demo.launch(enable_queue=True, share=True)