Spaces:

DeepLabCut
/

MegaDetector_DeepLabCut

Build error

App Files Files Community

MegaDetector_DeepLabCut / app.py

sabrinabenas

add save results

39504e9 about 2 years ago

raw

history blame

No virus

19.4 kB

	# Built from https://huggingface.co/spaces/hlydecker/MegaDetector_v5
	# Built from https://huggingface.co/spaces/sofmi/MegaDetector_DLClive/blob/main/app.py
	# Built from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/blob/main/app.py


	from tkinter import W
	import gradio as gr

	import torch
	import torchvision
	from dlclive import DLCLive, Processor
	import matplotlib
	from PIL import Image, ImageColor, ImageFont, ImageDraw
	# check git lfs pull!!
	import numpy as np
	import math

	# import json
	import os
	import yaml
	from model.models import DownloadModel
	from save_results import save_results
	import pdb

	#########################################
	# Input params
	FONTS = {'amiko': "font/Amiko-Regular.ttf",
	'nature': "font/LoveNature.otf",
	'painter':"font/PainterDecorator.otf",
	'animals': "font/UncialAnimals.ttf",
	'zen': "font/ZEN.TTF"}

	Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt",
	'md_v5b': "megadet_model/md_v5b.0.0.pt"}

	DLC_models = {'full_cat': "model/DLC_Cat_resnet_50_iteration-0_shuffle-0",
	'full_dog': "model/DLC_Dog_resnet_50_iteration-0_shuffle-0",
	'primate_face': "model/DLC_FacialLandmarks_resnet_50_iteration-1_shuffle-1",
	'full_human': "model/DLC_human_dancing_resnet_101_iteration-0_shuffle-1",
	'full_macaque': 'model/DLC_monkey_resnet_50_iteration-0_shuffle-1'}
	DLC_models_list = ['full_cat', 'full_dog','primate_face', 'full_human', 'full_macaque']
	#########################################
	# Draw keypoints on image
	def draw_keypoints_on_image(image,
	keypoints,
	map_label_id_to_str,
	flag_show_str_labels,
	use_normalized_coordinates=True,
	font_style='amiko',
	font_size=8,
	keypt_color="#ff0000",
	marker_size=2,
	):
	"""Draws keypoints on an image.
	Modified from:
	https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
	Args:
	image: a PIL.Image object.
	keypoints: a numpy array with shape [num_keypoints, 2].
	map_label_id_to_str: dict with keys=label number and values= label string
	flag_show_str_labels: boolean to select whether or not to show string labels
	color: color to draw the keypoints with. Default is red.
	radius: keypoint radius. Default value is 2.
	use_normalized_coordinates: if True (default), treat keypoint values as
	relative to the image. Otherwise treat them as absolute.


	"""
	# get a drawing context
	draw = ImageDraw.Draw(image)

	im_width, im_height = image.size
	keypoints_x = [k[0] for k in keypoints]
	keypoints_y = [k[1] for k in keypoints]
	alpha = [k[2] for k in keypoints]

	# adjust keypoints coords if required
	if use_normalized_coordinates:
	keypoints_x = tuple([im_width * x for x in keypoints_x])
	keypoints_y = tuple([im_height * y for y in keypoints_y])

	cmap = matplotlib.cm.get_cmap('hsv')
	cmap2 = matplotlib.cm.get_cmap('Greys')
	# draw ellipses around keypoints
	for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
	round_fill = [round(num255) for num in list(cmap(i10))[:3]] #check!
	round_outline = [round(num*255) for num in list(cmap2(alpha[i]))[:3]]
	draw.ellipse([(keypoint_x - marker_size, keypoint_y - marker_size),
	(keypoint_x + marker_size, keypoint_y + marker_size)],
	fill=tuple(round_fill), outline= tuple(round_outline), width=2) #fill and outline: [0,255]

	# add string labels around keypoints
	if flag_show_str_labels:
	font = ImageFont.truetype(FONTS[font_style],
	font_size)
	draw.text((keypoint_x + marker_size, keypoint_y + marker_size),#(0.5im_width, 0.5im_height), #-------
	map_label_id_to_str[i],
	ImageColor.getcolor(keypt_color, "RGB"), # rgb
	font=font)

	############################################
	# Predict detections with MegaDetector v5a model
	def predict_md(im,
	mega_model_input,
	size=640):

	# resize image
	g = (size / max(im.size)) # multipl factor to make max size of the image equal to input size
	im = im.resize((int(x * g) for x in im.size),
	Image.ANTIALIAS) # resize
	MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[mega_model_input])

	## detect objects
	results = MD_model(im) # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])

	return results

	##########################################
	def crop_animal_detections(img_in,
	yolo_results,
	likelihood_th):

	## Extract animal crops
	list_labels_as_str = [i for i in yolo_results.names.values()] # ['animal', 'person', 'vehicle']
	list_np_animal_crops = []

	# image to crop (scale as input for megadetector)
	img_in = img_in.resize((yolo_results.ims[0].shape[1],
	yolo_results.ims[0].shape[0]))
	# for every detection in the img
	for det_array in yolo_results.xyxy:

	# for every detection
	for j in range(det_array.shape[0]):

	# compute coords around bbox rounded to the nearest integer (for pasting later)
	xmin_rd = int(math.floor(det_array[j,0])) # int() should suffice?
	ymin_rd = int(math.floor(det_array[j,1]))

	xmax_rd = int(math.ceil(det_array[j,2]))
	ymax_rd = int(math.ceil(det_array[j,3]))

	pred_llk = det_array[j,4]
	pred_label = det_array[j,5]
	# keep animal crops above threshold
	if (pred_label == list_labels_as_str.index('animal')) and \
	(pred_llk >= likelihood_th):
	area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)

	#pdb.set_trace()
	crop = img_in.crop(area) #Image.fromarray(img_in).crop(area)
	crop_np = np.asarray(crop)

	# add to list
	list_np_animal_crops.append(crop_np)

	return list_np_animal_crops

	def draw_rectangle_text(img,results,font_style='amiko',font_size=8, keypt_color="white",):
	#pdb.set_trace()
	bbxyxy = results
	w, h = bbxyxy[2], bbxyxy[3]
	shape = [(bbxyxy[0], bbxyxy[1]), (w , h)]
	imgR = ImageDraw.Draw(img)
	imgR.rectangle(shape, outline ="red",width=5) ##bb for animal

	confidence = bbxyxy[4]
	string_bb = 'animal ' + str(round(confidence, 2))
	font = ImageFont.truetype(FONTS[font_style], font_size)

	text_size = font.getsize(string_bb) # (h,w)
	position = (bbxyxy[0],bbxyxy[1] - text_size[1] -2 )
	left, top, right, bottom = imgR.textbbox(position, string_bb, font=font)
	imgR.rectangle((left, top-5, right+5, bottom+5), fill="red")
	imgR.text((bbxyxy[0] + 3 ,bbxyxy[1] - text_size[1] -2 ), string_bb, font=font, fill="black")

	return imgR

	##########################################
	def predict_dlc(list_np_crops,
	kpts_likelihood_th,
	DLCmodel,
	dlc_proc):

	# run dlc thru list of crops
	dlc_live = DLCLive(DLCmodel, processor=dlc_proc)
	dlc_live.init_inference(list_np_crops[0])

	list_kpts_per_crop = []
	all_kypts = []
	np_aux = np.empty((1,3)) # can I avoid hardcoding here?
	for crop in list_np_crops:
	# scale crop here?
	keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
	# set kpts below threhsold to nan

	#pdb.set_trace()
	keypts_xyp[keypts_xyp[:,-1] < kpts_likelihood_th,:] = np_aux.fill(np.nan)
	# add kpts of this crop to list
	list_kpts_per_crop.append(keypts_xyp)
	all_kypts.append(keypts_xyp)
	#return confidence here
	return list_kpts_per_crop


	#####################################################
	def predict_pipeline(img_input,
	mega_model_input,
	dlc_model_input_str,
	flag_dlc_only,
	flag_show_str_labels,
	bbox_likelihood_th,
	kpts_likelihood_th,
	font_style,
	font_size,
	keypt_color,
	marker_size,
	):

	############################################################
	## Get DLC model and labels as strings
	# TODO: make a dict as for megadetector
	# pdb.set_trace()
	path_to_DLCmodel = DownloadModel(dlc_model_input_str, 'model/')
	pose_cfg_path = 'model/pose_cfg.yaml'
	#pdb.set_trece()
	# extract map label ids to strings
	# pose_cfg_dict['all_joints'] is a list of one-element lists,
	with open(pose_cfg_path, "r") as stream:
	pose_cfg_dict = yaml.safe_load(stream)

	map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']],
	pose_cfg_dict['all_joints_names'])])

	############################################################
	# ### Run Megadetector
	md_results = predict_md(img_input,
	mega_model_input,
	size=640) #Image.fromarray(results.imgs[0])
	#pdb.set_trace()
	################################################################
	# Obtain animal crops for bboxes with confidence above th

	list_crops = crop_animal_detections(img_input,
	md_results,
	bbox_likelihood_th)

	##############################################################
	# Run DLC
	dlc_proc = Processor()

	# if required: ignore MD crops and run DLC on full image [mostly for testing]
	if flag_dlc_only:
	# compute kpts on input img
	list_kpts_per_crop = predict_dlc([np.asarray(img_input)],
	kpts_likelihood_th,
	path_to_DLCmodel,
	dlc_proc)
	# draw kpts on input img #fix!
	draw_keypoints_on_image(img_input,
	list_kpts_per_crop[0], # a numpy array with shape [num_keypoints, 2].
	map_label_id_to_str,
	flag_show_str_labels,
	use_normalized_coordinates=False,
	font_style=font_style,
	font_size=font_size,
	keypt_color=keypt_color,
	marker_size=marker_size)
	return img_input

	else:
	# Compute kpts for each crop
	list_kpts_per_crop = predict_dlc(list_crops,
	kpts_likelihood_th,
	path_to_DLCmodel,
	dlc_proc)

	img_background = img_input.resize((md_results.ims[0].shape[1],md_results.ims[0].shape[0]))
	print('I have ' + str(len(list_crops)) + ' bounding box')

	for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
	list_kpts_per_crop)):

	## Draw keypts on crop
	img_crop = Image.fromarray(np_crop)
	draw_keypoints_on_image(img_crop,
	kpts_crop, # a numpy array with shape [num_keypoints, 2].
	map_label_id_to_str,
	flag_show_str_labels,
	use_normalized_coordinates=False, # if True, then I should use md_results.xyxyn for list_kpts_crop
	font_style=font_style,
	font_size=font_size,
	keypt_color=keypt_color,
	marker_size=marker_size)

	## Paste crop in original image https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
	img_background.paste(img_crop, box = tuple([int(t) for t in md_results.xyxy[0][ic,:2]]))


	#set trh!! FIXME
	bb_per_animal = md_results.xyxy[0].tolist()[ic]
	pred = md_results.xyxy[0].tolist()[ic][4]
	if bbox_likelihood_th < pred:
	draw_rectangle_text(img_background, bb_per_animal ,font_style=font_style,font_size=font_size, keypt_color=keypt_color)

	print(pred)

	download_file = save_results(md_results,list_kpts_per_crop,map_label_id_to_str,bbox_likelihood_th)

	return img_background, download_file

	#############################################
	# User interface: inputs
	# Input image
	gr_image_input = gr.inputs.Image(type="pil", label="Input Image")


	# Models
	gr_dlc_model_input = gr.inputs.Dropdown(choices=list(DLC_models_list), # choices
	default='full_cat', # default option
	type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
	label='Select DeepLabCut model')
	gr_mega_model_input = gr.inputs.Dropdown(choices=list(Megadet_Models.keys()),
	default='md_v5a', # default option
	type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
	label='Select MegaDetector model')
	# Other inputs
	gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
	label='Run DLClive only, directly on input image?')
	gr_str_labels_checkbox = gr.inputs.Checkbox(True,
	label='Show bodypart labels?')

	gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.02,0.8,
	label='Set confidence threshold for animal detections')
	gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
	label='Set confidence threshold for keypoints')

	# Data viz
	gr_keypt_color = gr.ColorPicker(label="choose color for keypoint label")

	gr_labels_font_style = gr.inputs.Dropdown(choices=['amiko', 'nature', 'painter', 'animals', 'zen'],
	default='amiko',
	type='value',
	label='Select keypoint label font')
	gr_slider_font_size = gr.inputs.Slider(5,30,1,8,
	label='Set font size')
	gr_slider_marker_size = gr.inputs.Slider(1,20,1,5,
	label='Set marker size')

	# list of inputs
	inputs = [gr_image_input,
	gr_mega_model_input,
	gr_dlc_model_input,
	gr_dlc_only_checkbox,
	gr_str_labels_checkbox,
	gr_slider_conf_bboxes,
	gr_slider_conf_keypoints,
	gr_labels_font_style,
	gr_slider_font_size,
	gr_keypt_color,
	gr_slider_marker_size,
	]
	####################################################
	# %%
	# User interface: outputs
	gr_image_output = gr.outputs.Image(type="pil", label="Output Image")
	out_smpl_npy_download = gr.File(label="Download JSON file")
	outputs = [gr_image_output,out_smpl_npy_download]

	##############################################
	# User interace: description
	gr_title = "MegaDetector v5 + DeepLabCut-Live!"
	gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang, Sabrina Benas -- DLC AI Residents 2022..\
	This App detects and estimate the pose of animals in camera trap images using <a href='https://github.com/microsoft/CameraTraps'>MegaDetector v5a</a> + <a href='https://github.com/DeepLabCut/DeepLabCut-live'>DeepLabCut-live</a>. \
	We host models from the <a href='http://www.mackenziemathislab.org/dlc-modelzoo'>DeepLabCut ModelZoo Project</a>\, and two <a href='https://github.com/microsoft/CameraTraps/blob/main/megadetector.md'>MegaDetector Models</a>. Please carefully check their licensing information if you use this project. The App additionally builds upon on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a> \
	<a href='https://huggingface.co/spaces/sofmi/MegaDetector_DLClive'>sofmi/MegaDetector_DLClive</a> \
	<a href='https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels'>Neslihan/megadetector_dlcmodels</a>\."

	# article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> \| <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"

	examples = [['example/monkey_full.jpg', 'md_v5a','full_macaque', False, True, 0.5, 0.3, 'amiko', 9, 'blue', 3],
	['example/dog.jpeg', 'md_v5a', 'full_dog', False, True, 0.5, 0.00, 'amiko',9, 'yellow', 3],
	['example/cat.jpg', 'md_v5a', 'full_cat', False, True, 0.5, 0.05, 'amiko', 9, 'purple', 3]]

	################################################
	# %% Define and launch gradio interface
	demo = gr.Interface(predict_pipeline,
	inputs=inputs,
	outputs=outputs,
	title=gr_title,
	description=gr_description,
	examples=examples,
	theme="huggingface",
	#live=True
	)

	demo.launch(enable_queue=True, share=True)