Spaces:

DeepLabCut
/

MegaDetector_DeepLabCut

Build error

App Files Files Community

sfmig commited on Aug 18, 2022

Commit

a065329

•

1 Parent(s): 6e8a870

refactoring. reverted background image fix.

Browse files

Files changed (1) hide show

app.py +143 -156

app.py CHANGED Viewed

@@ -9,12 +9,11 @@ import torch
 import torchvision
 from dlclive import DLCLive, Processor
-from PIL import Image
-from PIL import ImageFont
-from PIL import ImageDraw
 import numpy as np
 import math
 # import json
 import os
 import yaml
@@ -22,8 +21,7 @@ import yaml
 # import pdb
 #########################################
 FONTS = {'amiko': "font/Amiko-Regular.ttf",
         'nature': "font/LoveNature.otf",
         'painter':"font/PainterDecorator.otf",
@@ -33,83 +31,32 @@ FONTS = {'amiko': "font/Amiko-Regular.ttf",
 Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt",
                   'md_v5b': "megadet_model/md_v5b.0.0.pt"}
-#############################################
-# %%
-# Create user interface and launch: all inputs
-gr_image_input = gr.inputs.Image(type="pil", label="Input Image")
-gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog', 'primate_face', 'full_human', 'full_macaque'], # choices
-                                        default='full_cat', # default option
-                                        type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
-                                        label='Select DeepLabCut model')
-gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
-                                         label='Run DLClive only, directly on input image?')
-gr_str_labels_checkbox = gr.inputs.Checkbox(True,
-                                            label='Show bodypart labels?')
-gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
-                                        label='Set confidence threshold for animal detections')
-gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
-                                            label='Set confidence threshold for keypoints')
-gr_keypt_color = gr.ColorPicker(label="choose color for keypoint label")
-gr_pose_font_input = gr.inputs.Dropdown(choices=['amiko', 'nature', 'painter', 'animals', 'zen'],
-                                        default='amiko',
-                                        type='value',
-                                        label='Select keypoint label font')
-gr_slider_font_size = gr.inputs.Slider(5,30,1,8,
-                                        label='Set font size')
-gr_slider_pose_size = gr.inputs.Slider(0.5,5,0.2,2,
-                                        label='Set pose size')
-gr_mega_model_input = gr.inputs.Dropdown(choices=['md_v5a','md_v5b'],
-                                         default='md_v5a', # default option
-                                         type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
-                                         label='Select MegaDetector model')
-inputs = [gr_image_input,
-             gr_dlc_model_input,
-             gr_dlc_only_checkbox,
-             gr_str_labels_checkbox,
-             gr_slider_conf_bboxes,
-             gr_slider_conf_keypoints,
-             gr_pose_font_input,
-             gr_slider_font_size,
-             gr_keypt_color,
-             gr_slider_pose_size,
-             gr_mega_model_input,
-             ]
-#image = gr.inputs.Image(type="pil", label="Input Image")
-#chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight")
-#size = 640
 #########################################
-# %%
 def draw_keypoints_on_image(image,
                             keypoints,
                             map_label_id_to_str,
                             flag_show_str_labels,
                             use_normalized_coordinates=True,
-                            gr_pose_font_input='amiko',
-                            gr_slider_font_size=8,
-                            gr_keypt_color="#ff0000",
-                            gr_slider_pose_size='2'
-                            ):
     """Draws keypoints on an image.
     Modified from:
         https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
     Args:
     image: a PIL.Image object.
     keypoints: a numpy array with shape [num_keypoints, 2].
     color: color to draw the keypoints with. Default is red.
     radius: keypoint radius. Default value is 2.
     use_normalized_coordinates: if True (default), treat keypoint values as
         relative to the image.  Otherwise treat them as absolute.
     """
     # get a drawing context
-    draw = ImageDraw.Draw(image)
-    # font = ImageFont.truetype("sans-serif.ttf", 16)
     im_width, im_height = image.size
     keypoints_x = [k[0] for k in keypoints]
@@ -121,41 +68,46 @@ def draw_keypoints_on_image(image,
         keypoints_y = tuple([im_height * y for y in keypoints_y])
     # draw ellipses around keypoints and add string labels
-    font = ImageFont.truetype(FONTS[gr_pose_font_input], gr_slider_font_size) # font = ImageFont.truetype(<font-file>, <font-size>)
     for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
-        draw.ellipse([(keypoint_x - gr_slider_pose_size, keypoint_y - gr_slider_pose_size),
-                      (keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size)],
-                       outline=gr_keypt_color, fill=gr_keypt_color)
         # add string labels around keypoints
         if flag_show_str_labels:
-            # draw.text((x, y),"Sample Text",(r,g,b))
-            draw.text((keypoint_x + gr_slider_pose_size, keypoint_y + gr_slider_pose_size),#(0.5*im_width, 0.5*im_height), #-------
-                      map_label_id_to_str[i],#"Sample Text",
-                      (gr_keypt_color), # rgb
                       font=font)
 ############################################
-# %%
 # Predict detections with MegaDetector v5a model
-def predict_md(im, size=640, gr_mega_model_input='md_v5a'):
     # resize image
-    g = (size / max(im.size))  # gain
-    im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS)  # resize
-    MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[gr_mega_model_input])
     ## detect objects
     results = MD_model(im)  # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
     results.render()  # updates results.imgs with boxes and labels
-    return results #Image.fromarray(results.imgs[0]) ---return animals only?
-# %%
 def crop_animal_detections(yolo_results,
                            likelihood_th):
-    ## crop if animal and return list of crops
-    list_labels_as_str = yolo_results.names #['animal', 'person', 'vehicle']
-    list_np_animal_crops = []
     # for every image
     for img, det_array in zip(yolo_results.imgs,
                               yolo_results.xyxy):
@@ -170,9 +122,10 @@ def crop_animal_detections(yolo_results,
             xmax_rd = int(math.ceil(det_array[j,2]))
             ymax_rd = int(math.ceil(det_array[j,3]))
-            pred_llk = det_array[j,4] #-----TODO: filter based on likelihood?
             pred_label = det_array[j,5]
             if (pred_label == list_labels_as_str.index('animal')) and \
                 (pred_llk >= likelihood_th):
                 area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)
@@ -183,23 +136,9 @@ def crop_animal_detections(yolo_results,
                 # add to list
                 list_np_animal_crops.append(crop_np)
-    # for detections_dict in img_data["detections"]:
-    #     index = img_data["detections"].index(detections_dict)
-    #     if detections_dict["conf"] > 0.8:
-    #         x1, y1,w_box, h_box = detections_dict["bbox"]
-    #         ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
-    #         imageWidth=img.size[0]
-    #         imageHeight= img.size[1]
-    #         area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
-    #                 ymax * imageHeight)
-    #         crop = img.crop(area)
-    #         crop_np = np.asarray(crop)
-    #
-    # if detections_dict["category"] == "1":
     return list_np_animal_crops
-# %%
 def predict_dlc(list_np_crops,
                 kpts_likelihood_th,
                 DLCmodel,
@@ -210,7 +149,7 @@ def predict_dlc(list_np_crops,
     dlc_live.init_inference(list_np_crops[0])
     list_kpts_per_crop = []
-    np_aux = np.empty((1,3)) # can I avoid hardcoding?
     for crop in list_np_crops:
         # scale crop here?
         keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
@@ -222,22 +161,23 @@ def predict_dlc(list_np_crops,
     return list_kpts_per_crop
-# %%
 def predict_pipeline(img_input,
                      model_input_str,
                      flag_dlc_only,
                      flag_show_str_labels,
                      bbox_likelihood_th,
                      kpts_likelihood_th,
-                     gr_pose_font_input=gr_pose_font_input,
-                     gr_slider_font_size=gr_slider_font_size,
-                     gr_keypt_color=gr_keypt_color,
-                     gr_slider_pose_size=gr_slider_pose_size,
-                     gr_mega_model_input=gr_mega_model_input,
                      ):
     ############################################################
     ## Get DLC model and labels as strings
     if model_input_str == 'full_cat':
         path_to_DLCmodel =  "model/DLC_Cat_resnet_50_iteration-0_shuffle-0"
         pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
@@ -255,15 +195,18 @@ def predict_pipeline(img_input,
         pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
-    # read pose cfg as dict
     with open(pose_cfg_path, "r") as stream:
         pose_cfg_dict = yaml.safe_load(stream)
-    map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']], # pose_cfg_dict['all_joints'] is a list of one-element lists,
                                                      pose_cfg_dict['all_joints_names'])])
     ############################################################
     # ### Run Megadetector
-    md_results = predict_md(img_input, gr_mega_model_input=gr_mega_model_input) #Image.fromarray(results.imgs[0])
     ################################################################
     # Obtain animal crops for bboxes with confidence above th
@@ -277,7 +220,7 @@ def predict_pipeline(img_input,
     # if required: ignore MD crops and run DLC on full image [mostly for testing]
     if flag_dlc_only:
         # compute kpts on input img
-        list_kpts_per_crop = predict_dlc([np.asarray(img_input)],#list_crops,--------
                                          kpts_likelihood_th,
                                          path_to_DLCmodel,
                                          dlc_proc)
@@ -287,12 +230,11 @@ def predict_pipeline(img_input,
                                 map_label_id_to_str,
                                 flag_show_str_labels,
                                 use_normalized_coordinates=False,
-                                gr_pose_font_input=gr_pose_font_input,
-                                gr_slider_font_size=gr_slider_font_size,
-                                gr_keypt_color=gr_keypt_color,
-                                gr_slider_pose_size=gr_slider_pose_size,
-                                )
-        return img_input, #list_kpts_per_crop
     else:
         # Compute kpts for each crop
@@ -302,7 +244,14 @@ def predict_pipeline(img_input,
                                          dlc_proc)
         # Produce final image
-        img_background = img_input # img_input or Image.fromarray(md_results.imgs[0])?
         for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
                                                       list_kpts_per_crop)):
@@ -312,66 +261,104 @@ def predict_pipeline(img_input,
                                     kpts_crop, # a numpy array with shape [num_keypoints, 2].
                                     map_label_id_to_str,
                                     flag_show_str_labels,
-                                    use_normalized_coordinates=False,  # if True, then I should use md_results.xyxyn
-                                    gr_pose_font_input=gr_pose_font_input,
-                                    gr_slider_font_size=gr_slider_font_size,
-                                    gr_keypt_color=gr_keypt_color,
-                                    gr_slider_pose_size=gr_slider_pose_size,
-                                    )
             ## Paste crop in original image
             # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
             img_background.paste(img_crop,
-                                box = tuple([int(math.floor(t)) for t in md_results.xyxy[0][ic,:2]]))
-        return img_background, #list_kpts_per_crop, #Image.fromarray(list_crops[0]) #Image.fromarray(md_results.imgs[0]) #list_annotated_crops #
-##########################################################
 # %%
-# Get MegaDetector model
-# TODO: Allow user selectable model?
-# models = ["model_weights/md_v5a.0.0.pt","model_weights/md_v5b.0.0.pt"]
-#MD_model = torch.hub.load('ultralytics/yolov5', 'custom', "megadet_model/md_v5b.0.0.pt")
 ####################################################
 # %%
-# Create user interface and launch: all outputs
-gr_gallery_output = gr.Gallery(type="pil", label="Output Gallery")
-gr_json_output = gr.JSON(label='megadetector json')
-gr_pose_output = gr.Dataframe(headers=['bodypart', 'x', 'y', 'confidence'],
-                              #row_count = number of bodyparts,
-                              label='pose coordinates')
-outputs = [gr_gallery_output, #gr_json_output,
-#gr_pose_output,
-]
 gr_title = "MegaDetector v5 + DeepLabCut-Live!"
-gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang -- DLC AI Residents 2022\
                   This App detects and estimate the pose of animals in camera trap images using <a href='https://github.com/microsoft/CameraTraps'>MegaDetector v5a</a> + <a href='https://github.com/DeepLabCut/DeepLabCut-live'>DeepLabCut-live</a>. \
-                  It additionally builds upon on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a> \
                   <a href='https://huggingface.co/spaces/sofmi/MegaDetector_DLClive'>sofmi/MegaDetector_DLClive</a> \
-                  <a href='https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels'>Neslihan/megadetector_dlcmodels</a>\
-                  We experimented with two Megadetector models. See <a href='https://github.com/microsoft/CameraTraps/blob/main/megadetector.md'>here</a> for detailed information about models."
 # article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> | <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
-# examples = [['data/Macropod.jpg'], ['data/koala2.jpg'],['data/cat.jpg'],['data/BrushtailPossum.jpg']]
-# %%
 examples = [['example/monkey_full.jpg', 'full_macaque', False, True, 0.5, 0.3, 'amiko', 5, 'blue', 3, 'md_v5a'],
-['example/dog.jpeg', 'full_dog', False, True, 0.5, 0.05, 'amiko', 5, 'yellow', 3, 'md_v5a'],
-['example/cat.jpg', 'full_cat', False, True, 0.5, 0.05, 'amiko', 5, 'purple', 3, 'md_v5a']]
 demo = gr.Interface(predict_pipeline,
-             inputs=inputs,
-             outputs=outputs,
-             title=gr_title,
-             description=gr_description,
-             examples = examples,
-             theme="huggingface",
-             #live=True
-             )
 demo.launch(enable_queue=True, share=True)

 import torchvision
 from dlclive import DLCLive, Processor
+from PIL import Image, ImageColor, ImageFont, ImageDraw
 import numpy as np
 import math
 # import json
 import os
 import yaml
 # import pdb
 #########################################
+# Input params
 FONTS = {'amiko': "font/Amiko-Regular.ttf",
         'nature': "font/LoveNature.otf",
         'painter':"font/PainterDecorator.otf",
 Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt",
                   'md_v5b': "megadet_model/md_v5b.0.0.pt"}
 #########################################
+#  Draw keypoints on image
 def draw_keypoints_on_image(image,
                             keypoints,
                             map_label_id_to_str,
                             flag_show_str_labels,
                             use_normalized_coordinates=True,
+                            font_style='amiko',
+                            font_size=8,
+                            keypt_color="#ff0000",
+                            marker_size='2'):
     """Draws keypoints on an image.
     Modified from:
         https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
     Args:
     image: a PIL.Image object.
     keypoints: a numpy array with shape [num_keypoints, 2].
+    map_label_id_to_str: dict with keys=label number and values= label string
+    flag_show_str_labels: boolean to select whether or not to show string labels
     color: color to draw the keypoints with. Default is red.
     radius: keypoint radius. Default value is 2.
     use_normalized_coordinates: if True (default), treat keypoint values as
         relative to the image.  Otherwise treat them as absolute.
     """
     # get a drawing context
+    draw = ImageDraw.Draw(image)
     im_width, im_height = image.size
     keypoints_x = [k[0] for k in keypoints]
         keypoints_y = tuple([im_height * y for y in keypoints_y])
     # draw ellipses around keypoints and add string labels
     for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
+        draw.ellipse([(keypoint_x - marker_size, keypoint_y - marker_size),
+                      (keypoint_x + marker_size, keypoint_y + marker_size)],
+                       outline=keypt_color,
+                       fill=keypt_color)
         # add string labels around keypoints
         if flag_show_str_labels:
+            font = ImageFont.truetype(FONTS[font_style],
+                                     font_size)
+            draw.text((keypoint_x + marker_size, keypoint_y + marker_size),#(0.5*im_width, 0.5*im_height), #-------
+                      map_label_id_to_str[i],
+                      ImageColor.getcolor(keypt_color, "RGB"), # rgb
                       font=font)
 ############################################
 # Predict detections with MegaDetector v5a model
+def predict_md(im,
+               mega_model_input,
+               size=640):
     # resize image
+    g = (size / max(im.size))  # multipl factor to make max size of the image equal to input size
+    im = im.resize((int(x * g) for x in im.size),
+                    Image.ANTIALIAS)  # resize
+    MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[mega_model_input])
     ## detect objects
     results = MD_model(im)  # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
     results.render()  # updates results.imgs with boxes and labels
+    return results
+##########################################
 def crop_animal_detections(yolo_results,
                            likelihood_th):
+    ## Extract animal crops
+    list_labels_as_str = yolo_results.names  # ['animal', 'person', 'vehicle']
+    list_np_animal_crops = []
     # for every image
     for img, det_array in zip(yolo_results.imgs,
                               yolo_results.xyxy):
             xmax_rd = int(math.ceil(det_array[j,2]))
             ymax_rd = int(math.ceil(det_array[j,3]))
+            pred_llk = det_array[j,4]
             pred_label = det_array[j,5]
+            # keep animal crops above threshold
             if (pred_label == list_labels_as_str.index('animal')) and \
                 (pred_llk >= likelihood_th):
                 area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)
                 # add to list
                 list_np_animal_crops.append(crop_np)
     return list_np_animal_crops
+##########################################
 def predict_dlc(list_np_crops,
                 kpts_likelihood_th,
                 DLCmodel,
     dlc_live.init_inference(list_np_crops[0])
     list_kpts_per_crop = []
+    np_aux = np.empty((1,3)) # can I avoid hardcoding here?
     for crop in list_np_crops:
         # scale crop here?
         keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
     return list_kpts_per_crop
+#####################################################
 def predict_pipeline(img_input,
+                     mega_model_input,
                      model_input_str,
                      flag_dlc_only,
                      flag_show_str_labels,
                      bbox_likelihood_th,
                      kpts_likelihood_th,
+                     font_style,
+                     font_size,
+                     keypt_color,
+                     marker_size,
                      ):
     ############################################################
     ## Get DLC model and labels as strings
+    # TODO: make a dict as for megadetector
     if model_input_str == 'full_cat':
         path_to_DLCmodel =  "model/DLC_Cat_resnet_50_iteration-0_shuffle-0"
         pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
         pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
+    # extract map label ids to strings
+    # pose_cfg_dict['all_joints'] is a list of one-element lists,
     with open(pose_cfg_path, "r") as stream:
         pose_cfg_dict = yaml.safe_load(stream)
+    map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']],
                                                      pose_cfg_dict['all_joints_names'])])
     ############################################################
     # ### Run Megadetector
+    md_results = predict_md(img_input,
+                            mega_model_input,
+                            size=640) #Image.fromarray(results.imgs[0])
     ################################################################
     # Obtain animal crops for bboxes with confidence above th
     # if required: ignore MD crops and run DLC on full image [mostly for testing]
     if flag_dlc_only:
         # compute kpts on input img
+        list_kpts_per_crop = predict_dlc([np.asarray(img_input)],
                                          kpts_likelihood_th,
                                          path_to_DLCmodel,
                                          dlc_proc)
                                 map_label_id_to_str,
                                 flag_show_str_labels,
                                 use_normalized_coordinates=False,
+                                font_style=font_style,
+                                font_size=font_size,
+                                keypt_color=keypt_color,
+                                marker_size=marker_size)
+        return img_input
     else:
         # Compute kpts for each crop
                                          dlc_proc)
         # Produce final image
+        img_background = Image.fromarray(md_results.imgs[0]) # img_input or Image.fromarray(md_results.imgs[0])?
+        # Image.fromarray(md_results.imgs[0]) --> (640, 479)
+        # img_input.size ---> (259, 194)
+        # pdb.set_trace()
+        # resize image to match  megadetector output
+        # g = (640 / max(img_background.size))  # gain
+        # img_background = img_background.resize((int(x * g) for x in img_background.size), Image.ANTIALIAS)  # resize
         for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
                                                       list_kpts_per_crop)):
                                     kpts_crop, # a numpy array with shape [num_keypoints, 2].
                                     map_label_id_to_str,
                                     flag_show_str_labels,
+                                    use_normalized_coordinates=False,  # if True, then I should use md_results.xyxyn for list_kpts_crop
+                                    font_style=font_style,
+                                    font_size=font_size,
+                                    keypt_color=keypt_color,
+                                    marker_size=marker_size)
             ## Paste crop in original image
             # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
             img_background.paste(img_crop,
+                                 box = tuple([int(t) for t in md_results.xyxy[0][ic,:2]]))
+        return img_background
+#############################################
 # %%
+# User interface: inputs
+# Input image
+gr_image_input = gr.inputs.Image(type="pil", label="Input Image")
+# Models
+gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog', 'primate_face', 'full_human', 'full_macaque'], # choices
+                                        default='full_cat', # default option
+                                        type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
+                                        label='Select DeepLabCut model')
+gr_mega_model_input = gr.inputs.Dropdown(choices=['md_v5a','md_v5b'],
+                                         default='md_v5a', # default option
+                                         type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
+                                         label='Select MegaDetector model')
+# Other inputs
+gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
+                                         label='Run DLClive only, directly on input image?')
+gr_str_labels_checkbox = gr.inputs.Checkbox(True,
+                                            label='Show bodypart labels?')
+gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
+                                        label='Set confidence threshold for animal detections')
+gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
+                                            label='Set confidence threshold for keypoints')
+# Data viz
+gr_keypt_color = gr.ColorPicker(label="choose color for keypoint label")
+gr_labels_font_style = gr.inputs.Dropdown(choices=['amiko', 'nature', 'painter', 'animals', 'zen'],
+                                        default='amiko',
+                                        type='value',
+                                        label='Select keypoint label font')
+gr_slider_font_size = gr.inputs.Slider(5,30,1,8,
+                                        label='Set font size')
+gr_slider_marker_size = gr.inputs.Slider(0.5,5,0.2,2,
+                                        label='Set marker size')
+# list of inputs
+inputs = [gr_image_input,
+             gr_mega_model_input,
+             gr_dlc_model_input,
+             gr_dlc_only_checkbox,
+             gr_str_labels_checkbox,
+             gr_slider_conf_bboxes,
+             gr_slider_conf_keypoints,
+             gr_labels_font_style,
+             gr_slider_font_size,
+             gr_keypt_color,
+             gr_slider_marker_size,
+             ]
 ####################################################
 # %%
+# User interface: outputs
+gr_image_output = gr.outputs.Image(type="pil", label="Output Image")
+outputs = [gr_image_output]
+##############################################
+# User interace: description
 gr_title = "MegaDetector v5 + DeepLabCut-Live!"
+gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang -- DLC AI Residents 2022.\
                   This App detects and estimate the pose of animals in camera trap images using <a href='https://github.com/microsoft/CameraTraps'>MegaDetector v5a</a> + <a href='https://github.com/DeepLabCut/DeepLabCut-live'>DeepLabCut-live</a>. \
+                  We host models from the <a href='http://www.mackenziemathislab.org/dlc-modelzoo'>DeepLabCut ModelZoo Project</a>\, and two <a href='https://github.com/microsoft/CameraTraps/blob/main/megadetector.md'>MegaDetector Models</a>. Please carefully check their licensing information if you use this project. The App additionally builds upon on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a> \
                   <a href='https://huggingface.co/spaces/sofmi/MegaDetector_DLClive'>sofmi/MegaDetector_DLClive</a> \
+                  <a href='https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels'>Neslihan/megadetector_dlcmodels</a>\."
 # article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> | <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
 examples = [['example/monkey_full.jpg', 'full_macaque', False, True, 0.5, 0.3, 'amiko', 5, 'blue', 3, 'md_v5a'],
+            ['example/dog.jpeg', 'full_dog', False, True, 0.5, 0.05, 'amiko', 5, 'yellow', 3, 'md_v5a'],
+            ['example/cat.jpg', 'full_cat', False, True, 0.5, 0.05, 'amiko', 5, 'purple', 3, 'md_v5a']]
+################################################
+# %% Define and launch gradio interface
 demo = gr.Interface(predict_pipeline,
+                    inputs=inputs,
+                    outputs=outputs,
+                    title=gr_title,
+                    description=gr_description,
+                    examples=examples,
+                    theme="huggingface",
+                    #live=True
+                    )
 demo.launch(enable_queue=True, share=True)