vchiang001 commited on
Commit
07db84b
β€’
1 Parent(s): d7c24ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +289 -67
app.py CHANGED
@@ -1,84 +1,306 @@
1
  # Copied megadetector section from https://huggingface.co/spaces/hlydecker/MegaDetector_v5
 
2
  # Copied from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/blob/main/app.py
3
 
4
- print("before import")
5
  import gradio as gr
6
- import json
7
- import os
8
- import matplotlib.pyplot as plt
9
- import numpy as np
10
- from PIL import Image
11
  from dlclive import DLCLive, Processor
12
- from numpy import savetxt
13
- import PIL
14
- print("after import")
15
-
16
- # A method that allows using dlc live, but with different models, saves poses, and plots the poses onto image
17
- def dlclive_pose(model, crop_np, crop, index,dlc_proc):
18
- dlc_live = DLCLive(model, processor=dlc_proc)
19
- dlc_live.init_inference(crop_np)
20
- keypts = dlc_live.get_pose(crop_np)
21
- xpose = []
22
- ypose = []
23
- for key in keypts[:,2]:
24
- # if key > 0.05: # which value do we need here?
25
- i = np.where(keypts[:,2]==key)
26
- xpose.append(keypts[i,0])
27
- ypose.append(keypts[i,1])
28
- plt.imshow(crop)
29
- plt.scatter(xpose[:], ypose[:], 40, color='cyan')
30
-
31
- canvas = plt.gca().figure.canvas
32
- canvas.draw()
33
- image = PIL.Image.frombytes('RGB', canvas.get_width_height(), canvas.tostring_rgb())
34
-
35
- plt.clf()
36
- return image
37
-
38
- def classify_image(img, file):
39
-
40
- primate_face_model = 'model_weights/DLC_FacialLandmarks_resnet_50_iteration-1_shuffle-1'
41
- human_model = 'model_weights/DLC_human_dancing_resnet_101_iteration-0_shuffle-1'
42
-
43
- with open(file.name, 'r') as f:
44
- detection_results = json.load(f)
45
 
46
- dlc_proc = Processor()
 
 
 
 
 
 
 
 
47
 
48
- # Assuming there is only 1 detection on the output
49
- img_data = detection_results["images"][0]
50
 
51
- output_images = []
52
 
53
- for detections_dict in img_data["detections"]:
54
- index = img_data["detections"].index(detections_dict)
55
- if detections_dict["conf"] > 0.8:
56
- x1, y1,w_box, h_box = detections_dict["bbox"]
57
- ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
58
-
59
- imageWidth=img.size[0]
60
- imageHeight= img.size[1]
61
- area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
62
- ymax * imageHeight)
63
- crop = img.crop(area)
64
- crop_np = np.asarray(crop)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- if detections_dict["category"] == "1":
67
- selected_model = primate_face_model
68
- elif detections_dict["category"] == "2":
69
- selected_model = human_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- # Until we know how to dynamically add output element to gradio, just return the first image
72
- output_images.append(dlclive_pose(selected_model, crop_np, crop, index, dlc_proc))
73
 
74
- return output_images[0], output_images[1] # lol
 
 
 
 
 
 
 
 
 
75
 
76
- input_image = gr.inputs.Image(type="pil", label="Input Image")
77
- input_file = gr.inputs.File(label="output.json")
 
 
 
 
 
78
 
79
- # Fake it till we make it, we know our example has 2 outputs
80
- outputs = [gr.outputs.Image(type="pil", label="Output Image"), gr.outputs.Image(type="pil", label="Output Image")]
 
 
 
81
 
82
- gr.Interface(fn=classify_image, inputs=[input_image, input_file], outputs=outputs, theme="huggingface").launch()
 
 
 
 
 
 
 
 
 
83
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Copied megadetector section from https://huggingface.co/spaces/hlydecker/MegaDetector_v5
2
+ # Copied from https://huggingface.co/spaces/sofmi/MegaDetector_DLClive/blob/main/app.py
3
  # Copied from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/blob/main/app.py
4
 
5
+
6
  import gradio as gr
7
+
8
+ import torch
9
+ import torchvision
 
 
10
  from dlclive import DLCLive, Processor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ from PIL import Image
13
+ from PIL import ImageFont
14
+ from PIL import ImageDraw
15
+
16
+ import numpy as np
17
+ import math
18
+ # import json
19
+ import os
20
+ import yaml
21
 
22
+ # import pdb
 
23
 
24
+ #########################################
25
 
26
+ def draw_keypoints_on_image(image,
27
+ keypoints,
28
+ map_label_id_to_str,
29
+ color='red',
30
+ radius=2,
31
+ use_normalized_coordinates=True,
32
+ ):
33
+ """Draws keypoints on an image.
34
+ Modified from:
35
+ https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py
36
+ Args:
37
+ image: a PIL.Image object.
38
+ keypoints: a numpy array with shape [num_keypoints, 2].
39
+ color: color to draw the keypoints with. Default is red.
40
+ radius: keypoint radius. Default value is 2.
41
+ use_normalized_coordinates: if True (default), treat keypoint values as
42
+ relative to the image. Otherwise treat them as absolute.
43
+ """
44
+ # get a drawing context
45
+ draw = ImageDraw.Draw(image)
46
+ # font = ImageFont.truetype("sans-serif.ttf", 16)
47
+
48
+ im_width, im_height = image.size
49
+ keypoints_x = [k[0] for k in keypoints]
50
+ keypoints_y = [k[1] for k in keypoints]
51
+
52
+ # adjust keypoints coords if required
53
+ if use_normalized_coordinates:
54
+ keypoints_x = tuple([im_width * x for x in keypoints_x])
55
+ keypoints_y = tuple([im_height * y for y in keypoints_y])
56
+
57
+ # draw ellipses around keypoints and add string labels
58
+ font = ImageFont.truetype("Amiko-Regular.ttf", 8) # font = ImageFont.truetype(<font-file>, <font-size>)
59
+ for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)):
60
+ draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
61
+ (keypoint_x + radius, keypoint_y + radius)],
62
+ outline=color, fill=color)
63
+
64
+ # add string labels around keypoints
65
+ # draw.text((x, y),"Sample Text",(r,g,b))
66
+ draw.text((keypoint_x + radius, keypoint_y + radius),#(0.5*im_width, 0.5*im_height), #-------
67
+ map_label_id_to_str[i],#"Sample Text",
68
+ (255,0,0), # rgb
69
+ font=font)
70
+
71
+ ############################################
72
+
73
+ # Predict detections with MegaDetector v5a model
74
+ def predict_md(im, size=640):
75
+ # resize image
76
+ g = (size / max(im.size)) # gain
77
+ im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS) # resize
78
+
79
+ ## detect objects
80
+ results = MD_model(im) # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's'])
81
+ results.render() # updates results.imgs with boxes and labels
82
+
83
+ return results #Image.fromarray(results.imgs[0]) ---return animals only?
84
+
85
+ def crop_animal_detections(yolo_results,
86
+ likelihood_th):
87
+ ## crop if animal and return list of crops
88
+ list_labels_as_str = yolo_results.names #['animal', 'person', 'vehicle']
89
+ list_np_animal_crops = []
90
+
91
+ # for every image
92
+ for img, det_array in zip(yolo_results.imgs,
93
+ yolo_results.xyxy):
94
+
95
+ # for every detection
96
+ for j in range(det_array.shape[0]):
97
+
98
+ # compute coords around bbox rounded to the nearest integer (for pasting later)
99
+ xmin_rd = int(math.floor(det_array[j,0])) # int() should suffice?
100
+ ymin_rd = int(math.floor(det_array[j,1]))
101
+
102
+ xmax_rd = int(math.ceil(det_array[j,2]))
103
+ ymax_rd = int(math.ceil(det_array[j,3]))
104
+
105
+ pred_llk = det_array[j,4] #-----TODO: filter based on likelihood?
106
+ pred_label = det_array[j,5]
107
+
108
+ if (pred_label == list_labels_as_str.index('animal')) and \
109
+ (pred_llk >= likelihood_th):
110
+ area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd)
111
+
112
+ crop = Image.fromarray(img).crop(area)
113
+ crop_np = np.asarray(crop)
114
+
115
+ # add to list
116
+ list_np_animal_crops.append(crop_np)
117
+
118
+ # for detections_dict in img_data["detections"]:
119
+ # index = img_data["detections"].index(detections_dict)
120
+ # if detections_dict["conf"] > 0.8:
121
+ # x1, y1,w_box, h_box = detections_dict["bbox"]
122
+ # ymin,xmin,ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
123
 
124
+ # imageWidth=img.size[0]
125
+ # imageHeight= img.size[1]
126
+ # area = (xmin * imageWidth, ymin * imageHeight, xmax * imageWidth,
127
+ # ymax * imageHeight)
128
+ # crop = img.crop(area)
129
+ # crop_np = np.asarray(crop)
130
+ #
131
+ # if detections_dict["category"] == "1":
132
+ return list_np_animal_crops
133
+
134
+ def predict_dlc(list_np_crops,
135
+ kpts_likelihood_th,
136
+ DLCmodel,
137
+ dlc_proc):
138
+
139
+ # run dlc thru list of crops
140
+ dlc_live = DLCLive(DLCmodel, processor=dlc_proc)
141
+ dlc_live.init_inference(list_np_crops[0])
142
+
143
+ list_kpts_per_crop = []
144
+ np_aux = np.empty((1,3)) # can I avoid hardcoding?
145
+ for crop in list_np_crops:
146
+ # scale crop here?
147
+ keypts_xyp = dlc_live.get_pose(crop) # third column is llk!
148
+ # set kpts below threhsold to nan
149
+ keypts_xyp[keypts_xyp[:,-1] < kpts_likelihood_th,:] = np_aux.fill(np.nan)
150
+ # add kpts of this crop to list
151
+ list_kpts_per_crop.append(keypts_xyp)
152
+
153
+ return list_kpts_per_crop
154
+
155
+
156
+
157
+ def predict_pipeline(img_input,
158
+ model_input_str,
159
+ flag_dlc_only,
160
+ bbox_likelihood_th,
161
+ kpts_likelihood_th):
162
+
163
+ ############################################################
164
+ ## Get DLC model and labels as strings
165
+ if model_input_str == 'full_cat':
166
+ path_to_DLCmodel = "model/DLC_Cat_resnet_50_iteration-0_shuffle-0"
167
+ pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
168
+ elif model_input_str == 'full_dog':
169
+ path_to_DLCmodel = "model/DLC_Dog_resnet_50_iteration-0_shuffle-0"
170
+ pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
171
+ elif model_input_str == 'full_monkey':
172
+ path_to_DLCmodel = "DLC_monkey_resnet_50_iteration-0_shuffle-1"
173
+ pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
174
+ elif model_input_str == 'full_human':
175
+ path_to_DLCmodel = "DLC_human_dancing_resnet_101_iteration-0_shuffle-1"
176
+ pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
177
+ elif model_input_str == 'monkey_face':
178
+ path_to_DLCmodel = "model/DLC_FacialLandmarks_resnet_50_iteration-1_shuffle-1"
179
+ pose_cfg_path = os.path.join(path_to_DLCmodel,'pose_cfg.yaml')
180
+
181
+ # read pose cfg as dict
182
+ with open(pose_cfg_path, "r") as stream:
183
+ pose_cfg_dict = yaml.safe_load(stream)
184
+ map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']], # pose_cfg_dict['all_joints'] is a list of one-element lists,
185
+ pose_cfg_dict['all_joints_names'])])
186
+
187
+ ############################################################
188
+ # ### Run Megadetector
189
+ md_results = predict_md(img_input) #Image.fromarray(results.imgs[0])
190
+
191
+ ################################################################
192
+ # Obtain animal crops for bboxes with confidence above th
193
+ list_crops = crop_animal_detections(md_results,
194
+ bbox_likelihood_th)
195
+
196
+ ##############################################################
197
+ # Run DLC
198
+ dlc_proc = Processor()
199
+
200
+ # if required: ignore MD crops and run DLC on full image [mostly for testing]
201
+ if flag_dlc_only:
202
+ # compute kpts on input img
203
+ list_kpts_per_crop = predict_dlc([np.asarray(img_input)],#list_crops,--------
204
+ kpts_likelihood_th,
205
+ path_to_DLCmodel,
206
+ dlc_proc)
207
+ # draw kpts on input img
208
+ draw_keypoints_on_image(img_input,
209
+ list_kpts_per_crop[0], # a numpy array with shape [num_keypoints, 2].
210
+ map_label_id_to_str,
211
+ color='red',
212
+ radius=2,
213
+ use_normalized_coordinates=False)
214
+ return img_input
215
+
216
+ else:
217
+ # Compute kpts for each crop
218
+ list_kpts_per_crop = predict_dlc(list_crops,
219
+ kpts_likelihood_th,
220
+ path_to_DLCmodel,
221
+ dlc_proc)
222
+
223
+ # Produce final image
224
+ img_background = Image.fromarray(md_results.imgs[0]) # img_input?
225
+ for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops,
226
+ list_kpts_per_crop)):
227
+
228
+ ## Draw keypts on crop
229
+ img_crop = Image.fromarray(np_crop)
230
+ draw_keypoints_on_image(img_crop,
231
+ kpts_crop, # a numpy array with shape [num_keypoints, 2].
232
+ map_label_id_to_str,
233
+ color='red',
234
+ radius=2,
235
+ use_normalized_coordinates=False, # if True, then I should use md_results.xyxyn
236
+ )
237
+
238
+ ## Paste crop in original image
239
+ # https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste
240
+ img_background.paste(img_crop,
241
+ box = tuple([int(math.floor(t)) for t in md_results.xyxy[0][ic,:2]]))
242
+
243
+ return img_background #Image.fromarray(list_crops[0]) #Image.fromarray(md_results.imgs[0]) #list_annotated_crops #
244
+
245
+
246
+ ##########################################################
247
+ # Get MegaDetector model
248
+ # TODO: Allow user selectable model?
249
+ # models = ["model_weights/md_v5a.0.0.pt","model_weights/md_v5b.0.0.pt"]
250
+ MD_model = torch.hub.load('ultralytics/yolov5', 'custom', "megadet_model/md_v5a.0.0.pt")
251
 
 
 
252
 
253
+ ####################################################
254
+ # Create user interface and launch
255
+ gr_image_input = gr.inputs.Image(type="pil", label="Input Image")
256
+ gr_image_output = gr.outputs.Image(type="pil", label="Output Image")
257
+ gr_dlc_model_input = gr.inputs.Dropdown(choices=['full_cat','full_dog'], # choices
258
+ default='full_cat', # default option
259
+ type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected.
260
+ label='Select DLC model')
261
+ gr_dlc_only_checkbox = gr.inputs.Checkbox(False,
262
+ label='Run DLClive only, directly on input image?')
263
 
264
+ gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.05,0.8,
265
+ label='Set confidence threshold for animal detections')
266
+ gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0,
267
+ label='Set confidence threshold for keypoints')
268
+ #image = gr.inputs.Image(type="pil", label="Input Image")
269
+ #chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight")
270
+ #size = 640
271
 
272
+ gr_title = "MegaDetector v5 + DLClive"
273
+ gr_description = "Detect and estimate the pose of animals in camera trap images, using MegaDetector v5a + DeepLabCut-live. \
274
+ Builds up on work from <a href='https://huggingface.co/spaces/hlydecker/MegaDetector_v5'>hlydecker/MegaDetector_v5</a>"
275
+ # article = "<p style='text-align: center'>This app makes predictions using a YOLOv5x6 model that was trained to detect animals, humans, and vehicles in camera trap images; find out more about the project on <a href='https://github.com/microsoft/CameraTraps'>GitHub</a>. This app was built by Henry Lydecker but really depends on code and models developed by <a href='http://ecologize.org/'>Ecologize</a> and <a href='http://aka.ms/aiforearth'>Microsoft AI for Earth</a>. Find out more about the YOLO model from the original creator, <a href='https://pjreddie.com/darknet/yolo/'>Joseph Redmon</a>. YOLOv5 is a family of compound-scaled object detection models trained on the COCO dataset and developed by Ultralytics, and includes simple functionality for Test Time Augmentation (TTA), model ensembling, hyperparameter evolution, and export to ONNX, CoreML and TFLite. <a href='https://github.com/ultralytics/yolov5'>Source code</a> | <a href='https://pytorch.org/hub/ultralytics_yolov5'>PyTorch Hub</a></p>"
276
+ # examples = [['data/Macropod.jpg'], ['data/koala2.jpg'],['data/cat.jpg'],['data/BrushtailPossum.jpg']]
277
 
278
+ gr.Interface(predict_pipeline,
279
+ inputs=[gr_image_input,
280
+ gr_dlc_model_input,
281
+ gr_dlc_only_checkbox,
282
+ gr_slider_conf_bboxes,
283
+ gr_slider_conf_keypoints],
284
+ outputs=gr_image_output,
285
+ title=gr_title,
286
+ description=gr_description,
287
+ theme="huggingface").launch(enable_queue=True)
288
 
289
 
290
+ # def dlclive_pose(model, crop_np, crop, fname, index,dlc_proc):
291
+ # dlc_live = DLCLive(model, processor=dlc_proc)
292
+ # dlc_live.init_inference(crop_np)
293
+ # keypts = dlc_live.get_pose(crop_np)
294
+ # savetxt(str(fname)+ '_' + str(index) + '.csv' , keypts, delimiter=',')
295
+ # xpose = []
296
+ # ypose = []
297
+ # for key in keypts[:,2]:
298
+ # # if key > 0.05: # which value do we need here?
299
+ # i = np.where(keypts[:,2]==key)
300
+ # xpose.append(keypts[i,0])
301
+ # ypose.append(keypts[i,1])
302
+ # plt.imshow(crop)
303
+ # plt.scatter(xpose[:], ypose[:], 40, color='cyan')
304
+ # plt.savefig(str(fname)+ '_' + str(index) + '.png')
305
+ # plt.show()
306
+ # plt.clf()