import numpy as np import pycocotools.mask as mask_util from detectron2.structures import BoxMode # MotionNet: based on instances_to_coco_json and relevant codes in densepose def prediction_to_json(instances, img_id: str): """ Args: instances (Instances): the output of the model img_id (str): the image id in COCO Returns: list[dict]: the results in densepose evaluation format """ boxes = instances.pred_boxes.tensor.numpy() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() # Prediction for MotionNet # mtype = instances.mtype.squeeze(axis=1).tolist() # 2.0.3 if instances.has("pdim"): pdim = instances.pdim.tolist() if instances.has("ptrans"): ptrans = instances.ptrans.tolist() if instances.has("prot"): prot = instances.prot.tolist() mtype = instances.mtype.tolist() morigin = instances.morigin.tolist() maxis = instances.maxis.tolist() mstate = instances.mstate.tolist() mstatemax = instances.mstatemax.tolist() if instances.has("mextrinsic"): mextrinsic = instances.mextrinsic.tolist() # if motionstate: # mstate = instances.mstate.tolist() # MotionNet has masks in the annotation # use RLE to encode the masks, because they are too large and takes memory # since this evaluator stores outputs of the entire dataset rles = [mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks] for rle in rles: # "counts" is an array encoded by mask_util as a byte-stream. Python3's # json writer which always produces strings cannot serialize a bytestream # unless you decode it. Thankfully, utf-8 works out (which is also what # the pycocotools/_mask.pyx does). rle["counts"] = rle["counts"].decode("utf-8") results = [] for k in range(len(instances)): if instances.has("pdim"): result = { "image_id": img_id, "category_id": classes[k], "bbox": boxes[k], "score": scores[k], "segmentation": rles[k], "pdim": pdim[k], "ptrans": ptrans[k], "prot": prot[k], "mtype": mtype[k], "morigin": morigin[k], "maxis": maxis[k], "mstate": mstate[k], "mstatemax": mstatemax[k], } elif instances.has("mextrinsic"): result = { "image_id": img_id, "category_id": classes[k], "bbox": boxes[k], "score": scores[k], "segmentation": rles[k], "mtype": mtype[k], "morigin": morigin[k], "maxis": maxis[k], "mextrinsic": mextrinsic[k], "mstate": mstate[k], "mstatemax": mstatemax[k], } else: result = { "image_id": img_id, "category_id": classes[k], "bbox": boxes[k], "score": scores[k], "segmentation": rles[k], "mtype": mtype[k], "morigin": morigin[k], "maxis": maxis[k], "mstate": mstate[k], "mstatemax": mstatemax[k], } # if motionstate: # result["mstate"] = mstate[k] results.append(result) return results