opdmulti-demo / utilities.py
atwang's picture
semi-working demo for one part
5ceacf4
raw
history blame
3.65 kB
import numpy as np
import pycocotools.mask as mask_util
from detectron2.structures import BoxMode
# MotionNet: based on instances_to_coco_json and relevant codes in densepose
def prediction_to_json(instances, img_id: str):
"""
Args:
instances (Instances): the output of the model
img_id (str): the image id in COCO
Returns:
list[dict]: the results in densepose evaluation format
"""
boxes = instances.pred_boxes.tensor.numpy()
boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
boxes = boxes.tolist()
scores = instances.scores.tolist()
classes = instances.pred_classes.tolist()
# Prediction for MotionNet
# mtype = instances.mtype.squeeze(axis=1).tolist()
# 2.0.3
if instances.has("pdim"):
pdim = instances.pdim.tolist()
if instances.has("ptrans"):
ptrans = instances.ptrans.tolist()
if instances.has("prot"):
prot = instances.prot.tolist()
mtype = instances.mtype.tolist()
morigin = instances.morigin.tolist()
maxis = instances.maxis.tolist()
mstate = instances.mstate.tolist()
mstatemax = instances.mstatemax.tolist()
if instances.has("mextrinsic"):
mextrinsic = instances.mextrinsic.tolist()
# if motionstate:
# mstate = instances.mstate.tolist()
# MotionNet has masks in the annotation
# use RLE to encode the masks, because they are too large and takes memory
# since this evaluator stores outputs of the entire dataset
rles = [mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks]
for rle in rles:
# "counts" is an array encoded by mask_util as a byte-stream. Python3's
# json writer which always produces strings cannot serialize a bytestream
# unless you decode it. Thankfully, utf-8 works out (which is also what
# the pycocotools/_mask.pyx does).
rle["counts"] = rle["counts"].decode("utf-8")
results = []
for k in range(len(instances)):
if instances.has("pdim"):
result = {
"image_id": img_id,
"category_id": classes[k],
"bbox": boxes[k],
"score": scores[k],
"segmentation": rles[k],
"pdim": pdim[k],
"ptrans": ptrans[k],
"prot": prot[k],
"mtype": mtype[k],
"morigin": morigin[k],
"maxis": maxis[k],
"mstate": mstate[k],
"mstatemax": mstatemax[k],
}
elif instances.has("mextrinsic"):
result = {
"image_id": img_id,
"category_id": classes[k],
"bbox": boxes[k],
"score": scores[k],
"segmentation": rles[k],
"mtype": mtype[k],
"morigin": morigin[k],
"maxis": maxis[k],
"mextrinsic": mextrinsic[k],
"mstate": mstate[k],
"mstatemax": mstatemax[k],
}
else:
result = {
"image_id": img_id,
"category_id": classes[k],
"bbox": boxes[k],
"score": scores[k],
"segmentation": rles[k],
"mtype": mtype[k],
"morigin": morigin[k],
"maxis": maxis[k],
"mstate": mstate[k],
"mstatemax": mstatemax[k],
}
# if motionstate:
# result["mstate"] = mstate[k]
results.append(result)
return results