File size: 3,651 Bytes
5ceacf4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
import pycocotools.mask as mask_util
from detectron2.structures import BoxMode


# MotionNet: based on instances_to_coco_json and relevant codes in densepose
def prediction_to_json(instances, img_id: str):
    """
    Args:
        instances (Instances): the output of the model
        img_id (str): the image id in COCO

    Returns:
        list[dict]: the results in densepose evaluation format
    """
    boxes = instances.pred_boxes.tensor.numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    scores = instances.scores.tolist()
    classes = instances.pred_classes.tolist()
    # Prediction for MotionNet
    # mtype = instances.mtype.squeeze(axis=1).tolist()

    # 2.0.3
    if instances.has("pdim"):
        pdim = instances.pdim.tolist()
    if instances.has("ptrans"):
        ptrans = instances.ptrans.tolist()
    if instances.has("prot"):
        prot = instances.prot.tolist()

    mtype = instances.mtype.tolist()
    morigin = instances.morigin.tolist()
    maxis = instances.maxis.tolist()
    mstate = instances.mstate.tolist()
    mstatemax = instances.mstatemax.tolist()
    if instances.has("mextrinsic"):
        mextrinsic = instances.mextrinsic.tolist()

    # if motionstate:
    #     mstate = instances.mstate.tolist()

    # MotionNet has masks in the annotation
    # use RLE to encode the masks, because they are too large and takes memory
    # since this evaluator stores outputs of the entire dataset
    rles = [mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks]
    for rle in rles:
        # "counts" is an array encoded by mask_util as a byte-stream. Python3's
        # json writer which always produces strings cannot serialize a bytestream
        # unless you decode it. Thankfully, utf-8 works out (which is also what
        # the pycocotools/_mask.pyx does).
        rle["counts"] = rle["counts"].decode("utf-8")

    results = []
    for k in range(len(instances)):
        if instances.has("pdim"):
            result = {
                "image_id": img_id,
                "category_id": classes[k],
                "bbox": boxes[k],
                "score": scores[k],
                "segmentation": rles[k],
                "pdim": pdim[k],
                "ptrans": ptrans[k],
                "prot": prot[k],
                "mtype": mtype[k],
                "morigin": morigin[k],
                "maxis": maxis[k],
                "mstate": mstate[k],
                "mstatemax": mstatemax[k],
            }
        elif instances.has("mextrinsic"):
            result = {
                "image_id": img_id,
                "category_id": classes[k],
                "bbox": boxes[k],
                "score": scores[k],
                "segmentation": rles[k],
                "mtype": mtype[k],
                "morigin": morigin[k],
                "maxis": maxis[k],
                "mextrinsic": mextrinsic[k],
                "mstate": mstate[k],
                "mstatemax": mstatemax[k],
            }
        else:
            result = {
                "image_id": img_id,
                "category_id": classes[k],
                "bbox": boxes[k],
                "score": scores[k],
                "segmentation": rles[k],
                "mtype": mtype[k],
                "morigin": morigin[k],
                "maxis": maxis[k],
                "mstate": mstate[k],
                "mstatemax": mstatemax[k],
            }
        # if motionstate:
        #     result["mstate"] = mstate[k]
        results.append(result)
    return results