I am closer to understanding this model
Browse files- images/9999999_00783_d_0000358.jpg +0 -0
- understand.py +63 -2
images/9999999_00783_d_0000358.jpg
ADDED
understand.py
CHANGED
@@ -9,7 +9,7 @@ from transformers import DetrFeatureExtractor, DetrForSegmentation, MaskFormerIm
|
|
9 |
# from transformers.models.detr.feature_extraction_detr import rgb_to_id
|
10 |
from transformers.image_transforms import rgb_to_id
|
11 |
|
12 |
-
TEST_IMAGE = Image.open(r"images/
|
13 |
MODEL_NAME_DETR = "facebook/detr-resnet-50-panoptic"
|
14 |
MODEL_NAME_MASKFORMER = "facebook/maskformer-swin-large-coco"
|
15 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
@@ -58,7 +58,10 @@ outputs = model(**inputs) # <class 'transformers.models.maskformer.modeling_mask
|
|
58 |
|
59 |
results = processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
|
60 |
# <class 'dict'>
|
61 |
-
#
|
|
|
|
|
|
|
62 |
|
63 |
|
64 |
# From Tutorial (Box 79)
|
@@ -69,3 +72,61 @@ results = processor.post_process_panoptic_segmentation(outputs, target_sizes=[im
|
|
69 |
# visual_mask = (mask * 255).astype(np.uint8)
|
70 |
# return Image.fromarray(visual_mask)
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# from transformers.models.detr.feature_extraction_detr import rgb_to_id
|
10 |
from transformers.image_transforms import rgb_to_id
|
11 |
|
12 |
+
TEST_IMAGE = Image.open(r"images/9999999_00783_d_0000358.jpg")
|
13 |
MODEL_NAME_DETR = "facebook/detr-resnet-50-panoptic"
|
14 |
MODEL_NAME_MASKFORMER = "facebook/maskformer-swin-large-coco"
|
15 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
58 |
|
59 |
results = processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
|
60 |
# <class 'dict'>
|
61 |
+
# Keys: dict_keys(['segmentation', 'segments_info'])
|
62 |
+
# type(results["segments_info"]) --> list
|
63 |
+
# type(results["segmentation"]) --> <class 'torch.Tensor'>
|
64 |
+
|
65 |
|
66 |
|
67 |
# From Tutorial (Box 79)
|
|
|
72 |
# visual_mask = (mask * 255).astype(np.uint8)
|
73 |
# return Image.fromarray(visual_mask)
|
74 |
|
75 |
+
# How to get ID
|
76 |
+
|
77 |
+
"""
|
78 |
+
>>> model.config.id2label
|
79 |
+
{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter',
|
80 |
+
13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
|
81 |
+
27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket',
|
82 |
+
39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza',
|
83 |
+
54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone',
|
84 |
+
68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush', 80: 'banner', 81: 'blanket',
|
85 |
+
82: 'bridge', 83: 'cardboard', 84: 'counter', 85: 'curtain', 86: 'door-stuff', 87: 'floor-wood', 88: 'flower', 89: 'fruit', 90: 'gravel', 91: 'house', 92: 'light', 93: 'mirror-stuff', 94: 'net', 95: 'pillow',
|
86 |
+
96: 'platform', 97: 'playingfield', 98: 'railroad', 99: 'river', 100: 'road', 101: 'roof', 102: 'sand', 103: 'sea', 104: 'shelf', 105: 'snow', 106: 'stairs', 107: 'tent', 108: 'towel', 109: 'wall-brick',
|
87 |
+
110: 'wall-stone', 111: 'wall-tile', 112: 'wall-wood', 113: 'water-other', 114: 'window-blind', 115: 'window-other', 116: 'tree-merged', 117: 'fence-merged', 118: 'ceiling-merged', 119: 'sky-other-merged',
|
88 |
+
120: 'cabinet-merged', 121: 'table-merged', 122: 'floor-other-merged', 123: 'pavement-merged', 124: 'mountain-merged', 125: 'grass-merged', 126: 'dirt-merged', 127: 'paper-merged', 128: 'food-other-merged',
|
89 |
+
129: 'building-other-merged', 130: 'rock-merged', 131: 'wall-other-merged', 132: 'rug-merged'}
|
90 |
+
>>> model.config.id2label[123]
|
91 |
+
'pavement-merged'
|
92 |
+
>>> results["segments_info"][1]
|
93 |
+
{'id': 2, 'label_id': 123, 'was_fused': False, 'score': 0.995813}
|
94 |
+
"""
|
95 |
+
# Above labels don't correspond to anything ... https://github.com/nightrome/cocostuff/blob/master/labels.md
|
96 |
+
# This one was closest to helping: https://github.com/NielsRogge/Transformers-Tutorials/blob/master/MaskFormer/Inference/Inference_with_MaskFormer_for_semantic_%2B_panoptic_segmentation.ipynb
|
97 |
+
|
98 |
+
"""
|
99 |
+
>>> Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8))
|
100 |
+
<PIL.Image.Image image mode=L size=2000x1500 at 0x7F07773691C0>
|
101 |
+
>>> temp = Image.fromarray((mask * 255).cpu().numpy().astype(np.uint8))
|
102 |
+
"""
|
103 |
+
|
104 |
+
"""
|
105 |
+
>>> mask = (results["segmentation"].cpu().numpy == 4)
|
106 |
+
>>> mask = (results["segmentation"].cpu().numpy() == 4)
|
107 |
+
>>> mask
|
108 |
+
array([[False, False, False, ..., False, False, False],
|
109 |
+
[False, False, False, ..., False, False, False],
|
110 |
+
[False, False, False, ..., False, False, False],
|
111 |
+
...,
|
112 |
+
[False, False, False, ..., False, False, False],
|
113 |
+
[False, False, False, ..., False, False, False],
|
114 |
+
[False, False, False, ..., False, False, False]])
|
115 |
+
>>> visual_mask = (mask * 255).astype(np.uint8)
|
116 |
+
>>> visual_mask = Image.fromarray(visual_mask)
|
117 |
+
>>> plt.imshow(visual_mask)
|
118 |
+
<matplotlib.image.AxesImage object at 0x7f0761e78040>
|
119 |
+
>>> plt.show()
|
120 |
+
"""
|
121 |
+
|
122 |
+
"""
|
123 |
+
>>> mask = (results["segmentation"].cpu().numpy() == 1)
|
124 |
+
>>> visual_mask = (mask*255).astype(np.uint8)
|
125 |
+
>>> visual_mask = Image.fromarray(visual_mask)
|
126 |
+
>>> plt.imshow(visual_mask)
|
127 |
+
<matplotlib.image.AxesImage object at 0x7f0760298550>
|
128 |
+
>>> plt.show()
|
129 |
+
>>> results["segments_info"][0]
|
130 |
+
{'id': 1, 'label_id': 25, 'was_fused': False, 'score': 0.998022}
|
131 |
+
>>>
|
132 |
+
"""
|