Spaces:
Build error
Build error
shamita
commited on
Commit
•
1914862
1
Parent(s):
7e93dbb
model stack, reqs, app.py
Browse files- __pycache__/coco.cpython-36.pyc +0 -0
- app.py +56 -0
- coco.py +522 -0
- data/pretrained_weights.h5 +3 -0
- data/test0.jpg +0 -0
- mrcnn/__init__.py +1 -0
- mrcnn/__pycache__/__init__.cpython-310.pyc +0 -0
- mrcnn/__pycache__/__init__.cpython-36.pyc +0 -0
- mrcnn/__pycache__/cocoeval.cpython-310.pyc +0 -0
- mrcnn/__pycache__/cocoeval.cpython-36.pyc +0 -0
- mrcnn/__pycache__/config.cpython-310.pyc +0 -0
- mrcnn/__pycache__/config.cpython-36.pyc +0 -0
- mrcnn/__pycache__/dataset.cpython-310.pyc +0 -0
- mrcnn/__pycache__/dataset.cpython-36.pyc +0 -0
- mrcnn/__pycache__/evaluate.cpython-310.pyc +0 -0
- mrcnn/__pycache__/evaluate.cpython-36.pyc +0 -0
- mrcnn/__pycache__/model.cpython-310.pyc +0 -0
- mrcnn/__pycache__/model.cpython-36.pyc +0 -0
- mrcnn/__pycache__/utils.cpython-310.pyc +0 -0
- mrcnn/__pycache__/utils.cpython-36.pyc +0 -0
- mrcnn/__pycache__/visualize.cpython-36.pyc +0 -0
- mrcnn/cocoeval.py +535 -0
- mrcnn/config.py +193 -0
- mrcnn/dataset.py +156 -0
- mrcnn/evaluate.py +94 -0
- mrcnn/model.py +0 -0
- mrcnn/parallel_model.py +173 -0
- mrcnn/utils.py +839 -0
- mrcnn/visualize.py +452 -0
- requirements.txt +106 -0
- setup.py +68 -0
- test0.jpg +0 -0
__pycache__/coco.cpython-36.pyc
ADDED
Binary file (12.7 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
import numpy as np
|
6 |
+
import skimage.io
|
7 |
+
from pycocotools.coco import COCO
|
8 |
+
from pycocotools.cocoeval import COCOeval
|
9 |
+
from pycocotools import mask as maskUtils
|
10 |
+
import coco
|
11 |
+
from mrcnn.evaluate import build_coco_results, evaluate_coco
|
12 |
+
from mrcnn.dataset import MappingChallengeDataset
|
13 |
+
from mrcnn import visualize
|
14 |
+
from mrcnn.config import Config
|
15 |
+
from mrcnn import model as modellib, utils
|
16 |
+
import warnings
|
17 |
+
warnings.filterwarnings("ignore")
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
PRETRAINED_MODEL_PATH = os.path.join("data/","pretrained_weights.h5")
|
22 |
+
MODEL_DIR = os.path.join("logs")
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
class InferenceConfig(coco.CocoConfig):
|
27 |
+
GPU_COUNT = 1
|
28 |
+
IMAGES_PER_GPU = 1
|
29 |
+
NUM_CLASSES = 1 + 1 # 1 Background + 1 Building
|
30 |
+
IMAGE_MAX_DIM=320
|
31 |
+
IMAGE_MIN_DIM=320
|
32 |
+
NAME = "crowdai-mapping-challenge"
|
33 |
+
config = InferenceConfig()
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
|
38 |
+
model_path = PRETRAINED_MODEL_PATH
|
39 |
+
model.load_weights(model_path, by_name=True)
|
40 |
+
|
41 |
+
class_names = ['BG', 'building'] # In our case, we have 1 class for the background, and 1 class for building
|
42 |
+
|
43 |
+
|
44 |
+
def classify_image(img):
|
45 |
+
random_image = skimage.io.imread(img)
|
46 |
+
predictions = model.detect([random_image]*config.BATCH_SIZE, verbose=1) # We are replicating the same image to fill up the batch_size
|
47 |
+
p = predictions[0]
|
48 |
+
image = visualize.display_instances(random_image, p['rois'], p['masks'], p['class_ids'], class_names, p['scores'])
|
49 |
+
return image
|
50 |
+
|
51 |
+
image = gr.inputs.Image(shape=(320, 320))
|
52 |
+
out_image = gr.outputs.Image(shape=(320, 320))
|
53 |
+
examples = ['test0.jpg']
|
54 |
+
|
55 |
+
intf = gr.Interface(fn=classify_image, inputs=image, outputs=out_image, examples=examples)
|
56 |
+
intf.launch(inline=False)
|
coco.py
ADDED
@@ -0,0 +1,522 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Configurations and data loading code for MS COCO.
|
4 |
+
|
5 |
+
Copyright (c) 2017 Matterport, Inc.
|
6 |
+
Licensed under the MIT License (see LICENSE for details)
|
7 |
+
Written by Waleed Abdulla
|
8 |
+
|
9 |
+
------------------------------------------------------------
|
10 |
+
|
11 |
+
Usage: import the module (see Jupyter notebooks for examples), or run from
|
12 |
+
the command line as such:
|
13 |
+
|
14 |
+
# Train a new model starting from pre-trained COCO weights
|
15 |
+
python3 coco.py train --dataset=/path/to/coco/ --model=coco
|
16 |
+
|
17 |
+
# Train a new model starting from ImageNet weights
|
18 |
+
python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
|
19 |
+
|
20 |
+
# Continue training a model that you had trained earlier
|
21 |
+
python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
|
22 |
+
|
23 |
+
# Continue training the last model you trained
|
24 |
+
python3 coco.py train --dataset=/path/to/coco/ --model=last
|
25 |
+
|
26 |
+
# Run COCO evaluatoin on the last model you trained
|
27 |
+
python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
|
28 |
+
"""
|
29 |
+
|
30 |
+
import os
|
31 |
+
import time
|
32 |
+
import numpy as np
|
33 |
+
|
34 |
+
# Download and install the Python COCO tools from https://github.com/waleedka/coco
|
35 |
+
#
|
36 |
+
# pip install git+https://github.com/waleedka/coco.git#subdirectory=PythonAPI
|
37 |
+
#
|
38 |
+
# That's a fork from the original https://github.com/pdollar/coco with a bug
|
39 |
+
# fix for Python 3.
|
40 |
+
# I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
|
41 |
+
# If the PR is merged then use the original repo.
|
42 |
+
# Note: Edit PythonAPI/Makefile and replace "python" with "python3".
|
43 |
+
from pycocotools.coco import COCO
|
44 |
+
from pycocotools.cocoeval import COCOeval
|
45 |
+
from pycocotools import mask as maskUtils
|
46 |
+
|
47 |
+
import zipfile
|
48 |
+
import urllib.request
|
49 |
+
import shutil
|
50 |
+
|
51 |
+
from mrcnn.config import Config
|
52 |
+
import mrcnn.utils as utils
|
53 |
+
import mrcnn.model as modellib
|
54 |
+
|
55 |
+
# Root directory of the project
|
56 |
+
ROOT_DIR = os.getcwd()
|
57 |
+
|
58 |
+
# Path to trained weights file
|
59 |
+
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
|
60 |
+
|
61 |
+
# Directory to save logs and model checkpoints, if not provided
|
62 |
+
# through the command line argument --logs
|
63 |
+
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
|
64 |
+
DEFAULT_DATASET_YEAR = "2014"
|
65 |
+
|
66 |
+
############################################################
|
67 |
+
# Configurations
|
68 |
+
############################################################
|
69 |
+
|
70 |
+
|
71 |
+
class CocoConfig(Config):
|
72 |
+
"""Configuration for training on MS COCO.
|
73 |
+
Derives from the base Config class and overrides values specific
|
74 |
+
to the COCO dataset.
|
75 |
+
"""
|
76 |
+
# Give the configuration a recognizable name
|
77 |
+
NAME = "coco"
|
78 |
+
|
79 |
+
# We use a GPU with 12GB memory, which can fit two images.
|
80 |
+
# Adjust down if you use a smaller GPU.
|
81 |
+
IMAGES_PER_GPU = 2
|
82 |
+
|
83 |
+
# Uncomment to train on 8 GPUs (default is 1)
|
84 |
+
# GPU_COUNT = 8
|
85 |
+
|
86 |
+
# Number of classes (including background)
|
87 |
+
NUM_CLASSES = 1 + 80 # COCO has 80 classes
|
88 |
+
|
89 |
+
|
90 |
+
############################################################
|
91 |
+
# Dataset
|
92 |
+
############################################################
|
93 |
+
|
94 |
+
class CocoDataset(utils.Dataset):
|
95 |
+
def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
|
96 |
+
class_map=None, return_coco=False, auto_download=False):
|
97 |
+
"""Load a subset of the COCO dataset.
|
98 |
+
dataset_dir: The root directory of the COCO dataset.
|
99 |
+
subset: What to load (train, val, minival, valminusminival)
|
100 |
+
year: What dataset year to load (2014, 2017) as a string, not an integer
|
101 |
+
class_ids: If provided, only loads images that have the given classes.
|
102 |
+
class_map: TODO: Not implemented yet. Supports maping classes from
|
103 |
+
different datasets to the same class ID.
|
104 |
+
return_coco: If True, returns the COCO object.
|
105 |
+
auto_download: Automatically download and unzip MS-COCO images and annotations
|
106 |
+
"""
|
107 |
+
|
108 |
+
if auto_download is True:
|
109 |
+
self.auto_download(dataset_dir, subset, year)
|
110 |
+
|
111 |
+
coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
|
112 |
+
if subset == "minival" or subset == "valminusminival":
|
113 |
+
subset = "val"
|
114 |
+
image_dir = "{}/{}{}".format(dataset_dir, subset, year)
|
115 |
+
|
116 |
+
# Load all classes or a subset?
|
117 |
+
if not class_ids:
|
118 |
+
# All classes
|
119 |
+
class_ids = sorted(coco.getCatIds())
|
120 |
+
|
121 |
+
# All images or a subset?
|
122 |
+
if class_ids:
|
123 |
+
image_ids = []
|
124 |
+
for id in class_ids:
|
125 |
+
image_ids.extend(list(coco.getImgIds(catIds=[id])))
|
126 |
+
# Remove duplicates
|
127 |
+
image_ids = list(set(image_ids))
|
128 |
+
else:
|
129 |
+
# All images
|
130 |
+
image_ids = list(coco.imgs.keys())
|
131 |
+
|
132 |
+
# Add classes
|
133 |
+
for i in class_ids:
|
134 |
+
self.add_class("coco", i, coco.loadCats(i)[0]["name"])
|
135 |
+
|
136 |
+
# Add images
|
137 |
+
for i in image_ids:
|
138 |
+
self.add_image(
|
139 |
+
"coco", image_id=i,
|
140 |
+
path=os.path.join(image_dir, coco.imgs[i]['file_name']),
|
141 |
+
width=coco.imgs[i]["width"],
|
142 |
+
height=coco.imgs[i]["height"],
|
143 |
+
annotations=coco.loadAnns(coco.getAnnIds(
|
144 |
+
imgIds=[i], catIds=class_ids, iscrowd=None)))
|
145 |
+
if return_coco:
|
146 |
+
return coco
|
147 |
+
|
148 |
+
def auto_download(self, dataDir, dataType, dataYear):
|
149 |
+
"""Download the COCO dataset/annotations if requested.
|
150 |
+
dataDir: The root directory of the COCO dataset.
|
151 |
+
dataType: What to load (train, val, minival, valminusminival)
|
152 |
+
dataYear: What dataset year to load (2014, 2017) as a string, not an integer
|
153 |
+
Note:
|
154 |
+
For 2014, use "train", "val", "minival", or "valminusminival"
|
155 |
+
For 2017, only "train" and "val" annotations are available
|
156 |
+
"""
|
157 |
+
|
158 |
+
# Setup paths and file names
|
159 |
+
if dataType == "minival" or dataType == "valminusminival":
|
160 |
+
imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
|
161 |
+
imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
|
162 |
+
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
|
163 |
+
else:
|
164 |
+
imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
|
165 |
+
imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
|
166 |
+
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
|
167 |
+
# print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
|
168 |
+
|
169 |
+
# Create main folder if it doesn't exist yet
|
170 |
+
if not os.path.exists(dataDir):
|
171 |
+
os.makedirs(dataDir)
|
172 |
+
|
173 |
+
# Download images if not available locally
|
174 |
+
if not os.path.exists(imgDir):
|
175 |
+
os.makedirs(imgDir)
|
176 |
+
print("Downloading images to " + imgZipFile + " ...")
|
177 |
+
with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
|
178 |
+
shutil.copyfileobj(resp, out)
|
179 |
+
print("... done downloading.")
|
180 |
+
print("Unzipping " + imgZipFile)
|
181 |
+
with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
|
182 |
+
zip_ref.extractall(dataDir)
|
183 |
+
print("... done unzipping")
|
184 |
+
print("Will use images in " + imgDir)
|
185 |
+
|
186 |
+
# Setup annotations data paths
|
187 |
+
annDir = "{}/annotations".format(dataDir)
|
188 |
+
if dataType == "minival":
|
189 |
+
annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
|
190 |
+
annFile = "{}/instances_minival2014.json".format(annDir)
|
191 |
+
annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
|
192 |
+
unZipDir = annDir
|
193 |
+
elif dataType == "valminusminival":
|
194 |
+
annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
|
195 |
+
annFile = "{}/instances_valminusminival2014.json".format(annDir)
|
196 |
+
annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
|
197 |
+
unZipDir = annDir
|
198 |
+
else:
|
199 |
+
annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
|
200 |
+
annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
|
201 |
+
annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
|
202 |
+
unZipDir = dataDir
|
203 |
+
# print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
|
204 |
+
|
205 |
+
# Download annotations if not available locally
|
206 |
+
if not os.path.exists(annDir):
|
207 |
+
os.makedirs(annDir)
|
208 |
+
if not os.path.exists(annFile):
|
209 |
+
if not os.path.exists(annZipFile):
|
210 |
+
print("Downloading zipped annotations to " + annZipFile + " ...")
|
211 |
+
with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
|
212 |
+
shutil.copyfileobj(resp, out)
|
213 |
+
print("... done downloading.")
|
214 |
+
print("Unzipping " + annZipFile)
|
215 |
+
with zipfile.ZipFile(annZipFile, "r") as zip_ref:
|
216 |
+
zip_ref.extractall(unZipDir)
|
217 |
+
print("... done unzipping")
|
218 |
+
print("Will use annotations in " + annFile)
|
219 |
+
|
220 |
+
def load_mask(self, image_id):
|
221 |
+
"""Load instance masks for the given image.
|
222 |
+
|
223 |
+
Different datasets use different ways to store masks. This
|
224 |
+
function converts the different mask format to one format
|
225 |
+
in the form of a bitmap [height, width, instances].
|
226 |
+
|
227 |
+
Returns:
|
228 |
+
masks: A bool array of shape [height, width, instance count] with
|
229 |
+
one mask per instance.
|
230 |
+
class_ids: a 1D array of class IDs of the instance masks.
|
231 |
+
"""
|
232 |
+
# If not a COCO image, delegate to parent class.
|
233 |
+
image_info = self.image_info[image_id]
|
234 |
+
if image_info["source"] != "coco":
|
235 |
+
return super(CocoDataset, self).load_mask(image_id)
|
236 |
+
|
237 |
+
instance_masks = []
|
238 |
+
class_ids = []
|
239 |
+
annotations = self.image_info[image_id]["annotations"]
|
240 |
+
# Build mask of shape [height, width, instance_count] and list
|
241 |
+
# of class IDs that correspond to each channel of the mask.
|
242 |
+
for annotation in annotations:
|
243 |
+
class_id = self.map_source_class_id(
|
244 |
+
"coco.{}".format(annotation['category_id']))
|
245 |
+
if class_id:
|
246 |
+
m = self.annToMask(annotation, image_info["height"],
|
247 |
+
image_info["width"])
|
248 |
+
# Some objects are so small that they're less than 1 pixel area
|
249 |
+
# and end up rounded out. Skip those objects.
|
250 |
+
if m.max() < 1:
|
251 |
+
continue
|
252 |
+
# Is it a crowd? If so, use a negative class ID.
|
253 |
+
if annotation['iscrowd']:
|
254 |
+
# Use negative class ID for crowds
|
255 |
+
class_id *= -1
|
256 |
+
# For crowd masks, annToMask() sometimes returns a mask
|
257 |
+
# smaller than the given dimensions. If so, resize it.
|
258 |
+
if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
|
259 |
+
m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
|
260 |
+
instance_masks.append(m)
|
261 |
+
class_ids.append(class_id)
|
262 |
+
|
263 |
+
# Pack instance masks into an array
|
264 |
+
if class_ids:
|
265 |
+
mask = np.stack(instance_masks, axis=2)
|
266 |
+
class_ids = np.array(class_ids, dtype=np.int32)
|
267 |
+
return mask, class_ids
|
268 |
+
else:
|
269 |
+
# Call super class to return an empty mask
|
270 |
+
return super(CocoDataset, self).load_mask(image_id)
|
271 |
+
|
272 |
+
def image_reference(self, image_id):
|
273 |
+
"""Return a link to the image in the COCO Website."""
|
274 |
+
info = self.image_info[image_id]
|
275 |
+
if info["source"] == "coco":
|
276 |
+
return "http://cocodataset.org/#explore?id={}".format(info["id"])
|
277 |
+
else:
|
278 |
+
super(CocoDataset, self).image_reference(image_id)
|
279 |
+
|
280 |
+
# The following two functions are from pycocotools with a few changes.
|
281 |
+
|
282 |
+
def annToRLE(self, ann, height, width):
|
283 |
+
"""
|
284 |
+
Convert annotation which can be polygons, uncompressed RLE to RLE.
|
285 |
+
:return: binary mask (numpy 2D array)
|
286 |
+
"""
|
287 |
+
segm = ann['segmentation']
|
288 |
+
if isinstance(segm, list):
|
289 |
+
# polygon -- a single object might consist of multiple parts
|
290 |
+
# we merge all parts into one mask rle code
|
291 |
+
rles = maskUtils.frPyObjects(segm, height, width)
|
292 |
+
rle = maskUtils.merge(rles)
|
293 |
+
elif isinstance(segm['counts'], list):
|
294 |
+
# uncompressed RLE
|
295 |
+
rle = maskUtils.frPyObjects(segm, height, width)
|
296 |
+
else:
|
297 |
+
# rle
|
298 |
+
rle = ann['segmentation']
|
299 |
+
return rle
|
300 |
+
|
301 |
+
def annToMask(self, ann, height, width):
|
302 |
+
"""
|
303 |
+
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
|
304 |
+
:return: binary mask (numpy 2D array)
|
305 |
+
"""
|
306 |
+
rle = self.annToRLE(ann, height, width)
|
307 |
+
m = maskUtils.decode(rle)
|
308 |
+
return m
|
309 |
+
|
310 |
+
|
311 |
+
############################################################
|
312 |
+
# COCO Evaluation
|
313 |
+
############################################################
|
314 |
+
|
315 |
+
def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
|
316 |
+
"""Arrange resutls to match COCO specs in http://cocodataset.org/#format
|
317 |
+
"""
|
318 |
+
# If no results, return an empty list
|
319 |
+
if rois is None:
|
320 |
+
return []
|
321 |
+
|
322 |
+
results = []
|
323 |
+
for image_id in image_ids:
|
324 |
+
# Loop through detections
|
325 |
+
for i in range(rois.shape[0]):
|
326 |
+
class_id = class_ids[i]
|
327 |
+
score = scores[i]
|
328 |
+
bbox = np.around(rois[i], 1)
|
329 |
+
mask = masks[:, :, i]
|
330 |
+
|
331 |
+
result = {
|
332 |
+
"image_id": image_id,
|
333 |
+
"category_id": dataset.get_source_class_id(class_id, "coco"),
|
334 |
+
"bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
|
335 |
+
"score": score,
|
336 |
+
"segmentation": maskUtils.encode(np.asfortranarray(mask))
|
337 |
+
}
|
338 |
+
results.append(result)
|
339 |
+
return results
|
340 |
+
|
341 |
+
|
342 |
+
def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
|
343 |
+
"""Runs official COCO evaluation.
|
344 |
+
dataset: A Dataset object with valiadtion data
|
345 |
+
eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
|
346 |
+
limit: if not 0, it's the number of images to use for evaluation
|
347 |
+
"""
|
348 |
+
# Pick COCO images from the dataset
|
349 |
+
image_ids = image_ids or dataset.image_ids
|
350 |
+
|
351 |
+
# Limit to a subset
|
352 |
+
if limit:
|
353 |
+
image_ids = image_ids[:limit]
|
354 |
+
|
355 |
+
# Get corresponding COCO image IDs.
|
356 |
+
coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
|
357 |
+
|
358 |
+
t_prediction = 0
|
359 |
+
t_start = time.time()
|
360 |
+
|
361 |
+
results = []
|
362 |
+
for i, image_id in enumerate(image_ids):
|
363 |
+
# Load image
|
364 |
+
image = dataset.load_image(image_id)
|
365 |
+
|
366 |
+
# Run detection
|
367 |
+
t = time.time()
|
368 |
+
r = model.detect([image], verbose=0)[0]
|
369 |
+
t_prediction += (time.time() - t)
|
370 |
+
|
371 |
+
# Convert results to COCO format
|
372 |
+
image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
|
373 |
+
r["rois"], r["class_ids"],
|
374 |
+
r["scores"], r["masks"])
|
375 |
+
results.extend(image_results)
|
376 |
+
|
377 |
+
# Load results. This modifies results with additional attributes.
|
378 |
+
coco_results = coco.loadRes(results)
|
379 |
+
|
380 |
+
# Evaluate
|
381 |
+
cocoEval = COCOeval(coco, coco_results, eval_type)
|
382 |
+
cocoEval.params.imgIds = coco_image_ids
|
383 |
+
cocoEval.evaluate()
|
384 |
+
cocoEval.accumulate()
|
385 |
+
cocoEval.summarize()
|
386 |
+
|
387 |
+
print("Prediction time: {}. Average {}/image".format(
|
388 |
+
t_prediction, t_prediction / len(image_ids)))
|
389 |
+
print("Total time: ", time.time() - t_start)
|
390 |
+
|
391 |
+
|
392 |
+
############################################################
|
393 |
+
# Training
|
394 |
+
############################################################
|
395 |
+
|
396 |
+
|
397 |
+
if __name__ == '__main__':
|
398 |
+
import argparse
|
399 |
+
|
400 |
+
# Parse command line arguments
|
401 |
+
parser = argparse.ArgumentParser(
|
402 |
+
description='Train Mask R-CNN on MS COCO.')
|
403 |
+
parser.add_argument("command",
|
404 |
+
metavar="<command>",
|
405 |
+
help="'train' or 'evaluate' on MS COCO")
|
406 |
+
parser.add_argument('--dataset', required=True,
|
407 |
+
metavar="/path/to/coco/",
|
408 |
+
help='Directory of the MS-COCO dataset')
|
409 |
+
parser.add_argument('--year', required=False,
|
410 |
+
default=DEFAULT_DATASET_YEAR,
|
411 |
+
metavar="<year>",
|
412 |
+
help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
|
413 |
+
parser.add_argument('--model', required=True,
|
414 |
+
metavar="/path/to/weights.h5",
|
415 |
+
help="Path to weights .h5 file or 'coco'")
|
416 |
+
parser.add_argument('--logs', required=False,
|
417 |
+
default=DEFAULT_LOGS_DIR,
|
418 |
+
metavar="/path/to/logs/",
|
419 |
+
help='Logs and checkpoints directory (default=logs/)')
|
420 |
+
parser.add_argument('--limit', required=False,
|
421 |
+
default=500,
|
422 |
+
metavar="<image count>",
|
423 |
+
help='Images to use for evaluation (default=500)')
|
424 |
+
parser.add_argument('--download', required=False,
|
425 |
+
default=False,
|
426 |
+
metavar="<True|False>",
|
427 |
+
help='Automatically download and unzip MS-COCO files (default=False)',
|
428 |
+
type=bool)
|
429 |
+
args = parser.parse_args()
|
430 |
+
print("Command: ", args.command)
|
431 |
+
print("Model: ", args.model)
|
432 |
+
print("Dataset: ", args.dataset)
|
433 |
+
print("Year: ", args.year)
|
434 |
+
print("Logs: ", args.logs)
|
435 |
+
print("Auto Download: ", args.download)
|
436 |
+
|
437 |
+
# Configurations
|
438 |
+
if args.command == "train":
|
439 |
+
config = CocoConfig()
|
440 |
+
else:
|
441 |
+
class InferenceConfig(CocoConfig):
|
442 |
+
# Set batch size to 1 since we'll be running inference on
|
443 |
+
# one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
|
444 |
+
GPU_COUNT = 1
|
445 |
+
IMAGES_PER_GPU = 1
|
446 |
+
DETECTION_MIN_CONFIDENCE = 0
|
447 |
+
config = InferenceConfig()
|
448 |
+
config.display()
|
449 |
+
|
450 |
+
# Create model
|
451 |
+
if args.command == "train":
|
452 |
+
model = modellib.MaskRCNN(mode="training", config=config,
|
453 |
+
model_dir=args.logs)
|
454 |
+
else:
|
455 |
+
model = modellib.MaskRCNN(mode="inference", config=config,
|
456 |
+
model_dir=args.logs)
|
457 |
+
|
458 |
+
# Select weights file to load
|
459 |
+
if args.model.lower() == "coco":
|
460 |
+
model_path = COCO_MODEL_PATH
|
461 |
+
elif args.model.lower() == "last":
|
462 |
+
# Find last trained weights
|
463 |
+
model_path = model.find_last()[1]
|
464 |
+
elif args.model.lower() == "imagenet":
|
465 |
+
# Start from ImageNet trained weights
|
466 |
+
model_path = model.get_imagenet_weights()
|
467 |
+
else:
|
468 |
+
model_path = args.model
|
469 |
+
|
470 |
+
# Load weights
|
471 |
+
print("Loading weights ", model_path)
|
472 |
+
model.load_weights(model_path, by_name=True)
|
473 |
+
|
474 |
+
# Train or evaluate
|
475 |
+
if args.command == "train":
|
476 |
+
# Training dataset. Use the training set and 35K from the
|
477 |
+
# validation set, as as in the Mask RCNN paper.
|
478 |
+
dataset_train = CocoDataset()
|
479 |
+
dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
|
480 |
+
dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
|
481 |
+
dataset_train.prepare()
|
482 |
+
|
483 |
+
# Validation dataset
|
484 |
+
dataset_val = CocoDataset()
|
485 |
+
dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
|
486 |
+
dataset_val.prepare()
|
487 |
+
|
488 |
+
# *** This training schedule is an example. Update to your needs ***
|
489 |
+
|
490 |
+
# Training - Stage 1
|
491 |
+
print("Training network heads")
|
492 |
+
model.train(dataset_train, dataset_val,
|
493 |
+
learning_rate=config.LEARNING_RATE,
|
494 |
+
epochs=40,
|
495 |
+
layers='heads')
|
496 |
+
|
497 |
+
# Training - Stage 2
|
498 |
+
# Finetune layers from ResNet stage 4 and up
|
499 |
+
print("Fine tune Resnet stage 4 and up")
|
500 |
+
model.train(dataset_train, dataset_val,
|
501 |
+
learning_rate=config.LEARNING_RATE,
|
502 |
+
epochs=120,
|
503 |
+
layers='4+')
|
504 |
+
|
505 |
+
# Training - Stage 3
|
506 |
+
# Fine tune all layers
|
507 |
+
print("Fine tune all layers")
|
508 |
+
model.train(dataset_train, dataset_val,
|
509 |
+
learning_rate=config.LEARNING_RATE / 10,
|
510 |
+
epochs=160,
|
511 |
+
layers='all')
|
512 |
+
|
513 |
+
elif args.command == "evaluate":
|
514 |
+
# Validation dataset
|
515 |
+
dataset_val = CocoDataset()
|
516 |
+
coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
|
517 |
+
dataset_val.prepare()
|
518 |
+
print("Running COCO evaluation on {} images.".format(args.limit))
|
519 |
+
evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
|
520 |
+
else:
|
521 |
+
print("'{}' is not recognized. "
|
522 |
+
"Use 'train' or 'evaluate'".format(args.command))
|
data/pretrained_weights.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af08cd7b6f2b8e51bcfb685a8d21c784f58705e1c3e02c1a047b726faa25fd98
|
3 |
+
size 255856928
|
data/test0.jpg
ADDED
mrcnn/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
mrcnn/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (173 Bytes). View file
|
|
mrcnn/__pycache__/__init__.cpython-36.pyc
ADDED
Binary file (163 Bytes). View file
|
|
mrcnn/__pycache__/cocoeval.cpython-310.pyc
ADDED
Binary file (16.9 kB). View file
|
|
mrcnn/__pycache__/cocoeval.cpython-36.pyc
ADDED
Binary file (17.8 kB). View file
|
|
mrcnn/__pycache__/config.cpython-310.pyc
ADDED
Binary file (2.56 kB). View file
|
|
mrcnn/__pycache__/config.cpython-36.pyc
ADDED
Binary file (2.57 kB). View file
|
|
mrcnn/__pycache__/dataset.cpython-310.pyc
ADDED
Binary file (4.53 kB). View file
|
|
mrcnn/__pycache__/dataset.cpython-36.pyc
ADDED
Binary file (4.75 kB). View file
|
|
mrcnn/__pycache__/evaluate.cpython-310.pyc
ADDED
Binary file (2.72 kB). View file
|
|
mrcnn/__pycache__/evaluate.cpython-36.pyc
ADDED
Binary file (2.61 kB). View file
|
|
mrcnn/__pycache__/model.cpython-310.pyc
ADDED
Binary file (74.6 kB). View file
|
|
mrcnn/__pycache__/model.cpython-36.pyc
ADDED
Binary file (74.9 kB). View file
|
|
mrcnn/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (25.2 kB). View file
|
|
mrcnn/__pycache__/utils.cpython-36.pyc
ADDED
Binary file (25.2 kB). View file
|
|
mrcnn/__pycache__/visualize.cpython-36.pyc
ADDED
Binary file (12.6 kB). View file
|
|
mrcnn/cocoeval.py
ADDED
@@ -0,0 +1,535 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__author__ = 'tsungyi'
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import datetime
|
5 |
+
import time
|
6 |
+
from collections import defaultdict
|
7 |
+
from pycocotools import mask as maskUtils
|
8 |
+
import copy
|
9 |
+
|
10 |
+
"""
|
11 |
+
This script has been taken (and modified) from :
|
12 |
+
https://github.com/crowdAI/coco/blob/master/PythonAPI/pycocotools/cocoeval.py
|
13 |
+
"""
|
14 |
+
|
15 |
+
|
16 |
+
class COCOeval:
|
17 |
+
# Interface for evaluating detection on the Microsoft COCO dataset.
|
18 |
+
#
|
19 |
+
# The usage for CocoEval is as follows:
|
20 |
+
# cocoGt=..., cocoDt=... # load dataset and results
|
21 |
+
# E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
|
22 |
+
# E.params.recThrs = ...; # set parameters as desired
|
23 |
+
# E.evaluate(); # run per image evaluation
|
24 |
+
# E.accumulate(); # accumulate per image results
|
25 |
+
# E.summarize(); # display summary metrics of results
|
26 |
+
# For example usage see evalDemo.m and http://mscoco.org/.
|
27 |
+
#
|
28 |
+
# The evaluation parameters are as follows (defaults in brackets):
|
29 |
+
# imgIds - [all] N img ids to use for evaluation
|
30 |
+
# catIds - [all] K cat ids to use for evaluation
|
31 |
+
# iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation
|
32 |
+
# recThrs - [0:.01:1] R=101 recall thresholds for evaluation
|
33 |
+
# areaRng - [...] A=4 object area ranges for evaluation
|
34 |
+
# maxDets - [1 10 100] M=3 thresholds on max detections per image
|
35 |
+
# iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
|
36 |
+
# iouType replaced the now DEPRECATED useSegm parameter.
|
37 |
+
# useCats - [1] if true use category labels for evaluation
|
38 |
+
# Note: if useCats=0 category labels are ignored as in proposal scoring.
|
39 |
+
# Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
|
40 |
+
#
|
41 |
+
# evaluate(): evaluates detections on every image and every category and
|
42 |
+
# concats the results into the "evalImgs" with fields:
|
43 |
+
# dtIds - [1xD] id for each of the D detections (dt)
|
44 |
+
# gtIds - [1xG] id for each of the G ground truths (gt)
|
45 |
+
# dtMatches - [TxD] matching gt id at each IoU or 0
|
46 |
+
# gtMatches - [TxG] matching dt id at each IoU or 0
|
47 |
+
# dtScores - [1xD] confidence of each dt
|
48 |
+
# gtIgnore - [1xG] ignore flag for each gt
|
49 |
+
# dtIgnore - [TxD] ignore flag for each dt at each IoU
|
50 |
+
#
|
51 |
+
# accumulate(): accumulates the per-image, per-category evaluation
|
52 |
+
# results in "evalImgs" into the dictionary "eval" with fields:
|
53 |
+
# params - parameters used for evaluation
|
54 |
+
# date - date evaluation was performed
|
55 |
+
# counts - [T,R,K,A,M] parameter dimensions (see above)
|
56 |
+
# precision - [TxRxKxAxM] precision for every evaluation setting
|
57 |
+
# recall - [TxKxAxM] max recall for every evaluation setting
|
58 |
+
# Note: precision and recall==-1 for settings with no gt objects.
|
59 |
+
#
|
60 |
+
# See also coco, mask, pycocoDemo, pycocoEvalDemo
|
61 |
+
#
|
62 |
+
# Microsoft COCO Toolbox. version 2.0
|
63 |
+
# Data, paper, and tutorials available at: http://mscoco.org/
|
64 |
+
# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
|
65 |
+
# Licensed under the Simplified BSD License [see coco/license.txt]
|
66 |
+
def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'):
|
67 |
+
'''
|
68 |
+
Initialize CocoEval using coco APIs for gt and dt
|
69 |
+
:param cocoGt: coco object with ground truth annotations
|
70 |
+
:param cocoDt: coco object with detection results
|
71 |
+
:return: None
|
72 |
+
'''
|
73 |
+
if not iouType:
|
74 |
+
print('iouType not specified. use default iouType segm')
|
75 |
+
self.cocoGt = cocoGt # ground truth COCO API
|
76 |
+
self.cocoDt = cocoDt # detections COCO API
|
77 |
+
self.params = {} # evaluation parameters
|
78 |
+
self.evalImgs = defaultdict(list) # per-image per-category evaluation results [KxAxI] elements
|
79 |
+
self.eval = {} # accumulated evaluation results
|
80 |
+
self._gts = defaultdict(list) # gt for evaluation
|
81 |
+
self._dts = defaultdict(list) # dt for evaluation
|
82 |
+
self.params = Params(iouType=iouType) # parameters
|
83 |
+
self._paramsEval = {} # parameters for evaluation
|
84 |
+
self.stats = [] # result summarization
|
85 |
+
self.ious = {} # ious between all gts and dts
|
86 |
+
if not cocoGt is None:
|
87 |
+
self.params.imgIds = sorted(cocoGt.getImgIds())
|
88 |
+
self.params.catIds = sorted(cocoGt.getCatIds())
|
89 |
+
|
90 |
+
|
91 |
+
def _prepare(self):
|
92 |
+
'''
|
93 |
+
Prepare ._gts and ._dts for evaluation based on params
|
94 |
+
:return: None
|
95 |
+
'''
|
96 |
+
def _toMask(anns, coco):
|
97 |
+
# modify ann['segmentation'] by reference
|
98 |
+
for ann in anns:
|
99 |
+
rle = coco.annToRLE(ann)
|
100 |
+
ann['segmentation'] = rle
|
101 |
+
p = self.params
|
102 |
+
if p.useCats:
|
103 |
+
gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
|
104 |
+
dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
|
105 |
+
else:
|
106 |
+
gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
|
107 |
+
dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
|
108 |
+
|
109 |
+
# convert ground truth to mask if iouType == 'segm'
|
110 |
+
if p.iouType == 'segm':
|
111 |
+
_toMask(gts, self.cocoGt)
|
112 |
+
_toMask(dts, self.cocoDt)
|
113 |
+
# set ignore flag
|
114 |
+
for gt in gts:
|
115 |
+
gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
|
116 |
+
gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
|
117 |
+
if p.iouType == 'keypoints':
|
118 |
+
gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
|
119 |
+
self._gts = defaultdict(list) # gt for evaluation
|
120 |
+
self._dts = defaultdict(list) # dt for evaluation
|
121 |
+
for gt in gts:
|
122 |
+
self._gts[gt['image_id'], gt['category_id']].append(gt)
|
123 |
+
for dt in dts:
|
124 |
+
self._dts[dt['image_id'], dt['category_id']].append(dt)
|
125 |
+
self.evalImgs = defaultdict(list) # per-image per-category evaluation results
|
126 |
+
self.eval = {} # accumulated evaluation results
|
127 |
+
|
128 |
+
def evaluate(self):
|
129 |
+
'''
|
130 |
+
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
|
131 |
+
:return: None
|
132 |
+
'''
|
133 |
+
tic = time.time()
|
134 |
+
print('Running per image evaluation...')
|
135 |
+
p = self.params
|
136 |
+
# add backward compatibility if useSegm is specified in params
|
137 |
+
if not p.useSegm is None:
|
138 |
+
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
|
139 |
+
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
|
140 |
+
print('Evaluate annotation type *{}*'.format(p.iouType))
|
141 |
+
p.imgIds = list(np.unique(p.imgIds))
|
142 |
+
if p.useCats:
|
143 |
+
p.catIds = list(np.unique(p.catIds))
|
144 |
+
p.maxDets = sorted(p.maxDets)
|
145 |
+
self.params=p
|
146 |
+
|
147 |
+
self._prepare()
|
148 |
+
# loop through images, area range, max detection number
|
149 |
+
catIds = p.catIds if p.useCats else [-1]
|
150 |
+
|
151 |
+
if p.iouType == 'segm' or p.iouType == 'bbox':
|
152 |
+
computeIoU = self.computeIoU
|
153 |
+
elif p.iouType == 'keypoints':
|
154 |
+
computeIoU = self.computeOks
|
155 |
+
self.ious = {(imgId, catId): computeIoU(imgId, catId) \
|
156 |
+
for imgId in p.imgIds
|
157 |
+
for catId in catIds}
|
158 |
+
|
159 |
+
evaluateImg = self.evaluateImg
|
160 |
+
maxDet = p.maxDets[-1]
|
161 |
+
self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
|
162 |
+
for catId in catIds
|
163 |
+
for areaRng in p.areaRng
|
164 |
+
for imgId in p.imgIds
|
165 |
+
]
|
166 |
+
self._paramsEval = copy.deepcopy(self.params)
|
167 |
+
toc = time.time()
|
168 |
+
print('DONE (t={:0.2f}s).'.format(toc-tic))
|
169 |
+
|
170 |
+
def computeIoU(self, imgId, catId):
|
171 |
+
p = self.params
|
172 |
+
if p.useCats:
|
173 |
+
gt = self._gts[imgId,catId]
|
174 |
+
dt = self._dts[imgId,catId]
|
175 |
+
else:
|
176 |
+
gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
|
177 |
+
dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
|
178 |
+
if len(gt) == 0 and len(dt) ==0:
|
179 |
+
return []
|
180 |
+
inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
|
181 |
+
dt = [dt[i] for i in inds]
|
182 |
+
if len(dt) > p.maxDets[-1]:
|
183 |
+
dt=dt[0:p.maxDets[-1]]
|
184 |
+
|
185 |
+
if p.iouType == 'segm':
|
186 |
+
g = [g['segmentation'] for g in gt]
|
187 |
+
d = [d['segmentation'] for d in dt]
|
188 |
+
elif p.iouType == 'bbox':
|
189 |
+
g = [g['bbox'] for g in gt]
|
190 |
+
d = [d['bbox'] for d in dt]
|
191 |
+
else:
|
192 |
+
raise Exception('unknown iouType for iou computation')
|
193 |
+
|
194 |
+
# compute iou between each dt and gt region
|
195 |
+
iscrowd = [int(o['iscrowd']) for o in gt]
|
196 |
+
ious = maskUtils.iou(d,g,iscrowd)
|
197 |
+
return ious
|
198 |
+
|
199 |
+
def computeOks(self, imgId, catId):
|
200 |
+
p = self.params
|
201 |
+
# dimention here should be Nxm
|
202 |
+
gts = self._gts[imgId, catId]
|
203 |
+
dts = self._dts[imgId, catId]
|
204 |
+
inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
|
205 |
+
dts = [dts[i] for i in inds]
|
206 |
+
if len(dts) > p.maxDets[-1]:
|
207 |
+
dts = dts[0:p.maxDets[-1]]
|
208 |
+
# if len(gts) == 0 and len(dts) == 0:
|
209 |
+
if len(gts) == 0 or len(dts) == 0:
|
210 |
+
return []
|
211 |
+
ious = np.zeros((len(dts), len(gts)))
|
212 |
+
sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
|
213 |
+
vars = (sigmas * 2)**2
|
214 |
+
k = len(sigmas)
|
215 |
+
# compute oks between each detection and ground truth object
|
216 |
+
for j, gt in enumerate(gts):
|
217 |
+
# create bounds for ignore regions(double the gt bbox)
|
218 |
+
g = np.array(gt['keypoints'])
|
219 |
+
xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
|
220 |
+
k1 = np.count_nonzero(vg > 0)
|
221 |
+
bb = gt['bbox']
|
222 |
+
x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
|
223 |
+
y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
|
224 |
+
for i, dt in enumerate(dts):
|
225 |
+
d = np.array(dt['keypoints'])
|
226 |
+
xd = d[0::3]; yd = d[1::3]
|
227 |
+
if k1>0:
|
228 |
+
# measure the per-keypoint distance if keypoints visible
|
229 |
+
dx = xd - xg
|
230 |
+
dy = yd - yg
|
231 |
+
else:
|
232 |
+
# measure minimum distance to keypoints in (x0,y0) & (x1,y1)
|
233 |
+
z = np.zeros((k))
|
234 |
+
dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
|
235 |
+
dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
|
236 |
+
e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2
|
237 |
+
if k1 > 0:
|
238 |
+
e=e[vg > 0]
|
239 |
+
ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
|
240 |
+
return ious
|
241 |
+
|
242 |
+
def evaluateImg(self, imgId, catId, aRng, maxDet):
|
243 |
+
'''
|
244 |
+
perform evaluation for single category and image
|
245 |
+
:return: dict (single image results)
|
246 |
+
'''
|
247 |
+
p = self.params
|
248 |
+
if p.useCats:
|
249 |
+
gt = self._gts[imgId,catId]
|
250 |
+
dt = self._dts[imgId,catId]
|
251 |
+
else:
|
252 |
+
gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
|
253 |
+
dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
|
254 |
+
if len(gt) == 0 and len(dt) ==0:
|
255 |
+
return None
|
256 |
+
|
257 |
+
for g in gt:
|
258 |
+
if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
|
259 |
+
g['_ignore'] = 1
|
260 |
+
else:
|
261 |
+
g['_ignore'] = 0
|
262 |
+
|
263 |
+
# sort dt highest score first, sort gt ignore last
|
264 |
+
gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
|
265 |
+
gt = [gt[i] for i in gtind]
|
266 |
+
dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
|
267 |
+
dt = [dt[i] for i in dtind[0:maxDet]]
|
268 |
+
iscrowd = [int(o['iscrowd']) for o in gt]
|
269 |
+
# load computed ious
|
270 |
+
ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]
|
271 |
+
|
272 |
+
T = len(p.iouThrs)
|
273 |
+
G = len(gt)
|
274 |
+
D = len(dt)
|
275 |
+
gtm = np.zeros((T,G))
|
276 |
+
dtm = np.zeros((T,D))
|
277 |
+
gtIg = np.array([g['_ignore'] for g in gt])
|
278 |
+
dtIg = np.zeros((T,D))
|
279 |
+
if not len(ious)==0:
|
280 |
+
for tind, t in enumerate(p.iouThrs):
|
281 |
+
for dind, d in enumerate(dt):
|
282 |
+
# information about best match so far (m=-1 -> unmatched)
|
283 |
+
iou = min([t,1-1e-10])
|
284 |
+
m = -1
|
285 |
+
for gind, g in enumerate(gt):
|
286 |
+
# if this gt already matched, and not a crowd, continue
|
287 |
+
if gtm[tind,gind]>0 and not iscrowd[gind]:
|
288 |
+
continue
|
289 |
+
# if dt matched to reg gt, and on ignore gt, stop
|
290 |
+
if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
|
291 |
+
break
|
292 |
+
# continue to next gt unless better match made
|
293 |
+
if ious[dind,gind] < iou:
|
294 |
+
continue
|
295 |
+
# if match successful and best so far, store appropriately
|
296 |
+
iou=ious[dind,gind]
|
297 |
+
m=gind
|
298 |
+
# if match made store id of match for both dt and gt
|
299 |
+
if m ==-1:
|
300 |
+
continue
|
301 |
+
dtIg[tind,dind] = gtIg[m]
|
302 |
+
dtm[tind,dind] = gt[m]['id']
|
303 |
+
gtm[tind,m] = d['id']
|
304 |
+
# set unmatched detections outside of area range to ignore
|
305 |
+
a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
|
306 |
+
dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
|
307 |
+
# store results for given image and category
|
308 |
+
return {
|
309 |
+
'image_id': imgId,
|
310 |
+
'category_id': catId,
|
311 |
+
'aRng': aRng,
|
312 |
+
'maxDet': maxDet,
|
313 |
+
'dtIds': [d['id'] for d in dt],
|
314 |
+
'gtIds': [g['id'] for g in gt],
|
315 |
+
'dtMatches': dtm,
|
316 |
+
'gtMatches': gtm,
|
317 |
+
'dtScores': [d['score'] for d in dt],
|
318 |
+
'gtIgnore': gtIg,
|
319 |
+
'dtIgnore': dtIg,
|
320 |
+
}
|
321 |
+
|
322 |
+
def accumulate(self, p = None):
|
323 |
+
'''
|
324 |
+
Accumulate per image evaluation results and store the result in self.eval
|
325 |
+
:param p: input params for evaluation
|
326 |
+
:return: None
|
327 |
+
'''
|
328 |
+
print('Accumulating evaluation results...')
|
329 |
+
tic = time.time()
|
330 |
+
if not self.evalImgs:
|
331 |
+
print('Please run evaluate() first')
|
332 |
+
# allows input customized parameters
|
333 |
+
if p is None:
|
334 |
+
p = self.params
|
335 |
+
p.catIds = p.catIds if p.useCats == 1 else [-1]
|
336 |
+
T = len(p.iouThrs)
|
337 |
+
R = len(p.recThrs)
|
338 |
+
K = len(p.catIds) if p.useCats else 1
|
339 |
+
A = len(p.areaRng)
|
340 |
+
M = len(p.maxDets)
|
341 |
+
precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
|
342 |
+
recall = -np.ones((T,K,A,M))
|
343 |
+
|
344 |
+
# create dictionary for future indexing
|
345 |
+
_pe = self._paramsEval
|
346 |
+
catIds = _pe.catIds if _pe.useCats else [-1]
|
347 |
+
setK = set(catIds)
|
348 |
+
setA = set(map(tuple, _pe.areaRng))
|
349 |
+
setM = set(_pe.maxDets)
|
350 |
+
setI = set(_pe.imgIds)
|
351 |
+
# get inds to evaluate
|
352 |
+
k_list = [n for n, k in enumerate(p.catIds) if k in setK]
|
353 |
+
m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
|
354 |
+
a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
|
355 |
+
i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
|
356 |
+
I0 = len(_pe.imgIds)
|
357 |
+
A0 = len(_pe.areaRng)
|
358 |
+
# retrieve E at each category, area range, and max number of detections
|
359 |
+
for k, k0 in enumerate(k_list):
|
360 |
+
Nk = k0*A0*I0
|
361 |
+
for a, a0 in enumerate(a_list):
|
362 |
+
Na = a0*I0
|
363 |
+
for m, maxDet in enumerate(m_list):
|
364 |
+
E = [self.evalImgs[Nk + Na + i] for i in i_list]
|
365 |
+
E = [e for e in E if not e is None]
|
366 |
+
if len(E) == 0:
|
367 |
+
continue
|
368 |
+
dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
|
369 |
+
|
370 |
+
# different sorting method generates slightly different results.
|
371 |
+
# mergesort is used to be consistent as Matlab implementation.
|
372 |
+
inds = np.argsort(-dtScores, kind='mergesort')
|
373 |
+
|
374 |
+
dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
|
375 |
+
dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds]
|
376 |
+
gtIg = np.concatenate([e['gtIgnore'] for e in E])
|
377 |
+
npig = np.count_nonzero(gtIg==0 )
|
378 |
+
if npig == 0:
|
379 |
+
continue
|
380 |
+
tps = np.logical_and( dtm, np.logical_not(dtIg) )
|
381 |
+
fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
|
382 |
+
|
383 |
+
tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
|
384 |
+
fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
|
385 |
+
for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
|
386 |
+
tp = np.array(tp)
|
387 |
+
fp = np.array(fp)
|
388 |
+
nd = len(tp)
|
389 |
+
rc = tp / npig
|
390 |
+
pr = tp / (fp+tp+np.spacing(1))
|
391 |
+
q = np.zeros((R,))
|
392 |
+
|
393 |
+
if nd:
|
394 |
+
recall[t,k,a,m] = rc[-1]
|
395 |
+
else:
|
396 |
+
recall[t,k,a,m] = 0
|
397 |
+
|
398 |
+
# numpy is slow without cython optimization for accessing elements
|
399 |
+
# use python array gets significant speed improvement
|
400 |
+
pr = pr.tolist(); q = q.tolist()
|
401 |
+
|
402 |
+
for i in range(nd-1, 0, -1):
|
403 |
+
if pr[i] > pr[i-1]:
|
404 |
+
pr[i-1] = pr[i]
|
405 |
+
|
406 |
+
inds = np.searchsorted(rc, p.recThrs, side='left')
|
407 |
+
try:
|
408 |
+
for ri, pi in enumerate(inds):
|
409 |
+
q[ri] = pr[pi]
|
410 |
+
except:
|
411 |
+
pass
|
412 |
+
precision[t,:,k,a,m] = np.array(q)
|
413 |
+
self.eval = {
|
414 |
+
'params': p,
|
415 |
+
'counts': [T, R, K, A, M],
|
416 |
+
'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
417 |
+
'precision': precision,
|
418 |
+
'recall': recall,
|
419 |
+
}
|
420 |
+
toc = time.time()
|
421 |
+
print('DONE (t={:0.2f}s).'.format( toc-tic))
|
422 |
+
|
423 |
+
def _summarize(self, ap=1, iouThr=None, areaRng='all', maxDets=100 ):
|
424 |
+
p = self.params
|
425 |
+
iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
|
426 |
+
titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
|
427 |
+
typeStr = '(AP)' if ap==1 else '(AR)'
|
428 |
+
iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
|
429 |
+
if iouThr is None else '{:0.2f}'.format(iouThr)
|
430 |
+
|
431 |
+
aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
|
432 |
+
mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
|
433 |
+
if ap == 1:
|
434 |
+
# dimension of precision: [TxRxKxAxM]
|
435 |
+
s = self.eval['precision']
|
436 |
+
# IoU
|
437 |
+
if iouThr is not None:
|
438 |
+
t = np.where(iouThr == p.iouThrs)[0]
|
439 |
+
s = s[t]
|
440 |
+
s = s[:,:,:,aind,mind]
|
441 |
+
else:
|
442 |
+
# dimension of recall: [TxKxAxM]
|
443 |
+
s = self.eval['recall']
|
444 |
+
if iouThr is not None:
|
445 |
+
t = np.where(iouThr == p.iouThrs)[0]
|
446 |
+
s = s[t]
|
447 |
+
s = s[:,:,aind,mind]
|
448 |
+
if len(s[s>-1])==0:
|
449 |
+
mean_s = -1
|
450 |
+
else:
|
451 |
+
mean_s = np.mean(s[s>-1])
|
452 |
+
print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
|
453 |
+
return mean_s
|
454 |
+
|
455 |
+
def summarize(self):
|
456 |
+
'''
|
457 |
+
Compute and display summary metrics for evaluation results.
|
458 |
+
Note this functin can *only* be applied on the default parameter setting
|
459 |
+
'''
|
460 |
+
def _summarizeDets():
|
461 |
+
stats = np.zeros((12,))
|
462 |
+
stats[0] = self._summarize(1)
|
463 |
+
stats[1] = self._summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
|
464 |
+
stats[2] = self._summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
|
465 |
+
stats[3] = self._summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
|
466 |
+
stats[4] = self._summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
|
467 |
+
stats[5] = self._summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
|
468 |
+
stats[6] = self._summarize(0, maxDets=self.params.maxDets[0])
|
469 |
+
stats[7] = self._summarize(0, maxDets=self.params.maxDets[1])
|
470 |
+
stats[8] = self._summarize(0, maxDets=self.params.maxDets[2])
|
471 |
+
stats[9] = self._summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
|
472 |
+
stats[10] = self._summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
|
473 |
+
stats[11] = self._summarize(0, areaRng='large', maxDets=self.params.maxDets[2])
|
474 |
+
return stats
|
475 |
+
def _summarizeKps():
|
476 |
+
stats = np.zeros((10,))
|
477 |
+
stats[0] = self._summarize(1, maxDets=20)
|
478 |
+
stats[1] = self._summarize(1, maxDets=20, iouThr=.5)
|
479 |
+
stats[2] = self._summarize(1, maxDets=20, iouThr=.75)
|
480 |
+
stats[3] = self._summarize(1, maxDets=20, areaRng='medium')
|
481 |
+
stats[4] = self._summarize(1, maxDets=20, areaRng='large')
|
482 |
+
stats[5] = self._summarize(0, maxDets=20)
|
483 |
+
stats[6] = self._summarize(0, maxDets=20, iouThr=.5)
|
484 |
+
stats[7] = self._summarize(0, maxDets=20, iouThr=.75)
|
485 |
+
stats[8] = self._summarize(0, maxDets=20, areaRng='medium')
|
486 |
+
stats[9] = self._summarize(0, maxDets=20, areaRng='large')
|
487 |
+
return stats
|
488 |
+
if not self.eval:
|
489 |
+
raise Exception('Please run accumulate() first')
|
490 |
+
iouType = self.params.iouType
|
491 |
+
if iouType == 'segm' or iouType == 'bbox':
|
492 |
+
summarize = _summarizeDets
|
493 |
+
elif iouType == 'keypoints':
|
494 |
+
summarize = _summarizeKps
|
495 |
+
self.stats = summarize()
|
496 |
+
|
497 |
+
def __str__(self):
|
498 |
+
self.summarize()
|
499 |
+
|
500 |
+
class Params:
|
501 |
+
'''
|
502 |
+
Params for coco evaluation api
|
503 |
+
'''
|
504 |
+
def setDetParams(self):
|
505 |
+
self.imgIds = []
|
506 |
+
self.catIds = [100] # For the Category ID of Building
|
507 |
+
# np.arange causes trouble. the data point on arange is slightly larger than the true value
|
508 |
+
self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
|
509 |
+
self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
|
510 |
+
self.maxDets = [1, 10, 100]
|
511 |
+
self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
|
512 |
+
self.areaRngLbl = ['all', 'small', 'medium', 'large']
|
513 |
+
self.useCats = 1
|
514 |
+
|
515 |
+
def setKpParams(self):
|
516 |
+
self.imgIds = []
|
517 |
+
self.catIds = []
|
518 |
+
# np.arange causes trouble. the data point on arange is slightly larger than the true value
|
519 |
+
self.iouThrs = [0.5]
|
520 |
+
self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
|
521 |
+
self.maxDets = [20] # At max 20 objects detected per image
|
522 |
+
self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
|
523 |
+
self.areaRngLbl = ['all'] #Consider all area ranges for evaluation
|
524 |
+
self.useCats = 1
|
525 |
+
|
526 |
+
def __init__(self, iouType='segm'):
|
527 |
+
if iouType == 'segm' or iouType == 'bbox':
|
528 |
+
self.setDetParams()
|
529 |
+
elif iouType == 'keypoints':
|
530 |
+
self.setKpParams()
|
531 |
+
else:
|
532 |
+
raise Exception('iouType not supported')
|
533 |
+
self.iouType = iouType
|
534 |
+
# useSegm is deprecated
|
535 |
+
self.useSegm = None
|
mrcnn/config.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Base Configurations class.
|
4 |
+
|
5 |
+
Copyright (c) 2017 Matterport, Inc.
|
6 |
+
Licensed under the MIT License (see LICENSE for details)
|
7 |
+
Written by Waleed Abdulla
|
8 |
+
"""
|
9 |
+
|
10 |
+
import math
|
11 |
+
import numpy as np
|
12 |
+
|
13 |
+
|
14 |
+
# Base Configuration Class
|
15 |
+
# Don't use this class directly. Instead, sub-class it and override
|
16 |
+
# the configurations you need to change.
|
17 |
+
|
18 |
+
class Config(object):
|
19 |
+
"""Base configuration class. For custom configurations, create a
|
20 |
+
sub-class that inherits from this one and override properties
|
21 |
+
that need to be changed.
|
22 |
+
"""
|
23 |
+
# Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
|
24 |
+
# Useful if your code needs to do things differently depending on which
|
25 |
+
# experiment is running.
|
26 |
+
NAME = None # Override in sub-classes
|
27 |
+
|
28 |
+
# NUMBER OF GPUs to use. For CPU training, use 1
|
29 |
+
GPU_COUNT = 1
|
30 |
+
|
31 |
+
# Number of images to train with on each GPU. A 12GB GPU can typically
|
32 |
+
# handle 2 images of 1024x1024px.
|
33 |
+
# Adjust based on your GPU memory and image sizes. Use the highest
|
34 |
+
# number that your GPU can handle for best performance.
|
35 |
+
IMAGES_PER_GPU = 2
|
36 |
+
|
37 |
+
# Number of training steps per epoch
|
38 |
+
# This doesn't need to match the size of the training set. Tensorboard
|
39 |
+
# updates are saved at the end of each epoch, so setting this to a
|
40 |
+
# smaller number means getting more frequent TensorBoard updates.
|
41 |
+
# Validation stats are also calculated at each epoch end and they
|
42 |
+
# might take a while, so don't set this too small to avoid spending
|
43 |
+
# a lot of time on validation stats.
|
44 |
+
STEPS_PER_EPOCH = 1000
|
45 |
+
|
46 |
+
# Number of validation steps to run at the end of every training epoch.
|
47 |
+
# A bigger number improves accuracy of validation stats, but slows
|
48 |
+
# down the training.
|
49 |
+
VALIDATION_STEPS = 50
|
50 |
+
|
51 |
+
# Backbone network architecture
|
52 |
+
# Supported values are: resnet50, resnet101
|
53 |
+
BACKBONE = "resnet101"
|
54 |
+
|
55 |
+
# The strides of each layer of the FPN Pyramid. These values
|
56 |
+
# are based on a Resnet101 backbone.
|
57 |
+
BACKBONE_STRIDES = [4, 8, 16, 32, 64]
|
58 |
+
|
59 |
+
# Number of classification classes (including background)
|
60 |
+
NUM_CLASSES = 1 # Override in sub-classes
|
61 |
+
|
62 |
+
# Length of square anchor side in pixels
|
63 |
+
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
|
64 |
+
|
65 |
+
# Ratios of anchors at each cell (width/height)
|
66 |
+
# A value of 1 represents a square anchor, and 0.5 is a wide anchor
|
67 |
+
RPN_ANCHOR_RATIOS = [0.5, 1, 2]
|
68 |
+
|
69 |
+
# Anchor stride
|
70 |
+
# If 1 then anchors are created for each cell in the backbone feature map.
|
71 |
+
# If 2, then anchors are created for every other cell, and so on.
|
72 |
+
RPN_ANCHOR_STRIDE = 1
|
73 |
+
|
74 |
+
# Non-max suppression threshold to filter RPN proposals.
|
75 |
+
# You can increase this during training to generate more propsals.
|
76 |
+
RPN_NMS_THRESHOLD = 0.7
|
77 |
+
|
78 |
+
# How many anchors per image to use for RPN training
|
79 |
+
RPN_TRAIN_ANCHORS_PER_IMAGE = 256
|
80 |
+
|
81 |
+
# ROIs kept after non-maximum supression (training and inference)
|
82 |
+
POST_NMS_ROIS_TRAINING = 2000
|
83 |
+
POST_NMS_ROIS_INFERENCE = 1000
|
84 |
+
|
85 |
+
# If enabled, resizes instance masks to a smaller size to reduce
|
86 |
+
# memory load. Recommended when using high-resolution images.
|
87 |
+
USE_MINI_MASK = True
|
88 |
+
MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
|
89 |
+
|
90 |
+
# Input image resizing
|
91 |
+
# Generally, use the "square" resizing mode for training and inferencing
|
92 |
+
# and it should work well in most cases. In this mode, images are scaled
|
93 |
+
# up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
|
94 |
+
# scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
|
95 |
+
# padded with zeros to make it a square so multiple images can be put
|
96 |
+
# in one batch.
|
97 |
+
# Available resizing modes:
|
98 |
+
# none: No resizing or padding. Return the image unchanged.
|
99 |
+
# square: Resize and pad with zeros to get a square image
|
100 |
+
# of size [max_dim, max_dim].
|
101 |
+
# pad64: Pads width and height with zeros to make them multiples of 64.
|
102 |
+
# If IMAGE_MIN_DIM is not None, then scale the small side to
|
103 |
+
# that size before padding. IMAGE_MAX_DIM is ignored in this mode.
|
104 |
+
# The multiple of 64 is needed to ensure smooth scaling of feature
|
105 |
+
# maps up and down the 6 levels of the FPN pyramid (2**6=64).
|
106 |
+
IMAGE_RESIZE_MODE = "square"
|
107 |
+
IMAGE_MIN_DIM = 800
|
108 |
+
IMAGE_MAX_DIM = 1024
|
109 |
+
|
110 |
+
# Image mean (RGB)
|
111 |
+
MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
|
112 |
+
|
113 |
+
# Number of ROIs per image to feed to classifier/mask heads
|
114 |
+
# The Mask RCNN paper uses 512 but often the RPN doesn't generate
|
115 |
+
# enough positive proposals to fill this and keep a positive:negative
|
116 |
+
# ratio of 1:3. You can increase the number of proposals by adjusting
|
117 |
+
# the RPN NMS threshold.
|
118 |
+
TRAIN_ROIS_PER_IMAGE = 200
|
119 |
+
|
120 |
+
# Percent of positive ROIs used to train classifier/mask heads
|
121 |
+
ROI_POSITIVE_RATIO = 0.33
|
122 |
+
|
123 |
+
# Pooled ROIs
|
124 |
+
POOL_SIZE = 7
|
125 |
+
MASK_POOL_SIZE = 14
|
126 |
+
|
127 |
+
# Shape of output mask
|
128 |
+
# To change this you also need to change the neural network mask branch
|
129 |
+
MASK_SHAPE = [28, 28]
|
130 |
+
|
131 |
+
# Maximum number of ground truth instances to use in one image
|
132 |
+
MAX_GT_INSTANCES = 100
|
133 |
+
|
134 |
+
# Bounding box refinement standard deviation for RPN and final detections.
|
135 |
+
RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
|
136 |
+
BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
|
137 |
+
|
138 |
+
# Max number of final detections
|
139 |
+
DETECTION_MAX_INSTANCES = 100
|
140 |
+
|
141 |
+
# Minimum probability value to accept a detected instance
|
142 |
+
# ROIs below this threshold are skipped
|
143 |
+
DETECTION_MIN_CONFIDENCE = 0.7
|
144 |
+
|
145 |
+
# Non-maximum suppression threshold for detection
|
146 |
+
DETECTION_NMS_THRESHOLD = 0.3
|
147 |
+
|
148 |
+
# Learning rate and momentum
|
149 |
+
# The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
|
150 |
+
# weights to explode. Likely due to differences in optimzer
|
151 |
+
# implementation.
|
152 |
+
LEARNING_RATE = 0.001
|
153 |
+
LEARNING_MOMENTUM = 0.9
|
154 |
+
|
155 |
+
# Weight decay regularization
|
156 |
+
WEIGHT_DECAY = 0.0001
|
157 |
+
|
158 |
+
# Use RPN ROIs or externally generated ROIs for training
|
159 |
+
# Keep this True for most situations. Set to False if you want to train
|
160 |
+
# the head branches on ROI generated by code rather than the ROIs from
|
161 |
+
# the RPN. For example, to debug the classifier head without having to
|
162 |
+
# train the RPN.
|
163 |
+
USE_RPN_ROIS = True
|
164 |
+
|
165 |
+
# Train or freeze batch normalization layers
|
166 |
+
# None: Train BN layers. This is the normal mode
|
167 |
+
# False: Freeze BN layers. Good when using a small batch size
|
168 |
+
# True: (don't use). Set layer in training mode even when inferencing
|
169 |
+
TRAIN_BN = False # Defaulting to False since batch size is often small
|
170 |
+
|
171 |
+
# Gradient norm clipping
|
172 |
+
GRADIENT_CLIP_NORM = 5.0
|
173 |
+
|
174 |
+
def __init__(self):
|
175 |
+
"""Set values of computed attributes."""
|
176 |
+
# Effective batch size
|
177 |
+
self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
|
178 |
+
|
179 |
+
# Input image size
|
180 |
+
self.IMAGE_SHAPE = np.array(
|
181 |
+
[self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
|
182 |
+
|
183 |
+
# Image meta data length
|
184 |
+
# See compose_image_meta() for details
|
185 |
+
self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
|
186 |
+
|
187 |
+
def display(self):
|
188 |
+
"""Display Configuration values."""
|
189 |
+
print("\nConfigurations:")
|
190 |
+
for a in dir(self):
|
191 |
+
if not a.startswith("__") and not callable(getattr(self, a)):
|
192 |
+
print("{:30} {}".format(a, getattr(self, a)))
|
193 |
+
print("\n")
|
mrcnn/dataset.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mrcnn import utils
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
from pycocotools.coco import COCO
|
5 |
+
from pycocotools.cocoeval import COCOeval
|
6 |
+
from pycocotools import mask as maskUtils
|
7 |
+
|
8 |
+
import os
|
9 |
+
|
10 |
+
class MappingChallengeDataset(utils.Dataset):
|
11 |
+
def load_dataset(self, dataset_dir, load_small=False, return_coco=True):
|
12 |
+
""" Loads dataset released for the crowdAI Mapping Challenge(https://www.crowdai.org/challenges/mapping-challenge)
|
13 |
+
Params:
|
14 |
+
- dataset_dir : root directory of the dataset (can point to the train/val folder)
|
15 |
+
- load_small : Boolean value which signals if the annotations for all the images need to be loaded into the memory,
|
16 |
+
or if only a small subset of the same should be loaded into memory
|
17 |
+
"""
|
18 |
+
self.load_small = load_small
|
19 |
+
if self.load_small:
|
20 |
+
annotation_path = os.path.join(dataset_dir, "annotation-small.json")
|
21 |
+
else:
|
22 |
+
annotation_path = os.path.join(dataset_dir, "annotation.json")
|
23 |
+
|
24 |
+
image_dir = os.path.join(dataset_dir, "images")
|
25 |
+
print("Annotation Path ", annotation_path)
|
26 |
+
print("Image Dir ", image_dir)
|
27 |
+
assert os.path.exists(annotation_path) and os.path.exists(image_dir)
|
28 |
+
|
29 |
+
self.coco = COCO(annotation_path)
|
30 |
+
self.image_dir = image_dir
|
31 |
+
print(len(self.coco.imgs))
|
32 |
+
|
33 |
+
# Load all classes (Only Building in this version)
|
34 |
+
classIds = self.coco.getCatIds()
|
35 |
+
|
36 |
+
# Load all images
|
37 |
+
image_ids = list(self.coco.imgs.keys())
|
38 |
+
|
39 |
+
# register classes
|
40 |
+
for _class_id in classIds:
|
41 |
+
self.add_class("crowdai-mapping-challenge", _class_id, self.coco.loadCats(_class_id)[0]["name"])
|
42 |
+
|
43 |
+
# Register Images
|
44 |
+
img_exist = []
|
45 |
+
for _img_id in image_ids:
|
46 |
+
path = os.path.join(image_dir, self.coco.imgs[_img_id]['file_name'])
|
47 |
+
if os.path.exists(path):
|
48 |
+
img_exist.append(_img_id)
|
49 |
+
|
50 |
+
coco_updated = {}
|
51 |
+
for i in img_exist:
|
52 |
+
coco_updated[i] = self.coco.imgs[i]
|
53 |
+
|
54 |
+
self.coco.imgs = coco_updated
|
55 |
+
print(len(self.coco.imgs))
|
56 |
+
|
57 |
+
for _img_id in img_exist:
|
58 |
+
assert(os.path.exists(os.path.join(image_dir, self.coco.imgs[_img_id]['file_name'])))
|
59 |
+
|
60 |
+
self.add_image(
|
61 |
+
"crowdai-mapping-challenge", image_id=_img_id,
|
62 |
+
path=os.path.join(image_dir, self.coco.imgs[_img_id]['file_name']),
|
63 |
+
width=self.coco.imgs[_img_id]["width"],
|
64 |
+
height=self.coco.imgs[_img_id]["height"],
|
65 |
+
annotations=self.coco.loadAnns(self.coco.getAnnIds(
|
66 |
+
imgIds=[_img_id],
|
67 |
+
catIds=classIds,
|
68 |
+
iscrowd=None)))
|
69 |
+
|
70 |
+
if return_coco:
|
71 |
+
return self.coco
|
72 |
+
|
73 |
+
def load_mask(self, image_id):
|
74 |
+
""" Loads instance mask for a given image
|
75 |
+
This function converts mask from the coco format to a
|
76 |
+
a bitmap [height, width, instance]
|
77 |
+
Params:
|
78 |
+
- image_id : reference id for a given image
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
masks : A bool array of shape [height, width, instances] with
|
82 |
+
one mask per instance
|
83 |
+
class_ids : a 1D array of classIds of the corresponding instance masks
|
84 |
+
(In this version of the challenge it will be of shape [instances] and always be filled with the class-id of the "Building" class.)
|
85 |
+
"""
|
86 |
+
|
87 |
+
image_info = self.image_info[image_id]
|
88 |
+
assert image_info["source"] == "crowdai-mapping-challenge"
|
89 |
+
|
90 |
+
instance_masks = []
|
91 |
+
class_ids = []
|
92 |
+
annotations = self.image_info[image_id]["annotations"]
|
93 |
+
# Build mask of shape [height, width, instance_count] and list
|
94 |
+
# of class IDs that correspond to each channel of the mask.
|
95 |
+
for annotation in annotations:
|
96 |
+
class_id = self.map_source_class_id(
|
97 |
+
"crowdai-mapping-challenge.{}".format(annotation['category_id']))
|
98 |
+
if class_id:
|
99 |
+
m = self.annToMask(annotation, image_info["height"],
|
100 |
+
image_info["width"])
|
101 |
+
# Some objects are so small that they're less than 1 pixel area
|
102 |
+
# and end up rounded out. Skip those objects.
|
103 |
+
if m.max() < 1:
|
104 |
+
continue
|
105 |
+
|
106 |
+
# Ignore the notion of "is_crowd" as specified in the coco format
|
107 |
+
# as we donot have the said annotation in the current version of the dataset
|
108 |
+
|
109 |
+
instance_masks.append(m)
|
110 |
+
class_ids.append(class_id)
|
111 |
+
# Pack instance masks into an array
|
112 |
+
if class_ids:
|
113 |
+
mask = np.stack(instance_masks, axis=2)
|
114 |
+
class_ids = np.array(class_ids, dtype=np.int32)
|
115 |
+
return mask, class_ids
|
116 |
+
else:
|
117 |
+
# Call super class to return an empty mask
|
118 |
+
return super(MappingChallengeDataset, self).load_mask(image_id)
|
119 |
+
|
120 |
+
|
121 |
+
def image_reference(self, image_id):
|
122 |
+
"""Return a reference for a particular image
|
123 |
+
|
124 |
+
Ideally you this function is supposed to return a URL
|
125 |
+
but in this case, we will simply return the image_id
|
126 |
+
"""
|
127 |
+
return "crowdai-mapping-challenge::{}".format(image_id)
|
128 |
+
# The following two functions are from pycocotools with a few changes.
|
129 |
+
|
130 |
+
def annToRLE(self, ann, height, width):
|
131 |
+
"""
|
132 |
+
Convert annotation which can be polygons, uncompressed RLE to RLE.
|
133 |
+
:return: binary mask (numpy 2D array)
|
134 |
+
"""
|
135 |
+
segm = ann['segmentation']
|
136 |
+
if isinstance(segm, list):
|
137 |
+
# polygon -- a single object might consist of multiple parts
|
138 |
+
# we merge all parts into one mask rle code
|
139 |
+
rles = maskUtils.frPyObjects(segm, height, width)
|
140 |
+
rle = maskUtils.merge(rles)
|
141 |
+
elif isinstance(segm['counts'], list):
|
142 |
+
# uncompressed RLE
|
143 |
+
rle = maskUtils.frPyObjects(segm, height, width)
|
144 |
+
else:
|
145 |
+
# rle
|
146 |
+
rle = ann['segmentation']
|
147 |
+
return rle
|
148 |
+
|
149 |
+
def annToMask(self, ann, height, width):
|
150 |
+
"""
|
151 |
+
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
|
152 |
+
:return: binary mask (numpy 2D array)
|
153 |
+
"""
|
154 |
+
rle = self.annToRLE(ann, height, width)
|
155 |
+
m = maskUtils.decode(rle)
|
156 |
+
return m
|
mrcnn/evaluate.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pycocotools.coco import COCO
|
2 |
+
from mrcnn.cocoeval import COCOeval
|
3 |
+
from pycocotools import mask as maskUtils
|
4 |
+
import time
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
############################################################
|
8 |
+
# COCO Evaluation
|
9 |
+
############################################################
|
10 |
+
|
11 |
+
def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
|
12 |
+
"""Arrange resutls to match COCO specs in http://cocodataset.org/#format
|
13 |
+
"""
|
14 |
+
# If no results, return an empty list
|
15 |
+
if rois is None:
|
16 |
+
return []
|
17 |
+
|
18 |
+
results = []
|
19 |
+
for image_id in image_ids:
|
20 |
+
# Loop through detections
|
21 |
+
for i in range(rois.shape[0]):
|
22 |
+
class_id = class_ids[i]
|
23 |
+
score = scores[i]
|
24 |
+
bbox = np.around(rois[i], 1)
|
25 |
+
mask = masks[:, :, i]
|
26 |
+
|
27 |
+
result = {
|
28 |
+
"image_id": image_id,
|
29 |
+
"category_id": dataset.get_source_class_id(class_id, "crowdai-mapping-challenge"),
|
30 |
+
"bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
|
31 |
+
"score": score,
|
32 |
+
"segmentation": maskUtils.encode(np.asfortranarray(mask)).encode('utf-8')
|
33 |
+
}
|
34 |
+
results.append(result)
|
35 |
+
return results
|
36 |
+
|
37 |
+
|
38 |
+
def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
|
39 |
+
"""Runs official COCO evaluation.
|
40 |
+
dataset: A Dataset object with valiadtion data
|
41 |
+
eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
|
42 |
+
limit: if not 0, it's the number of images to use for evaluation
|
43 |
+
"""
|
44 |
+
# Pick COCO images from the dataset
|
45 |
+
image_ids = image_ids or dataset.image_ids
|
46 |
+
|
47 |
+
# Limit to a subset
|
48 |
+
if limit:
|
49 |
+
image_ids = image_ids[:limit]
|
50 |
+
|
51 |
+
# Get corresponding COCO image IDs.
|
52 |
+
coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
|
53 |
+
|
54 |
+
t_prediction = 0
|
55 |
+
t_start = time.time()
|
56 |
+
|
57 |
+
results = []
|
58 |
+
|
59 |
+
for i, image_id in enumerate(image_ids):
|
60 |
+
# Load image
|
61 |
+
image = dataset.load_image(image_id)
|
62 |
+
|
63 |
+
# Run detection
|
64 |
+
t = time.time()
|
65 |
+
print("="*100)
|
66 |
+
print("Image shape ", image.shape)
|
67 |
+
r = model.detect([image])
|
68 |
+
r = r[0]
|
69 |
+
t_prediction += (time.time() - t)
|
70 |
+
print("Prediction time : ", (time.time() - t))
|
71 |
+
# Convert results to COCO format
|
72 |
+
image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
|
73 |
+
r["rois"], r["class_ids"],
|
74 |
+
r["scores"], r["masks"])
|
75 |
+
print("Number of detections : ", len(r["rois"]))
|
76 |
+
print("Classes Predicted : ", r["class_ids"])
|
77 |
+
print("Scores : ", r["scores"])
|
78 |
+
results.extend(image_results)
|
79 |
+
|
80 |
+
# Load results. This modifies results with additional attributes.
|
81 |
+
coco_results = coco.loadRes(results)
|
82 |
+
|
83 |
+
# Evaluate
|
84 |
+
cocoEval = COCOeval(coco, coco_results, eval_type)
|
85 |
+
cocoEval.params.imgIds = coco_image_ids
|
86 |
+
cocoEval.evaluate()
|
87 |
+
cocoEval.accumulate()
|
88 |
+
ap = cocoEval._summarize(ap=1, iouThr=0.5, areaRng="all", maxDets=100)
|
89 |
+
ar = cocoEval._summarize(ap=0, areaRng="all", maxDets=100)
|
90 |
+
print("Precision : ", ap, " Recall : ", ar)
|
91 |
+
|
92 |
+
print("Prediction time: {}. Average {}/image".format(
|
93 |
+
t_prediction, t_prediction / len(image_ids)))
|
94 |
+
print("Total time: ", time.time() - t_start)
|
mrcnn/model.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mrcnn/parallel_model.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Multi-GPU Support for Keras.
|
4 |
+
|
5 |
+
Copyright (c) 2017 Matterport, Inc.
|
6 |
+
Licensed under the MIT License (see LICENSE for details)
|
7 |
+
Written by Waleed Abdulla
|
8 |
+
|
9 |
+
Ideas and a small code snippets from these sources:
|
10 |
+
https://github.com/fchollet/keras/issues/2436
|
11 |
+
https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
|
12 |
+
https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
|
13 |
+
https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
|
14 |
+
"""
|
15 |
+
|
16 |
+
import tensorflow as tf
|
17 |
+
import keras.backend as K
|
18 |
+
import keras.layers as KL
|
19 |
+
import keras.models as KM
|
20 |
+
|
21 |
+
|
22 |
+
class ParallelModel(KM.Model):
|
23 |
+
"""Subclasses the standard Keras Model and adds multi-GPU support.
|
24 |
+
It works by creating a copy of the model on each GPU. Then it slices
|
25 |
+
the inputs and sends a slice to each copy of the model, and then
|
26 |
+
merges the outputs together and applies the loss on the combined
|
27 |
+
outputs.
|
28 |
+
"""
|
29 |
+
|
30 |
+
def __init__(self, keras_model, gpu_count):
|
31 |
+
"""Class constructor.
|
32 |
+
keras_model: The Keras model to parallelize
|
33 |
+
gpu_count: Number of GPUs. Must be > 1
|
34 |
+
"""
|
35 |
+
self.inner_model = keras_model
|
36 |
+
self.gpu_count = gpu_count
|
37 |
+
merged_outputs = self.make_parallel()
|
38 |
+
super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
|
39 |
+
outputs=merged_outputs)
|
40 |
+
|
41 |
+
def __getattribute__(self, attrname):
|
42 |
+
"""Redirect loading and saving methods to the inner model. That's where
|
43 |
+
the weights are stored."""
|
44 |
+
if 'load' in attrname or 'save' in attrname:
|
45 |
+
return getattr(self.inner_model, attrname)
|
46 |
+
return super(ParallelModel, self).__getattribute__(attrname)
|
47 |
+
|
48 |
+
def summary(self, *args, **kwargs):
|
49 |
+
"""Override summary() to display summaries of both, the wrapper
|
50 |
+
and inner models."""
|
51 |
+
super(ParallelModel, self).summary(*args, **kwargs)
|
52 |
+
self.inner_model.summary(*args, **kwargs)
|
53 |
+
|
54 |
+
def make_parallel(self):
|
55 |
+
"""Creates a new wrapper model that consists of multiple replicas of
|
56 |
+
the original model placed on different GPUs.
|
57 |
+
"""
|
58 |
+
# Slice inputs. Slice inputs on the CPU to avoid sending a copy
|
59 |
+
# of the full inputs to all GPUs. Saves on bandwidth and memory.
|
60 |
+
input_slices = {name: tf.split(x, self.gpu_count)
|
61 |
+
for name, x in zip(self.inner_model.input_names,
|
62 |
+
self.inner_model.inputs)}
|
63 |
+
|
64 |
+
output_names = self.inner_model.output_names
|
65 |
+
outputs_all = []
|
66 |
+
for i in range(len(self.inner_model.outputs)):
|
67 |
+
outputs_all.append([])
|
68 |
+
|
69 |
+
# Run the model call() on each GPU to place the ops there
|
70 |
+
for i in range(self.gpu_count):
|
71 |
+
with tf.device('/gpu:%d' % i):
|
72 |
+
with tf.name_scope('tower_%d' % i):
|
73 |
+
# Run a slice of inputs through this replica
|
74 |
+
zipped_inputs = zip(self.inner_model.input_names,
|
75 |
+
self.inner_model.inputs)
|
76 |
+
inputs = [
|
77 |
+
KL.Lambda(lambda s: input_slices[name][i],
|
78 |
+
output_shape=lambda s: (None,) + s[1:])(tensor)
|
79 |
+
for name, tensor in zipped_inputs]
|
80 |
+
# Create the model replica and get the outputs
|
81 |
+
outputs = self.inner_model(inputs)
|
82 |
+
if not isinstance(outputs, list):
|
83 |
+
outputs = [outputs]
|
84 |
+
# Save the outputs for merging back together later
|
85 |
+
for l, o in enumerate(outputs):
|
86 |
+
outputs_all[l].append(o)
|
87 |
+
|
88 |
+
# Merge outputs on CPU
|
89 |
+
with tf.device('/cpu:0'):
|
90 |
+
merged = []
|
91 |
+
for outputs, name in zip(outputs_all, output_names):
|
92 |
+
# If outputs are numbers without dimensions, add a batch dim.
|
93 |
+
def add_dim(tensor):
|
94 |
+
"""Add a dimension to tensors that don't have any."""
|
95 |
+
if K.int_shape(tensor) == ():
|
96 |
+
return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
|
97 |
+
return tensor
|
98 |
+
outputs = list(map(add_dim, outputs))
|
99 |
+
|
100 |
+
# Concatenate
|
101 |
+
merged.append(KL.Concatenate(axis=0, name=name)(outputs))
|
102 |
+
return merged
|
103 |
+
|
104 |
+
|
105 |
+
if __name__ == "__main__":
|
106 |
+
# Testing code below. It creates a simple model to train on MNIST and
|
107 |
+
# tries to run it on 2 GPUs. It saves the graph so it can be viewed
|
108 |
+
# in TensorBoard. Run it as:
|
109 |
+
#
|
110 |
+
# python3 parallel_model.py
|
111 |
+
|
112 |
+
import os
|
113 |
+
import numpy as np
|
114 |
+
import keras.optimizers
|
115 |
+
from keras.datasets import mnist
|
116 |
+
from keras.preprocessing.image import ImageDataGenerator
|
117 |
+
|
118 |
+
GPU_COUNT = 2
|
119 |
+
|
120 |
+
# Root directory of the project
|
121 |
+
ROOT_DIR = os.path.abspath("../")
|
122 |
+
|
123 |
+
# Directory to save logs and trained model
|
124 |
+
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
|
125 |
+
|
126 |
+
def build_model(x_train, num_classes):
|
127 |
+
# Reset default graph. Keras leaves old ops in the graph,
|
128 |
+
# which are ignored for execution but clutter graph
|
129 |
+
# visualization in TensorBoard.
|
130 |
+
tf.reset_default_graph()
|
131 |
+
|
132 |
+
inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
|
133 |
+
x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
|
134 |
+
name="conv1")(inputs)
|
135 |
+
x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
|
136 |
+
name="conv2")(x)
|
137 |
+
x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
|
138 |
+
x = KL.Flatten(name="flat1")(x)
|
139 |
+
x = KL.Dense(128, activation='relu', name="dense1")(x)
|
140 |
+
x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
|
141 |
+
|
142 |
+
return KM.Model(inputs, x, "digit_classifier_model")
|
143 |
+
|
144 |
+
# Load MNIST Data
|
145 |
+
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
146 |
+
x_train = np.expand_dims(x_train, -1).astype('float32') / 255
|
147 |
+
x_test = np.expand_dims(x_test, -1).astype('float32') / 255
|
148 |
+
|
149 |
+
print('x_train shape:', x_train.shape)
|
150 |
+
print('x_test shape:', x_test.shape)
|
151 |
+
|
152 |
+
# Build data generator and model
|
153 |
+
datagen = ImageDataGenerator()
|
154 |
+
model = build_model(x_train, 10)
|
155 |
+
|
156 |
+
# Add multi-GPU support.
|
157 |
+
model = ParallelModel(model, GPU_COUNT)
|
158 |
+
|
159 |
+
optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
|
160 |
+
|
161 |
+
model.compile(loss='sparse_categorical_crossentropy',
|
162 |
+
optimizer=optimizer, metrics=['accuracy'])
|
163 |
+
|
164 |
+
model.summary()
|
165 |
+
|
166 |
+
# Train
|
167 |
+
model.fit_generator(
|
168 |
+
datagen.flow(x_train, y_train, batch_size=64),
|
169 |
+
steps_per_epoch=50, epochs=10, verbose=1,
|
170 |
+
validation_data=(x_test, y_test),
|
171 |
+
callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
|
172 |
+
write_graph=True)]
|
173 |
+
)
|
mrcnn/utils.py
ADDED
@@ -0,0 +1,839 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Common utility functions and classes.
|
4 |
+
|
5 |
+
Copyright (c) 2017 Matterport, Inc.
|
6 |
+
Licensed under the MIT License (see LICENSE for details)
|
7 |
+
Written by Waleed Abdulla
|
8 |
+
"""
|
9 |
+
|
10 |
+
import sys
|
11 |
+
import os
|
12 |
+
import math
|
13 |
+
import random
|
14 |
+
import numpy as np
|
15 |
+
import tensorflow as tf
|
16 |
+
import scipy
|
17 |
+
import skimage.color
|
18 |
+
import skimage.io
|
19 |
+
import skimage.transform
|
20 |
+
import urllib.request
|
21 |
+
import shutil
|
22 |
+
import warnings
|
23 |
+
|
24 |
+
# URL from which to download the latest COCO trained weights
|
25 |
+
COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
|
26 |
+
|
27 |
+
|
28 |
+
############################################################
|
29 |
+
# Bounding Boxes
|
30 |
+
############################################################
|
31 |
+
|
32 |
+
def extract_bboxes(mask):
|
33 |
+
"""Compute bounding boxes from masks.
|
34 |
+
mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
|
35 |
+
|
36 |
+
Returns: bbox array [num_instances, (y1, x1, y2, x2)].
|
37 |
+
"""
|
38 |
+
boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
|
39 |
+
for i in range(mask.shape[-1]):
|
40 |
+
m = mask[:, :, i]
|
41 |
+
# Bounding box.
|
42 |
+
horizontal_indicies = np.where(np.any(m, axis=0))[0]
|
43 |
+
vertical_indicies = np.where(np.any(m, axis=1))[0]
|
44 |
+
if horizontal_indicies.shape[0]:
|
45 |
+
x1, x2 = horizontal_indicies[[0, -1]]
|
46 |
+
y1, y2 = vertical_indicies[[0, -1]]
|
47 |
+
# x2 and y2 should not be part of the box. Increment by 1.
|
48 |
+
x2 += 1
|
49 |
+
y2 += 1
|
50 |
+
else:
|
51 |
+
# No mask for this instance. Might happen due to
|
52 |
+
# resizing or cropping. Set bbox to zeros
|
53 |
+
x1, x2, y1, y2 = 0, 0, 0, 0
|
54 |
+
boxes[i] = np.array([y1, x1, y2, x2])
|
55 |
+
return boxes.astype(np.int32)
|
56 |
+
|
57 |
+
|
58 |
+
def compute_iou(box, boxes, box_area, boxes_area):
|
59 |
+
"""Calculates IoU of the given box with the array of the given boxes.
|
60 |
+
box: 1D vector [y1, x1, y2, x2]
|
61 |
+
boxes: [boxes_count, (y1, x1, y2, x2)]
|
62 |
+
box_area: float. the area of 'box'
|
63 |
+
boxes_area: array of length boxes_count.
|
64 |
+
|
65 |
+
Note: the areas are passed in rather than calculated here for
|
66 |
+
efficency. Calculate once in the caller to avoid duplicate work.
|
67 |
+
"""
|
68 |
+
# Calculate intersection areas
|
69 |
+
y1 = np.maximum(box[0], boxes[:, 0])
|
70 |
+
y2 = np.minimum(box[2], boxes[:, 2])
|
71 |
+
x1 = np.maximum(box[1], boxes[:, 1])
|
72 |
+
x2 = np.minimum(box[3], boxes[:, 3])
|
73 |
+
intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
|
74 |
+
union = box_area + boxes_area[:] - intersection[:]
|
75 |
+
iou = intersection / union
|
76 |
+
return iou
|
77 |
+
|
78 |
+
|
79 |
+
def compute_overlaps(boxes1, boxes2):
|
80 |
+
"""Computes IoU overlaps between two sets of boxes.
|
81 |
+
boxes1, boxes2: [N, (y1, x1, y2, x2)].
|
82 |
+
|
83 |
+
For better performance, pass the largest set first and the smaller second.
|
84 |
+
"""
|
85 |
+
# Areas of anchors and GT boxes
|
86 |
+
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
|
87 |
+
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
|
88 |
+
|
89 |
+
# Compute overlaps to generate matrix [boxes1 count, boxes2 count]
|
90 |
+
# Each cell contains the IoU value.
|
91 |
+
overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
|
92 |
+
for i in range(overlaps.shape[1]):
|
93 |
+
box2 = boxes2[i]
|
94 |
+
overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
|
95 |
+
return overlaps
|
96 |
+
|
97 |
+
|
98 |
+
def compute_overlaps_masks(masks1, masks2):
|
99 |
+
'''Computes IoU overlaps between two sets of masks.
|
100 |
+
masks1, masks2: [Height, Width, instances]
|
101 |
+
'''
|
102 |
+
# flatten masks
|
103 |
+
masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
|
104 |
+
masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
|
105 |
+
area1 = np.sum(masks1, axis=0)
|
106 |
+
area2 = np.sum(masks2, axis=0)
|
107 |
+
|
108 |
+
# intersections and union
|
109 |
+
intersections = np.dot(masks1.T, masks2)
|
110 |
+
union = area1[:, None] + area2[None, :] - intersections
|
111 |
+
overlaps = intersections / union
|
112 |
+
|
113 |
+
return overlaps
|
114 |
+
|
115 |
+
|
116 |
+
def non_max_suppression(boxes, scores, threshold):
|
117 |
+
"""Performs non-maximum supression and returns indicies of kept boxes.
|
118 |
+
boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
|
119 |
+
scores: 1-D array of box scores.
|
120 |
+
threshold: Float. IoU threshold to use for filtering.
|
121 |
+
"""
|
122 |
+
assert boxes.shape[0] > 0
|
123 |
+
if boxes.dtype.kind != "f":
|
124 |
+
boxes = boxes.astype(np.float32)
|
125 |
+
|
126 |
+
# Compute box areas
|
127 |
+
y1 = boxes[:, 0]
|
128 |
+
x1 = boxes[:, 1]
|
129 |
+
y2 = boxes[:, 2]
|
130 |
+
x2 = boxes[:, 3]
|
131 |
+
area = (y2 - y1) * (x2 - x1)
|
132 |
+
|
133 |
+
# Get indicies of boxes sorted by scores (highest first)
|
134 |
+
ixs = scores.argsort()[::-1]
|
135 |
+
|
136 |
+
pick = []
|
137 |
+
while len(ixs) > 0:
|
138 |
+
# Pick top box and add its index to the list
|
139 |
+
i = ixs[0]
|
140 |
+
pick.append(i)
|
141 |
+
# Compute IoU of the picked box with the rest
|
142 |
+
iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
|
143 |
+
# Identify boxes with IoU over the threshold. This
|
144 |
+
# returns indicies into ixs[1:], so add 1 to get
|
145 |
+
# indicies into ixs.
|
146 |
+
remove_ixs = np.where(iou > threshold)[0] + 1
|
147 |
+
# Remove indicies of the picked and overlapped boxes.
|
148 |
+
ixs = np.delete(ixs, remove_ixs)
|
149 |
+
ixs = np.delete(ixs, 0)
|
150 |
+
return np.array(pick, dtype=np.int32)
|
151 |
+
|
152 |
+
|
153 |
+
def apply_box_deltas(boxes, deltas):
|
154 |
+
"""Applies the given deltas to the given boxes.
|
155 |
+
boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
|
156 |
+
deltas: [N, (dy, dx, log(dh), log(dw))]
|
157 |
+
"""
|
158 |
+
boxes = boxes.astype(np.float32)
|
159 |
+
# Convert to y, x, h, w
|
160 |
+
height = boxes[:, 2] - boxes[:, 0]
|
161 |
+
width = boxes[:, 3] - boxes[:, 1]
|
162 |
+
center_y = boxes[:, 0] + 0.5 * height
|
163 |
+
center_x = boxes[:, 1] + 0.5 * width
|
164 |
+
# Apply deltas
|
165 |
+
center_y += deltas[:, 0] * height
|
166 |
+
center_x += deltas[:, 1] * width
|
167 |
+
height *= np.exp(deltas[:, 2])
|
168 |
+
width *= np.exp(deltas[:, 3])
|
169 |
+
# Convert back to y1, x1, y2, x2
|
170 |
+
y1 = center_y - 0.5 * height
|
171 |
+
x1 = center_x - 0.5 * width
|
172 |
+
y2 = y1 + height
|
173 |
+
x2 = x1 + width
|
174 |
+
return np.stack([y1, x1, y2, x2], axis=1)
|
175 |
+
|
176 |
+
|
177 |
+
def box_refinement_graph(box, gt_box):
|
178 |
+
"""Compute refinement needed to transform box to gt_box.
|
179 |
+
box and gt_box are [N, (y1, x1, y2, x2)]
|
180 |
+
"""
|
181 |
+
box = tf.cast(box, tf.float32)
|
182 |
+
gt_box = tf.cast(gt_box, tf.float32)
|
183 |
+
|
184 |
+
height = box[:, 2] - box[:, 0]
|
185 |
+
width = box[:, 3] - box[:, 1]
|
186 |
+
center_y = box[:, 0] + 0.5 * height
|
187 |
+
center_x = box[:, 1] + 0.5 * width
|
188 |
+
|
189 |
+
gt_height = gt_box[:, 2] - gt_box[:, 0]
|
190 |
+
gt_width = gt_box[:, 3] - gt_box[:, 1]
|
191 |
+
gt_center_y = gt_box[:, 0] + 0.5 * gt_height
|
192 |
+
gt_center_x = gt_box[:, 1] + 0.5 * gt_width
|
193 |
+
|
194 |
+
dy = (gt_center_y - center_y) / height
|
195 |
+
dx = (gt_center_x - center_x) / width
|
196 |
+
dh = tf.log(gt_height / height)
|
197 |
+
dw = tf.log(gt_width / width)
|
198 |
+
|
199 |
+
result = tf.stack([dy, dx, dh, dw], axis=1)
|
200 |
+
return result
|
201 |
+
|
202 |
+
|
203 |
+
def box_refinement(box, gt_box):
|
204 |
+
"""Compute refinement needed to transform box to gt_box.
|
205 |
+
box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
|
206 |
+
assumed to be outside the box.
|
207 |
+
"""
|
208 |
+
box = box.astype(np.float32)
|
209 |
+
gt_box = gt_box.astype(np.float32)
|
210 |
+
|
211 |
+
height = box[:, 2] - box[:, 0]
|
212 |
+
width = box[:, 3] - box[:, 1]
|
213 |
+
center_y = box[:, 0] + 0.5 * height
|
214 |
+
center_x = box[:, 1] + 0.5 * width
|
215 |
+
|
216 |
+
gt_height = gt_box[:, 2] - gt_box[:, 0]
|
217 |
+
gt_width = gt_box[:, 3] - gt_box[:, 1]
|
218 |
+
gt_center_y = gt_box[:, 0] + 0.5 * gt_height
|
219 |
+
gt_center_x = gt_box[:, 1] + 0.5 * gt_width
|
220 |
+
|
221 |
+
dy = (gt_center_y - center_y) / height
|
222 |
+
dx = (gt_center_x - center_x) / width
|
223 |
+
dh = np.log(gt_height / height)
|
224 |
+
dw = np.log(gt_width / width)
|
225 |
+
|
226 |
+
return np.stack([dy, dx, dh, dw], axis=1)
|
227 |
+
|
228 |
+
|
229 |
+
############################################################
|
230 |
+
# Dataset
|
231 |
+
############################################################
|
232 |
+
|
233 |
+
class Dataset(object):
|
234 |
+
"""The base class for dataset classes.
|
235 |
+
To use it, create a new class that adds functions specific to the dataset
|
236 |
+
you want to use. For example:
|
237 |
+
|
238 |
+
class CatsAndDogsDataset(Dataset):
|
239 |
+
def load_cats_and_dogs(self):
|
240 |
+
...
|
241 |
+
def load_mask(self, image_id):
|
242 |
+
...
|
243 |
+
def image_reference(self, image_id):
|
244 |
+
...
|
245 |
+
|
246 |
+
See COCODataset and ShapesDataset as examples.
|
247 |
+
"""
|
248 |
+
|
249 |
+
def __init__(self, class_map=None):
|
250 |
+
self._image_ids = []
|
251 |
+
self.image_info = []
|
252 |
+
# Background is always the first class
|
253 |
+
self.class_info = [{"source": "", "id": 0, "name": "BG"}]
|
254 |
+
self.source_class_ids = {}
|
255 |
+
|
256 |
+
def add_class(self, source, class_id, class_name):
|
257 |
+
assert "." not in source, "Source name cannot contain a dot"
|
258 |
+
# Does the class exist already?
|
259 |
+
for info in self.class_info:
|
260 |
+
if info['source'] == source and info["id"] == class_id:
|
261 |
+
# source.class_id combination already available, skip
|
262 |
+
return
|
263 |
+
# Add the class
|
264 |
+
self.class_info.append({
|
265 |
+
"source": source,
|
266 |
+
"id": class_id,
|
267 |
+
"name": class_name,
|
268 |
+
})
|
269 |
+
|
270 |
+
def add_image(self, source, image_id, path, **kwargs):
|
271 |
+
image_info = {
|
272 |
+
"id": image_id,
|
273 |
+
"source": source,
|
274 |
+
"path": path,
|
275 |
+
}
|
276 |
+
image_info.update(kwargs)
|
277 |
+
self.image_info.append(image_info)
|
278 |
+
|
279 |
+
def image_reference(self, image_id):
|
280 |
+
"""Return a link to the image in its source Website or details about
|
281 |
+
the image that help looking it up or debugging it.
|
282 |
+
|
283 |
+
Override for your dataset, but pass to this function
|
284 |
+
if you encounter images not in your dataset.
|
285 |
+
"""
|
286 |
+
return ""
|
287 |
+
|
288 |
+
def prepare(self, class_map=None):
|
289 |
+
"""Prepares the Dataset class for use.
|
290 |
+
|
291 |
+
TODO: class map is not supported yet. When done, it should handle mapping
|
292 |
+
classes from different datasets to the same class ID.
|
293 |
+
"""
|
294 |
+
|
295 |
+
def clean_name(name):
|
296 |
+
"""Returns a shorter version of object names for cleaner display."""
|
297 |
+
return ",".join(name.split(",")[:1])
|
298 |
+
|
299 |
+
# Build (or rebuild) everything else from the info dicts.
|
300 |
+
self.num_classes = len(self.class_info)
|
301 |
+
self.class_ids = np.arange(self.num_classes)
|
302 |
+
self.class_names = [clean_name(c["name"]) for c in self.class_info]
|
303 |
+
self.num_images = len(self.image_info)
|
304 |
+
self._image_ids = np.arange(self.num_images)
|
305 |
+
|
306 |
+
self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
|
307 |
+
for info, id in zip(self.class_info, self.class_ids)}
|
308 |
+
|
309 |
+
# Map sources to class_ids they support
|
310 |
+
self.sources = list(set([i['source'] for i in self.class_info]))
|
311 |
+
self.source_class_ids = {}
|
312 |
+
# Loop over datasets
|
313 |
+
for source in self.sources:
|
314 |
+
self.source_class_ids[source] = []
|
315 |
+
# Find classes that belong to this dataset
|
316 |
+
for i, info in enumerate(self.class_info):
|
317 |
+
# Include BG class in all datasets
|
318 |
+
if i == 0 or source == info['source']:
|
319 |
+
self.source_class_ids[source].append(i)
|
320 |
+
|
321 |
+
def map_source_class_id(self, source_class_id):
|
322 |
+
"""Takes a source class ID and returns the int class ID assigned to it.
|
323 |
+
|
324 |
+
For example:
|
325 |
+
dataset.map_source_class_id("coco.12") -> 23
|
326 |
+
"""
|
327 |
+
return self.class_from_source_map[source_class_id]
|
328 |
+
|
329 |
+
def get_source_class_id(self, class_id, source):
|
330 |
+
"""Map an internal class ID to the corresponding class ID in the source dataset."""
|
331 |
+
info = self.class_info[class_id]
|
332 |
+
assert info['source'] == source
|
333 |
+
return info['id']
|
334 |
+
|
335 |
+
def append_data(self, class_info, image_info):
|
336 |
+
self.external_to_class_id = {}
|
337 |
+
for i, c in enumerate(self.class_info):
|
338 |
+
for ds, id in c["map"]:
|
339 |
+
self.external_to_class_id[ds + str(id)] = i
|
340 |
+
|
341 |
+
# Map external image IDs to internal ones.
|
342 |
+
self.external_to_image_id = {}
|
343 |
+
for i, info in enumerate(self.image_info):
|
344 |
+
self.external_to_image_id[info["ds"] + str(info["id"])] = i
|
345 |
+
|
346 |
+
@property
|
347 |
+
def image_ids(self):
|
348 |
+
return self._image_ids
|
349 |
+
|
350 |
+
def source_image_link(self, image_id):
|
351 |
+
"""Returns the path or URL to the image.
|
352 |
+
Override this to return a URL to the image if it's availble online for easy
|
353 |
+
debugging.
|
354 |
+
"""
|
355 |
+
return self.image_info[image_id]["path"]
|
356 |
+
|
357 |
+
def load_image(self, image_id):
|
358 |
+
"""Load the specified image and return a [H,W,3] Numpy array.
|
359 |
+
"""
|
360 |
+
# Load image
|
361 |
+
image = skimage.io.imread(self.image_info[image_id]['path'])
|
362 |
+
# If grayscale. Convert to RGB for consistency.
|
363 |
+
if image.ndim != 3:
|
364 |
+
image = skimage.color.gray2rgb(image)
|
365 |
+
# If has an alpha channel, remove it for consistency
|
366 |
+
if image.shape[-1] == 4:
|
367 |
+
image = image[..., :3]
|
368 |
+
return image
|
369 |
+
|
370 |
+
def load_mask(self, image_id):
|
371 |
+
"""Load instance masks for the given image.
|
372 |
+
|
373 |
+
Different datasets use different ways to store masks. Override this
|
374 |
+
method to load instance masks and return them in the form of am
|
375 |
+
array of binary masks of shape [height, width, instances].
|
376 |
+
|
377 |
+
Returns:
|
378 |
+
masks: A bool array of shape [height, width, instance count] with
|
379 |
+
a binary mask per instance.
|
380 |
+
class_ids: a 1D array of class IDs of the instance masks.
|
381 |
+
"""
|
382 |
+
# Override this function to load a mask from your dataset.
|
383 |
+
# Otherwise, it returns an empty mask.
|
384 |
+
mask = np.empty([0, 0, 0])
|
385 |
+
class_ids = np.empty([0], np.int32)
|
386 |
+
return mask, class_ids
|
387 |
+
|
388 |
+
|
389 |
+
def resize_image(image, min_dim=None, max_dim=None, mode="square"):
|
390 |
+
"""Resizes an image keeping the aspect ratio unchanged.
|
391 |
+
|
392 |
+
min_dim: if provided, resizes the image such that it's smaller
|
393 |
+
dimension == min_dim
|
394 |
+
max_dim: if provided, ensures that the image longest side doesn't
|
395 |
+
exceed this value.
|
396 |
+
mode: Resizing mode.
|
397 |
+
none: No resizing. Return the image unchanged.
|
398 |
+
square: Resize and pad with zeros to get a square image
|
399 |
+
of size [max_dim, max_dim].
|
400 |
+
pad64: Pads width and height with zeros to make them multiples of 64.
|
401 |
+
If min_dim is provided, it scales the small side to >= min_dim
|
402 |
+
before padding. max_dim is ignored in this mode.
|
403 |
+
The multiple of 64 is needed to ensure smooth scaling of feature
|
404 |
+
maps up and down the 6 levels of the FPN pyramid (2**6=64).
|
405 |
+
|
406 |
+
Returns:
|
407 |
+
image: the resized image
|
408 |
+
window: (y1, x1, y2, x2). If max_dim is provided, padding might
|
409 |
+
be inserted in the returned image. If so, this window is the
|
410 |
+
coordinates of the image part of the full image (excluding
|
411 |
+
the padding). The x2, y2 pixels are not included.
|
412 |
+
scale: The scale factor used to resize the image
|
413 |
+
padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
|
414 |
+
"""
|
415 |
+
# Keep track of image dtype and return results in the same dtype
|
416 |
+
image_dtype = image.dtype
|
417 |
+
# Default window (y1, x1, y2, x2) and default scale == 1.
|
418 |
+
h, w = image.shape[:2]
|
419 |
+
window = (0, 0, h, w)
|
420 |
+
scale = 1
|
421 |
+
padding = [(0, 0), (0, 0), (0, 0)]
|
422 |
+
|
423 |
+
if mode == "none":
|
424 |
+
return image, window, scale, padding
|
425 |
+
|
426 |
+
# Scale?
|
427 |
+
if min_dim:
|
428 |
+
# Scale up but not down
|
429 |
+
scale = max(1, min_dim / min(h, w))
|
430 |
+
# Does it exceed max dim?
|
431 |
+
if max_dim and mode == "square":
|
432 |
+
image_max = max(h, w)
|
433 |
+
if round(image_max * scale) > max_dim:
|
434 |
+
scale = max_dim / image_max
|
435 |
+
|
436 |
+
# Resize image using bilinear interpolation
|
437 |
+
if scale != 1:
|
438 |
+
image = skimage.transform.resize(
|
439 |
+
image, (round(h * scale), round(w * scale)),
|
440 |
+
order=1, mode="constant", preserve_range=True)
|
441 |
+
# Need padding?
|
442 |
+
if mode == "square":
|
443 |
+
# Get new height and width
|
444 |
+
h, w = image.shape[:2]
|
445 |
+
top_pad = (max_dim - h) // 2
|
446 |
+
bottom_pad = max_dim - h - top_pad
|
447 |
+
left_pad = (max_dim - w) // 2
|
448 |
+
right_pad = max_dim - w - left_pad
|
449 |
+
padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
|
450 |
+
image = np.pad(image, padding, mode='constant', constant_values=0)
|
451 |
+
window = (top_pad, left_pad, h + top_pad, w + left_pad)
|
452 |
+
elif mode == "pad64":
|
453 |
+
h, w = image.shape[:2]
|
454 |
+
# Both sides must be divisible by 64
|
455 |
+
assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
|
456 |
+
# Height
|
457 |
+
if h % 64 > 0:
|
458 |
+
max_h = h - (h % 64) + 64
|
459 |
+
top_pad = (max_h - h) // 2
|
460 |
+
bottom_pad = max_h - h - top_pad
|
461 |
+
else:
|
462 |
+
top_pad = bottom_pad = 0
|
463 |
+
# Width
|
464 |
+
if w % 64 > 0:
|
465 |
+
max_w = w - (w % 64) + 64
|
466 |
+
left_pad = (max_w - w) // 2
|
467 |
+
right_pad = max_w - w - left_pad
|
468 |
+
else:
|
469 |
+
left_pad = right_pad = 0
|
470 |
+
padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
|
471 |
+
image = np.pad(image, padding, mode='constant', constant_values=0)
|
472 |
+
window = (top_pad, left_pad, h + top_pad, w + left_pad)
|
473 |
+
else:
|
474 |
+
raise Exception("Mode {} not supported".format(mode))
|
475 |
+
return image.astype(image_dtype), window, scale, padding
|
476 |
+
|
477 |
+
|
478 |
+
def resize_mask(mask, scale, padding):
|
479 |
+
"""Resizes a mask using the given scale and padding.
|
480 |
+
Typically, you get the scale and padding from resize_image() to
|
481 |
+
ensure both, the image and the mask, are resized consistently.
|
482 |
+
|
483 |
+
scale: mask scaling factor
|
484 |
+
padding: Padding to add to the mask in the form
|
485 |
+
[(top, bottom), (left, right), (0, 0)]
|
486 |
+
"""
|
487 |
+
# Suppress warning from scipy 0.13.0, the output shape of zoom() is
|
488 |
+
# calculated with round() instead of int()
|
489 |
+
with warnings.catch_warnings():
|
490 |
+
warnings.simplefilter("ignore")
|
491 |
+
mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
|
492 |
+
mask = np.pad(mask, padding, mode='constant', constant_values=0)
|
493 |
+
return mask
|
494 |
+
|
495 |
+
|
496 |
+
def minimize_mask(bbox, mask, mini_shape):
|
497 |
+
"""Resize masks to a smaller version to reduce memory load.
|
498 |
+
Mini-masks can be resized back to image scale using expand_masks()
|
499 |
+
|
500 |
+
See inspect_data.ipynb notebook for more details.
|
501 |
+
"""
|
502 |
+
mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
|
503 |
+
for i in range(mask.shape[-1]):
|
504 |
+
# Pick slice and cast to bool in case load_mask() returned wrong dtype
|
505 |
+
m = mask[:, :, i].astype(bool)
|
506 |
+
y1, x1, y2, x2 = bbox[i][:4]
|
507 |
+
m = m[y1:y2, x1:x2]
|
508 |
+
if m.size == 0:
|
509 |
+
raise Exception("Invalid bounding box with area of zero")
|
510 |
+
# Resize with bilinear interpolation
|
511 |
+
m = skimage.transform.resize(m, mini_shape, order=1, mode="constant")
|
512 |
+
mini_mask[:, :, i] = np.around(m).astype(np.bool)
|
513 |
+
return mini_mask
|
514 |
+
|
515 |
+
|
516 |
+
def expand_mask(bbox, mini_mask, image_shape):
|
517 |
+
"""Resizes mini masks back to image size. Reverses the change
|
518 |
+
of minimize_mask().
|
519 |
+
|
520 |
+
See inspect_data.ipynb notebook for more details.
|
521 |
+
"""
|
522 |
+
mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
|
523 |
+
for i in range(mask.shape[-1]):
|
524 |
+
m = mini_mask[:, :, i]
|
525 |
+
y1, x1, y2, x2 = bbox[i][:4]
|
526 |
+
h = y2 - y1
|
527 |
+
w = x2 - x1
|
528 |
+
# Resize with bilinear interpolation
|
529 |
+
m = skimage.transform.resize(m, (h, w), order=1, mode="constant")
|
530 |
+
mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
|
531 |
+
return mask
|
532 |
+
|
533 |
+
|
534 |
+
# TODO: Build and use this function to reduce code duplication
|
535 |
+
def mold_mask(mask, config):
|
536 |
+
pass
|
537 |
+
|
538 |
+
|
539 |
+
def unmold_mask(mask, bbox, image_shape):
|
540 |
+
"""Converts a mask generated by the neural network to a format similar
|
541 |
+
to its original shape.
|
542 |
+
mask: [height, width] of type float. A small, typically 28x28 mask.
|
543 |
+
bbox: [y1, x1, y2, x2]. The box to fit the mask in.
|
544 |
+
|
545 |
+
Returns a binary mask with the same size as the original image.
|
546 |
+
"""
|
547 |
+
threshold = 0.5
|
548 |
+
y1, x1, y2, x2 = bbox
|
549 |
+
mask = skimage.transform.resize(mask, (y2 - y1, x2 - x1), order=1, mode="constant")
|
550 |
+
mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
|
551 |
+
|
552 |
+
# Put the mask in the right location.
|
553 |
+
full_mask = np.zeros(image_shape[:2], dtype=np.bool)
|
554 |
+
full_mask[y1:y2, x1:x2] = mask
|
555 |
+
return full_mask
|
556 |
+
|
557 |
+
|
558 |
+
############################################################
|
559 |
+
# Anchors
|
560 |
+
############################################################
|
561 |
+
|
562 |
+
def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
|
563 |
+
"""
|
564 |
+
scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
|
565 |
+
ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
|
566 |
+
shape: [height, width] spatial shape of the feature map over which
|
567 |
+
to generate anchors.
|
568 |
+
feature_stride: Stride of the feature map relative to the image in pixels.
|
569 |
+
anchor_stride: Stride of anchors on the feature map. For example, if the
|
570 |
+
value is 2 then generate anchors for every other feature map pixel.
|
571 |
+
"""
|
572 |
+
# Get all combinations of scales and ratios
|
573 |
+
scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
|
574 |
+
scales = scales.flatten()
|
575 |
+
ratios = ratios.flatten()
|
576 |
+
|
577 |
+
# Enumerate heights and widths from scales and ratios
|
578 |
+
heights = scales / np.sqrt(ratios)
|
579 |
+
widths = scales * np.sqrt(ratios)
|
580 |
+
|
581 |
+
# Enumerate shifts in feature space
|
582 |
+
shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
|
583 |
+
shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
|
584 |
+
shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
|
585 |
+
|
586 |
+
# Enumerate combinations of shifts, widths, and heights
|
587 |
+
box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
|
588 |
+
box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
|
589 |
+
|
590 |
+
# Reshape to get a list of (y, x) and a list of (h, w)
|
591 |
+
box_centers = np.stack(
|
592 |
+
[box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
|
593 |
+
box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
|
594 |
+
|
595 |
+
# Convert to corner coordinates (y1, x1, y2, x2)
|
596 |
+
boxes = np.concatenate([box_centers - 0.5 * box_sizes,
|
597 |
+
box_centers + 0.5 * box_sizes], axis=1)
|
598 |
+
return boxes
|
599 |
+
|
600 |
+
|
601 |
+
def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
|
602 |
+
anchor_stride):
|
603 |
+
"""Generate anchors at different levels of a feature pyramid. Each scale
|
604 |
+
is associated with a level of the pyramid, but each ratio is used in
|
605 |
+
all levels of the pyramid.
|
606 |
+
|
607 |
+
Returns:
|
608 |
+
anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
|
609 |
+
with the same order of the given scales. So, anchors of scale[0] come
|
610 |
+
first, then anchors of scale[1], and so on.
|
611 |
+
"""
|
612 |
+
# Anchors
|
613 |
+
# [anchor_count, (y1, x1, y2, x2)]
|
614 |
+
anchors = []
|
615 |
+
for i in range(len(scales)):
|
616 |
+
anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
|
617 |
+
feature_strides[i], anchor_stride))
|
618 |
+
return np.concatenate(anchors, axis=0)
|
619 |
+
|
620 |
+
|
621 |
+
############################################################
|
622 |
+
# Miscellaneous
|
623 |
+
############################################################
|
624 |
+
|
625 |
+
def trim_zeros(x):
|
626 |
+
"""It's common to have tensors larger than the available data and
|
627 |
+
pad with zeros. This function removes rows that are all zeros.
|
628 |
+
|
629 |
+
x: [rows, columns].
|
630 |
+
"""
|
631 |
+
assert len(x.shape) == 2
|
632 |
+
return x[~np.all(x == 0, axis=1)]
|
633 |
+
|
634 |
+
|
635 |
+
def compute_matches(gt_boxes, gt_class_ids, gt_masks,
|
636 |
+
pred_boxes, pred_class_ids, pred_scores, pred_masks,
|
637 |
+
iou_threshold=0.5, score_threshold=0.0):
|
638 |
+
"""Finds matches between prediction and ground truth instances.
|
639 |
+
|
640 |
+
Returns:
|
641 |
+
gt_match: 1-D array. For each GT box it has the index of the matched
|
642 |
+
predicted box.
|
643 |
+
pred_match: 1-D array. For each predicted box, it has the index of
|
644 |
+
the matched ground truth box.
|
645 |
+
overlaps: [pred_boxes, gt_boxes] IoU overlaps.
|
646 |
+
"""
|
647 |
+
# Trim zero padding
|
648 |
+
# TODO: cleaner to do zero unpadding upstream
|
649 |
+
gt_boxes = trim_zeros(gt_boxes)
|
650 |
+
gt_masks = gt_masks[..., :gt_boxes.shape[0]]
|
651 |
+
pred_boxes = trim_zeros(pred_boxes)
|
652 |
+
pred_scores = pred_scores[:pred_boxes.shape[0]]
|
653 |
+
# Sort predictions by score from high to low
|
654 |
+
indices = np.argsort(pred_scores)[::-1]
|
655 |
+
pred_boxes = pred_boxes[indices]
|
656 |
+
pred_class_ids = pred_class_ids[indices]
|
657 |
+
pred_scores = pred_scores[indices]
|
658 |
+
pred_masks = pred_masks[..., indices]
|
659 |
+
|
660 |
+
# Compute IoU overlaps [pred_masks, gt_masks]
|
661 |
+
overlaps = compute_overlaps_masks(pred_masks, gt_masks)
|
662 |
+
|
663 |
+
# Loop through predictions and find matching ground truth boxes
|
664 |
+
match_count = 0
|
665 |
+
pred_match = -1 * np.ones([pred_boxes.shape[0]])
|
666 |
+
gt_match = -1 * np.ones([gt_boxes.shape[0]])
|
667 |
+
for i in range(len(pred_boxes)):
|
668 |
+
# Find best matching ground truth box
|
669 |
+
# 1. Sort matches by score
|
670 |
+
sorted_ixs = np.argsort(overlaps[i])[::-1]
|
671 |
+
# 2. Remove low scores
|
672 |
+
low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
|
673 |
+
if low_score_idx.size > 0:
|
674 |
+
sorted_ixs = sorted_ixs[:low_score_idx[0]]
|
675 |
+
# 3. Find the match
|
676 |
+
for j in sorted_ixs:
|
677 |
+
# If ground truth box is already matched, go to next one
|
678 |
+
if gt_match[j] > 0:
|
679 |
+
continue
|
680 |
+
# If we reach IoU smaller than the threshold, end the loop
|
681 |
+
iou = overlaps[i, j]
|
682 |
+
if iou < iou_threshold:
|
683 |
+
break
|
684 |
+
# Do we have a match?
|
685 |
+
if pred_class_ids[i] == gt_class_ids[j]:
|
686 |
+
match_count += 1
|
687 |
+
gt_match[j] = i
|
688 |
+
pred_match[i] = j
|
689 |
+
break
|
690 |
+
|
691 |
+
return gt_match, pred_match, overlaps
|
692 |
+
|
693 |
+
|
694 |
+
def compute_ap(gt_boxes, gt_class_ids, gt_masks,
|
695 |
+
pred_boxes, pred_class_ids, pred_scores, pred_masks,
|
696 |
+
iou_threshold=0.5):
|
697 |
+
"""Compute Average Precision at a set IoU threshold (default 0.5).
|
698 |
+
|
699 |
+
Returns:
|
700 |
+
mAP: Mean Average Precision
|
701 |
+
precisions: List of precisions at different class score thresholds.
|
702 |
+
recalls: List of recall values at different class score thresholds.
|
703 |
+
overlaps: [pred_boxes, gt_boxes] IoU overlaps.
|
704 |
+
"""
|
705 |
+
# Get matches and overlaps
|
706 |
+
gt_match, pred_match, overlaps = compute_matches(
|
707 |
+
gt_boxes, gt_class_ids, gt_masks,
|
708 |
+
pred_boxes, pred_class_ids, pred_scores, pred_masks,
|
709 |
+
iou_threshold)
|
710 |
+
|
711 |
+
# Compute precision and recall at each prediction box step
|
712 |
+
precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
|
713 |
+
recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
|
714 |
+
|
715 |
+
# Pad with start and end values to simplify the math
|
716 |
+
precisions = np.concatenate([[0], precisions, [0]])
|
717 |
+
recalls = np.concatenate([[0], recalls, [1]])
|
718 |
+
|
719 |
+
# Ensure precision values decrease but don't increase. This way, the
|
720 |
+
# precision value at each recall threshold is the maximum it can be
|
721 |
+
# for all following recall thresholds, as specified by the VOC paper.
|
722 |
+
for i in range(len(precisions) - 2, -1, -1):
|
723 |
+
precisions[i] = np.maximum(precisions[i], precisions[i + 1])
|
724 |
+
|
725 |
+
# Compute mean AP over recall range
|
726 |
+
indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
|
727 |
+
mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
|
728 |
+
precisions[indices])
|
729 |
+
|
730 |
+
return mAP, precisions, recalls, overlaps
|
731 |
+
|
732 |
+
|
733 |
+
def compute_recall(pred_boxes, gt_boxes, iou):
|
734 |
+
"""Compute the recall at the given IoU threshold. It's an indication
|
735 |
+
of how many GT boxes were found by the given prediction boxes.
|
736 |
+
|
737 |
+
pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
|
738 |
+
gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
|
739 |
+
"""
|
740 |
+
# Measure overlaps
|
741 |
+
overlaps = compute_overlaps(pred_boxes, gt_boxes)
|
742 |
+
iou_max = np.max(overlaps, axis=1)
|
743 |
+
iou_argmax = np.argmax(overlaps, axis=1)
|
744 |
+
positive_ids = np.where(iou_max >= iou)[0]
|
745 |
+
matched_gt_boxes = iou_argmax[positive_ids]
|
746 |
+
|
747 |
+
recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
|
748 |
+
return recall, positive_ids
|
749 |
+
|
750 |
+
|
751 |
+
# ## Batch Slicing
|
752 |
+
# Some custom layers support a batch size of 1 only, and require a lot of work
|
753 |
+
# to support batches greater than 1. This function slices an input tensor
|
754 |
+
# across the batch dimension and feeds batches of size 1. Effectively,
|
755 |
+
# an easy way to support batches > 1 quickly with little code modification.
|
756 |
+
# In the long run, it's more efficient to modify the code to support large
|
757 |
+
# batches and getting rid of this function. Consider this a temporary solution
|
758 |
+
def batch_slice(inputs, graph_fn, batch_size, names=None):
|
759 |
+
"""Splits inputs into slices and feeds each slice to a copy of the given
|
760 |
+
computation graph and then combines the results. It allows you to run a
|
761 |
+
graph on a batch of inputs even if the graph is written to support one
|
762 |
+
instance only.
|
763 |
+
|
764 |
+
inputs: list of tensors. All must have the same first dimension length
|
765 |
+
graph_fn: A function that returns a TF tensor that's part of a graph.
|
766 |
+
batch_size: number of slices to divide the data into.
|
767 |
+
names: If provided, assigns names to the resulting tensors.
|
768 |
+
"""
|
769 |
+
if not isinstance(inputs, list):
|
770 |
+
inputs = [inputs]
|
771 |
+
|
772 |
+
outputs = []
|
773 |
+
for i in range(batch_size):
|
774 |
+
inputs_slice = [x[i] for x in inputs]
|
775 |
+
output_slice = graph_fn(*inputs_slice)
|
776 |
+
if not isinstance(output_slice, (tuple, list)):
|
777 |
+
output_slice = [output_slice]
|
778 |
+
outputs.append(output_slice)
|
779 |
+
# Change outputs from a list of slices where each is
|
780 |
+
# a list of outputs to a list of outputs and each has
|
781 |
+
# a list of slices
|
782 |
+
outputs = list(zip(*outputs))
|
783 |
+
|
784 |
+
if names is None:
|
785 |
+
names = [None] * len(outputs)
|
786 |
+
|
787 |
+
result = [tf.stack(o, axis=0, name=n)
|
788 |
+
for o, n in zip(outputs, names)]
|
789 |
+
if len(result) == 1:
|
790 |
+
result = result[0]
|
791 |
+
|
792 |
+
return result
|
793 |
+
|
794 |
+
|
795 |
+
def download_trained_weights(coco_model_path, verbose=1):
|
796 |
+
"""Download COCO trained weights from Releases.
|
797 |
+
|
798 |
+
coco_model_path: local path of COCO trained weights
|
799 |
+
"""
|
800 |
+
if verbose > 0:
|
801 |
+
print("Downloading pretrained model to " + coco_model_path + " ...")
|
802 |
+
with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
|
803 |
+
shutil.copyfileobj(resp, out)
|
804 |
+
if verbose > 0:
|
805 |
+
print("... done downloading pretrained model!")
|
806 |
+
|
807 |
+
|
808 |
+
def norm_boxes(boxes, shape):
|
809 |
+
"""Converts boxes from pixel coordinates to normalized coordinates.
|
810 |
+
boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
|
811 |
+
shape: [..., (height, width)] in pixels
|
812 |
+
|
813 |
+
Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
|
814 |
+
coordinates it's inside the box.
|
815 |
+
|
816 |
+
Returns:
|
817 |
+
[N, (y1, x1, y2, x2)] in normalized coordinates
|
818 |
+
"""
|
819 |
+
h, w = shape
|
820 |
+
scale = np.array([h - 1, w - 1, h - 1, w - 1])
|
821 |
+
shift = np.array([0, 0, 1, 1])
|
822 |
+
return np.divide((boxes - shift), scale).astype(np.float32)
|
823 |
+
|
824 |
+
|
825 |
+
def denorm_boxes(boxes, shape):
|
826 |
+
"""Converts boxes from normalized coordinates to pixel coordinates.
|
827 |
+
boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
|
828 |
+
shape: [..., (height, width)] in pixels
|
829 |
+
|
830 |
+
Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
|
831 |
+
coordinates it's inside the box.
|
832 |
+
|
833 |
+
Returns:
|
834 |
+
[N, (y1, x1, y2, x2)] in pixel coordinates
|
835 |
+
"""
|
836 |
+
h, w = shape
|
837 |
+
scale = np.array([h - 1, w - 1, h - 1, w - 1])
|
838 |
+
shift = np.array([0, 0, 1, 1])
|
839 |
+
return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
|
mrcnn/visualize.py
ADDED
@@ -0,0 +1,452 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Display and Visualization Functions.
|
4 |
+
|
5 |
+
Copyright (c) 2017 Matterport, Inc.
|
6 |
+
Licensed under the MIT License (see LICENSE for details)
|
7 |
+
Written by Waleed Abdulla
|
8 |
+
"""
|
9 |
+
|
10 |
+
import os
|
11 |
+
import sys
|
12 |
+
import logging
|
13 |
+
import random
|
14 |
+
import itertools
|
15 |
+
import colorsys
|
16 |
+
|
17 |
+
import numpy as np
|
18 |
+
from skimage.measure import find_contours
|
19 |
+
import matplotlib.pyplot as plt
|
20 |
+
from matplotlib import patches, lines
|
21 |
+
from matplotlib.patches import Polygon
|
22 |
+
import IPython.display
|
23 |
+
|
24 |
+
# Root directory of the project
|
25 |
+
ROOT_DIR = os.path.abspath("../")
|
26 |
+
|
27 |
+
# Import Mask RCNN
|
28 |
+
sys.path.append(ROOT_DIR) # To find local version of the library
|
29 |
+
from mrcnn import utils
|
30 |
+
|
31 |
+
|
32 |
+
############################################################
|
33 |
+
# Visualization
|
34 |
+
############################################################
|
35 |
+
|
36 |
+
def display_images(images, titles=None, cols=4, cmap=None, norm=None,
|
37 |
+
interpolation=None):
|
38 |
+
"""Display the given set of images, optionally with titles.
|
39 |
+
images: list or array of image tensors in HWC format.
|
40 |
+
titles: optional. A list of titles to display with each image.
|
41 |
+
cols: number of images per row
|
42 |
+
cmap: Optional. Color map to use. For example, "Blues".
|
43 |
+
norm: Optional. A Normalize instance to map values to colors.
|
44 |
+
interpolation: Optional. Image interporlation to use for display.
|
45 |
+
"""
|
46 |
+
# titles = titles if titles is not None else [""] * len(images)
|
47 |
+
# rows = len(images) // cols + 1
|
48 |
+
# plt.figure(figsize=(14, 14 * rows // cols))
|
49 |
+
# i = 1
|
50 |
+
# for image, title in zip(images, titles):
|
51 |
+
# plt.subplot(rows, cols, i)
|
52 |
+
# plt.title(title, fontsize=9)
|
53 |
+
# plt.axis('off')
|
54 |
+
# plt.imshow(image.astype(np.uint8), cmap=cmap,
|
55 |
+
# norm=norm, interpolation=interpolation)
|
56 |
+
# i += 1
|
57 |
+
# plt.show()
|
58 |
+
pass
|
59 |
+
|
60 |
+
|
61 |
+
def random_colors(N, bright=True):
|
62 |
+
"""
|
63 |
+
Generate random colors.
|
64 |
+
To get visually distinct colors, generate them in HSV space then
|
65 |
+
convert to RGB.
|
66 |
+
"""
|
67 |
+
brightness = 1.0 if bright else 0.7
|
68 |
+
hsv = [(i / N, 1, brightness) for i in range(N)]
|
69 |
+
colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
|
70 |
+
random.shuffle(colors)
|
71 |
+
return colors
|
72 |
+
|
73 |
+
|
74 |
+
def apply_mask(image, mask, color, alpha=0.5):
|
75 |
+
"""Apply the given mask to the image.
|
76 |
+
"""
|
77 |
+
for c in range(3):
|
78 |
+
image[:, :, c] = np.where(mask == 1,
|
79 |
+
image[:, :, c] *
|
80 |
+
(1 - alpha) + alpha * color[c] * 255,
|
81 |
+
image[:, :, c])
|
82 |
+
return image
|
83 |
+
|
84 |
+
|
85 |
+
def display_instances(image, boxes, masks, class_ids, class_names,
|
86 |
+
scores=None, title="",
|
87 |
+
figsize=(16, 16), ax=None):
|
88 |
+
"""
|
89 |
+
boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
|
90 |
+
masks: [height, width, num_instances]
|
91 |
+
class_ids: [num_instances]
|
92 |
+
class_names: list of class names of the dataset
|
93 |
+
scores: (optional) confidence scores for each box
|
94 |
+
figsize: (optional) the size of the image.
|
95 |
+
"""
|
96 |
+
# Number of instances
|
97 |
+
N = boxes.shape[0]
|
98 |
+
if not N:
|
99 |
+
print("\n*** No instances to display *** \n")
|
100 |
+
else:
|
101 |
+
assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
|
102 |
+
|
103 |
+
# if not ax:
|
104 |
+
# _, ax = plt.subplots(1, figsize=figsize)
|
105 |
+
|
106 |
+
# Generate random colors
|
107 |
+
colors = random_colors(N)
|
108 |
+
|
109 |
+
# Show area outside image boundaries.
|
110 |
+
height, width = image.shape[:2]
|
111 |
+
# ax.set_ylim(height + 10, -10)
|
112 |
+
# ax.set_xlim(-10, width + 10)
|
113 |
+
# ax.axis('off')
|
114 |
+
# ax.set_title(title)
|
115 |
+
|
116 |
+
masked_image = image.astype(np.uint32).copy()
|
117 |
+
for i in range(N):
|
118 |
+
color = colors[i]
|
119 |
+
|
120 |
+
# Bounding box
|
121 |
+
if not np.any(boxes[i]):
|
122 |
+
# Skip this instance. Has no bbox. Likely lost in image cropping.
|
123 |
+
continue
|
124 |
+
y1, x1, y2, x2 = boxes[i]
|
125 |
+
p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
|
126 |
+
alpha=0.7, linestyle="dashed",
|
127 |
+
edgecolor=color, facecolor='none')
|
128 |
+
#ax.add_patch(p)
|
129 |
+
|
130 |
+
# Label
|
131 |
+
class_id = class_ids[i]
|
132 |
+
score = scores[i] if scores is not None else None
|
133 |
+
label = class_names[class_id]
|
134 |
+
x = random.randint(x1, (x1 + x2) // 2)
|
135 |
+
caption = "{} {:.3f}".format(label, score) if score else label
|
136 |
+
# ax.text(x1, y1 + 8, caption,
|
137 |
+
# color='w', size=11, backgroundcolor="none")
|
138 |
+
|
139 |
+
# Mask
|
140 |
+
mask = masks[:, :, i]
|
141 |
+
masked_image = apply_mask(masked_image, mask, color)
|
142 |
+
|
143 |
+
# Mask Polygon
|
144 |
+
# Pad to ensure proper polygons for masks that touch image edges.
|
145 |
+
padded_mask = np.zeros(
|
146 |
+
(mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
|
147 |
+
padded_mask[1:-1, 1:-1] = mask
|
148 |
+
contours = find_contours(padded_mask, 0.5)
|
149 |
+
for verts in contours:
|
150 |
+
# Subtract the padding and flip (y, x) to (x, y)
|
151 |
+
verts = np.fliplr(verts) - 1
|
152 |
+
p = Polygon(verts, facecolor="none", edgecolor=color)
|
153 |
+
#ax.add_patch(p)
|
154 |
+
#ax.imshow(masked_image.astype(np.uint8))
|
155 |
+
#plt.show()
|
156 |
+
return masked_image.astype(np.uint8)
|
157 |
+
|
158 |
+
|
159 |
+
def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
|
160 |
+
"""
|
161 |
+
anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
|
162 |
+
proposals: [n, 4] the same anchors but refined to fit objects better.
|
163 |
+
"""
|
164 |
+
masked_image = image.copy()
|
165 |
+
|
166 |
+
# Pick random anchors in case there are too many.
|
167 |
+
ids = np.arange(rois.shape[0], dtype=np.int32)
|
168 |
+
ids = np.random.choice(
|
169 |
+
ids, limit, replace=False) if ids.shape[0] > limit else ids
|
170 |
+
|
171 |
+
fig, ax = plt.subplots(1, figsize=(12, 12))
|
172 |
+
if rois.shape[0] > limit:
|
173 |
+
plt.title("Showing {} random ROIs out of {}".format(
|
174 |
+
len(ids), rois.shape[0]))
|
175 |
+
else:
|
176 |
+
plt.title("{} ROIs".format(len(ids)))
|
177 |
+
|
178 |
+
# Show area outside image boundaries.
|
179 |
+
ax.set_ylim(image.shape[0] + 20, -20)
|
180 |
+
ax.set_xlim(-50, image.shape[1] + 20)
|
181 |
+
ax.axis('off')
|
182 |
+
|
183 |
+
for i, id in enumerate(ids):
|
184 |
+
color = np.random.rand(3)
|
185 |
+
class_id = class_ids[id]
|
186 |
+
# ROI
|
187 |
+
y1, x1, y2, x2 = rois[id]
|
188 |
+
p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
|
189 |
+
edgecolor=color if class_id else "gray",
|
190 |
+
facecolor='none', linestyle="dashed")
|
191 |
+
ax.add_patch(p)
|
192 |
+
# Refined ROI
|
193 |
+
if class_id:
|
194 |
+
ry1, rx1, ry2, rx2 = refined_rois[id]
|
195 |
+
p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
|
196 |
+
edgecolor=color, facecolor='none')
|
197 |
+
ax.add_patch(p)
|
198 |
+
# Connect the top-left corners of the anchor and proposal for easy visualization
|
199 |
+
ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
|
200 |
+
|
201 |
+
# Label
|
202 |
+
label = class_names[class_id]
|
203 |
+
ax.text(rx1, ry1 + 8, "{}".format(label),
|
204 |
+
color='w', size=11, backgroundcolor="none")
|
205 |
+
|
206 |
+
# Mask
|
207 |
+
m = utils.unmold_mask(mask[id], rois[id]
|
208 |
+
[:4].astype(np.int32), image.shape)
|
209 |
+
masked_image = apply_mask(masked_image, m, color)
|
210 |
+
|
211 |
+
#ax.imshow(masked_image)
|
212 |
+
|
213 |
+
# Print stats
|
214 |
+
print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
|
215 |
+
print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
|
216 |
+
print("Positive Ratio: {:.2f}".format(
|
217 |
+
class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
|
218 |
+
|
219 |
+
|
220 |
+
# TODO: Replace with matplotlib equivalent?
|
221 |
+
def draw_box(image, box, color):
|
222 |
+
"""Draw 3-pixel width bounding boxes on the given image array.
|
223 |
+
color: list of 3 int values for RGB.
|
224 |
+
"""
|
225 |
+
y1, x1, y2, x2 = box
|
226 |
+
image[y1:y1 + 2, x1:x2] = color
|
227 |
+
image[y2:y2 + 2, x1:x2] = color
|
228 |
+
image[y1:y2, x1:x1 + 2] = color
|
229 |
+
image[y1:y2, x2:x2 + 2] = color
|
230 |
+
return image
|
231 |
+
|
232 |
+
|
233 |
+
def display_top_masks(image, mask, class_ids, class_names, limit=4):
|
234 |
+
"""Display the given image and the top few class masks."""
|
235 |
+
to_display = []
|
236 |
+
titles = []
|
237 |
+
to_display.append(image)
|
238 |
+
titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
|
239 |
+
# Pick top prominent classes in this image
|
240 |
+
unique_class_ids = np.unique(class_ids)
|
241 |
+
mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
|
242 |
+
for i in unique_class_ids]
|
243 |
+
top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
|
244 |
+
key=lambda r: r[1], reverse=True) if v[1] > 0]
|
245 |
+
# Generate images and titles
|
246 |
+
for i in range(limit):
|
247 |
+
class_id = top_ids[i] if i < len(top_ids) else -1
|
248 |
+
# Pull masks of instances belonging to the same class.
|
249 |
+
m = mask[:, :, np.where(class_ids == class_id)[0]]
|
250 |
+
m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
|
251 |
+
to_display.append(m)
|
252 |
+
titles.append(class_names[class_id] if class_id != -1 else "-")
|
253 |
+
display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
|
254 |
+
|
255 |
+
|
256 |
+
def plot_precision_recall(AP, precisions, recalls):
|
257 |
+
"""Draw the precision-recall curve.
|
258 |
+
|
259 |
+
AP: Average precision at IoU >= 0.5
|
260 |
+
precisions: list of precision values
|
261 |
+
recalls: list of recall values
|
262 |
+
"""
|
263 |
+
# Plot the Precision-Recall curve
|
264 |
+
_, ax = plt.subplots(1)
|
265 |
+
ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
|
266 |
+
ax.set_ylim(0, 1.1)
|
267 |
+
ax.set_xlim(0, 1.1)
|
268 |
+
_ = ax.plot(recalls, precisions)
|
269 |
+
|
270 |
+
|
271 |
+
def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
|
272 |
+
overlaps, class_names, threshold=0.5):
|
273 |
+
"""Draw a grid showing how ground truth objects are classified.
|
274 |
+
gt_class_ids: [N] int. Ground truth class IDs
|
275 |
+
pred_class_id: [N] int. Predicted class IDs
|
276 |
+
pred_scores: [N] float. The probability scores of predicted classes
|
277 |
+
overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes.
|
278 |
+
class_names: list of all class names in the dataset
|
279 |
+
threshold: Float. The prediction probability required to predict a class
|
280 |
+
"""
|
281 |
+
gt_class_ids = gt_class_ids[gt_class_ids != 0]
|
282 |
+
pred_class_ids = pred_class_ids[pred_class_ids != 0]
|
283 |
+
|
284 |
+
plt.figure(figsize=(12, 10))
|
285 |
+
plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
|
286 |
+
plt.yticks(np.arange(len(pred_class_ids)),
|
287 |
+
["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
|
288 |
+
for i, id in enumerate(pred_class_ids)])
|
289 |
+
plt.xticks(np.arange(len(gt_class_ids)),
|
290 |
+
[class_names[int(id)] for id in gt_class_ids], rotation=90)
|
291 |
+
|
292 |
+
thresh = overlaps.max() / 2.
|
293 |
+
for i, j in itertools.product(range(overlaps.shape[0]),
|
294 |
+
range(overlaps.shape[1])):
|
295 |
+
text = ""
|
296 |
+
if overlaps[i, j] > threshold:
|
297 |
+
text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
|
298 |
+
color = ("white" if overlaps[i, j] > thresh
|
299 |
+
else "black" if overlaps[i, j] > 0
|
300 |
+
else "grey")
|
301 |
+
plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
|
302 |
+
horizontalalignment="center", verticalalignment="center",
|
303 |
+
fontsize=9, color=color)
|
304 |
+
|
305 |
+
plt.tight_layout()
|
306 |
+
plt.xlabel("Ground Truth")
|
307 |
+
plt.ylabel("Predictions")
|
308 |
+
|
309 |
+
|
310 |
+
def draw_boxes(image, boxes=None, refined_boxes=None,
|
311 |
+
masks=None, captions=None, visibilities=None,
|
312 |
+
title="", ax=None):
|
313 |
+
"""Draw bounding boxes and segmentation masks with differnt
|
314 |
+
customizations.
|
315 |
+
|
316 |
+
boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
|
317 |
+
refined_boxes: Like boxes, but draw with solid lines to show
|
318 |
+
that they're the result of refining 'boxes'.
|
319 |
+
masks: [N, height, width]
|
320 |
+
captions: List of N titles to display on each box
|
321 |
+
visibilities: (optional) List of values of 0, 1, or 2. Determine how
|
322 |
+
prominant each bounding box should be.
|
323 |
+
title: An optional title to show over the image
|
324 |
+
ax: (optional) Matplotlib axis to draw on.
|
325 |
+
"""
|
326 |
+
# Number of boxes
|
327 |
+
assert boxes is not None or refined_boxes is not None
|
328 |
+
N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
|
329 |
+
|
330 |
+
# Matplotlib Axis
|
331 |
+
if not ax:
|
332 |
+
_, ax = plt.subplots(1, figsize=(12, 12))
|
333 |
+
|
334 |
+
# Generate random colors
|
335 |
+
colors = random_colors(N)
|
336 |
+
|
337 |
+
# Show area outside image boundaries.
|
338 |
+
margin = image.shape[0] // 10
|
339 |
+
ax.set_ylim(image.shape[0] + margin, -margin)
|
340 |
+
ax.set_xlim(-margin, image.shape[1] + margin)
|
341 |
+
ax.axis('off')
|
342 |
+
|
343 |
+
ax.set_title(title)
|
344 |
+
|
345 |
+
masked_image = image.astype(np.uint32).copy()
|
346 |
+
for i in range(N):
|
347 |
+
# Box visibility
|
348 |
+
visibility = visibilities[i] if visibilities is not None else 1
|
349 |
+
if visibility == 0:
|
350 |
+
color = "gray"
|
351 |
+
style = "dotted"
|
352 |
+
alpha = 0.5
|
353 |
+
elif visibility == 1:
|
354 |
+
color = colors[i]
|
355 |
+
style = "dotted"
|
356 |
+
alpha = 1
|
357 |
+
elif visibility == 2:
|
358 |
+
color = colors[i]
|
359 |
+
style = "solid"
|
360 |
+
alpha = 1
|
361 |
+
|
362 |
+
# Boxes
|
363 |
+
if boxes is not None:
|
364 |
+
if not np.any(boxes[i]):
|
365 |
+
# Skip this instance. Has no bbox. Likely lost in cropping.
|
366 |
+
continue
|
367 |
+
y1, x1, y2, x2 = boxes[i]
|
368 |
+
p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
|
369 |
+
alpha=alpha, linestyle=style,
|
370 |
+
edgecolor=color, facecolor='none')
|
371 |
+
ax.add_patch(p)
|
372 |
+
|
373 |
+
# Refined boxes
|
374 |
+
if refined_boxes is not None and visibility > 0:
|
375 |
+
ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
|
376 |
+
p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
|
377 |
+
edgecolor=color, facecolor='none')
|
378 |
+
ax.add_patch(p)
|
379 |
+
# Connect the top-left corners of the anchor and proposal
|
380 |
+
if boxes is not None:
|
381 |
+
ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
|
382 |
+
|
383 |
+
# Captions
|
384 |
+
if captions is not None:
|
385 |
+
caption = captions[i]
|
386 |
+
# If there are refined boxes, display captions on them
|
387 |
+
if refined_boxes is not None:
|
388 |
+
y1, x1, y2, x2 = ry1, rx1, ry2, rx2
|
389 |
+
x = random.randint(x1, (x1 + x2) // 2)
|
390 |
+
ax.text(x1, y1, caption, size=11, verticalalignment='top',
|
391 |
+
color='w', backgroundcolor="none",
|
392 |
+
bbox={'facecolor': color, 'alpha': 0.5,
|
393 |
+
'pad': 2, 'edgecolor': 'none'})
|
394 |
+
|
395 |
+
# Masks
|
396 |
+
if masks is not None:
|
397 |
+
mask = masks[:, :, i]
|
398 |
+
masked_image = apply_mask(masked_image, mask, color)
|
399 |
+
# Mask Polygon
|
400 |
+
# Pad to ensure proper polygons for masks that touch image edges.
|
401 |
+
padded_mask = np.zeros(
|
402 |
+
(mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
|
403 |
+
padded_mask[1:-1, 1:-1] = mask
|
404 |
+
contours = find_contours(padded_mask, 0.5)
|
405 |
+
for verts in contours:
|
406 |
+
# Subtract the padding and flip (y, x) to (x, y)
|
407 |
+
verts = np.fliplr(verts) - 1
|
408 |
+
p = Polygon(verts, facecolor="none", edgecolor=color)
|
409 |
+
ax.add_patch(p)
|
410 |
+
ax.imshow(masked_image.astype(np.uint8))
|
411 |
+
|
412 |
+
|
413 |
+
def display_table(table):
|
414 |
+
"""Display values in a table format.
|
415 |
+
table: an iterable of rows, and each row is an iterable of values.
|
416 |
+
"""
|
417 |
+
html = ""
|
418 |
+
for row in table:
|
419 |
+
row_html = ""
|
420 |
+
for col in row:
|
421 |
+
row_html += "<td>{:40}</td>".format(str(col))
|
422 |
+
html += "<tr>" + row_html + "</tr>"
|
423 |
+
html = "<table>" + html + "</table>"
|
424 |
+
#IPython.display.display(IPython.display.HTML(html))
|
425 |
+
|
426 |
+
|
427 |
+
def display_weight_stats(model):
|
428 |
+
"""Scans all the weights in the model and returns a list of tuples
|
429 |
+
that contain stats about each weight.
|
430 |
+
"""
|
431 |
+
layers = model.get_trainable_layers()
|
432 |
+
table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
|
433 |
+
for l in layers:
|
434 |
+
weight_values = l.get_weights() # list of Numpy arrays
|
435 |
+
weight_tensors = l.weights # list of TF tensors
|
436 |
+
for i, w in enumerate(weight_values):
|
437 |
+
weight_name = weight_tensors[i].name
|
438 |
+
# Detect problematic layers. Exclude biases of conv layers.
|
439 |
+
alert = ""
|
440 |
+
if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
|
441 |
+
alert += "<span style='color:red'>*** dead?</span>"
|
442 |
+
if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
|
443 |
+
alert += "<span style='color:red'>*** Overflow?</span>"
|
444 |
+
# Add row
|
445 |
+
table.append([
|
446 |
+
weight_name + alert,
|
447 |
+
str(w.shape),
|
448 |
+
"{:+9.4f}".format(w.min()),
|
449 |
+
"{:+10.4f}".format(w.max()),
|
450 |
+
"{:+9.4f}".format(w.std()),
|
451 |
+
])
|
452 |
+
#display_table(table)
|
requirements.txt
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
alabaster==0.7.12
|
2 |
+
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1633990451307/work
|
3 |
+
async-generator==1.10
|
4 |
+
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1659291887007/work
|
5 |
+
Babel==2.11.0
|
6 |
+
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
|
7 |
+
backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1618230623929/work
|
8 |
+
bleach==1.5.0
|
9 |
+
certifi==2022.9.24
|
10 |
+
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1631636256886/work
|
11 |
+
charset-normalizer==2.0.12
|
12 |
+
cycler==0.11.0
|
13 |
+
Cython==0.29.32
|
14 |
+
dataclasses==0.8
|
15 |
+
decorator==4.4.2
|
16 |
+
defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work
|
17 |
+
docutils==0.18.1
|
18 |
+
entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work
|
19 |
+
h5py==2.10.0
|
20 |
+
html5lib==0.9999999
|
21 |
+
idna==3.4
|
22 |
+
imageio==2.15.0
|
23 |
+
imagesize==1.4.1
|
24 |
+
imgaug==0.4.0
|
25 |
+
importlib-metadata==4.8.3
|
26 |
+
ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1620912934572/work/dist/ipykernel-5.5.5-py3-none-any.whl
|
27 |
+
ipyparallel==8.2.1
|
28 |
+
ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1609697613279/work
|
29 |
+
ipython-genutils==0.2.0
|
30 |
+
ipywidgets==7.7.2
|
31 |
+
jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1605054537831/work
|
32 |
+
Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1636510082894/work
|
33 |
+
jsonschema==3.0.2
|
34 |
+
jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1642858610849/work
|
35 |
+
jupyter-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1631852698933/work
|
36 |
+
jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1601375948261/work
|
37 |
+
jupyterlab-widgets==1.1.1
|
38 |
+
Keras==2.0.8
|
39 |
+
kiwisolver==1.3.1
|
40 |
+
Markdown==3.3.7
|
41 |
+
MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1621455668064/work
|
42 |
+
matplotlib==3.3.4
|
43 |
+
mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1624941293729/work
|
44 |
+
nb-conda @ file:///home/conda/feedstock_root/build_artifacts/nb_conda_1611345535156/work
|
45 |
+
nb-conda-kernels @ file:///home/conda/feedstock_root/build_artifacts/nb_conda_kernels_1606762461711/work
|
46 |
+
nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1637327213451/work
|
47 |
+
nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert_1605401832871/work
|
48 |
+
nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1617383142101/work
|
49 |
+
nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1664684991461/work
|
50 |
+
networkx==2.5.1
|
51 |
+
nose==1.3.7
|
52 |
+
notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1616419146127/work
|
53 |
+
numpy==1.19.5
|
54 |
+
opencv-python==4.6.0.66
|
55 |
+
packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1637239678211/work
|
56 |
+
pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work
|
57 |
+
parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1595548966091/work
|
58 |
+
pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1667297516076/work
|
59 |
+
pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
|
60 |
+
Pillow==8.4.0
|
61 |
+
prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1665692535292/work
|
62 |
+
prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1669057097528/work
|
63 |
+
protobuf==3.19.6
|
64 |
+
psutil==5.9.4
|
65 |
+
ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
|
66 |
+
pycocotools==2.0.6
|
67 |
+
pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work
|
68 |
+
Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1660666458521/work
|
69 |
+
pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1652235407899/work
|
70 |
+
pyrsistent @ file:///home/conda/feedstock_root/build_artifacts/pyrsistent_1610146795286/work
|
71 |
+
python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
|
72 |
+
pytz==2022.6
|
73 |
+
PyWavelets==1.1.1
|
74 |
+
PyYAML==6.0
|
75 |
+
pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1631793305981/work
|
76 |
+
qtconsole==5.2.2
|
77 |
+
QtPy==2.0.1
|
78 |
+
requests==2.27.1
|
79 |
+
scikit-image==0.17.2
|
80 |
+
scipy==1.5.4
|
81 |
+
Send2Trash @ file:///home/conda/feedstock_root/build_artifacts/send2trash_1628511208346/work
|
82 |
+
Shapely==1.8.5.post1
|
83 |
+
six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
|
84 |
+
snowballstemmer==2.2.0
|
85 |
+
Sphinx==5.3.0
|
86 |
+
sphinxcontrib-applehelp==1.0.2
|
87 |
+
sphinxcontrib-devhelp==1.0.2
|
88 |
+
sphinxcontrib-htmlhelp==2.0.0
|
89 |
+
sphinxcontrib-jsmath==1.0.1
|
90 |
+
sphinxcontrib-qthelp==1.0.3
|
91 |
+
sphinxcontrib-serializinghtml==1.1.5
|
92 |
+
tensorflow==1.3.0
|
93 |
+
tensorflow-tensorboard==0.1.8
|
94 |
+
terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1631128154882/work
|
95 |
+
testpath @ file:///home/conda/feedstock_root/build_artifacts/testpath_1645693042223/work
|
96 |
+
tifffile==2020.9.3
|
97 |
+
tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1610094701020/work
|
98 |
+
tqdm==4.19.9
|
99 |
+
traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1631041982274/work
|
100 |
+
typing_extensions==4.1.1
|
101 |
+
urllib3==1.26.13
|
102 |
+
wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1600965781394/work
|
103 |
+
webencodings==0.5.1
|
104 |
+
Werkzeug==2.0.3
|
105 |
+
widgetsnbextension==3.6.1
|
106 |
+
zipp==3.6.0
|
setup.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
The build/compilations setup
|
3 |
+
|
4 |
+
>> pip install -r requirements.txt
|
5 |
+
>> python setup.py install
|
6 |
+
"""
|
7 |
+
import pip
|
8 |
+
import logging
|
9 |
+
import pkg_resources
|
10 |
+
try:
|
11 |
+
from setuptools import setup
|
12 |
+
except ImportError:
|
13 |
+
from distutils.core import setup
|
14 |
+
|
15 |
+
|
16 |
+
def _parse_requirements(file_path):
|
17 |
+
pip_ver = pkg_resources.get_distribution('pip').version
|
18 |
+
pip_version = list(map(int, pip_ver.split('.')[:2]))
|
19 |
+
if pip_version >= [6, 0]:
|
20 |
+
raw = pip.req.parse_requirements(file_path,
|
21 |
+
session=pip.download.PipSession())
|
22 |
+
else:
|
23 |
+
raw = pip.req.parse_requirements(file_path)
|
24 |
+
return [str(i.req) for i in raw]
|
25 |
+
|
26 |
+
|
27 |
+
# parse_requirements() returns generator of pip.req.InstallRequirement objects
|
28 |
+
try:
|
29 |
+
install_reqs = _parse_requirements("requirements.txt")
|
30 |
+
except Exception:
|
31 |
+
logging.warning('Fail load requirements file, so using default ones.')
|
32 |
+
install_reqs = []
|
33 |
+
|
34 |
+
setup(
|
35 |
+
name='mask-rcnn',
|
36 |
+
version='2.1',
|
37 |
+
url='https://github.com/matterport/Mask_RCNN',
|
38 |
+
author='Matterport',
|
39 |
+
author_email='waleed.abdulla@gmail.com',
|
40 |
+
license='MIT',
|
41 |
+
description='Mask R-CNN for object detection and instance segmentation',
|
42 |
+
packages=["mrcnn"],
|
43 |
+
install_requires=install_reqs,
|
44 |
+
include_package_data=True,
|
45 |
+
python_requires='>=3.4',
|
46 |
+
long_description="""This is an implementation of Mask R-CNN on Python 3, Keras, and TensorFlow.
|
47 |
+
The model generates bounding boxes and segmentation masks for each instance of an object in the image.
|
48 |
+
It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.""",
|
49 |
+
classifiers=[
|
50 |
+
"Development Status :: 5 - Production/Stable",
|
51 |
+
"Environment :: Console",
|
52 |
+
"Intended Audience :: Developers",
|
53 |
+
"Intended Audience :: Information Technology",
|
54 |
+
"Intended Audience :: Education",
|
55 |
+
"Intended Audience :: Science/Research",
|
56 |
+
"License :: OSI Approved :: MIT License",
|
57 |
+
"Natural Language :: English",
|
58 |
+
"Operating System :: OS Independent",
|
59 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
60 |
+
"Topic :: Scientific/Engineering :: Image Recognition",
|
61 |
+
"Topic :: Scientific/Engineering :: Visualization",
|
62 |
+
"Topic :: Scientific/Engineering :: Image Segmentation",
|
63 |
+
'Programming Language :: Python :: 3.4',
|
64 |
+
'Programming Language :: Python :: 3.5',
|
65 |
+
'Programming Language :: Python :: 3.6',
|
66 |
+
],
|
67 |
+
keywords="image instance segmentation object detection mask rcnn r-cnn tensorflow keras",
|
68 |
+
)
|
test0.jpg
ADDED