shamita commited on
Commit
1914862
1 Parent(s): 7e93dbb

model stack, reqs, app.py

Browse files
__pycache__/coco.cpython-36.pyc ADDED
Binary file (12.7 kB). View file
 
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import os
4
+ import sys
5
+ import numpy as np
6
+ import skimage.io
7
+ from pycocotools.coco import COCO
8
+ from pycocotools.cocoeval import COCOeval
9
+ from pycocotools import mask as maskUtils
10
+ import coco
11
+ from mrcnn.evaluate import build_coco_results, evaluate_coco
12
+ from mrcnn.dataset import MappingChallengeDataset
13
+ from mrcnn import visualize
14
+ from mrcnn.config import Config
15
+ from mrcnn import model as modellib, utils
16
+ import warnings
17
+ warnings.filterwarnings("ignore")
18
+
19
+
20
+
21
+ PRETRAINED_MODEL_PATH = os.path.join("data/","pretrained_weights.h5")
22
+ MODEL_DIR = os.path.join("logs")
23
+
24
+
25
+
26
+ class InferenceConfig(coco.CocoConfig):
27
+ GPU_COUNT = 1
28
+ IMAGES_PER_GPU = 1
29
+ NUM_CLASSES = 1 + 1 # 1 Background + 1 Building
30
+ IMAGE_MAX_DIM=320
31
+ IMAGE_MIN_DIM=320
32
+ NAME = "crowdai-mapping-challenge"
33
+ config = InferenceConfig()
34
+
35
+
36
+
37
+ model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
38
+ model_path = PRETRAINED_MODEL_PATH
39
+ model.load_weights(model_path, by_name=True)
40
+
41
+ class_names = ['BG', 'building'] # In our case, we have 1 class for the background, and 1 class for building
42
+
43
+
44
+ def classify_image(img):
45
+ random_image = skimage.io.imread(img)
46
+ predictions = model.detect([random_image]*config.BATCH_SIZE, verbose=1) # We are replicating the same image to fill up the batch_size
47
+ p = predictions[0]
48
+ image = visualize.display_instances(random_image, p['rois'], p['masks'], p['class_ids'], class_names, p['scores'])
49
+ return image
50
+
51
+ image = gr.inputs.Image(shape=(320, 320))
52
+ out_image = gr.outputs.Image(shape=(320, 320))
53
+ examples = ['test0.jpg']
54
+
55
+ intf = gr.Interface(fn=classify_image, inputs=image, outputs=out_image, examples=examples)
56
+ intf.launch(inline=False)
coco.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Configurations and data loading code for MS COCO.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+
9
+ ------------------------------------------------------------
10
+
11
+ Usage: import the module (see Jupyter notebooks for examples), or run from
12
+ the command line as such:
13
+
14
+ # Train a new model starting from pre-trained COCO weights
15
+ python3 coco.py train --dataset=/path/to/coco/ --model=coco
16
+
17
+ # Train a new model starting from ImageNet weights
18
+ python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
19
+
20
+ # Continue training a model that you had trained earlier
21
+ python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
22
+
23
+ # Continue training the last model you trained
24
+ python3 coco.py train --dataset=/path/to/coco/ --model=last
25
+
26
+ # Run COCO evaluatoin on the last model you trained
27
+ python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
28
+ """
29
+
30
+ import os
31
+ import time
32
+ import numpy as np
33
+
34
+ # Download and install the Python COCO tools from https://github.com/waleedka/coco
35
+ #
36
+ # pip install git+https://github.com/waleedka/coco.git#subdirectory=PythonAPI
37
+ #
38
+ # That's a fork from the original https://github.com/pdollar/coco with a bug
39
+ # fix for Python 3.
40
+ # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
41
+ # If the PR is merged then use the original repo.
42
+ # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
43
+ from pycocotools.coco import COCO
44
+ from pycocotools.cocoeval import COCOeval
45
+ from pycocotools import mask as maskUtils
46
+
47
+ import zipfile
48
+ import urllib.request
49
+ import shutil
50
+
51
+ from mrcnn.config import Config
52
+ import mrcnn.utils as utils
53
+ import mrcnn.model as modellib
54
+
55
+ # Root directory of the project
56
+ ROOT_DIR = os.getcwd()
57
+
58
+ # Path to trained weights file
59
+ COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
60
+
61
+ # Directory to save logs and model checkpoints, if not provided
62
+ # through the command line argument --logs
63
+ DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
64
+ DEFAULT_DATASET_YEAR = "2014"
65
+
66
+ ############################################################
67
+ # Configurations
68
+ ############################################################
69
+
70
+
71
+ class CocoConfig(Config):
72
+ """Configuration for training on MS COCO.
73
+ Derives from the base Config class and overrides values specific
74
+ to the COCO dataset.
75
+ """
76
+ # Give the configuration a recognizable name
77
+ NAME = "coco"
78
+
79
+ # We use a GPU with 12GB memory, which can fit two images.
80
+ # Adjust down if you use a smaller GPU.
81
+ IMAGES_PER_GPU = 2
82
+
83
+ # Uncomment to train on 8 GPUs (default is 1)
84
+ # GPU_COUNT = 8
85
+
86
+ # Number of classes (including background)
87
+ NUM_CLASSES = 1 + 80 # COCO has 80 classes
88
+
89
+
90
+ ############################################################
91
+ # Dataset
92
+ ############################################################
93
+
94
+ class CocoDataset(utils.Dataset):
95
+ def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
96
+ class_map=None, return_coco=False, auto_download=False):
97
+ """Load a subset of the COCO dataset.
98
+ dataset_dir: The root directory of the COCO dataset.
99
+ subset: What to load (train, val, minival, valminusminival)
100
+ year: What dataset year to load (2014, 2017) as a string, not an integer
101
+ class_ids: If provided, only loads images that have the given classes.
102
+ class_map: TODO: Not implemented yet. Supports maping classes from
103
+ different datasets to the same class ID.
104
+ return_coco: If True, returns the COCO object.
105
+ auto_download: Automatically download and unzip MS-COCO images and annotations
106
+ """
107
+
108
+ if auto_download is True:
109
+ self.auto_download(dataset_dir, subset, year)
110
+
111
+ coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
112
+ if subset == "minival" or subset == "valminusminival":
113
+ subset = "val"
114
+ image_dir = "{}/{}{}".format(dataset_dir, subset, year)
115
+
116
+ # Load all classes or a subset?
117
+ if not class_ids:
118
+ # All classes
119
+ class_ids = sorted(coco.getCatIds())
120
+
121
+ # All images or a subset?
122
+ if class_ids:
123
+ image_ids = []
124
+ for id in class_ids:
125
+ image_ids.extend(list(coco.getImgIds(catIds=[id])))
126
+ # Remove duplicates
127
+ image_ids = list(set(image_ids))
128
+ else:
129
+ # All images
130
+ image_ids = list(coco.imgs.keys())
131
+
132
+ # Add classes
133
+ for i in class_ids:
134
+ self.add_class("coco", i, coco.loadCats(i)[0]["name"])
135
+
136
+ # Add images
137
+ for i in image_ids:
138
+ self.add_image(
139
+ "coco", image_id=i,
140
+ path=os.path.join(image_dir, coco.imgs[i]['file_name']),
141
+ width=coco.imgs[i]["width"],
142
+ height=coco.imgs[i]["height"],
143
+ annotations=coco.loadAnns(coco.getAnnIds(
144
+ imgIds=[i], catIds=class_ids, iscrowd=None)))
145
+ if return_coco:
146
+ return coco
147
+
148
+ def auto_download(self, dataDir, dataType, dataYear):
149
+ """Download the COCO dataset/annotations if requested.
150
+ dataDir: The root directory of the COCO dataset.
151
+ dataType: What to load (train, val, minival, valminusminival)
152
+ dataYear: What dataset year to load (2014, 2017) as a string, not an integer
153
+ Note:
154
+ For 2014, use "train", "val", "minival", or "valminusminival"
155
+ For 2017, only "train" and "val" annotations are available
156
+ """
157
+
158
+ # Setup paths and file names
159
+ if dataType == "minival" or dataType == "valminusminival":
160
+ imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
161
+ imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
162
+ imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
163
+ else:
164
+ imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
165
+ imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
166
+ imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
167
+ # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
168
+
169
+ # Create main folder if it doesn't exist yet
170
+ if not os.path.exists(dataDir):
171
+ os.makedirs(dataDir)
172
+
173
+ # Download images if not available locally
174
+ if not os.path.exists(imgDir):
175
+ os.makedirs(imgDir)
176
+ print("Downloading images to " + imgZipFile + " ...")
177
+ with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
178
+ shutil.copyfileobj(resp, out)
179
+ print("... done downloading.")
180
+ print("Unzipping " + imgZipFile)
181
+ with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
182
+ zip_ref.extractall(dataDir)
183
+ print("... done unzipping")
184
+ print("Will use images in " + imgDir)
185
+
186
+ # Setup annotations data paths
187
+ annDir = "{}/annotations".format(dataDir)
188
+ if dataType == "minival":
189
+ annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
190
+ annFile = "{}/instances_minival2014.json".format(annDir)
191
+ annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
192
+ unZipDir = annDir
193
+ elif dataType == "valminusminival":
194
+ annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
195
+ annFile = "{}/instances_valminusminival2014.json".format(annDir)
196
+ annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
197
+ unZipDir = annDir
198
+ else:
199
+ annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
200
+ annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
201
+ annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
202
+ unZipDir = dataDir
203
+ # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
204
+
205
+ # Download annotations if not available locally
206
+ if not os.path.exists(annDir):
207
+ os.makedirs(annDir)
208
+ if not os.path.exists(annFile):
209
+ if not os.path.exists(annZipFile):
210
+ print("Downloading zipped annotations to " + annZipFile + " ...")
211
+ with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
212
+ shutil.copyfileobj(resp, out)
213
+ print("... done downloading.")
214
+ print("Unzipping " + annZipFile)
215
+ with zipfile.ZipFile(annZipFile, "r") as zip_ref:
216
+ zip_ref.extractall(unZipDir)
217
+ print("... done unzipping")
218
+ print("Will use annotations in " + annFile)
219
+
220
+ def load_mask(self, image_id):
221
+ """Load instance masks for the given image.
222
+
223
+ Different datasets use different ways to store masks. This
224
+ function converts the different mask format to one format
225
+ in the form of a bitmap [height, width, instances].
226
+
227
+ Returns:
228
+ masks: A bool array of shape [height, width, instance count] with
229
+ one mask per instance.
230
+ class_ids: a 1D array of class IDs of the instance masks.
231
+ """
232
+ # If not a COCO image, delegate to parent class.
233
+ image_info = self.image_info[image_id]
234
+ if image_info["source"] != "coco":
235
+ return super(CocoDataset, self).load_mask(image_id)
236
+
237
+ instance_masks = []
238
+ class_ids = []
239
+ annotations = self.image_info[image_id]["annotations"]
240
+ # Build mask of shape [height, width, instance_count] and list
241
+ # of class IDs that correspond to each channel of the mask.
242
+ for annotation in annotations:
243
+ class_id = self.map_source_class_id(
244
+ "coco.{}".format(annotation['category_id']))
245
+ if class_id:
246
+ m = self.annToMask(annotation, image_info["height"],
247
+ image_info["width"])
248
+ # Some objects are so small that they're less than 1 pixel area
249
+ # and end up rounded out. Skip those objects.
250
+ if m.max() < 1:
251
+ continue
252
+ # Is it a crowd? If so, use a negative class ID.
253
+ if annotation['iscrowd']:
254
+ # Use negative class ID for crowds
255
+ class_id *= -1
256
+ # For crowd masks, annToMask() sometimes returns a mask
257
+ # smaller than the given dimensions. If so, resize it.
258
+ if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
259
+ m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
260
+ instance_masks.append(m)
261
+ class_ids.append(class_id)
262
+
263
+ # Pack instance masks into an array
264
+ if class_ids:
265
+ mask = np.stack(instance_masks, axis=2)
266
+ class_ids = np.array(class_ids, dtype=np.int32)
267
+ return mask, class_ids
268
+ else:
269
+ # Call super class to return an empty mask
270
+ return super(CocoDataset, self).load_mask(image_id)
271
+
272
+ def image_reference(self, image_id):
273
+ """Return a link to the image in the COCO Website."""
274
+ info = self.image_info[image_id]
275
+ if info["source"] == "coco":
276
+ return "http://cocodataset.org/#explore?id={}".format(info["id"])
277
+ else:
278
+ super(CocoDataset, self).image_reference(image_id)
279
+
280
+ # The following two functions are from pycocotools with a few changes.
281
+
282
+ def annToRLE(self, ann, height, width):
283
+ """
284
+ Convert annotation which can be polygons, uncompressed RLE to RLE.
285
+ :return: binary mask (numpy 2D array)
286
+ """
287
+ segm = ann['segmentation']
288
+ if isinstance(segm, list):
289
+ # polygon -- a single object might consist of multiple parts
290
+ # we merge all parts into one mask rle code
291
+ rles = maskUtils.frPyObjects(segm, height, width)
292
+ rle = maskUtils.merge(rles)
293
+ elif isinstance(segm['counts'], list):
294
+ # uncompressed RLE
295
+ rle = maskUtils.frPyObjects(segm, height, width)
296
+ else:
297
+ # rle
298
+ rle = ann['segmentation']
299
+ return rle
300
+
301
+ def annToMask(self, ann, height, width):
302
+ """
303
+ Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
304
+ :return: binary mask (numpy 2D array)
305
+ """
306
+ rle = self.annToRLE(ann, height, width)
307
+ m = maskUtils.decode(rle)
308
+ return m
309
+
310
+
311
+ ############################################################
312
+ # COCO Evaluation
313
+ ############################################################
314
+
315
+ def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
316
+ """Arrange resutls to match COCO specs in http://cocodataset.org/#format
317
+ """
318
+ # If no results, return an empty list
319
+ if rois is None:
320
+ return []
321
+
322
+ results = []
323
+ for image_id in image_ids:
324
+ # Loop through detections
325
+ for i in range(rois.shape[0]):
326
+ class_id = class_ids[i]
327
+ score = scores[i]
328
+ bbox = np.around(rois[i], 1)
329
+ mask = masks[:, :, i]
330
+
331
+ result = {
332
+ "image_id": image_id,
333
+ "category_id": dataset.get_source_class_id(class_id, "coco"),
334
+ "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
335
+ "score": score,
336
+ "segmentation": maskUtils.encode(np.asfortranarray(mask))
337
+ }
338
+ results.append(result)
339
+ return results
340
+
341
+
342
+ def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
343
+ """Runs official COCO evaluation.
344
+ dataset: A Dataset object with valiadtion data
345
+ eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
346
+ limit: if not 0, it's the number of images to use for evaluation
347
+ """
348
+ # Pick COCO images from the dataset
349
+ image_ids = image_ids or dataset.image_ids
350
+
351
+ # Limit to a subset
352
+ if limit:
353
+ image_ids = image_ids[:limit]
354
+
355
+ # Get corresponding COCO image IDs.
356
+ coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
357
+
358
+ t_prediction = 0
359
+ t_start = time.time()
360
+
361
+ results = []
362
+ for i, image_id in enumerate(image_ids):
363
+ # Load image
364
+ image = dataset.load_image(image_id)
365
+
366
+ # Run detection
367
+ t = time.time()
368
+ r = model.detect([image], verbose=0)[0]
369
+ t_prediction += (time.time() - t)
370
+
371
+ # Convert results to COCO format
372
+ image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
373
+ r["rois"], r["class_ids"],
374
+ r["scores"], r["masks"])
375
+ results.extend(image_results)
376
+
377
+ # Load results. This modifies results with additional attributes.
378
+ coco_results = coco.loadRes(results)
379
+
380
+ # Evaluate
381
+ cocoEval = COCOeval(coco, coco_results, eval_type)
382
+ cocoEval.params.imgIds = coco_image_ids
383
+ cocoEval.evaluate()
384
+ cocoEval.accumulate()
385
+ cocoEval.summarize()
386
+
387
+ print("Prediction time: {}. Average {}/image".format(
388
+ t_prediction, t_prediction / len(image_ids)))
389
+ print("Total time: ", time.time() - t_start)
390
+
391
+
392
+ ############################################################
393
+ # Training
394
+ ############################################################
395
+
396
+
397
+ if __name__ == '__main__':
398
+ import argparse
399
+
400
+ # Parse command line arguments
401
+ parser = argparse.ArgumentParser(
402
+ description='Train Mask R-CNN on MS COCO.')
403
+ parser.add_argument("command",
404
+ metavar="<command>",
405
+ help="'train' or 'evaluate' on MS COCO")
406
+ parser.add_argument('--dataset', required=True,
407
+ metavar="/path/to/coco/",
408
+ help='Directory of the MS-COCO dataset')
409
+ parser.add_argument('--year', required=False,
410
+ default=DEFAULT_DATASET_YEAR,
411
+ metavar="<year>",
412
+ help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
413
+ parser.add_argument('--model', required=True,
414
+ metavar="/path/to/weights.h5",
415
+ help="Path to weights .h5 file or 'coco'")
416
+ parser.add_argument('--logs', required=False,
417
+ default=DEFAULT_LOGS_DIR,
418
+ metavar="/path/to/logs/",
419
+ help='Logs and checkpoints directory (default=logs/)')
420
+ parser.add_argument('--limit', required=False,
421
+ default=500,
422
+ metavar="<image count>",
423
+ help='Images to use for evaluation (default=500)')
424
+ parser.add_argument('--download', required=False,
425
+ default=False,
426
+ metavar="<True|False>",
427
+ help='Automatically download and unzip MS-COCO files (default=False)',
428
+ type=bool)
429
+ args = parser.parse_args()
430
+ print("Command: ", args.command)
431
+ print("Model: ", args.model)
432
+ print("Dataset: ", args.dataset)
433
+ print("Year: ", args.year)
434
+ print("Logs: ", args.logs)
435
+ print("Auto Download: ", args.download)
436
+
437
+ # Configurations
438
+ if args.command == "train":
439
+ config = CocoConfig()
440
+ else:
441
+ class InferenceConfig(CocoConfig):
442
+ # Set batch size to 1 since we'll be running inference on
443
+ # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
444
+ GPU_COUNT = 1
445
+ IMAGES_PER_GPU = 1
446
+ DETECTION_MIN_CONFIDENCE = 0
447
+ config = InferenceConfig()
448
+ config.display()
449
+
450
+ # Create model
451
+ if args.command == "train":
452
+ model = modellib.MaskRCNN(mode="training", config=config,
453
+ model_dir=args.logs)
454
+ else:
455
+ model = modellib.MaskRCNN(mode="inference", config=config,
456
+ model_dir=args.logs)
457
+
458
+ # Select weights file to load
459
+ if args.model.lower() == "coco":
460
+ model_path = COCO_MODEL_PATH
461
+ elif args.model.lower() == "last":
462
+ # Find last trained weights
463
+ model_path = model.find_last()[1]
464
+ elif args.model.lower() == "imagenet":
465
+ # Start from ImageNet trained weights
466
+ model_path = model.get_imagenet_weights()
467
+ else:
468
+ model_path = args.model
469
+
470
+ # Load weights
471
+ print("Loading weights ", model_path)
472
+ model.load_weights(model_path, by_name=True)
473
+
474
+ # Train or evaluate
475
+ if args.command == "train":
476
+ # Training dataset. Use the training set and 35K from the
477
+ # validation set, as as in the Mask RCNN paper.
478
+ dataset_train = CocoDataset()
479
+ dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
480
+ dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
481
+ dataset_train.prepare()
482
+
483
+ # Validation dataset
484
+ dataset_val = CocoDataset()
485
+ dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
486
+ dataset_val.prepare()
487
+
488
+ # *** This training schedule is an example. Update to your needs ***
489
+
490
+ # Training - Stage 1
491
+ print("Training network heads")
492
+ model.train(dataset_train, dataset_val,
493
+ learning_rate=config.LEARNING_RATE,
494
+ epochs=40,
495
+ layers='heads')
496
+
497
+ # Training - Stage 2
498
+ # Finetune layers from ResNet stage 4 and up
499
+ print("Fine tune Resnet stage 4 and up")
500
+ model.train(dataset_train, dataset_val,
501
+ learning_rate=config.LEARNING_RATE,
502
+ epochs=120,
503
+ layers='4+')
504
+
505
+ # Training - Stage 3
506
+ # Fine tune all layers
507
+ print("Fine tune all layers")
508
+ model.train(dataset_train, dataset_val,
509
+ learning_rate=config.LEARNING_RATE / 10,
510
+ epochs=160,
511
+ layers='all')
512
+
513
+ elif args.command == "evaluate":
514
+ # Validation dataset
515
+ dataset_val = CocoDataset()
516
+ coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
517
+ dataset_val.prepare()
518
+ print("Running COCO evaluation on {} images.".format(args.limit))
519
+ evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
520
+ else:
521
+ print("'{}' is not recognized. "
522
+ "Use 'train' or 'evaluate'".format(args.command))
data/pretrained_weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af08cd7b6f2b8e51bcfb685a8d21c784f58705e1c3e02c1a047b726faa25fd98
3
+ size 255856928
data/test0.jpg ADDED
mrcnn/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
mrcnn/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (173 Bytes). View file
 
mrcnn/__pycache__/__init__.cpython-36.pyc ADDED
Binary file (163 Bytes). View file
 
mrcnn/__pycache__/cocoeval.cpython-310.pyc ADDED
Binary file (16.9 kB). View file
 
mrcnn/__pycache__/cocoeval.cpython-36.pyc ADDED
Binary file (17.8 kB). View file
 
mrcnn/__pycache__/config.cpython-310.pyc ADDED
Binary file (2.56 kB). View file
 
mrcnn/__pycache__/config.cpython-36.pyc ADDED
Binary file (2.57 kB). View file
 
mrcnn/__pycache__/dataset.cpython-310.pyc ADDED
Binary file (4.53 kB). View file
 
mrcnn/__pycache__/dataset.cpython-36.pyc ADDED
Binary file (4.75 kB). View file
 
mrcnn/__pycache__/evaluate.cpython-310.pyc ADDED
Binary file (2.72 kB). View file
 
mrcnn/__pycache__/evaluate.cpython-36.pyc ADDED
Binary file (2.61 kB). View file
 
mrcnn/__pycache__/model.cpython-310.pyc ADDED
Binary file (74.6 kB). View file
 
mrcnn/__pycache__/model.cpython-36.pyc ADDED
Binary file (74.9 kB). View file
 
mrcnn/__pycache__/utils.cpython-310.pyc ADDED
Binary file (25.2 kB). View file
 
mrcnn/__pycache__/utils.cpython-36.pyc ADDED
Binary file (25.2 kB). View file
 
mrcnn/__pycache__/visualize.cpython-36.pyc ADDED
Binary file (12.6 kB). View file
 
mrcnn/cocoeval.py ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __author__ = 'tsungyi'
2
+
3
+ import numpy as np
4
+ import datetime
5
+ import time
6
+ from collections import defaultdict
7
+ from pycocotools import mask as maskUtils
8
+ import copy
9
+
10
+ """
11
+ This script has been taken (and modified) from :
12
+ https://github.com/crowdAI/coco/blob/master/PythonAPI/pycocotools/cocoeval.py
13
+ """
14
+
15
+
16
+ class COCOeval:
17
+ # Interface for evaluating detection on the Microsoft COCO dataset.
18
+ #
19
+ # The usage for CocoEval is as follows:
20
+ # cocoGt=..., cocoDt=... # load dataset and results
21
+ # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
22
+ # E.params.recThrs = ...; # set parameters as desired
23
+ # E.evaluate(); # run per image evaluation
24
+ # E.accumulate(); # accumulate per image results
25
+ # E.summarize(); # display summary metrics of results
26
+ # For example usage see evalDemo.m and http://mscoco.org/.
27
+ #
28
+ # The evaluation parameters are as follows (defaults in brackets):
29
+ # imgIds - [all] N img ids to use for evaluation
30
+ # catIds - [all] K cat ids to use for evaluation
31
+ # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation
32
+ # recThrs - [0:.01:1] R=101 recall thresholds for evaluation
33
+ # areaRng - [...] A=4 object area ranges for evaluation
34
+ # maxDets - [1 10 100] M=3 thresholds on max detections per image
35
+ # iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
36
+ # iouType replaced the now DEPRECATED useSegm parameter.
37
+ # useCats - [1] if true use category labels for evaluation
38
+ # Note: if useCats=0 category labels are ignored as in proposal scoring.
39
+ # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
40
+ #
41
+ # evaluate(): evaluates detections on every image and every category and
42
+ # concats the results into the "evalImgs" with fields:
43
+ # dtIds - [1xD] id for each of the D detections (dt)
44
+ # gtIds - [1xG] id for each of the G ground truths (gt)
45
+ # dtMatches - [TxD] matching gt id at each IoU or 0
46
+ # gtMatches - [TxG] matching dt id at each IoU or 0
47
+ # dtScores - [1xD] confidence of each dt
48
+ # gtIgnore - [1xG] ignore flag for each gt
49
+ # dtIgnore - [TxD] ignore flag for each dt at each IoU
50
+ #
51
+ # accumulate(): accumulates the per-image, per-category evaluation
52
+ # results in "evalImgs" into the dictionary "eval" with fields:
53
+ # params - parameters used for evaluation
54
+ # date - date evaluation was performed
55
+ # counts - [T,R,K,A,M] parameter dimensions (see above)
56
+ # precision - [TxRxKxAxM] precision for every evaluation setting
57
+ # recall - [TxKxAxM] max recall for every evaluation setting
58
+ # Note: precision and recall==-1 for settings with no gt objects.
59
+ #
60
+ # See also coco, mask, pycocoDemo, pycocoEvalDemo
61
+ #
62
+ # Microsoft COCO Toolbox. version 2.0
63
+ # Data, paper, and tutorials available at: http://mscoco.org/
64
+ # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
65
+ # Licensed under the Simplified BSD License [see coco/license.txt]
66
+ def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'):
67
+ '''
68
+ Initialize CocoEval using coco APIs for gt and dt
69
+ :param cocoGt: coco object with ground truth annotations
70
+ :param cocoDt: coco object with detection results
71
+ :return: None
72
+ '''
73
+ if not iouType:
74
+ print('iouType not specified. use default iouType segm')
75
+ self.cocoGt = cocoGt # ground truth COCO API
76
+ self.cocoDt = cocoDt # detections COCO API
77
+ self.params = {} # evaluation parameters
78
+ self.evalImgs = defaultdict(list) # per-image per-category evaluation results [KxAxI] elements
79
+ self.eval = {} # accumulated evaluation results
80
+ self._gts = defaultdict(list) # gt for evaluation
81
+ self._dts = defaultdict(list) # dt for evaluation
82
+ self.params = Params(iouType=iouType) # parameters
83
+ self._paramsEval = {} # parameters for evaluation
84
+ self.stats = [] # result summarization
85
+ self.ious = {} # ious between all gts and dts
86
+ if not cocoGt is None:
87
+ self.params.imgIds = sorted(cocoGt.getImgIds())
88
+ self.params.catIds = sorted(cocoGt.getCatIds())
89
+
90
+
91
+ def _prepare(self):
92
+ '''
93
+ Prepare ._gts and ._dts for evaluation based on params
94
+ :return: None
95
+ '''
96
+ def _toMask(anns, coco):
97
+ # modify ann['segmentation'] by reference
98
+ for ann in anns:
99
+ rle = coco.annToRLE(ann)
100
+ ann['segmentation'] = rle
101
+ p = self.params
102
+ if p.useCats:
103
+ gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
104
+ dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
105
+ else:
106
+ gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
107
+ dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
108
+
109
+ # convert ground truth to mask if iouType == 'segm'
110
+ if p.iouType == 'segm':
111
+ _toMask(gts, self.cocoGt)
112
+ _toMask(dts, self.cocoDt)
113
+ # set ignore flag
114
+ for gt in gts:
115
+ gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
116
+ gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
117
+ if p.iouType == 'keypoints':
118
+ gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
119
+ self._gts = defaultdict(list) # gt for evaluation
120
+ self._dts = defaultdict(list) # dt for evaluation
121
+ for gt in gts:
122
+ self._gts[gt['image_id'], gt['category_id']].append(gt)
123
+ for dt in dts:
124
+ self._dts[dt['image_id'], dt['category_id']].append(dt)
125
+ self.evalImgs = defaultdict(list) # per-image per-category evaluation results
126
+ self.eval = {} # accumulated evaluation results
127
+
128
+ def evaluate(self):
129
+ '''
130
+ Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
131
+ :return: None
132
+ '''
133
+ tic = time.time()
134
+ print('Running per image evaluation...')
135
+ p = self.params
136
+ # add backward compatibility if useSegm is specified in params
137
+ if not p.useSegm is None:
138
+ p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
139
+ print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
140
+ print('Evaluate annotation type *{}*'.format(p.iouType))
141
+ p.imgIds = list(np.unique(p.imgIds))
142
+ if p.useCats:
143
+ p.catIds = list(np.unique(p.catIds))
144
+ p.maxDets = sorted(p.maxDets)
145
+ self.params=p
146
+
147
+ self._prepare()
148
+ # loop through images, area range, max detection number
149
+ catIds = p.catIds if p.useCats else [-1]
150
+
151
+ if p.iouType == 'segm' or p.iouType == 'bbox':
152
+ computeIoU = self.computeIoU
153
+ elif p.iouType == 'keypoints':
154
+ computeIoU = self.computeOks
155
+ self.ious = {(imgId, catId): computeIoU(imgId, catId) \
156
+ for imgId in p.imgIds
157
+ for catId in catIds}
158
+
159
+ evaluateImg = self.evaluateImg
160
+ maxDet = p.maxDets[-1]
161
+ self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
162
+ for catId in catIds
163
+ for areaRng in p.areaRng
164
+ for imgId in p.imgIds
165
+ ]
166
+ self._paramsEval = copy.deepcopy(self.params)
167
+ toc = time.time()
168
+ print('DONE (t={:0.2f}s).'.format(toc-tic))
169
+
170
+ def computeIoU(self, imgId, catId):
171
+ p = self.params
172
+ if p.useCats:
173
+ gt = self._gts[imgId,catId]
174
+ dt = self._dts[imgId,catId]
175
+ else:
176
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
177
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
178
+ if len(gt) == 0 and len(dt) ==0:
179
+ return []
180
+ inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
181
+ dt = [dt[i] for i in inds]
182
+ if len(dt) > p.maxDets[-1]:
183
+ dt=dt[0:p.maxDets[-1]]
184
+
185
+ if p.iouType == 'segm':
186
+ g = [g['segmentation'] for g in gt]
187
+ d = [d['segmentation'] for d in dt]
188
+ elif p.iouType == 'bbox':
189
+ g = [g['bbox'] for g in gt]
190
+ d = [d['bbox'] for d in dt]
191
+ else:
192
+ raise Exception('unknown iouType for iou computation')
193
+
194
+ # compute iou between each dt and gt region
195
+ iscrowd = [int(o['iscrowd']) for o in gt]
196
+ ious = maskUtils.iou(d,g,iscrowd)
197
+ return ious
198
+
199
+ def computeOks(self, imgId, catId):
200
+ p = self.params
201
+ # dimention here should be Nxm
202
+ gts = self._gts[imgId, catId]
203
+ dts = self._dts[imgId, catId]
204
+ inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
205
+ dts = [dts[i] for i in inds]
206
+ if len(dts) > p.maxDets[-1]:
207
+ dts = dts[0:p.maxDets[-1]]
208
+ # if len(gts) == 0 and len(dts) == 0:
209
+ if len(gts) == 0 or len(dts) == 0:
210
+ return []
211
+ ious = np.zeros((len(dts), len(gts)))
212
+ sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
213
+ vars = (sigmas * 2)**2
214
+ k = len(sigmas)
215
+ # compute oks between each detection and ground truth object
216
+ for j, gt in enumerate(gts):
217
+ # create bounds for ignore regions(double the gt bbox)
218
+ g = np.array(gt['keypoints'])
219
+ xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
220
+ k1 = np.count_nonzero(vg > 0)
221
+ bb = gt['bbox']
222
+ x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
223
+ y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
224
+ for i, dt in enumerate(dts):
225
+ d = np.array(dt['keypoints'])
226
+ xd = d[0::3]; yd = d[1::3]
227
+ if k1>0:
228
+ # measure the per-keypoint distance if keypoints visible
229
+ dx = xd - xg
230
+ dy = yd - yg
231
+ else:
232
+ # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
233
+ z = np.zeros((k))
234
+ dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
235
+ dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
236
+ e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2
237
+ if k1 > 0:
238
+ e=e[vg > 0]
239
+ ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
240
+ return ious
241
+
242
+ def evaluateImg(self, imgId, catId, aRng, maxDet):
243
+ '''
244
+ perform evaluation for single category and image
245
+ :return: dict (single image results)
246
+ '''
247
+ p = self.params
248
+ if p.useCats:
249
+ gt = self._gts[imgId,catId]
250
+ dt = self._dts[imgId,catId]
251
+ else:
252
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
253
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
254
+ if len(gt) == 0 and len(dt) ==0:
255
+ return None
256
+
257
+ for g in gt:
258
+ if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
259
+ g['_ignore'] = 1
260
+ else:
261
+ g['_ignore'] = 0
262
+
263
+ # sort dt highest score first, sort gt ignore last
264
+ gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
265
+ gt = [gt[i] for i in gtind]
266
+ dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
267
+ dt = [dt[i] for i in dtind[0:maxDet]]
268
+ iscrowd = [int(o['iscrowd']) for o in gt]
269
+ # load computed ious
270
+ ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]
271
+
272
+ T = len(p.iouThrs)
273
+ G = len(gt)
274
+ D = len(dt)
275
+ gtm = np.zeros((T,G))
276
+ dtm = np.zeros((T,D))
277
+ gtIg = np.array([g['_ignore'] for g in gt])
278
+ dtIg = np.zeros((T,D))
279
+ if not len(ious)==0:
280
+ for tind, t in enumerate(p.iouThrs):
281
+ for dind, d in enumerate(dt):
282
+ # information about best match so far (m=-1 -> unmatched)
283
+ iou = min([t,1-1e-10])
284
+ m = -1
285
+ for gind, g in enumerate(gt):
286
+ # if this gt already matched, and not a crowd, continue
287
+ if gtm[tind,gind]>0 and not iscrowd[gind]:
288
+ continue
289
+ # if dt matched to reg gt, and on ignore gt, stop
290
+ if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
291
+ break
292
+ # continue to next gt unless better match made
293
+ if ious[dind,gind] < iou:
294
+ continue
295
+ # if match successful and best so far, store appropriately
296
+ iou=ious[dind,gind]
297
+ m=gind
298
+ # if match made store id of match for both dt and gt
299
+ if m ==-1:
300
+ continue
301
+ dtIg[tind,dind] = gtIg[m]
302
+ dtm[tind,dind] = gt[m]['id']
303
+ gtm[tind,m] = d['id']
304
+ # set unmatched detections outside of area range to ignore
305
+ a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
306
+ dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
307
+ # store results for given image and category
308
+ return {
309
+ 'image_id': imgId,
310
+ 'category_id': catId,
311
+ 'aRng': aRng,
312
+ 'maxDet': maxDet,
313
+ 'dtIds': [d['id'] for d in dt],
314
+ 'gtIds': [g['id'] for g in gt],
315
+ 'dtMatches': dtm,
316
+ 'gtMatches': gtm,
317
+ 'dtScores': [d['score'] for d in dt],
318
+ 'gtIgnore': gtIg,
319
+ 'dtIgnore': dtIg,
320
+ }
321
+
322
+ def accumulate(self, p = None):
323
+ '''
324
+ Accumulate per image evaluation results and store the result in self.eval
325
+ :param p: input params for evaluation
326
+ :return: None
327
+ '''
328
+ print('Accumulating evaluation results...')
329
+ tic = time.time()
330
+ if not self.evalImgs:
331
+ print('Please run evaluate() first')
332
+ # allows input customized parameters
333
+ if p is None:
334
+ p = self.params
335
+ p.catIds = p.catIds if p.useCats == 1 else [-1]
336
+ T = len(p.iouThrs)
337
+ R = len(p.recThrs)
338
+ K = len(p.catIds) if p.useCats else 1
339
+ A = len(p.areaRng)
340
+ M = len(p.maxDets)
341
+ precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
342
+ recall = -np.ones((T,K,A,M))
343
+
344
+ # create dictionary for future indexing
345
+ _pe = self._paramsEval
346
+ catIds = _pe.catIds if _pe.useCats else [-1]
347
+ setK = set(catIds)
348
+ setA = set(map(tuple, _pe.areaRng))
349
+ setM = set(_pe.maxDets)
350
+ setI = set(_pe.imgIds)
351
+ # get inds to evaluate
352
+ k_list = [n for n, k in enumerate(p.catIds) if k in setK]
353
+ m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
354
+ a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
355
+ i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
356
+ I0 = len(_pe.imgIds)
357
+ A0 = len(_pe.areaRng)
358
+ # retrieve E at each category, area range, and max number of detections
359
+ for k, k0 in enumerate(k_list):
360
+ Nk = k0*A0*I0
361
+ for a, a0 in enumerate(a_list):
362
+ Na = a0*I0
363
+ for m, maxDet in enumerate(m_list):
364
+ E = [self.evalImgs[Nk + Na + i] for i in i_list]
365
+ E = [e for e in E if not e is None]
366
+ if len(E) == 0:
367
+ continue
368
+ dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
369
+
370
+ # different sorting method generates slightly different results.
371
+ # mergesort is used to be consistent as Matlab implementation.
372
+ inds = np.argsort(-dtScores, kind='mergesort')
373
+
374
+ dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
375
+ dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds]
376
+ gtIg = np.concatenate([e['gtIgnore'] for e in E])
377
+ npig = np.count_nonzero(gtIg==0 )
378
+ if npig == 0:
379
+ continue
380
+ tps = np.logical_and( dtm, np.logical_not(dtIg) )
381
+ fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
382
+
383
+ tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
384
+ fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
385
+ for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
386
+ tp = np.array(tp)
387
+ fp = np.array(fp)
388
+ nd = len(tp)
389
+ rc = tp / npig
390
+ pr = tp / (fp+tp+np.spacing(1))
391
+ q = np.zeros((R,))
392
+
393
+ if nd:
394
+ recall[t,k,a,m] = rc[-1]
395
+ else:
396
+ recall[t,k,a,m] = 0
397
+
398
+ # numpy is slow without cython optimization for accessing elements
399
+ # use python array gets significant speed improvement
400
+ pr = pr.tolist(); q = q.tolist()
401
+
402
+ for i in range(nd-1, 0, -1):
403
+ if pr[i] > pr[i-1]:
404
+ pr[i-1] = pr[i]
405
+
406
+ inds = np.searchsorted(rc, p.recThrs, side='left')
407
+ try:
408
+ for ri, pi in enumerate(inds):
409
+ q[ri] = pr[pi]
410
+ except:
411
+ pass
412
+ precision[t,:,k,a,m] = np.array(q)
413
+ self.eval = {
414
+ 'params': p,
415
+ 'counts': [T, R, K, A, M],
416
+ 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
417
+ 'precision': precision,
418
+ 'recall': recall,
419
+ }
420
+ toc = time.time()
421
+ print('DONE (t={:0.2f}s).'.format( toc-tic))
422
+
423
+ def _summarize(self, ap=1, iouThr=None, areaRng='all', maxDets=100 ):
424
+ p = self.params
425
+ iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
426
+ titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
427
+ typeStr = '(AP)' if ap==1 else '(AR)'
428
+ iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
429
+ if iouThr is None else '{:0.2f}'.format(iouThr)
430
+
431
+ aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
432
+ mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
433
+ if ap == 1:
434
+ # dimension of precision: [TxRxKxAxM]
435
+ s = self.eval['precision']
436
+ # IoU
437
+ if iouThr is not None:
438
+ t = np.where(iouThr == p.iouThrs)[0]
439
+ s = s[t]
440
+ s = s[:,:,:,aind,mind]
441
+ else:
442
+ # dimension of recall: [TxKxAxM]
443
+ s = self.eval['recall']
444
+ if iouThr is not None:
445
+ t = np.where(iouThr == p.iouThrs)[0]
446
+ s = s[t]
447
+ s = s[:,:,aind,mind]
448
+ if len(s[s>-1])==0:
449
+ mean_s = -1
450
+ else:
451
+ mean_s = np.mean(s[s>-1])
452
+ print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
453
+ return mean_s
454
+
455
+ def summarize(self):
456
+ '''
457
+ Compute and display summary metrics for evaluation results.
458
+ Note this functin can *only* be applied on the default parameter setting
459
+ '''
460
+ def _summarizeDets():
461
+ stats = np.zeros((12,))
462
+ stats[0] = self._summarize(1)
463
+ stats[1] = self._summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
464
+ stats[2] = self._summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
465
+ stats[3] = self._summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
466
+ stats[4] = self._summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
467
+ stats[5] = self._summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
468
+ stats[6] = self._summarize(0, maxDets=self.params.maxDets[0])
469
+ stats[7] = self._summarize(0, maxDets=self.params.maxDets[1])
470
+ stats[8] = self._summarize(0, maxDets=self.params.maxDets[2])
471
+ stats[9] = self._summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
472
+ stats[10] = self._summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
473
+ stats[11] = self._summarize(0, areaRng='large', maxDets=self.params.maxDets[2])
474
+ return stats
475
+ def _summarizeKps():
476
+ stats = np.zeros((10,))
477
+ stats[0] = self._summarize(1, maxDets=20)
478
+ stats[1] = self._summarize(1, maxDets=20, iouThr=.5)
479
+ stats[2] = self._summarize(1, maxDets=20, iouThr=.75)
480
+ stats[3] = self._summarize(1, maxDets=20, areaRng='medium')
481
+ stats[4] = self._summarize(1, maxDets=20, areaRng='large')
482
+ stats[5] = self._summarize(0, maxDets=20)
483
+ stats[6] = self._summarize(0, maxDets=20, iouThr=.5)
484
+ stats[7] = self._summarize(0, maxDets=20, iouThr=.75)
485
+ stats[8] = self._summarize(0, maxDets=20, areaRng='medium')
486
+ stats[9] = self._summarize(0, maxDets=20, areaRng='large')
487
+ return stats
488
+ if not self.eval:
489
+ raise Exception('Please run accumulate() first')
490
+ iouType = self.params.iouType
491
+ if iouType == 'segm' or iouType == 'bbox':
492
+ summarize = _summarizeDets
493
+ elif iouType == 'keypoints':
494
+ summarize = _summarizeKps
495
+ self.stats = summarize()
496
+
497
+ def __str__(self):
498
+ self.summarize()
499
+
500
+ class Params:
501
+ '''
502
+ Params for coco evaluation api
503
+ '''
504
+ def setDetParams(self):
505
+ self.imgIds = []
506
+ self.catIds = [100] # For the Category ID of Building
507
+ # np.arange causes trouble. the data point on arange is slightly larger than the true value
508
+ self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
509
+ self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
510
+ self.maxDets = [1, 10, 100]
511
+ self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
512
+ self.areaRngLbl = ['all', 'small', 'medium', 'large']
513
+ self.useCats = 1
514
+
515
+ def setKpParams(self):
516
+ self.imgIds = []
517
+ self.catIds = []
518
+ # np.arange causes trouble. the data point on arange is slightly larger than the true value
519
+ self.iouThrs = [0.5]
520
+ self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
521
+ self.maxDets = [20] # At max 20 objects detected per image
522
+ self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
523
+ self.areaRngLbl = ['all'] #Consider all area ranges for evaluation
524
+ self.useCats = 1
525
+
526
+ def __init__(self, iouType='segm'):
527
+ if iouType == 'segm' or iouType == 'bbox':
528
+ self.setDetParams()
529
+ elif iouType == 'keypoints':
530
+ self.setKpParams()
531
+ else:
532
+ raise Exception('iouType not supported')
533
+ self.iouType = iouType
534
+ # useSegm is deprecated
535
+ self.useSegm = None
mrcnn/config.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Base Configurations class.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+ """
9
+
10
+ import math
11
+ import numpy as np
12
+
13
+
14
+ # Base Configuration Class
15
+ # Don't use this class directly. Instead, sub-class it and override
16
+ # the configurations you need to change.
17
+
18
+ class Config(object):
19
+ """Base configuration class. For custom configurations, create a
20
+ sub-class that inherits from this one and override properties
21
+ that need to be changed.
22
+ """
23
+ # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
24
+ # Useful if your code needs to do things differently depending on which
25
+ # experiment is running.
26
+ NAME = None # Override in sub-classes
27
+
28
+ # NUMBER OF GPUs to use. For CPU training, use 1
29
+ GPU_COUNT = 1
30
+
31
+ # Number of images to train with on each GPU. A 12GB GPU can typically
32
+ # handle 2 images of 1024x1024px.
33
+ # Adjust based on your GPU memory and image sizes. Use the highest
34
+ # number that your GPU can handle for best performance.
35
+ IMAGES_PER_GPU = 2
36
+
37
+ # Number of training steps per epoch
38
+ # This doesn't need to match the size of the training set. Tensorboard
39
+ # updates are saved at the end of each epoch, so setting this to a
40
+ # smaller number means getting more frequent TensorBoard updates.
41
+ # Validation stats are also calculated at each epoch end and they
42
+ # might take a while, so don't set this too small to avoid spending
43
+ # a lot of time on validation stats.
44
+ STEPS_PER_EPOCH = 1000
45
+
46
+ # Number of validation steps to run at the end of every training epoch.
47
+ # A bigger number improves accuracy of validation stats, but slows
48
+ # down the training.
49
+ VALIDATION_STEPS = 50
50
+
51
+ # Backbone network architecture
52
+ # Supported values are: resnet50, resnet101
53
+ BACKBONE = "resnet101"
54
+
55
+ # The strides of each layer of the FPN Pyramid. These values
56
+ # are based on a Resnet101 backbone.
57
+ BACKBONE_STRIDES = [4, 8, 16, 32, 64]
58
+
59
+ # Number of classification classes (including background)
60
+ NUM_CLASSES = 1 # Override in sub-classes
61
+
62
+ # Length of square anchor side in pixels
63
+ RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
64
+
65
+ # Ratios of anchors at each cell (width/height)
66
+ # A value of 1 represents a square anchor, and 0.5 is a wide anchor
67
+ RPN_ANCHOR_RATIOS = [0.5, 1, 2]
68
+
69
+ # Anchor stride
70
+ # If 1 then anchors are created for each cell in the backbone feature map.
71
+ # If 2, then anchors are created for every other cell, and so on.
72
+ RPN_ANCHOR_STRIDE = 1
73
+
74
+ # Non-max suppression threshold to filter RPN proposals.
75
+ # You can increase this during training to generate more propsals.
76
+ RPN_NMS_THRESHOLD = 0.7
77
+
78
+ # How many anchors per image to use for RPN training
79
+ RPN_TRAIN_ANCHORS_PER_IMAGE = 256
80
+
81
+ # ROIs kept after non-maximum supression (training and inference)
82
+ POST_NMS_ROIS_TRAINING = 2000
83
+ POST_NMS_ROIS_INFERENCE = 1000
84
+
85
+ # If enabled, resizes instance masks to a smaller size to reduce
86
+ # memory load. Recommended when using high-resolution images.
87
+ USE_MINI_MASK = True
88
+ MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
89
+
90
+ # Input image resizing
91
+ # Generally, use the "square" resizing mode for training and inferencing
92
+ # and it should work well in most cases. In this mode, images are scaled
93
+ # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
94
+ # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
95
+ # padded with zeros to make it a square so multiple images can be put
96
+ # in one batch.
97
+ # Available resizing modes:
98
+ # none: No resizing or padding. Return the image unchanged.
99
+ # square: Resize and pad with zeros to get a square image
100
+ # of size [max_dim, max_dim].
101
+ # pad64: Pads width and height with zeros to make them multiples of 64.
102
+ # If IMAGE_MIN_DIM is not None, then scale the small side to
103
+ # that size before padding. IMAGE_MAX_DIM is ignored in this mode.
104
+ # The multiple of 64 is needed to ensure smooth scaling of feature
105
+ # maps up and down the 6 levels of the FPN pyramid (2**6=64).
106
+ IMAGE_RESIZE_MODE = "square"
107
+ IMAGE_MIN_DIM = 800
108
+ IMAGE_MAX_DIM = 1024
109
+
110
+ # Image mean (RGB)
111
+ MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
112
+
113
+ # Number of ROIs per image to feed to classifier/mask heads
114
+ # The Mask RCNN paper uses 512 but often the RPN doesn't generate
115
+ # enough positive proposals to fill this and keep a positive:negative
116
+ # ratio of 1:3. You can increase the number of proposals by adjusting
117
+ # the RPN NMS threshold.
118
+ TRAIN_ROIS_PER_IMAGE = 200
119
+
120
+ # Percent of positive ROIs used to train classifier/mask heads
121
+ ROI_POSITIVE_RATIO = 0.33
122
+
123
+ # Pooled ROIs
124
+ POOL_SIZE = 7
125
+ MASK_POOL_SIZE = 14
126
+
127
+ # Shape of output mask
128
+ # To change this you also need to change the neural network mask branch
129
+ MASK_SHAPE = [28, 28]
130
+
131
+ # Maximum number of ground truth instances to use in one image
132
+ MAX_GT_INSTANCES = 100
133
+
134
+ # Bounding box refinement standard deviation for RPN and final detections.
135
+ RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
136
+ BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
137
+
138
+ # Max number of final detections
139
+ DETECTION_MAX_INSTANCES = 100
140
+
141
+ # Minimum probability value to accept a detected instance
142
+ # ROIs below this threshold are skipped
143
+ DETECTION_MIN_CONFIDENCE = 0.7
144
+
145
+ # Non-maximum suppression threshold for detection
146
+ DETECTION_NMS_THRESHOLD = 0.3
147
+
148
+ # Learning rate and momentum
149
+ # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
150
+ # weights to explode. Likely due to differences in optimzer
151
+ # implementation.
152
+ LEARNING_RATE = 0.001
153
+ LEARNING_MOMENTUM = 0.9
154
+
155
+ # Weight decay regularization
156
+ WEIGHT_DECAY = 0.0001
157
+
158
+ # Use RPN ROIs or externally generated ROIs for training
159
+ # Keep this True for most situations. Set to False if you want to train
160
+ # the head branches on ROI generated by code rather than the ROIs from
161
+ # the RPN. For example, to debug the classifier head without having to
162
+ # train the RPN.
163
+ USE_RPN_ROIS = True
164
+
165
+ # Train or freeze batch normalization layers
166
+ # None: Train BN layers. This is the normal mode
167
+ # False: Freeze BN layers. Good when using a small batch size
168
+ # True: (don't use). Set layer in training mode even when inferencing
169
+ TRAIN_BN = False # Defaulting to False since batch size is often small
170
+
171
+ # Gradient norm clipping
172
+ GRADIENT_CLIP_NORM = 5.0
173
+
174
+ def __init__(self):
175
+ """Set values of computed attributes."""
176
+ # Effective batch size
177
+ self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
178
+
179
+ # Input image size
180
+ self.IMAGE_SHAPE = np.array(
181
+ [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
182
+
183
+ # Image meta data length
184
+ # See compose_image_meta() for details
185
+ self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
186
+
187
+ def display(self):
188
+ """Display Configuration values."""
189
+ print("\nConfigurations:")
190
+ for a in dir(self):
191
+ if not a.startswith("__") and not callable(getattr(self, a)):
192
+ print("{:30} {}".format(a, getattr(self, a)))
193
+ print("\n")
mrcnn/dataset.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mrcnn import utils
2
+ import numpy as np
3
+
4
+ from pycocotools.coco import COCO
5
+ from pycocotools.cocoeval import COCOeval
6
+ from pycocotools import mask as maskUtils
7
+
8
+ import os
9
+
10
+ class MappingChallengeDataset(utils.Dataset):
11
+ def load_dataset(self, dataset_dir, load_small=False, return_coco=True):
12
+ """ Loads dataset released for the crowdAI Mapping Challenge(https://www.crowdai.org/challenges/mapping-challenge)
13
+ Params:
14
+ - dataset_dir : root directory of the dataset (can point to the train/val folder)
15
+ - load_small : Boolean value which signals if the annotations for all the images need to be loaded into the memory,
16
+ or if only a small subset of the same should be loaded into memory
17
+ """
18
+ self.load_small = load_small
19
+ if self.load_small:
20
+ annotation_path = os.path.join(dataset_dir, "annotation-small.json")
21
+ else:
22
+ annotation_path = os.path.join(dataset_dir, "annotation.json")
23
+
24
+ image_dir = os.path.join(dataset_dir, "images")
25
+ print("Annotation Path ", annotation_path)
26
+ print("Image Dir ", image_dir)
27
+ assert os.path.exists(annotation_path) and os.path.exists(image_dir)
28
+
29
+ self.coco = COCO(annotation_path)
30
+ self.image_dir = image_dir
31
+ print(len(self.coco.imgs))
32
+
33
+ # Load all classes (Only Building in this version)
34
+ classIds = self.coco.getCatIds()
35
+
36
+ # Load all images
37
+ image_ids = list(self.coco.imgs.keys())
38
+
39
+ # register classes
40
+ for _class_id in classIds:
41
+ self.add_class("crowdai-mapping-challenge", _class_id, self.coco.loadCats(_class_id)[0]["name"])
42
+
43
+ # Register Images
44
+ img_exist = []
45
+ for _img_id in image_ids:
46
+ path = os.path.join(image_dir, self.coco.imgs[_img_id]['file_name'])
47
+ if os.path.exists(path):
48
+ img_exist.append(_img_id)
49
+
50
+ coco_updated = {}
51
+ for i in img_exist:
52
+ coco_updated[i] = self.coco.imgs[i]
53
+
54
+ self.coco.imgs = coco_updated
55
+ print(len(self.coco.imgs))
56
+
57
+ for _img_id in img_exist:
58
+ assert(os.path.exists(os.path.join(image_dir, self.coco.imgs[_img_id]['file_name'])))
59
+
60
+ self.add_image(
61
+ "crowdai-mapping-challenge", image_id=_img_id,
62
+ path=os.path.join(image_dir, self.coco.imgs[_img_id]['file_name']),
63
+ width=self.coco.imgs[_img_id]["width"],
64
+ height=self.coco.imgs[_img_id]["height"],
65
+ annotations=self.coco.loadAnns(self.coco.getAnnIds(
66
+ imgIds=[_img_id],
67
+ catIds=classIds,
68
+ iscrowd=None)))
69
+
70
+ if return_coco:
71
+ return self.coco
72
+
73
+ def load_mask(self, image_id):
74
+ """ Loads instance mask for a given image
75
+ This function converts mask from the coco format to a
76
+ a bitmap [height, width, instance]
77
+ Params:
78
+ - image_id : reference id for a given image
79
+
80
+ Returns:
81
+ masks : A bool array of shape [height, width, instances] with
82
+ one mask per instance
83
+ class_ids : a 1D array of classIds of the corresponding instance masks
84
+ (In this version of the challenge it will be of shape [instances] and always be filled with the class-id of the "Building" class.)
85
+ """
86
+
87
+ image_info = self.image_info[image_id]
88
+ assert image_info["source"] == "crowdai-mapping-challenge"
89
+
90
+ instance_masks = []
91
+ class_ids = []
92
+ annotations = self.image_info[image_id]["annotations"]
93
+ # Build mask of shape [height, width, instance_count] and list
94
+ # of class IDs that correspond to each channel of the mask.
95
+ for annotation in annotations:
96
+ class_id = self.map_source_class_id(
97
+ "crowdai-mapping-challenge.{}".format(annotation['category_id']))
98
+ if class_id:
99
+ m = self.annToMask(annotation, image_info["height"],
100
+ image_info["width"])
101
+ # Some objects are so small that they're less than 1 pixel area
102
+ # and end up rounded out. Skip those objects.
103
+ if m.max() < 1:
104
+ continue
105
+
106
+ # Ignore the notion of "is_crowd" as specified in the coco format
107
+ # as we donot have the said annotation in the current version of the dataset
108
+
109
+ instance_masks.append(m)
110
+ class_ids.append(class_id)
111
+ # Pack instance masks into an array
112
+ if class_ids:
113
+ mask = np.stack(instance_masks, axis=2)
114
+ class_ids = np.array(class_ids, dtype=np.int32)
115
+ return mask, class_ids
116
+ else:
117
+ # Call super class to return an empty mask
118
+ return super(MappingChallengeDataset, self).load_mask(image_id)
119
+
120
+
121
+ def image_reference(self, image_id):
122
+ """Return a reference for a particular image
123
+
124
+ Ideally you this function is supposed to return a URL
125
+ but in this case, we will simply return the image_id
126
+ """
127
+ return "crowdai-mapping-challenge::{}".format(image_id)
128
+ # The following two functions are from pycocotools with a few changes.
129
+
130
+ def annToRLE(self, ann, height, width):
131
+ """
132
+ Convert annotation which can be polygons, uncompressed RLE to RLE.
133
+ :return: binary mask (numpy 2D array)
134
+ """
135
+ segm = ann['segmentation']
136
+ if isinstance(segm, list):
137
+ # polygon -- a single object might consist of multiple parts
138
+ # we merge all parts into one mask rle code
139
+ rles = maskUtils.frPyObjects(segm, height, width)
140
+ rle = maskUtils.merge(rles)
141
+ elif isinstance(segm['counts'], list):
142
+ # uncompressed RLE
143
+ rle = maskUtils.frPyObjects(segm, height, width)
144
+ else:
145
+ # rle
146
+ rle = ann['segmentation']
147
+ return rle
148
+
149
+ def annToMask(self, ann, height, width):
150
+ """
151
+ Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
152
+ :return: binary mask (numpy 2D array)
153
+ """
154
+ rle = self.annToRLE(ann, height, width)
155
+ m = maskUtils.decode(rle)
156
+ return m
mrcnn/evaluate.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pycocotools.coco import COCO
2
+ from mrcnn.cocoeval import COCOeval
3
+ from pycocotools import mask as maskUtils
4
+ import time
5
+ import numpy as np
6
+
7
+ ############################################################
8
+ # COCO Evaluation
9
+ ############################################################
10
+
11
+ def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
12
+ """Arrange resutls to match COCO specs in http://cocodataset.org/#format
13
+ """
14
+ # If no results, return an empty list
15
+ if rois is None:
16
+ return []
17
+
18
+ results = []
19
+ for image_id in image_ids:
20
+ # Loop through detections
21
+ for i in range(rois.shape[0]):
22
+ class_id = class_ids[i]
23
+ score = scores[i]
24
+ bbox = np.around(rois[i], 1)
25
+ mask = masks[:, :, i]
26
+
27
+ result = {
28
+ "image_id": image_id,
29
+ "category_id": dataset.get_source_class_id(class_id, "crowdai-mapping-challenge"),
30
+ "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
31
+ "score": score,
32
+ "segmentation": maskUtils.encode(np.asfortranarray(mask)).encode('utf-8')
33
+ }
34
+ results.append(result)
35
+ return results
36
+
37
+
38
+ def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
39
+ """Runs official COCO evaluation.
40
+ dataset: A Dataset object with valiadtion data
41
+ eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
42
+ limit: if not 0, it's the number of images to use for evaluation
43
+ """
44
+ # Pick COCO images from the dataset
45
+ image_ids = image_ids or dataset.image_ids
46
+
47
+ # Limit to a subset
48
+ if limit:
49
+ image_ids = image_ids[:limit]
50
+
51
+ # Get corresponding COCO image IDs.
52
+ coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
53
+
54
+ t_prediction = 0
55
+ t_start = time.time()
56
+
57
+ results = []
58
+
59
+ for i, image_id in enumerate(image_ids):
60
+ # Load image
61
+ image = dataset.load_image(image_id)
62
+
63
+ # Run detection
64
+ t = time.time()
65
+ print("="*100)
66
+ print("Image shape ", image.shape)
67
+ r = model.detect([image])
68
+ r = r[0]
69
+ t_prediction += (time.time() - t)
70
+ print("Prediction time : ", (time.time() - t))
71
+ # Convert results to COCO format
72
+ image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
73
+ r["rois"], r["class_ids"],
74
+ r["scores"], r["masks"])
75
+ print("Number of detections : ", len(r["rois"]))
76
+ print("Classes Predicted : ", r["class_ids"])
77
+ print("Scores : ", r["scores"])
78
+ results.extend(image_results)
79
+
80
+ # Load results. This modifies results with additional attributes.
81
+ coco_results = coco.loadRes(results)
82
+
83
+ # Evaluate
84
+ cocoEval = COCOeval(coco, coco_results, eval_type)
85
+ cocoEval.params.imgIds = coco_image_ids
86
+ cocoEval.evaluate()
87
+ cocoEval.accumulate()
88
+ ap = cocoEval._summarize(ap=1, iouThr=0.5, areaRng="all", maxDets=100)
89
+ ar = cocoEval._summarize(ap=0, areaRng="all", maxDets=100)
90
+ print("Precision : ", ap, " Recall : ", ar)
91
+
92
+ print("Prediction time: {}. Average {}/image".format(
93
+ t_prediction, t_prediction / len(image_ids)))
94
+ print("Total time: ", time.time() - t_start)
mrcnn/model.py ADDED
The diff for this file is too large to render. See raw diff
 
mrcnn/parallel_model.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Multi-GPU Support for Keras.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+
9
+ Ideas and a small code snippets from these sources:
10
+ https://github.com/fchollet/keras/issues/2436
11
+ https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
12
+ https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
13
+ https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
14
+ """
15
+
16
+ import tensorflow as tf
17
+ import keras.backend as K
18
+ import keras.layers as KL
19
+ import keras.models as KM
20
+
21
+
22
+ class ParallelModel(KM.Model):
23
+ """Subclasses the standard Keras Model and adds multi-GPU support.
24
+ It works by creating a copy of the model on each GPU. Then it slices
25
+ the inputs and sends a slice to each copy of the model, and then
26
+ merges the outputs together and applies the loss on the combined
27
+ outputs.
28
+ """
29
+
30
+ def __init__(self, keras_model, gpu_count):
31
+ """Class constructor.
32
+ keras_model: The Keras model to parallelize
33
+ gpu_count: Number of GPUs. Must be > 1
34
+ """
35
+ self.inner_model = keras_model
36
+ self.gpu_count = gpu_count
37
+ merged_outputs = self.make_parallel()
38
+ super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
39
+ outputs=merged_outputs)
40
+
41
+ def __getattribute__(self, attrname):
42
+ """Redirect loading and saving methods to the inner model. That's where
43
+ the weights are stored."""
44
+ if 'load' in attrname or 'save' in attrname:
45
+ return getattr(self.inner_model, attrname)
46
+ return super(ParallelModel, self).__getattribute__(attrname)
47
+
48
+ def summary(self, *args, **kwargs):
49
+ """Override summary() to display summaries of both, the wrapper
50
+ and inner models."""
51
+ super(ParallelModel, self).summary(*args, **kwargs)
52
+ self.inner_model.summary(*args, **kwargs)
53
+
54
+ def make_parallel(self):
55
+ """Creates a new wrapper model that consists of multiple replicas of
56
+ the original model placed on different GPUs.
57
+ """
58
+ # Slice inputs. Slice inputs on the CPU to avoid sending a copy
59
+ # of the full inputs to all GPUs. Saves on bandwidth and memory.
60
+ input_slices = {name: tf.split(x, self.gpu_count)
61
+ for name, x in zip(self.inner_model.input_names,
62
+ self.inner_model.inputs)}
63
+
64
+ output_names = self.inner_model.output_names
65
+ outputs_all = []
66
+ for i in range(len(self.inner_model.outputs)):
67
+ outputs_all.append([])
68
+
69
+ # Run the model call() on each GPU to place the ops there
70
+ for i in range(self.gpu_count):
71
+ with tf.device('/gpu:%d' % i):
72
+ with tf.name_scope('tower_%d' % i):
73
+ # Run a slice of inputs through this replica
74
+ zipped_inputs = zip(self.inner_model.input_names,
75
+ self.inner_model.inputs)
76
+ inputs = [
77
+ KL.Lambda(lambda s: input_slices[name][i],
78
+ output_shape=lambda s: (None,) + s[1:])(tensor)
79
+ for name, tensor in zipped_inputs]
80
+ # Create the model replica and get the outputs
81
+ outputs = self.inner_model(inputs)
82
+ if not isinstance(outputs, list):
83
+ outputs = [outputs]
84
+ # Save the outputs for merging back together later
85
+ for l, o in enumerate(outputs):
86
+ outputs_all[l].append(o)
87
+
88
+ # Merge outputs on CPU
89
+ with tf.device('/cpu:0'):
90
+ merged = []
91
+ for outputs, name in zip(outputs_all, output_names):
92
+ # If outputs are numbers without dimensions, add a batch dim.
93
+ def add_dim(tensor):
94
+ """Add a dimension to tensors that don't have any."""
95
+ if K.int_shape(tensor) == ():
96
+ return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
97
+ return tensor
98
+ outputs = list(map(add_dim, outputs))
99
+
100
+ # Concatenate
101
+ merged.append(KL.Concatenate(axis=0, name=name)(outputs))
102
+ return merged
103
+
104
+
105
+ if __name__ == "__main__":
106
+ # Testing code below. It creates a simple model to train on MNIST and
107
+ # tries to run it on 2 GPUs. It saves the graph so it can be viewed
108
+ # in TensorBoard. Run it as:
109
+ #
110
+ # python3 parallel_model.py
111
+
112
+ import os
113
+ import numpy as np
114
+ import keras.optimizers
115
+ from keras.datasets import mnist
116
+ from keras.preprocessing.image import ImageDataGenerator
117
+
118
+ GPU_COUNT = 2
119
+
120
+ # Root directory of the project
121
+ ROOT_DIR = os.path.abspath("../")
122
+
123
+ # Directory to save logs and trained model
124
+ MODEL_DIR = os.path.join(ROOT_DIR, "logs")
125
+
126
+ def build_model(x_train, num_classes):
127
+ # Reset default graph. Keras leaves old ops in the graph,
128
+ # which are ignored for execution but clutter graph
129
+ # visualization in TensorBoard.
130
+ tf.reset_default_graph()
131
+
132
+ inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
133
+ x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
134
+ name="conv1")(inputs)
135
+ x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
136
+ name="conv2")(x)
137
+ x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
138
+ x = KL.Flatten(name="flat1")(x)
139
+ x = KL.Dense(128, activation='relu', name="dense1")(x)
140
+ x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
141
+
142
+ return KM.Model(inputs, x, "digit_classifier_model")
143
+
144
+ # Load MNIST Data
145
+ (x_train, y_train), (x_test, y_test) = mnist.load_data()
146
+ x_train = np.expand_dims(x_train, -1).astype('float32') / 255
147
+ x_test = np.expand_dims(x_test, -1).astype('float32') / 255
148
+
149
+ print('x_train shape:', x_train.shape)
150
+ print('x_test shape:', x_test.shape)
151
+
152
+ # Build data generator and model
153
+ datagen = ImageDataGenerator()
154
+ model = build_model(x_train, 10)
155
+
156
+ # Add multi-GPU support.
157
+ model = ParallelModel(model, GPU_COUNT)
158
+
159
+ optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
160
+
161
+ model.compile(loss='sparse_categorical_crossentropy',
162
+ optimizer=optimizer, metrics=['accuracy'])
163
+
164
+ model.summary()
165
+
166
+ # Train
167
+ model.fit_generator(
168
+ datagen.flow(x_train, y_train, batch_size=64),
169
+ steps_per_epoch=50, epochs=10, verbose=1,
170
+ validation_data=(x_test, y_test),
171
+ callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
172
+ write_graph=True)]
173
+ )
mrcnn/utils.py ADDED
@@ -0,0 +1,839 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Common utility functions and classes.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+ """
9
+
10
+ import sys
11
+ import os
12
+ import math
13
+ import random
14
+ import numpy as np
15
+ import tensorflow as tf
16
+ import scipy
17
+ import skimage.color
18
+ import skimage.io
19
+ import skimage.transform
20
+ import urllib.request
21
+ import shutil
22
+ import warnings
23
+
24
+ # URL from which to download the latest COCO trained weights
25
+ COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
26
+
27
+
28
+ ############################################################
29
+ # Bounding Boxes
30
+ ############################################################
31
+
32
+ def extract_bboxes(mask):
33
+ """Compute bounding boxes from masks.
34
+ mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
35
+
36
+ Returns: bbox array [num_instances, (y1, x1, y2, x2)].
37
+ """
38
+ boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
39
+ for i in range(mask.shape[-1]):
40
+ m = mask[:, :, i]
41
+ # Bounding box.
42
+ horizontal_indicies = np.where(np.any(m, axis=0))[0]
43
+ vertical_indicies = np.where(np.any(m, axis=1))[0]
44
+ if horizontal_indicies.shape[0]:
45
+ x1, x2 = horizontal_indicies[[0, -1]]
46
+ y1, y2 = vertical_indicies[[0, -1]]
47
+ # x2 and y2 should not be part of the box. Increment by 1.
48
+ x2 += 1
49
+ y2 += 1
50
+ else:
51
+ # No mask for this instance. Might happen due to
52
+ # resizing or cropping. Set bbox to zeros
53
+ x1, x2, y1, y2 = 0, 0, 0, 0
54
+ boxes[i] = np.array([y1, x1, y2, x2])
55
+ return boxes.astype(np.int32)
56
+
57
+
58
+ def compute_iou(box, boxes, box_area, boxes_area):
59
+ """Calculates IoU of the given box with the array of the given boxes.
60
+ box: 1D vector [y1, x1, y2, x2]
61
+ boxes: [boxes_count, (y1, x1, y2, x2)]
62
+ box_area: float. the area of 'box'
63
+ boxes_area: array of length boxes_count.
64
+
65
+ Note: the areas are passed in rather than calculated here for
66
+ efficency. Calculate once in the caller to avoid duplicate work.
67
+ """
68
+ # Calculate intersection areas
69
+ y1 = np.maximum(box[0], boxes[:, 0])
70
+ y2 = np.minimum(box[2], boxes[:, 2])
71
+ x1 = np.maximum(box[1], boxes[:, 1])
72
+ x2 = np.minimum(box[3], boxes[:, 3])
73
+ intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
74
+ union = box_area + boxes_area[:] - intersection[:]
75
+ iou = intersection / union
76
+ return iou
77
+
78
+
79
+ def compute_overlaps(boxes1, boxes2):
80
+ """Computes IoU overlaps between two sets of boxes.
81
+ boxes1, boxes2: [N, (y1, x1, y2, x2)].
82
+
83
+ For better performance, pass the largest set first and the smaller second.
84
+ """
85
+ # Areas of anchors and GT boxes
86
+ area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
87
+ area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
88
+
89
+ # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
90
+ # Each cell contains the IoU value.
91
+ overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
92
+ for i in range(overlaps.shape[1]):
93
+ box2 = boxes2[i]
94
+ overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
95
+ return overlaps
96
+
97
+
98
+ def compute_overlaps_masks(masks1, masks2):
99
+ '''Computes IoU overlaps between two sets of masks.
100
+ masks1, masks2: [Height, Width, instances]
101
+ '''
102
+ # flatten masks
103
+ masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
104
+ masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
105
+ area1 = np.sum(masks1, axis=0)
106
+ area2 = np.sum(masks2, axis=0)
107
+
108
+ # intersections and union
109
+ intersections = np.dot(masks1.T, masks2)
110
+ union = area1[:, None] + area2[None, :] - intersections
111
+ overlaps = intersections / union
112
+
113
+ return overlaps
114
+
115
+
116
+ def non_max_suppression(boxes, scores, threshold):
117
+ """Performs non-maximum supression and returns indicies of kept boxes.
118
+ boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
119
+ scores: 1-D array of box scores.
120
+ threshold: Float. IoU threshold to use for filtering.
121
+ """
122
+ assert boxes.shape[0] > 0
123
+ if boxes.dtype.kind != "f":
124
+ boxes = boxes.astype(np.float32)
125
+
126
+ # Compute box areas
127
+ y1 = boxes[:, 0]
128
+ x1 = boxes[:, 1]
129
+ y2 = boxes[:, 2]
130
+ x2 = boxes[:, 3]
131
+ area = (y2 - y1) * (x2 - x1)
132
+
133
+ # Get indicies of boxes sorted by scores (highest first)
134
+ ixs = scores.argsort()[::-1]
135
+
136
+ pick = []
137
+ while len(ixs) > 0:
138
+ # Pick top box and add its index to the list
139
+ i = ixs[0]
140
+ pick.append(i)
141
+ # Compute IoU of the picked box with the rest
142
+ iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
143
+ # Identify boxes with IoU over the threshold. This
144
+ # returns indicies into ixs[1:], so add 1 to get
145
+ # indicies into ixs.
146
+ remove_ixs = np.where(iou > threshold)[0] + 1
147
+ # Remove indicies of the picked and overlapped boxes.
148
+ ixs = np.delete(ixs, remove_ixs)
149
+ ixs = np.delete(ixs, 0)
150
+ return np.array(pick, dtype=np.int32)
151
+
152
+
153
+ def apply_box_deltas(boxes, deltas):
154
+ """Applies the given deltas to the given boxes.
155
+ boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
156
+ deltas: [N, (dy, dx, log(dh), log(dw))]
157
+ """
158
+ boxes = boxes.astype(np.float32)
159
+ # Convert to y, x, h, w
160
+ height = boxes[:, 2] - boxes[:, 0]
161
+ width = boxes[:, 3] - boxes[:, 1]
162
+ center_y = boxes[:, 0] + 0.5 * height
163
+ center_x = boxes[:, 1] + 0.5 * width
164
+ # Apply deltas
165
+ center_y += deltas[:, 0] * height
166
+ center_x += deltas[:, 1] * width
167
+ height *= np.exp(deltas[:, 2])
168
+ width *= np.exp(deltas[:, 3])
169
+ # Convert back to y1, x1, y2, x2
170
+ y1 = center_y - 0.5 * height
171
+ x1 = center_x - 0.5 * width
172
+ y2 = y1 + height
173
+ x2 = x1 + width
174
+ return np.stack([y1, x1, y2, x2], axis=1)
175
+
176
+
177
+ def box_refinement_graph(box, gt_box):
178
+ """Compute refinement needed to transform box to gt_box.
179
+ box and gt_box are [N, (y1, x1, y2, x2)]
180
+ """
181
+ box = tf.cast(box, tf.float32)
182
+ gt_box = tf.cast(gt_box, tf.float32)
183
+
184
+ height = box[:, 2] - box[:, 0]
185
+ width = box[:, 3] - box[:, 1]
186
+ center_y = box[:, 0] + 0.5 * height
187
+ center_x = box[:, 1] + 0.5 * width
188
+
189
+ gt_height = gt_box[:, 2] - gt_box[:, 0]
190
+ gt_width = gt_box[:, 3] - gt_box[:, 1]
191
+ gt_center_y = gt_box[:, 0] + 0.5 * gt_height
192
+ gt_center_x = gt_box[:, 1] + 0.5 * gt_width
193
+
194
+ dy = (gt_center_y - center_y) / height
195
+ dx = (gt_center_x - center_x) / width
196
+ dh = tf.log(gt_height / height)
197
+ dw = tf.log(gt_width / width)
198
+
199
+ result = tf.stack([dy, dx, dh, dw], axis=1)
200
+ return result
201
+
202
+
203
+ def box_refinement(box, gt_box):
204
+ """Compute refinement needed to transform box to gt_box.
205
+ box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
206
+ assumed to be outside the box.
207
+ """
208
+ box = box.astype(np.float32)
209
+ gt_box = gt_box.astype(np.float32)
210
+
211
+ height = box[:, 2] - box[:, 0]
212
+ width = box[:, 3] - box[:, 1]
213
+ center_y = box[:, 0] + 0.5 * height
214
+ center_x = box[:, 1] + 0.5 * width
215
+
216
+ gt_height = gt_box[:, 2] - gt_box[:, 0]
217
+ gt_width = gt_box[:, 3] - gt_box[:, 1]
218
+ gt_center_y = gt_box[:, 0] + 0.5 * gt_height
219
+ gt_center_x = gt_box[:, 1] + 0.5 * gt_width
220
+
221
+ dy = (gt_center_y - center_y) / height
222
+ dx = (gt_center_x - center_x) / width
223
+ dh = np.log(gt_height / height)
224
+ dw = np.log(gt_width / width)
225
+
226
+ return np.stack([dy, dx, dh, dw], axis=1)
227
+
228
+
229
+ ############################################################
230
+ # Dataset
231
+ ############################################################
232
+
233
+ class Dataset(object):
234
+ """The base class for dataset classes.
235
+ To use it, create a new class that adds functions specific to the dataset
236
+ you want to use. For example:
237
+
238
+ class CatsAndDogsDataset(Dataset):
239
+ def load_cats_and_dogs(self):
240
+ ...
241
+ def load_mask(self, image_id):
242
+ ...
243
+ def image_reference(self, image_id):
244
+ ...
245
+
246
+ See COCODataset and ShapesDataset as examples.
247
+ """
248
+
249
+ def __init__(self, class_map=None):
250
+ self._image_ids = []
251
+ self.image_info = []
252
+ # Background is always the first class
253
+ self.class_info = [{"source": "", "id": 0, "name": "BG"}]
254
+ self.source_class_ids = {}
255
+
256
+ def add_class(self, source, class_id, class_name):
257
+ assert "." not in source, "Source name cannot contain a dot"
258
+ # Does the class exist already?
259
+ for info in self.class_info:
260
+ if info['source'] == source and info["id"] == class_id:
261
+ # source.class_id combination already available, skip
262
+ return
263
+ # Add the class
264
+ self.class_info.append({
265
+ "source": source,
266
+ "id": class_id,
267
+ "name": class_name,
268
+ })
269
+
270
+ def add_image(self, source, image_id, path, **kwargs):
271
+ image_info = {
272
+ "id": image_id,
273
+ "source": source,
274
+ "path": path,
275
+ }
276
+ image_info.update(kwargs)
277
+ self.image_info.append(image_info)
278
+
279
+ def image_reference(self, image_id):
280
+ """Return a link to the image in its source Website or details about
281
+ the image that help looking it up or debugging it.
282
+
283
+ Override for your dataset, but pass to this function
284
+ if you encounter images not in your dataset.
285
+ """
286
+ return ""
287
+
288
+ def prepare(self, class_map=None):
289
+ """Prepares the Dataset class for use.
290
+
291
+ TODO: class map is not supported yet. When done, it should handle mapping
292
+ classes from different datasets to the same class ID.
293
+ """
294
+
295
+ def clean_name(name):
296
+ """Returns a shorter version of object names for cleaner display."""
297
+ return ",".join(name.split(",")[:1])
298
+
299
+ # Build (or rebuild) everything else from the info dicts.
300
+ self.num_classes = len(self.class_info)
301
+ self.class_ids = np.arange(self.num_classes)
302
+ self.class_names = [clean_name(c["name"]) for c in self.class_info]
303
+ self.num_images = len(self.image_info)
304
+ self._image_ids = np.arange(self.num_images)
305
+
306
+ self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
307
+ for info, id in zip(self.class_info, self.class_ids)}
308
+
309
+ # Map sources to class_ids they support
310
+ self.sources = list(set([i['source'] for i in self.class_info]))
311
+ self.source_class_ids = {}
312
+ # Loop over datasets
313
+ for source in self.sources:
314
+ self.source_class_ids[source] = []
315
+ # Find classes that belong to this dataset
316
+ for i, info in enumerate(self.class_info):
317
+ # Include BG class in all datasets
318
+ if i == 0 or source == info['source']:
319
+ self.source_class_ids[source].append(i)
320
+
321
+ def map_source_class_id(self, source_class_id):
322
+ """Takes a source class ID and returns the int class ID assigned to it.
323
+
324
+ For example:
325
+ dataset.map_source_class_id("coco.12") -> 23
326
+ """
327
+ return self.class_from_source_map[source_class_id]
328
+
329
+ def get_source_class_id(self, class_id, source):
330
+ """Map an internal class ID to the corresponding class ID in the source dataset."""
331
+ info = self.class_info[class_id]
332
+ assert info['source'] == source
333
+ return info['id']
334
+
335
+ def append_data(self, class_info, image_info):
336
+ self.external_to_class_id = {}
337
+ for i, c in enumerate(self.class_info):
338
+ for ds, id in c["map"]:
339
+ self.external_to_class_id[ds + str(id)] = i
340
+
341
+ # Map external image IDs to internal ones.
342
+ self.external_to_image_id = {}
343
+ for i, info in enumerate(self.image_info):
344
+ self.external_to_image_id[info["ds"] + str(info["id"])] = i
345
+
346
+ @property
347
+ def image_ids(self):
348
+ return self._image_ids
349
+
350
+ def source_image_link(self, image_id):
351
+ """Returns the path or URL to the image.
352
+ Override this to return a URL to the image if it's availble online for easy
353
+ debugging.
354
+ """
355
+ return self.image_info[image_id]["path"]
356
+
357
+ def load_image(self, image_id):
358
+ """Load the specified image and return a [H,W,3] Numpy array.
359
+ """
360
+ # Load image
361
+ image = skimage.io.imread(self.image_info[image_id]['path'])
362
+ # If grayscale. Convert to RGB for consistency.
363
+ if image.ndim != 3:
364
+ image = skimage.color.gray2rgb(image)
365
+ # If has an alpha channel, remove it for consistency
366
+ if image.shape[-1] == 4:
367
+ image = image[..., :3]
368
+ return image
369
+
370
+ def load_mask(self, image_id):
371
+ """Load instance masks for the given image.
372
+
373
+ Different datasets use different ways to store masks. Override this
374
+ method to load instance masks and return them in the form of am
375
+ array of binary masks of shape [height, width, instances].
376
+
377
+ Returns:
378
+ masks: A bool array of shape [height, width, instance count] with
379
+ a binary mask per instance.
380
+ class_ids: a 1D array of class IDs of the instance masks.
381
+ """
382
+ # Override this function to load a mask from your dataset.
383
+ # Otherwise, it returns an empty mask.
384
+ mask = np.empty([0, 0, 0])
385
+ class_ids = np.empty([0], np.int32)
386
+ return mask, class_ids
387
+
388
+
389
+ def resize_image(image, min_dim=None, max_dim=None, mode="square"):
390
+ """Resizes an image keeping the aspect ratio unchanged.
391
+
392
+ min_dim: if provided, resizes the image such that it's smaller
393
+ dimension == min_dim
394
+ max_dim: if provided, ensures that the image longest side doesn't
395
+ exceed this value.
396
+ mode: Resizing mode.
397
+ none: No resizing. Return the image unchanged.
398
+ square: Resize and pad with zeros to get a square image
399
+ of size [max_dim, max_dim].
400
+ pad64: Pads width and height with zeros to make them multiples of 64.
401
+ If min_dim is provided, it scales the small side to >= min_dim
402
+ before padding. max_dim is ignored in this mode.
403
+ The multiple of 64 is needed to ensure smooth scaling of feature
404
+ maps up and down the 6 levels of the FPN pyramid (2**6=64).
405
+
406
+ Returns:
407
+ image: the resized image
408
+ window: (y1, x1, y2, x2). If max_dim is provided, padding might
409
+ be inserted in the returned image. If so, this window is the
410
+ coordinates of the image part of the full image (excluding
411
+ the padding). The x2, y2 pixels are not included.
412
+ scale: The scale factor used to resize the image
413
+ padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
414
+ """
415
+ # Keep track of image dtype and return results in the same dtype
416
+ image_dtype = image.dtype
417
+ # Default window (y1, x1, y2, x2) and default scale == 1.
418
+ h, w = image.shape[:2]
419
+ window = (0, 0, h, w)
420
+ scale = 1
421
+ padding = [(0, 0), (0, 0), (0, 0)]
422
+
423
+ if mode == "none":
424
+ return image, window, scale, padding
425
+
426
+ # Scale?
427
+ if min_dim:
428
+ # Scale up but not down
429
+ scale = max(1, min_dim / min(h, w))
430
+ # Does it exceed max dim?
431
+ if max_dim and mode == "square":
432
+ image_max = max(h, w)
433
+ if round(image_max * scale) > max_dim:
434
+ scale = max_dim / image_max
435
+
436
+ # Resize image using bilinear interpolation
437
+ if scale != 1:
438
+ image = skimage.transform.resize(
439
+ image, (round(h * scale), round(w * scale)),
440
+ order=1, mode="constant", preserve_range=True)
441
+ # Need padding?
442
+ if mode == "square":
443
+ # Get new height and width
444
+ h, w = image.shape[:2]
445
+ top_pad = (max_dim - h) // 2
446
+ bottom_pad = max_dim - h - top_pad
447
+ left_pad = (max_dim - w) // 2
448
+ right_pad = max_dim - w - left_pad
449
+ padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
450
+ image = np.pad(image, padding, mode='constant', constant_values=0)
451
+ window = (top_pad, left_pad, h + top_pad, w + left_pad)
452
+ elif mode == "pad64":
453
+ h, w = image.shape[:2]
454
+ # Both sides must be divisible by 64
455
+ assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
456
+ # Height
457
+ if h % 64 > 0:
458
+ max_h = h - (h % 64) + 64
459
+ top_pad = (max_h - h) // 2
460
+ bottom_pad = max_h - h - top_pad
461
+ else:
462
+ top_pad = bottom_pad = 0
463
+ # Width
464
+ if w % 64 > 0:
465
+ max_w = w - (w % 64) + 64
466
+ left_pad = (max_w - w) // 2
467
+ right_pad = max_w - w - left_pad
468
+ else:
469
+ left_pad = right_pad = 0
470
+ padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
471
+ image = np.pad(image, padding, mode='constant', constant_values=0)
472
+ window = (top_pad, left_pad, h + top_pad, w + left_pad)
473
+ else:
474
+ raise Exception("Mode {} not supported".format(mode))
475
+ return image.astype(image_dtype), window, scale, padding
476
+
477
+
478
+ def resize_mask(mask, scale, padding):
479
+ """Resizes a mask using the given scale and padding.
480
+ Typically, you get the scale and padding from resize_image() to
481
+ ensure both, the image and the mask, are resized consistently.
482
+
483
+ scale: mask scaling factor
484
+ padding: Padding to add to the mask in the form
485
+ [(top, bottom), (left, right), (0, 0)]
486
+ """
487
+ # Suppress warning from scipy 0.13.0, the output shape of zoom() is
488
+ # calculated with round() instead of int()
489
+ with warnings.catch_warnings():
490
+ warnings.simplefilter("ignore")
491
+ mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
492
+ mask = np.pad(mask, padding, mode='constant', constant_values=0)
493
+ return mask
494
+
495
+
496
+ def minimize_mask(bbox, mask, mini_shape):
497
+ """Resize masks to a smaller version to reduce memory load.
498
+ Mini-masks can be resized back to image scale using expand_masks()
499
+
500
+ See inspect_data.ipynb notebook for more details.
501
+ """
502
+ mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
503
+ for i in range(mask.shape[-1]):
504
+ # Pick slice and cast to bool in case load_mask() returned wrong dtype
505
+ m = mask[:, :, i].astype(bool)
506
+ y1, x1, y2, x2 = bbox[i][:4]
507
+ m = m[y1:y2, x1:x2]
508
+ if m.size == 0:
509
+ raise Exception("Invalid bounding box with area of zero")
510
+ # Resize with bilinear interpolation
511
+ m = skimage.transform.resize(m, mini_shape, order=1, mode="constant")
512
+ mini_mask[:, :, i] = np.around(m).astype(np.bool)
513
+ return mini_mask
514
+
515
+
516
+ def expand_mask(bbox, mini_mask, image_shape):
517
+ """Resizes mini masks back to image size. Reverses the change
518
+ of minimize_mask().
519
+
520
+ See inspect_data.ipynb notebook for more details.
521
+ """
522
+ mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
523
+ for i in range(mask.shape[-1]):
524
+ m = mini_mask[:, :, i]
525
+ y1, x1, y2, x2 = bbox[i][:4]
526
+ h = y2 - y1
527
+ w = x2 - x1
528
+ # Resize with bilinear interpolation
529
+ m = skimage.transform.resize(m, (h, w), order=1, mode="constant")
530
+ mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
531
+ return mask
532
+
533
+
534
+ # TODO: Build and use this function to reduce code duplication
535
+ def mold_mask(mask, config):
536
+ pass
537
+
538
+
539
+ def unmold_mask(mask, bbox, image_shape):
540
+ """Converts a mask generated by the neural network to a format similar
541
+ to its original shape.
542
+ mask: [height, width] of type float. A small, typically 28x28 mask.
543
+ bbox: [y1, x1, y2, x2]. The box to fit the mask in.
544
+
545
+ Returns a binary mask with the same size as the original image.
546
+ """
547
+ threshold = 0.5
548
+ y1, x1, y2, x2 = bbox
549
+ mask = skimage.transform.resize(mask, (y2 - y1, x2 - x1), order=1, mode="constant")
550
+ mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
551
+
552
+ # Put the mask in the right location.
553
+ full_mask = np.zeros(image_shape[:2], dtype=np.bool)
554
+ full_mask[y1:y2, x1:x2] = mask
555
+ return full_mask
556
+
557
+
558
+ ############################################################
559
+ # Anchors
560
+ ############################################################
561
+
562
+ def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
563
+ """
564
+ scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
565
+ ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
566
+ shape: [height, width] spatial shape of the feature map over which
567
+ to generate anchors.
568
+ feature_stride: Stride of the feature map relative to the image in pixels.
569
+ anchor_stride: Stride of anchors on the feature map. For example, if the
570
+ value is 2 then generate anchors for every other feature map pixel.
571
+ """
572
+ # Get all combinations of scales and ratios
573
+ scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
574
+ scales = scales.flatten()
575
+ ratios = ratios.flatten()
576
+
577
+ # Enumerate heights and widths from scales and ratios
578
+ heights = scales / np.sqrt(ratios)
579
+ widths = scales * np.sqrt(ratios)
580
+
581
+ # Enumerate shifts in feature space
582
+ shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
583
+ shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
584
+ shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
585
+
586
+ # Enumerate combinations of shifts, widths, and heights
587
+ box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
588
+ box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
589
+
590
+ # Reshape to get a list of (y, x) and a list of (h, w)
591
+ box_centers = np.stack(
592
+ [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
593
+ box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
594
+
595
+ # Convert to corner coordinates (y1, x1, y2, x2)
596
+ boxes = np.concatenate([box_centers - 0.5 * box_sizes,
597
+ box_centers + 0.5 * box_sizes], axis=1)
598
+ return boxes
599
+
600
+
601
+ def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
602
+ anchor_stride):
603
+ """Generate anchors at different levels of a feature pyramid. Each scale
604
+ is associated with a level of the pyramid, but each ratio is used in
605
+ all levels of the pyramid.
606
+
607
+ Returns:
608
+ anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
609
+ with the same order of the given scales. So, anchors of scale[0] come
610
+ first, then anchors of scale[1], and so on.
611
+ """
612
+ # Anchors
613
+ # [anchor_count, (y1, x1, y2, x2)]
614
+ anchors = []
615
+ for i in range(len(scales)):
616
+ anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
617
+ feature_strides[i], anchor_stride))
618
+ return np.concatenate(anchors, axis=0)
619
+
620
+
621
+ ############################################################
622
+ # Miscellaneous
623
+ ############################################################
624
+
625
+ def trim_zeros(x):
626
+ """It's common to have tensors larger than the available data and
627
+ pad with zeros. This function removes rows that are all zeros.
628
+
629
+ x: [rows, columns].
630
+ """
631
+ assert len(x.shape) == 2
632
+ return x[~np.all(x == 0, axis=1)]
633
+
634
+
635
+ def compute_matches(gt_boxes, gt_class_ids, gt_masks,
636
+ pred_boxes, pred_class_ids, pred_scores, pred_masks,
637
+ iou_threshold=0.5, score_threshold=0.0):
638
+ """Finds matches between prediction and ground truth instances.
639
+
640
+ Returns:
641
+ gt_match: 1-D array. For each GT box it has the index of the matched
642
+ predicted box.
643
+ pred_match: 1-D array. For each predicted box, it has the index of
644
+ the matched ground truth box.
645
+ overlaps: [pred_boxes, gt_boxes] IoU overlaps.
646
+ """
647
+ # Trim zero padding
648
+ # TODO: cleaner to do zero unpadding upstream
649
+ gt_boxes = trim_zeros(gt_boxes)
650
+ gt_masks = gt_masks[..., :gt_boxes.shape[0]]
651
+ pred_boxes = trim_zeros(pred_boxes)
652
+ pred_scores = pred_scores[:pred_boxes.shape[0]]
653
+ # Sort predictions by score from high to low
654
+ indices = np.argsort(pred_scores)[::-1]
655
+ pred_boxes = pred_boxes[indices]
656
+ pred_class_ids = pred_class_ids[indices]
657
+ pred_scores = pred_scores[indices]
658
+ pred_masks = pred_masks[..., indices]
659
+
660
+ # Compute IoU overlaps [pred_masks, gt_masks]
661
+ overlaps = compute_overlaps_masks(pred_masks, gt_masks)
662
+
663
+ # Loop through predictions and find matching ground truth boxes
664
+ match_count = 0
665
+ pred_match = -1 * np.ones([pred_boxes.shape[0]])
666
+ gt_match = -1 * np.ones([gt_boxes.shape[0]])
667
+ for i in range(len(pred_boxes)):
668
+ # Find best matching ground truth box
669
+ # 1. Sort matches by score
670
+ sorted_ixs = np.argsort(overlaps[i])[::-1]
671
+ # 2. Remove low scores
672
+ low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
673
+ if low_score_idx.size > 0:
674
+ sorted_ixs = sorted_ixs[:low_score_idx[0]]
675
+ # 3. Find the match
676
+ for j in sorted_ixs:
677
+ # If ground truth box is already matched, go to next one
678
+ if gt_match[j] > 0:
679
+ continue
680
+ # If we reach IoU smaller than the threshold, end the loop
681
+ iou = overlaps[i, j]
682
+ if iou < iou_threshold:
683
+ break
684
+ # Do we have a match?
685
+ if pred_class_ids[i] == gt_class_ids[j]:
686
+ match_count += 1
687
+ gt_match[j] = i
688
+ pred_match[i] = j
689
+ break
690
+
691
+ return gt_match, pred_match, overlaps
692
+
693
+
694
+ def compute_ap(gt_boxes, gt_class_ids, gt_masks,
695
+ pred_boxes, pred_class_ids, pred_scores, pred_masks,
696
+ iou_threshold=0.5):
697
+ """Compute Average Precision at a set IoU threshold (default 0.5).
698
+
699
+ Returns:
700
+ mAP: Mean Average Precision
701
+ precisions: List of precisions at different class score thresholds.
702
+ recalls: List of recall values at different class score thresholds.
703
+ overlaps: [pred_boxes, gt_boxes] IoU overlaps.
704
+ """
705
+ # Get matches and overlaps
706
+ gt_match, pred_match, overlaps = compute_matches(
707
+ gt_boxes, gt_class_ids, gt_masks,
708
+ pred_boxes, pred_class_ids, pred_scores, pred_masks,
709
+ iou_threshold)
710
+
711
+ # Compute precision and recall at each prediction box step
712
+ precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
713
+ recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
714
+
715
+ # Pad with start and end values to simplify the math
716
+ precisions = np.concatenate([[0], precisions, [0]])
717
+ recalls = np.concatenate([[0], recalls, [1]])
718
+
719
+ # Ensure precision values decrease but don't increase. This way, the
720
+ # precision value at each recall threshold is the maximum it can be
721
+ # for all following recall thresholds, as specified by the VOC paper.
722
+ for i in range(len(precisions) - 2, -1, -1):
723
+ precisions[i] = np.maximum(precisions[i], precisions[i + 1])
724
+
725
+ # Compute mean AP over recall range
726
+ indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
727
+ mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
728
+ precisions[indices])
729
+
730
+ return mAP, precisions, recalls, overlaps
731
+
732
+
733
+ def compute_recall(pred_boxes, gt_boxes, iou):
734
+ """Compute the recall at the given IoU threshold. It's an indication
735
+ of how many GT boxes were found by the given prediction boxes.
736
+
737
+ pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
738
+ gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
739
+ """
740
+ # Measure overlaps
741
+ overlaps = compute_overlaps(pred_boxes, gt_boxes)
742
+ iou_max = np.max(overlaps, axis=1)
743
+ iou_argmax = np.argmax(overlaps, axis=1)
744
+ positive_ids = np.where(iou_max >= iou)[0]
745
+ matched_gt_boxes = iou_argmax[positive_ids]
746
+
747
+ recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
748
+ return recall, positive_ids
749
+
750
+
751
+ # ## Batch Slicing
752
+ # Some custom layers support a batch size of 1 only, and require a lot of work
753
+ # to support batches greater than 1. This function slices an input tensor
754
+ # across the batch dimension and feeds batches of size 1. Effectively,
755
+ # an easy way to support batches > 1 quickly with little code modification.
756
+ # In the long run, it's more efficient to modify the code to support large
757
+ # batches and getting rid of this function. Consider this a temporary solution
758
+ def batch_slice(inputs, graph_fn, batch_size, names=None):
759
+ """Splits inputs into slices and feeds each slice to a copy of the given
760
+ computation graph and then combines the results. It allows you to run a
761
+ graph on a batch of inputs even if the graph is written to support one
762
+ instance only.
763
+
764
+ inputs: list of tensors. All must have the same first dimension length
765
+ graph_fn: A function that returns a TF tensor that's part of a graph.
766
+ batch_size: number of slices to divide the data into.
767
+ names: If provided, assigns names to the resulting tensors.
768
+ """
769
+ if not isinstance(inputs, list):
770
+ inputs = [inputs]
771
+
772
+ outputs = []
773
+ for i in range(batch_size):
774
+ inputs_slice = [x[i] for x in inputs]
775
+ output_slice = graph_fn(*inputs_slice)
776
+ if not isinstance(output_slice, (tuple, list)):
777
+ output_slice = [output_slice]
778
+ outputs.append(output_slice)
779
+ # Change outputs from a list of slices where each is
780
+ # a list of outputs to a list of outputs and each has
781
+ # a list of slices
782
+ outputs = list(zip(*outputs))
783
+
784
+ if names is None:
785
+ names = [None] * len(outputs)
786
+
787
+ result = [tf.stack(o, axis=0, name=n)
788
+ for o, n in zip(outputs, names)]
789
+ if len(result) == 1:
790
+ result = result[0]
791
+
792
+ return result
793
+
794
+
795
+ def download_trained_weights(coco_model_path, verbose=1):
796
+ """Download COCO trained weights from Releases.
797
+
798
+ coco_model_path: local path of COCO trained weights
799
+ """
800
+ if verbose > 0:
801
+ print("Downloading pretrained model to " + coco_model_path + " ...")
802
+ with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
803
+ shutil.copyfileobj(resp, out)
804
+ if verbose > 0:
805
+ print("... done downloading pretrained model!")
806
+
807
+
808
+ def norm_boxes(boxes, shape):
809
+ """Converts boxes from pixel coordinates to normalized coordinates.
810
+ boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
811
+ shape: [..., (height, width)] in pixels
812
+
813
+ Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
814
+ coordinates it's inside the box.
815
+
816
+ Returns:
817
+ [N, (y1, x1, y2, x2)] in normalized coordinates
818
+ """
819
+ h, w = shape
820
+ scale = np.array([h - 1, w - 1, h - 1, w - 1])
821
+ shift = np.array([0, 0, 1, 1])
822
+ return np.divide((boxes - shift), scale).astype(np.float32)
823
+
824
+
825
+ def denorm_boxes(boxes, shape):
826
+ """Converts boxes from normalized coordinates to pixel coordinates.
827
+ boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
828
+ shape: [..., (height, width)] in pixels
829
+
830
+ Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
831
+ coordinates it's inside the box.
832
+
833
+ Returns:
834
+ [N, (y1, x1, y2, x2)] in pixel coordinates
835
+ """
836
+ h, w = shape
837
+ scale = np.array([h - 1, w - 1, h - 1, w - 1])
838
+ shift = np.array([0, 0, 1, 1])
839
+ return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
mrcnn/visualize.py ADDED
@@ -0,0 +1,452 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Display and Visualization Functions.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+ """
9
+
10
+ import os
11
+ import sys
12
+ import logging
13
+ import random
14
+ import itertools
15
+ import colorsys
16
+
17
+ import numpy as np
18
+ from skimage.measure import find_contours
19
+ import matplotlib.pyplot as plt
20
+ from matplotlib import patches, lines
21
+ from matplotlib.patches import Polygon
22
+ import IPython.display
23
+
24
+ # Root directory of the project
25
+ ROOT_DIR = os.path.abspath("../")
26
+
27
+ # Import Mask RCNN
28
+ sys.path.append(ROOT_DIR) # To find local version of the library
29
+ from mrcnn import utils
30
+
31
+
32
+ ############################################################
33
+ # Visualization
34
+ ############################################################
35
+
36
+ def display_images(images, titles=None, cols=4, cmap=None, norm=None,
37
+ interpolation=None):
38
+ """Display the given set of images, optionally with titles.
39
+ images: list or array of image tensors in HWC format.
40
+ titles: optional. A list of titles to display with each image.
41
+ cols: number of images per row
42
+ cmap: Optional. Color map to use. For example, "Blues".
43
+ norm: Optional. A Normalize instance to map values to colors.
44
+ interpolation: Optional. Image interporlation to use for display.
45
+ """
46
+ # titles = titles if titles is not None else [""] * len(images)
47
+ # rows = len(images) // cols + 1
48
+ # plt.figure(figsize=(14, 14 * rows // cols))
49
+ # i = 1
50
+ # for image, title in zip(images, titles):
51
+ # plt.subplot(rows, cols, i)
52
+ # plt.title(title, fontsize=9)
53
+ # plt.axis('off')
54
+ # plt.imshow(image.astype(np.uint8), cmap=cmap,
55
+ # norm=norm, interpolation=interpolation)
56
+ # i += 1
57
+ # plt.show()
58
+ pass
59
+
60
+
61
+ def random_colors(N, bright=True):
62
+ """
63
+ Generate random colors.
64
+ To get visually distinct colors, generate them in HSV space then
65
+ convert to RGB.
66
+ """
67
+ brightness = 1.0 if bright else 0.7
68
+ hsv = [(i / N, 1, brightness) for i in range(N)]
69
+ colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
70
+ random.shuffle(colors)
71
+ return colors
72
+
73
+
74
+ def apply_mask(image, mask, color, alpha=0.5):
75
+ """Apply the given mask to the image.
76
+ """
77
+ for c in range(3):
78
+ image[:, :, c] = np.where(mask == 1,
79
+ image[:, :, c] *
80
+ (1 - alpha) + alpha * color[c] * 255,
81
+ image[:, :, c])
82
+ return image
83
+
84
+
85
+ def display_instances(image, boxes, masks, class_ids, class_names,
86
+ scores=None, title="",
87
+ figsize=(16, 16), ax=None):
88
+ """
89
+ boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
90
+ masks: [height, width, num_instances]
91
+ class_ids: [num_instances]
92
+ class_names: list of class names of the dataset
93
+ scores: (optional) confidence scores for each box
94
+ figsize: (optional) the size of the image.
95
+ """
96
+ # Number of instances
97
+ N = boxes.shape[0]
98
+ if not N:
99
+ print("\n*** No instances to display *** \n")
100
+ else:
101
+ assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
102
+
103
+ # if not ax:
104
+ # _, ax = plt.subplots(1, figsize=figsize)
105
+
106
+ # Generate random colors
107
+ colors = random_colors(N)
108
+
109
+ # Show area outside image boundaries.
110
+ height, width = image.shape[:2]
111
+ # ax.set_ylim(height + 10, -10)
112
+ # ax.set_xlim(-10, width + 10)
113
+ # ax.axis('off')
114
+ # ax.set_title(title)
115
+
116
+ masked_image = image.astype(np.uint32).copy()
117
+ for i in range(N):
118
+ color = colors[i]
119
+
120
+ # Bounding box
121
+ if not np.any(boxes[i]):
122
+ # Skip this instance. Has no bbox. Likely lost in image cropping.
123
+ continue
124
+ y1, x1, y2, x2 = boxes[i]
125
+ p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
126
+ alpha=0.7, linestyle="dashed",
127
+ edgecolor=color, facecolor='none')
128
+ #ax.add_patch(p)
129
+
130
+ # Label
131
+ class_id = class_ids[i]
132
+ score = scores[i] if scores is not None else None
133
+ label = class_names[class_id]
134
+ x = random.randint(x1, (x1 + x2) // 2)
135
+ caption = "{} {:.3f}".format(label, score) if score else label
136
+ # ax.text(x1, y1 + 8, caption,
137
+ # color='w', size=11, backgroundcolor="none")
138
+
139
+ # Mask
140
+ mask = masks[:, :, i]
141
+ masked_image = apply_mask(masked_image, mask, color)
142
+
143
+ # Mask Polygon
144
+ # Pad to ensure proper polygons for masks that touch image edges.
145
+ padded_mask = np.zeros(
146
+ (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
147
+ padded_mask[1:-1, 1:-1] = mask
148
+ contours = find_contours(padded_mask, 0.5)
149
+ for verts in contours:
150
+ # Subtract the padding and flip (y, x) to (x, y)
151
+ verts = np.fliplr(verts) - 1
152
+ p = Polygon(verts, facecolor="none", edgecolor=color)
153
+ #ax.add_patch(p)
154
+ #ax.imshow(masked_image.astype(np.uint8))
155
+ #plt.show()
156
+ return masked_image.astype(np.uint8)
157
+
158
+
159
+ def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
160
+ """
161
+ anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
162
+ proposals: [n, 4] the same anchors but refined to fit objects better.
163
+ """
164
+ masked_image = image.copy()
165
+
166
+ # Pick random anchors in case there are too many.
167
+ ids = np.arange(rois.shape[0], dtype=np.int32)
168
+ ids = np.random.choice(
169
+ ids, limit, replace=False) if ids.shape[0] > limit else ids
170
+
171
+ fig, ax = plt.subplots(1, figsize=(12, 12))
172
+ if rois.shape[0] > limit:
173
+ plt.title("Showing {} random ROIs out of {}".format(
174
+ len(ids), rois.shape[0]))
175
+ else:
176
+ plt.title("{} ROIs".format(len(ids)))
177
+
178
+ # Show area outside image boundaries.
179
+ ax.set_ylim(image.shape[0] + 20, -20)
180
+ ax.set_xlim(-50, image.shape[1] + 20)
181
+ ax.axis('off')
182
+
183
+ for i, id in enumerate(ids):
184
+ color = np.random.rand(3)
185
+ class_id = class_ids[id]
186
+ # ROI
187
+ y1, x1, y2, x2 = rois[id]
188
+ p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
189
+ edgecolor=color if class_id else "gray",
190
+ facecolor='none', linestyle="dashed")
191
+ ax.add_patch(p)
192
+ # Refined ROI
193
+ if class_id:
194
+ ry1, rx1, ry2, rx2 = refined_rois[id]
195
+ p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
196
+ edgecolor=color, facecolor='none')
197
+ ax.add_patch(p)
198
+ # Connect the top-left corners of the anchor and proposal for easy visualization
199
+ ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
200
+
201
+ # Label
202
+ label = class_names[class_id]
203
+ ax.text(rx1, ry1 + 8, "{}".format(label),
204
+ color='w', size=11, backgroundcolor="none")
205
+
206
+ # Mask
207
+ m = utils.unmold_mask(mask[id], rois[id]
208
+ [:4].astype(np.int32), image.shape)
209
+ masked_image = apply_mask(masked_image, m, color)
210
+
211
+ #ax.imshow(masked_image)
212
+
213
+ # Print stats
214
+ print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
215
+ print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
216
+ print("Positive Ratio: {:.2f}".format(
217
+ class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
218
+
219
+
220
+ # TODO: Replace with matplotlib equivalent?
221
+ def draw_box(image, box, color):
222
+ """Draw 3-pixel width bounding boxes on the given image array.
223
+ color: list of 3 int values for RGB.
224
+ """
225
+ y1, x1, y2, x2 = box
226
+ image[y1:y1 + 2, x1:x2] = color
227
+ image[y2:y2 + 2, x1:x2] = color
228
+ image[y1:y2, x1:x1 + 2] = color
229
+ image[y1:y2, x2:x2 + 2] = color
230
+ return image
231
+
232
+
233
+ def display_top_masks(image, mask, class_ids, class_names, limit=4):
234
+ """Display the given image and the top few class masks."""
235
+ to_display = []
236
+ titles = []
237
+ to_display.append(image)
238
+ titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
239
+ # Pick top prominent classes in this image
240
+ unique_class_ids = np.unique(class_ids)
241
+ mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
242
+ for i in unique_class_ids]
243
+ top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
244
+ key=lambda r: r[1], reverse=True) if v[1] > 0]
245
+ # Generate images and titles
246
+ for i in range(limit):
247
+ class_id = top_ids[i] if i < len(top_ids) else -1
248
+ # Pull masks of instances belonging to the same class.
249
+ m = mask[:, :, np.where(class_ids == class_id)[0]]
250
+ m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
251
+ to_display.append(m)
252
+ titles.append(class_names[class_id] if class_id != -1 else "-")
253
+ display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
254
+
255
+
256
+ def plot_precision_recall(AP, precisions, recalls):
257
+ """Draw the precision-recall curve.
258
+
259
+ AP: Average precision at IoU >= 0.5
260
+ precisions: list of precision values
261
+ recalls: list of recall values
262
+ """
263
+ # Plot the Precision-Recall curve
264
+ _, ax = plt.subplots(1)
265
+ ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
266
+ ax.set_ylim(0, 1.1)
267
+ ax.set_xlim(0, 1.1)
268
+ _ = ax.plot(recalls, precisions)
269
+
270
+
271
+ def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
272
+ overlaps, class_names, threshold=0.5):
273
+ """Draw a grid showing how ground truth objects are classified.
274
+ gt_class_ids: [N] int. Ground truth class IDs
275
+ pred_class_id: [N] int. Predicted class IDs
276
+ pred_scores: [N] float. The probability scores of predicted classes
277
+ overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes.
278
+ class_names: list of all class names in the dataset
279
+ threshold: Float. The prediction probability required to predict a class
280
+ """
281
+ gt_class_ids = gt_class_ids[gt_class_ids != 0]
282
+ pred_class_ids = pred_class_ids[pred_class_ids != 0]
283
+
284
+ plt.figure(figsize=(12, 10))
285
+ plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
286
+ plt.yticks(np.arange(len(pred_class_ids)),
287
+ ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
288
+ for i, id in enumerate(pred_class_ids)])
289
+ plt.xticks(np.arange(len(gt_class_ids)),
290
+ [class_names[int(id)] for id in gt_class_ids], rotation=90)
291
+
292
+ thresh = overlaps.max() / 2.
293
+ for i, j in itertools.product(range(overlaps.shape[0]),
294
+ range(overlaps.shape[1])):
295
+ text = ""
296
+ if overlaps[i, j] > threshold:
297
+ text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
298
+ color = ("white" if overlaps[i, j] > thresh
299
+ else "black" if overlaps[i, j] > 0
300
+ else "grey")
301
+ plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
302
+ horizontalalignment="center", verticalalignment="center",
303
+ fontsize=9, color=color)
304
+
305
+ plt.tight_layout()
306
+ plt.xlabel("Ground Truth")
307
+ plt.ylabel("Predictions")
308
+
309
+
310
+ def draw_boxes(image, boxes=None, refined_boxes=None,
311
+ masks=None, captions=None, visibilities=None,
312
+ title="", ax=None):
313
+ """Draw bounding boxes and segmentation masks with differnt
314
+ customizations.
315
+
316
+ boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
317
+ refined_boxes: Like boxes, but draw with solid lines to show
318
+ that they're the result of refining 'boxes'.
319
+ masks: [N, height, width]
320
+ captions: List of N titles to display on each box
321
+ visibilities: (optional) List of values of 0, 1, or 2. Determine how
322
+ prominant each bounding box should be.
323
+ title: An optional title to show over the image
324
+ ax: (optional) Matplotlib axis to draw on.
325
+ """
326
+ # Number of boxes
327
+ assert boxes is not None or refined_boxes is not None
328
+ N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
329
+
330
+ # Matplotlib Axis
331
+ if not ax:
332
+ _, ax = plt.subplots(1, figsize=(12, 12))
333
+
334
+ # Generate random colors
335
+ colors = random_colors(N)
336
+
337
+ # Show area outside image boundaries.
338
+ margin = image.shape[0] // 10
339
+ ax.set_ylim(image.shape[0] + margin, -margin)
340
+ ax.set_xlim(-margin, image.shape[1] + margin)
341
+ ax.axis('off')
342
+
343
+ ax.set_title(title)
344
+
345
+ masked_image = image.astype(np.uint32).copy()
346
+ for i in range(N):
347
+ # Box visibility
348
+ visibility = visibilities[i] if visibilities is not None else 1
349
+ if visibility == 0:
350
+ color = "gray"
351
+ style = "dotted"
352
+ alpha = 0.5
353
+ elif visibility == 1:
354
+ color = colors[i]
355
+ style = "dotted"
356
+ alpha = 1
357
+ elif visibility == 2:
358
+ color = colors[i]
359
+ style = "solid"
360
+ alpha = 1
361
+
362
+ # Boxes
363
+ if boxes is not None:
364
+ if not np.any(boxes[i]):
365
+ # Skip this instance. Has no bbox. Likely lost in cropping.
366
+ continue
367
+ y1, x1, y2, x2 = boxes[i]
368
+ p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
369
+ alpha=alpha, linestyle=style,
370
+ edgecolor=color, facecolor='none')
371
+ ax.add_patch(p)
372
+
373
+ # Refined boxes
374
+ if refined_boxes is not None and visibility > 0:
375
+ ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
376
+ p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
377
+ edgecolor=color, facecolor='none')
378
+ ax.add_patch(p)
379
+ # Connect the top-left corners of the anchor and proposal
380
+ if boxes is not None:
381
+ ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
382
+
383
+ # Captions
384
+ if captions is not None:
385
+ caption = captions[i]
386
+ # If there are refined boxes, display captions on them
387
+ if refined_boxes is not None:
388
+ y1, x1, y2, x2 = ry1, rx1, ry2, rx2
389
+ x = random.randint(x1, (x1 + x2) // 2)
390
+ ax.text(x1, y1, caption, size=11, verticalalignment='top',
391
+ color='w', backgroundcolor="none",
392
+ bbox={'facecolor': color, 'alpha': 0.5,
393
+ 'pad': 2, 'edgecolor': 'none'})
394
+
395
+ # Masks
396
+ if masks is not None:
397
+ mask = masks[:, :, i]
398
+ masked_image = apply_mask(masked_image, mask, color)
399
+ # Mask Polygon
400
+ # Pad to ensure proper polygons for masks that touch image edges.
401
+ padded_mask = np.zeros(
402
+ (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
403
+ padded_mask[1:-1, 1:-1] = mask
404
+ contours = find_contours(padded_mask, 0.5)
405
+ for verts in contours:
406
+ # Subtract the padding and flip (y, x) to (x, y)
407
+ verts = np.fliplr(verts) - 1
408
+ p = Polygon(verts, facecolor="none", edgecolor=color)
409
+ ax.add_patch(p)
410
+ ax.imshow(masked_image.astype(np.uint8))
411
+
412
+
413
+ def display_table(table):
414
+ """Display values in a table format.
415
+ table: an iterable of rows, and each row is an iterable of values.
416
+ """
417
+ html = ""
418
+ for row in table:
419
+ row_html = ""
420
+ for col in row:
421
+ row_html += "<td>{:40}</td>".format(str(col))
422
+ html += "<tr>" + row_html + "</tr>"
423
+ html = "<table>" + html + "</table>"
424
+ #IPython.display.display(IPython.display.HTML(html))
425
+
426
+
427
+ def display_weight_stats(model):
428
+ """Scans all the weights in the model and returns a list of tuples
429
+ that contain stats about each weight.
430
+ """
431
+ layers = model.get_trainable_layers()
432
+ table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
433
+ for l in layers:
434
+ weight_values = l.get_weights() # list of Numpy arrays
435
+ weight_tensors = l.weights # list of TF tensors
436
+ for i, w in enumerate(weight_values):
437
+ weight_name = weight_tensors[i].name
438
+ # Detect problematic layers. Exclude biases of conv layers.
439
+ alert = ""
440
+ if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
441
+ alert += "<span style='color:red'>*** dead?</span>"
442
+ if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
443
+ alert += "<span style='color:red'>*** Overflow?</span>"
444
+ # Add row
445
+ table.append([
446
+ weight_name + alert,
447
+ str(w.shape),
448
+ "{:+9.4f}".format(w.min()),
449
+ "{:+10.4f}".format(w.max()),
450
+ "{:+9.4f}".format(w.std()),
451
+ ])
452
+ #display_table(table)
requirements.txt ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ alabaster==0.7.12
2
+ argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1633990451307/work
3
+ async-generator==1.10
4
+ attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1659291887007/work
5
+ Babel==2.11.0
6
+ backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
7
+ backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1618230623929/work
8
+ bleach==1.5.0
9
+ certifi==2022.9.24
10
+ cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1631636256886/work
11
+ charset-normalizer==2.0.12
12
+ cycler==0.11.0
13
+ Cython==0.29.32
14
+ dataclasses==0.8
15
+ decorator==4.4.2
16
+ defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work
17
+ docutils==0.18.1
18
+ entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work
19
+ h5py==2.10.0
20
+ html5lib==0.9999999
21
+ idna==3.4
22
+ imageio==2.15.0
23
+ imagesize==1.4.1
24
+ imgaug==0.4.0
25
+ importlib-metadata==4.8.3
26
+ ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1620912934572/work/dist/ipykernel-5.5.5-py3-none-any.whl
27
+ ipyparallel==8.2.1
28
+ ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1609697613279/work
29
+ ipython-genutils==0.2.0
30
+ ipywidgets==7.7.2
31
+ jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1605054537831/work
32
+ Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1636510082894/work
33
+ jsonschema==3.0.2
34
+ jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1642858610849/work
35
+ jupyter-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1631852698933/work
36
+ jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1601375948261/work
37
+ jupyterlab-widgets==1.1.1
38
+ Keras==2.0.8
39
+ kiwisolver==1.3.1
40
+ Markdown==3.3.7
41
+ MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1621455668064/work
42
+ matplotlib==3.3.4
43
+ mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1624941293729/work
44
+ nb-conda @ file:///home/conda/feedstock_root/build_artifacts/nb_conda_1611345535156/work
45
+ nb-conda-kernels @ file:///home/conda/feedstock_root/build_artifacts/nb_conda_kernels_1606762461711/work
46
+ nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1637327213451/work
47
+ nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert_1605401832871/work
48
+ nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1617383142101/work
49
+ nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1664684991461/work
50
+ networkx==2.5.1
51
+ nose==1.3.7
52
+ notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1616419146127/work
53
+ numpy==1.19.5
54
+ opencv-python==4.6.0.66
55
+ packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1637239678211/work
56
+ pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work
57
+ parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1595548966091/work
58
+ pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1667297516076/work
59
+ pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
60
+ Pillow==8.4.0
61
+ prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1665692535292/work
62
+ prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1669057097528/work
63
+ protobuf==3.19.6
64
+ psutil==5.9.4
65
+ ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
66
+ pycocotools==2.0.6
67
+ pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work
68
+ Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1660666458521/work
69
+ pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1652235407899/work
70
+ pyrsistent @ file:///home/conda/feedstock_root/build_artifacts/pyrsistent_1610146795286/work
71
+ python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
72
+ pytz==2022.6
73
+ PyWavelets==1.1.1
74
+ PyYAML==6.0
75
+ pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1631793305981/work
76
+ qtconsole==5.2.2
77
+ QtPy==2.0.1
78
+ requests==2.27.1
79
+ scikit-image==0.17.2
80
+ scipy==1.5.4
81
+ Send2Trash @ file:///home/conda/feedstock_root/build_artifacts/send2trash_1628511208346/work
82
+ Shapely==1.8.5.post1
83
+ six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
84
+ snowballstemmer==2.2.0
85
+ Sphinx==5.3.0
86
+ sphinxcontrib-applehelp==1.0.2
87
+ sphinxcontrib-devhelp==1.0.2
88
+ sphinxcontrib-htmlhelp==2.0.0
89
+ sphinxcontrib-jsmath==1.0.1
90
+ sphinxcontrib-qthelp==1.0.3
91
+ sphinxcontrib-serializinghtml==1.1.5
92
+ tensorflow==1.3.0
93
+ tensorflow-tensorboard==0.1.8
94
+ terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1631128154882/work
95
+ testpath @ file:///home/conda/feedstock_root/build_artifacts/testpath_1645693042223/work
96
+ tifffile==2020.9.3
97
+ tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1610094701020/work
98
+ tqdm==4.19.9
99
+ traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1631041982274/work
100
+ typing_extensions==4.1.1
101
+ urllib3==1.26.13
102
+ wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1600965781394/work
103
+ webencodings==0.5.1
104
+ Werkzeug==2.0.3
105
+ widgetsnbextension==3.6.1
106
+ zipp==3.6.0
setup.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ The build/compilations setup
3
+
4
+ >> pip install -r requirements.txt
5
+ >> python setup.py install
6
+ """
7
+ import pip
8
+ import logging
9
+ import pkg_resources
10
+ try:
11
+ from setuptools import setup
12
+ except ImportError:
13
+ from distutils.core import setup
14
+
15
+
16
+ def _parse_requirements(file_path):
17
+ pip_ver = pkg_resources.get_distribution('pip').version
18
+ pip_version = list(map(int, pip_ver.split('.')[:2]))
19
+ if pip_version >= [6, 0]:
20
+ raw = pip.req.parse_requirements(file_path,
21
+ session=pip.download.PipSession())
22
+ else:
23
+ raw = pip.req.parse_requirements(file_path)
24
+ return [str(i.req) for i in raw]
25
+
26
+
27
+ # parse_requirements() returns generator of pip.req.InstallRequirement objects
28
+ try:
29
+ install_reqs = _parse_requirements("requirements.txt")
30
+ except Exception:
31
+ logging.warning('Fail load requirements file, so using default ones.')
32
+ install_reqs = []
33
+
34
+ setup(
35
+ name='mask-rcnn',
36
+ version='2.1',
37
+ url='https://github.com/matterport/Mask_RCNN',
38
+ author='Matterport',
39
+ author_email='waleed.abdulla@gmail.com',
40
+ license='MIT',
41
+ description='Mask R-CNN for object detection and instance segmentation',
42
+ packages=["mrcnn"],
43
+ install_requires=install_reqs,
44
+ include_package_data=True,
45
+ python_requires='>=3.4',
46
+ long_description="""This is an implementation of Mask R-CNN on Python 3, Keras, and TensorFlow.
47
+ The model generates bounding boxes and segmentation masks for each instance of an object in the image.
48
+ It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.""",
49
+ classifiers=[
50
+ "Development Status :: 5 - Production/Stable",
51
+ "Environment :: Console",
52
+ "Intended Audience :: Developers",
53
+ "Intended Audience :: Information Technology",
54
+ "Intended Audience :: Education",
55
+ "Intended Audience :: Science/Research",
56
+ "License :: OSI Approved :: MIT License",
57
+ "Natural Language :: English",
58
+ "Operating System :: OS Independent",
59
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
60
+ "Topic :: Scientific/Engineering :: Image Recognition",
61
+ "Topic :: Scientific/Engineering :: Visualization",
62
+ "Topic :: Scientific/Engineering :: Image Segmentation",
63
+ 'Programming Language :: Python :: 3.4',
64
+ 'Programming Language :: Python :: 3.5',
65
+ 'Programming Language :: Python :: 3.6',
66
+ ],
67
+ keywords="image instance segmentation object detection mask rcnn r-cnn tensorflow keras",
68
+ )
test0.jpg ADDED