""" Mask R-CNN Base Configurations class. Copyright (c) 2017 Matterport, Inc. Licensed under the MIT License (see LICENSE for details) Written by Waleed Abdulla """ import math import numpy as np # Base Configuration Class # Don't use this class directly. Instead, sub-class it and override # the configurations you need to change. class Config(object): """Base configuration class. For custom configurations, create a sub-class that inherits from this one and override properties that need to be changed. """ # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. # Useful if your code needs to do things differently depending on which # experiment is running. NAME = None # Override in sub-classes # NUMBER OF GPUs to use. For CPU training, use 1 GPU_COUNT = 1 # Number of images to train with on each GPU. A 12GB GPU can typically # handle 2 images of 1024x1024px. # Adjust based on your GPU memory and image sizes. Use the highest # number that your GPU can handle for best performance. IMAGES_PER_GPU = 2 # Number of training steps per epoch # This doesn't need to match the size of the training set. Tensorboard # updates are saved at the end of each epoch, so setting this to a # smaller number means getting more frequent TensorBoard updates. # Validation stats are also calculated at each epoch end and they # might take a while, so don't set this too small to avoid spending # a lot of time on validation stats. STEPS_PER_EPOCH = 1000 # Number of validation steps to run at the end of every training epoch. # A bigger number improves accuracy of validation stats, but slows # down the training. VALIDATION_STEPS = 50 # Backbone network architecture # Supported values are: resnet50, resnet101 BACKBONE = "resnet101" # The strides of each layer of the FPN Pyramid. These values # are based on a Resnet101 backbone. BACKBONE_STRIDES = [4, 8, 16, 32, 64] # Number of classification classes (including background) NUM_CLASSES = 1 # Override in sub-classes # Length of square anchor side in pixels RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) # Ratios of anchors at each cell (width/height) # A value of 1 represents a square anchor, and 0.5 is a wide anchor RPN_ANCHOR_RATIOS = [0.5, 1, 2] # Anchor stride # If 1 then anchors are created for each cell in the backbone feature map. # If 2, then anchors are created for every other cell, and so on. RPN_ANCHOR_STRIDE = 1 # Non-max suppression threshold to filter RPN proposals. # You can reduce this during training to generate more propsals. RPN_NMS_THRESHOLD = 0.7 # How many anchors per image to use for RPN training RPN_TRAIN_ANCHORS_PER_IMAGE = 256 # ROIs kept after non-maximum supression (training and inference) POST_NMS_ROIS_TRAINING = 2000 POST_NMS_ROIS_INFERENCE = 1000 # If enabled, resizes instance masks to a smaller size to reduce # memory load. Recommended when using high-resolution images. USE_MINI_MASK = True MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask # Input image resing # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't # be satisfied together the IMAGE_MAX_DIM is enforced. IMAGE_MIN_DIM = 800 IMAGE_MAX_DIM = 1024 # If True, pad images with zeros such that they're (max_dim by max_dim) IMAGE_PADDING = True # currently, the False option is not supported # Image mean (RGB) MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) # Number of ROIs per image to feed to classifier/mask heads # The Mask RCNN paper uses 512 but often the RPN doesn't generate # enough positive proposals to fill this and keep a positive:negative # ratio of 1:3. You can increase the number of proposals by adjusting # the RPN NMS threshold. TRAIN_ROIS_PER_IMAGE = 200 # Percent of positive ROIs used to train classifier/mask heads ROI_POSITIVE_RATIO = 0.33 # Pooled ROIs POOL_SIZE = 7 MASK_POOL_SIZE = 14 MASK_SHAPE = [28, 28] # Maximum number of ground truth instances to use in one image MAX_GT_INSTANCES = 100 # Bounding box refinement standard deviation for RPN and final detections. RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) # Max number of final detections DETECTION_MAX_INSTANCES = 100 # Minimum probability value to accept a detected instance # ROIs below this threshold are skipped DETECTION_MIN_CONFIDENCE = 0.7 # Non-maximum suppression threshold for detection DETECTION_NMS_THRESHOLD = 0.3 # Learning rate and momentum # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes # weights to explode. Likely due to differences in optimzer # implementation. LEARNING_RATE = 0.001 LEARNING_MOMENTUM = 0.9 # Weight decay regularization WEIGHT_DECAY = 0.0001 # Use RPN ROIs or externally generated ROIs for training # Keep this True for most situations. Set to False if you want to train # the head branches on ROI generated by code rather than the ROIs from # the RPN. For example, to debug the classifier head without having to # train the RPN. USE_RPN_ROIS = True def __init__(self): """Set values of computed attributes.""" # Effective batch size self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT # Input image size self.IMAGE_SHAPE = np.array( [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) # Compute backbone size from input image size self.BACKBONE_SHAPES = np.array( [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)), int(math.ceil(self.IMAGE_SHAPE[1] / stride))] for stride in self.BACKBONE_STRIDES]) def display(self): """Display Configuration values.""" print("\nConfigurations:") for a in dir(self): if not a.startswith("__") and not callable(getattr(self, a)): print("{:30} {}".format(a, getattr(self, a))) print("\n")