Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. | |
import os | |
from yacs.config import CfgNode as CN | |
# ----------------------------------------------------------------------------- | |
# Convention about Training / Test specific parameters | |
# ----------------------------------------------------------------------------- | |
# Whenever an argument can be either used for training or for testing, the | |
# corresponding name will be post-fixed by a _TRAIN for a training parameter, | |
# or _TEST for a test-specific parameter. | |
# For example, the number of images during training will be | |
# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be | |
# IMAGES_PER_BATCH_TEST | |
# ----------------------------------------------------------------------------- | |
# Config definition | |
# ----------------------------------------------------------------------------- | |
_C = CN() | |
_C.MODEL = CN() | |
_C.MODEL.RPN_ONLY = False | |
_C.MODEL.MASK_ON = False | |
_C.MODEL.SEG_ON = False | |
_C.MODEL.CHAR_MASK_ON = False | |
_C.MODEL.DEVICE = "cuda" | |
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN" | |
_C.MODEL.TRAIN_DETECTION_ONLY = False | |
_C.MODEL.RESNET34 = False | |
# If the WEIGHT starts with a catalog://, like :R-50, the code will look for | |
# the path in paths_catalog. Else, it will use it as the specified absolute | |
# path | |
_C.MODEL.WEIGHT = "" | |
_C.SEQUENCE = CN() | |
_C.SEQUENCE.SEQ_ON = False | |
_C.SEQUENCE.NUM_CHAR = 38 | |
_C.SEQUENCE.BOS_TOKEN = 0 | |
_C.SEQUENCE.MAX_LENGTH = 32 | |
_C.SEQUENCE.TEACHER_FORCE_RATIO = 1.0 | |
_C.SEQUENCE.TWO_CONV = False | |
_C.SEQUENCE.MEAN_SCORE = False | |
_C.SEQUENCE.RESIZE_HEIGHT = 16 | |
_C.SEQUENCE.RESIZE_WIDTH = 64 | |
# ----------------------------------------------------------------------------- | |
# INPUT | |
# ----------------------------------------------------------------------------- | |
_C.INPUT = CN() | |
# Size of the smallest side of the image during training | |
_C.INPUT.MIN_SIZE_TRAIN = (800,) # (800,) | |
# Maximum size of the side of the image during training | |
_C.INPUT.MAX_SIZE_TRAIN = 1333 | |
# Size of the smallest side of the image during testing | |
_C.INPUT.MIN_SIZE_TEST = 800 | |
# Maximum size of the side of the image during testing | |
_C.INPUT.MAX_SIZE_TEST = 1333 | |
# Values to be used for image normalization | |
_C.INPUT.PIXEL_MEAN = [102.9801, 115.9465, 122.7717] | |
# Values to be used for image normalization | |
_C.INPUT.PIXEL_STD = [1.0, 1.0, 1.0] | |
# Convert image to BGR format (for Caffe2 models), in range 0-255 | |
_C.INPUT.TO_BGR255 = True | |
_C.INPUT.STRICT_RESIZE = False | |
# ----------------------------------------------------------------------------- | |
# Dataset | |
# ----------------------------------------------------------------------------- | |
_C.DATASETS = CN() | |
# List of the dataset names for training, as present in paths_catalog.py | |
_C.DATASETS.TRAIN = () | |
# List of the dataset names for testing, as present in paths_catalog.py | |
_C.DATASETS.TEST = () | |
_C.DATASETS.RATIOS = [] | |
_C.DATASETS.AUG = False | |
_C.DATASETS.RANDOM_CROP_PROB = 0.0 | |
_C.DATASETS.IGNORE_DIFFICULT = False | |
_C.DATASETS.FIX_CROP = False | |
_C.DATASETS.CROP_SIZE = (512, 512) | |
_C.DATASETS.MAX_ROTATE_THETA = 30 | |
_C.DATASETS.FIX_ROTATE = False | |
# ----------------------------------------------------------------------------- | |
# DataLoader | |
# ----------------------------------------------------------------------------- | |
_C.DATALOADER = CN() | |
# Number of data loading threads | |
_C.DATALOADER.NUM_WORKERS = 4 | |
# If > 0, this enforces that each collated batch should have a size divisible | |
# by SIZE_DIVISIBILITY | |
_C.DATALOADER.SIZE_DIVISIBILITY = 0 | |
# If True, each batch should contain only images for which the aspect ratio | |
# is compatible. This groups portrait images together, and landscape images | |
# are not batched with portrait images. | |
_C.DATALOADER.ASPECT_RATIO_GROUPING = True | |
# ---------------------------------------------------------------------------- # | |
# Backbone options | |
# ---------------------------------------------------------------------------- # | |
_C.MODEL.BACKBONE = CN() | |
# The backbone conv body to use | |
# The string must match a function that is imported in modeling.model_builder | |
# (e.g., 'FPN.add_fpn_ResNet101_conv5_body' to specify a ResNet-101-FPN | |
# backbone) | |
_C.MODEL.BACKBONE.CONV_BODY = "R-50-C4" | |
# Add StopGrad at a specified stage so the bottom layers are frozen | |
_C.MODEL.BACKBONE.FREEZE_CONV_BODY_AT = 2 | |
_C.MODEL.BACKBONE.OUT_CHANNELS = 256 * 4 | |
# ---------------------------------------------------------------------------- # | |
# ResNe[X]t options (ResNets = {ResNet, ResNeXt} | |
# Note that parts of a resnet may be used for both the backbone and the head | |
# These options apply to both | |
# ---------------------------------------------------------------------------- # | |
_C.MODEL.RESNETS = CN() | |
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt | |
_C.MODEL.RESNETS.NUM_GROUPS = 1 | |
# Baseline width of each group | |
_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64 | |
# Place the stride 2 conv on the 1x1 filter | |
# Use True only for the original MSRA ResNet; use False for C2 and Torch models | |
_C.MODEL.RESNETS.STRIDE_IN_1X1 = True | |
# Residual transformation function | |
_C.MODEL.RESNETS.TRANS_FUNC = "BottleneckWithFixedBatchNorm" | |
# ResNet's stem function (conv1 and pool1) | |
_C.MODEL.RESNETS.STEM_FUNC = "StemWithFixedBatchNorm" | |
# Apply dilation in stage "res5" | |
_C.MODEL.RESNETS.RES5_DILATION = 1 | |
_C.MODEL.RESNETS.BACKBONE_OUT_CHANNELS = 256 * 4 | |
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256 | |
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64 | |
_C.MODEL.RESNETS.STAGE_WITH_DCN = (False, False, False, False) | |
_C.MODEL.RESNETS.WITH_MODULATED_DCN = False | |
_C.MODEL.RESNETS.DEFORMABLE_GROUPS = 1 | |
_C.MODEL.RESNETS.LAYERS = (3, 4, 6, 3) | |
# ---------------------------------------------------------------------------- # | |
# FPN options | |
# ---------------------------------------------------------------------------- # | |
_C.MODEL.FPN = CN() | |
_C.MODEL.FPN.USE_GN = False | |
_C.MODEL.FPN.USE_RELU = False | |
# ---------------------------------------------------------------------------- # | |
# RPN options | |
# ---------------------------------------------------------------------------- # | |
_C.MODEL.RPN = CN() | |
_C.MODEL.RPN.USE_FPN = False | |
# Base RPN anchor sizes given in absolute pixels w.r.t. the scaled network input | |
_C.MODEL.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512) | |
# Stride of the feature map that RPN is attached. | |
# For FPN, number of strides should match number of scales | |
_C.MODEL.RPN.ANCHOR_STRIDE = (16,) | |
# RPN anchor aspect ratios | |
_C.MODEL.RPN.ASPECT_RATIOS = (0.5, 1.0, 2.0) | |
# Remove RPN anchors that go outside the image by RPN_STRADDLE_THRESH pixels | |
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors | |
_C.MODEL.RPN.STRADDLE_THRESH = 0 | |
# Minimum overlap required between an anchor and ground-truth box for the | |
# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD | |
# ==> positive RPN example) | |
_C.MODEL.RPN.FG_IOU_THRESHOLD = 0.7 | |
# Maximum overlap allowed between an anchor and ground-truth box for the | |
# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD | |
# ==> negative RPN example) | |
_C.MODEL.RPN.BG_IOU_THRESHOLD = 0.3 | |
# Total number of RPN examples per image | |
_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256 | |
# Target fraction of foreground (positive) examples per RPN minibatch | |
_C.MODEL.RPN.POSITIVE_FRACTION = 0.5 | |
# Number of top scoring RPN proposals to keep before applying NMS | |
# When FPN is used, this is *per FPN level* (not total) | |
_C.MODEL.RPN.PRE_NMS_TOP_N_TRAIN = 12000 | |
_C.MODEL.RPN.PRE_NMS_TOP_N_TEST = 6000 | |
# Number of top scoring RPN proposals to keep after applying NMS | |
_C.MODEL.RPN.POST_NMS_TOP_N_TRAIN = 2000 | |
_C.MODEL.RPN.POST_NMS_TOP_N_TEST = 1000 | |
# NMS threshold used on RPN proposals | |
_C.MODEL.RPN.NMS_THRESH = 0.7 | |
# Proposal height and width both need to be greater than RPN_MIN_SIZE | |
# (a the scale used during training or inference) | |
_C.MODEL.RPN.MIN_SIZE = 0 | |
# Number of top scoring RPN proposals to keep after combining proposals from | |
# all FPN levels | |
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN = 2000 | |
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST = 2000 | |
_C.MODEL.SEG = CN() | |
_C.MODEL.SEG.USE_FPN = False | |
_C.MODEL.SEG.USE_FUSE_FEATURE = False | |
# Total number of SEG examples per image | |
_C.MODEL.SEG.BATCH_SIZE_PER_IMAGE = 256 | |
# Target fraction of foreground (positive) examples per SEG minibatch | |
_C.MODEL.SEG.POSITIVE_FRACTION = 0.5 | |
# NMS threshold used on SEG proposals | |
_C.MODEL.SEG.BINARY_THRESH = 0.5 | |
_C.MODEL.SEG.USE_MULTIPLE_THRESH = False | |
_C.MODEL.SEG.MULTIPLE_THRESH = (0.2, 0.3, 0.5, 0.7) | |
_C.MODEL.SEG.BOX_THRESH = 0.7 | |
# Proposal height and width both need to be greater than RPN_MIN_SIZE | |
# (a the scale used during training or inference) | |
_C.MODEL.SEG.MIN_SIZE = 0 | |
_C.MODEL.SEG.SHRINK_RATIO = 0.5 | |
# Number of top scoring RPN proposals to keep after combining proposals from | |
# all FPN levels | |
_C.MODEL.SEG.TOP_N_TRAIN = 1000 | |
_C.MODEL.SEG.TOP_N_TEST = 1000 | |
_C.MODEL.SEG.AUG_PROPOSALS = False | |
_C.MODEL.SEG.IGNORE_DIFFICULT = True | |
_C.MODEL.SEG.EXPAND_RATIO = 1.6 | |
_C.MODEL.SEG.BOX_EXPAND_RATIO = 1.5 | |
_C.MODEL.SEG.USE_SEG_POLY = False | |
_C.MODEL.SEG.USE_PPM = False | |
# ---------------------------------------------------------------------------- # | |
# ROI HEADS options | |
# ---------------------------------------------------------------------------- # | |
_C.MODEL.ROI_HEADS = CN() | |
_C.MODEL.ROI_HEADS.USE_FPN = False | |
# Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD) | |
_C.MODEL.ROI_HEADS.FG_IOU_THRESHOLD = 0.5 | |
# Overlap threshold for an RoI to be considered background | |
# (class = 0 if overlap in [0, BG_IOU_THRESHOLD)) | |
_C.MODEL.ROI_HEADS.BG_IOU_THRESHOLD = 0.5 | |
# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets | |
# These are empirically chosen to approximately lead to unit variance targets | |
_C.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0) | |
# RoI minibatch size *per image* (number of regions of interest [ROIs]) | |
# Total number of RoIs per training minibatch = | |
# TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH * NUM_GPUS | |
# E.g., a common configuration is: 512 * 2 * 8 = 8192 | |
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 | |
# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0) | |
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25 | |
# Only used on test mode | |
# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to | |
# balance obtaining high recall with not having too many low precision | |
# detections that will slow down inference post processing steps (like NMS) | |
# _C.MODEL.ROI_HEADS.SCORE_THRESH = 0.05 | |
_C.MODEL.ROI_HEADS.SCORE_THRESH = 0.0 | |
# Overlap threshold used for non-maximum suppression (suppress boxes with | |
# IoU >= this threshold) | |
_C.MODEL.ROI_HEADS.NMS = 0.5 | |
# Maximum number of detections to return per image (100 is based on the limit | |
# established for the COCO dataset) | |
_C.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = 100 | |
_C.MODEL.ROI_BOX_HEAD = CN() | |
_C.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor" | |
_C.MODEL.ROI_BOX_HEAD.PREDICTOR = "FastRCNNPredictor" | |
_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14 | |
_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0 | |
_C.MODEL.ROI_BOX_HEAD.POOLER_SCALES = (1.0 / 16,) | |
_C.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 81 | |
# Hidden layer dimension when using an MLP for the RoI box head | |
_C.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM = 1024 | |
_C.MODEL.ROI_BOX_HEAD.USE_REGRESSION = True | |
_C.MODEL.ROI_BOX_HEAD.INFERENCE_USE_BOX = True | |
_C.MODEL.ROI_BOX_HEAD.USE_MASKED_FEATURE = False | |
_C.MODEL.ROI_BOX_HEAD.SOFT_MASKED_FEATURE_RATIO = 0. | |
_C.MODEL.ROI_BOX_HEAD.MIX_OPTION = "" | |
_C.MODEL.ROI_MASK_HEAD = CN() | |
_C.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor" | |
_C.MODEL.ROI_MASK_HEAD.PREDICTOR = "MaskRCNNC4Predictor" | |
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14 | |
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION_H = 32 | |
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION_W = 128 | |
_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0 | |
_C.MODEL.ROI_MASK_HEAD.POOLER_SCALES = (1.0 / 16,) | |
_C.MODEL.ROI_MASK_HEAD.MLP_HEAD_DIM = 1024 | |
_C.MODEL.ROI_MASK_HEAD.CONV_LAYERS = (256, 256, 256, 256) | |
_C.MODEL.ROI_MASK_HEAD.RESOLUTION = 14 | |
_C.MODEL.ROI_MASK_HEAD.RESOLUTION_H = 32 | |
_C.MODEL.ROI_MASK_HEAD.RESOLUTION_W = 128 | |
_C.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True | |
_C.MODEL.ROI_MASK_HEAD.CHAR_NUM_CLASSES = 38 | |
_C.MODEL.ROI_MASK_HEAD.USE_WEIGHTED_CHAR_MASK = False | |
_C.MODEL.ROI_MASK_HEAD.MASK_BATCH_SIZE_PER_IM = 64 | |
_C.MODEL.ROI_MASK_HEAD.USE_MASKED_FEATURE = False | |
_C.MODEL.ROI_MASK_HEAD.SOFT_MASKED_FEATURE_RATIO = 0. | |
_C.MODEL.ROI_MASK_HEAD.MIX_OPTION = "" | |
# ---------------------------------------------------------------------------- # | |
# Solver | |
# ---------------------------------------------------------------------------- # | |
_C.SOLVER = CN() | |
_C.SOLVER.MAX_ITER = 40000 | |
_C.SOLVER.BASE_LR = 0.001 | |
_C.SOLVER.BIAS_LR_FACTOR = 2 | |
_C.SOLVER.MOMENTUM = 0.9 | |
_C.SOLVER.WEIGHT_DECAY = 0.0005 | |
_C.SOLVER.WEIGHT_DECAY_BIAS = 0 | |
_C.SOLVER.GAMMA = 0.1 | |
_C.SOLVER.STEPS = (30000,) | |
_C.SOLVER.WARMUP_FACTOR = 1.0 / 3 | |
_C.SOLVER.WARMUP_ITERS = 500 | |
_C.SOLVER.WARMUP_METHOD = "linear" | |
_C.SOLVER.CHECKPOINT_PERIOD = 5000 | |
# Number of images per batch | |
# This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will | |
# see 2 images per batch | |
_C.SOLVER.IMS_PER_BATCH = 16 | |
_C.SOLVER.RESUME = True | |
_C.SOLVER.USE_ADAM = False | |
_C.SOLVER.POW_SCHEDULE = False | |
_C.SOLVER.DISPLAY_FREQ = 20 | |
# ---------------------------------------------------------------------------- # | |
# Specific test options | |
# ---------------------------------------------------------------------------- # | |
_C.TEST = CN() | |
_C.TEST.EXPECTED_RESULTS = [] | |
_C.TEST.EXPECTED_RESULTS_SIGMA_TOL = 4 | |
# Number of images per batch | |
# This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will | |
# see 2 images per batch | |
_C.TEST.IMS_PER_BATCH = 8 | |
_C.TEST.VIS = False | |
# from 0 to 255 | |
_C.TEST.CHAR_THRESH = 128 | |
# ---------------------------------------------------------------------------- # | |
# Misc options | |
# ---------------------------------------------------------------------------- # | |
_C.OUTPUT_DIR = "." | |
_C.PATHS_CATALOG = os.path.join(os.path.dirname(__file__), "paths_catalog.py") | |
# ---------------------------------------------------------------------------- # | |
# Precision options | |
# ---------------------------------------------------------------------------- # | |
# Precision of input, allowable: (float32, float16) | |
_C.DTYPE = "float32" | |
# Enable verbosity in apex.amp | |
_C.AMP_VERBOSE = False |