YOLO-World3 / third_party /mmyolo /configs /yolov5 /ins_seg /yolov5_ins_s-v61_syncbn_fast_8xb16-300e_coco_instance.py
stevengrove
initial commit
186701e
raw
history blame contribute delete
No virus
4.34 kB
_base_ = '../yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' # noqa
# ========================modified parameters======================
# YOLOv5RandomAffine
use_mask2refine = True
max_aspect_ratio = 100
min_area_ratio = 0.01
# Polygon2Mask
downsample_ratio = 4
mask_overlap = True
# LeterResize
# half_pad_param: if set to True, left and right pad_param will
# be given by dividing padding_h by 2. If set to False, pad_param is
# in int format. We recommend setting this to False for object
# detection tasks, and True for instance segmentation tasks.
# Default to False.
half_pad_param = True
# Testing take a long time due to model_test_cfg.
# If you want to speed it up, you can increase score_thr
# or decraese nms_pre and max_per_img
model_test_cfg = dict(
multi_label=True,
nms_pre=30000,
min_bbox_size=0,
score_thr=0.001,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=300,
mask_thr_binary=0.5,
# fast_test: Whether to use fast test methods. When set
# to False, the implementation here is the same as the
# official, with higher mAP. If set to True, mask will first
# be upsampled to origin image shape through Pytorch, and
# then use mask_thr_binary to determine which pixels belong
# to the object. If set to False, will first use
# mask_thr_binary to determine which pixels belong to the
# object , and then use opencv to upsample mask to origin
# image shape. Default to False.
fast_test=True)
# ===============================Unmodified in most cases====================
model = dict(
type='YOLODetector',
bbox_head=dict(
type='YOLOv5InsHead',
head_module=dict(
type='YOLOv5InsHeadModule', mask_channels=32, proto_channels=256),
mask_overlap=mask_overlap,
loss_mask=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=True, reduction='none'),
loss_mask_weight=0.05),
test_cfg=model_test_cfg)
pre_transform = [
dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=True,
mask2bbox=use_mask2refine)
]
train_pipeline = [
*pre_transform,
dict(
type='Mosaic',
img_scale=_base_.img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2),
border_val=(114, 114, 114),
min_area_ratio=min_area_ratio,
max_aspect_ratio=max_aspect_ratio,
use_mask_refine=use_mask2refine),
# TODO: support mask transform in albu
# Geometric transformations are not supported in albu now.
dict(
type='mmdet.Albu',
transforms=_base_.albu_train_transforms,
bbox_params=dict(
type='BboxParams',
format='pascal_voc',
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
keymap={
'img': 'image',
'gt_bboxes': 'bboxes'
}),
dict(type='YOLOv5HSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(
type='Polygon2Mask',
downsample_ratio=downsample_ratio,
mask_overlap=mask_overlap),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
test_pipeline = [
dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
dict(type='YOLOv5KeepRatioResize', scale=_base_.img_scale),
dict(
type='LetterResize',
scale=_base_.img_scale,
allow_scale_up=False,
half_pad_param=half_pad_param,
pad_val=dict(img=114)),
dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param'))
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(metric=['bbox', 'segm'])
test_evaluator = val_evaluator