custom_imports = dict(imports=['mmseg.datasets', 'mmseg.models'], allow_failed_imports=False) sub_model_train = [ 'panoptic_head', 'data_preprocessor' ] sub_model_optim = { 'panoptic_head': {'lr_mult': 1}, } max_epochs = 1000 optimizer = dict( type='AdamW', sub_model=sub_model_optim, lr=0.0005, weight_decay=1e-3 ) param_scheduler = [ # warm up learning rate scheduler dict( type='LinearLR', start_factor=1e-4, by_epoch=True, begin=0, end=1, # update by iter convert_to_iter_based=True), # main learning rate scheduler dict( type='CosineAnnealingLR', T_max=max_epochs, by_epoch=True, begin=1, end=max_epochs, ), ] param_scheduler_callback = dict( type='ParamSchedulerHook' ) evaluator_ = dict( type='CocoPLMetric', metric=['bbox', 'segm'], proposal_nums=[1, 10, 100] ) evaluator = dict( val_evaluator=evaluator_, ) image_size = (1024, 1024) data_preprocessor = dict( type='mmdet.DetDataPreprocessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True, pad_size_divisor=32, pad_mask=True, mask_pad_value=0, ) num_things_classes = 1 num_stuff_classes = 0 num_classes = num_things_classes + num_stuff_classes prompt_shape = (30, 5) model_cfg = dict( type='SegSAMAnchorPLer', hyperparameters=dict( optimizer=optimizer, param_scheduler=param_scheduler, evaluator=evaluator, ), need_train_names=sub_model_train, data_preprocessor=data_preprocessor, backbone=dict( type='vit_h', # checkpoint='pretrain/sam/sam_vit_h_4b8939.pth', # type='vit_b', # checkpoint='pretrain/sam/sam_vit_b_01ec64.pth', ), panoptic_head=dict( type='SAMAnchorInstanceHead', neck=dict( type='SAMAggregatorNeck', in_channels=[1280] * 32, # in_channels=[768] * 12, inner_channels=32, selected_channels=range(4, 32, 2), # selected_channels=range(4, 12, 2), out_channels=256, up_sample_scale=4, ), rpn_head=dict( type='mmdet.RPNHead', in_channels=256, feat_channels=256, anchor_generator=dict( type='mmdet.AnchorGenerator', scales=[2, 4, 8, 16, 32, 64], ratios=[0.5, 1.0, 2.0], strides=[8, 16, 32]), bbox_coder=dict( type='mmdet.DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0]), loss_cls=dict( type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)), roi_head=dict( type='SAMAnchorPromptRoIHead', bbox_roi_extractor=dict( type='mmdet.SingleRoIExtractor', roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[8, 16, 32]), bbox_head=dict( type='mmdet.Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=num_classes, bbox_coder=dict( type='mmdet.DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, loss_cls=dict( type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='mmdet.SmoothL1Loss', loss_weight=1.0)), mask_roi_extractor=dict( type='mmdet.SingleRoIExtractor', roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=256, featmap_strides=[8, 16, 32]), mask_head=dict( type='SAMPromptMaskHead', per_query_point=prompt_shape[1], with_sincos=True, class_agnostic=True, loss_mask=dict( type='mmdet.CrossEntropyLoss', use_mask=True, loss_weight=1.0))), # model training and testing settings train_cfg=dict( rpn=dict( assigner=dict( type='mmdet.MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='mmdet.RandomSampler', num=512, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), allowed_border=-1, pos_weight=-1, debug=False), rpn_proposal=dict( nms_pre=2000, max_per_img=1000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=dict( assigner=dict( type='mmdet.MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='mmdet.RandomSampler', num=256, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), mask_size=1024, pos_weight=-1, debug=False)), test_cfg=dict( rpn=dict( nms_pre=1000, max_per_img=1000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=dict( score_thr=0.05, nms=dict(type='nms', iou_threshold=0.5), max_per_img=100, mask_thr_binary=0.5) ) ) ) task_name = 'whu_ins' exp_name = 'E20230629_0' logger = dict( type='WandbLogger', project=task_name, group='sam-anchor', name=exp_name ) vis_backends = [dict(type='mmdet.LocalVisBackend')] visualizer = dict( type='mmdet.DetLocalVisualizer', vis_backends=vis_backends, name='visualizer', fig_save_cfg=dict( frameon=False, figsize=(40, 20), # dpi=300, ), line_width=2, alpha=0.8 ) callbacks = [ param_scheduler_callback, dict( type='ModelCheckpoint', dirpath=f'results/{task_name}/{exp_name}/checkpoints', save_last=True, mode='max', monitor='valsegm_map_0', save_top_k=3, filename='epoch_{epoch}-map_{valsegm_map_0:.4f}' ), dict( type='LearningRateMonitor', logging_interval='step' ) ] trainer_cfg = dict( compiled_model=False, accelerator="auto", strategy="auto", # strategy="ddp", # strategy='ddp_find_unused_parameters_true', # precision='32', # precision='16-mixed', devices=8, default_root_dir=f'results/{task_name}/{exp_name}', # default_root_dir='results/tmp', max_epochs=max_epochs, logger=logger, callbacks=callbacks, log_every_n_steps=5, check_val_every_n_epoch=5, benchmark=True, # sync_batchnorm=True, # fast_dev_run=True, # limit_train_batches=1, # limit_val_batches=0, # limit_test_batches=None, # limit_predict_batches=None, # overfit_batches=0.0, # val_check_interval=None, # num_sanity_val_steps=0, # enable_checkpointing=None, # enable_progress_bar=None, # enable_model_summary=None, # accumulate_grad_batches=32, # gradient_clip_val=15, # gradient_clip_algorithm='norm', # deterministic=None, # inference_mode: bool=True, use_distributed_sampler=True, # profiler="simple", # detect_anomaly=False, # barebones=False, # plugins=None, # reload_dataloaders_every_n_epochs=0, ) backend_args = None train_pipeline = [ dict(type='mmdet.LoadImageFromFile'), dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True), dict(type='mmdet.Resize', scale=image_size), dict(type='mmdet.RandomFlip', prob=0.5), dict(type='mmdet.PackDetInputs') ] test_pipeline = [ dict(type='mmdet.LoadImageFromFile', backend_args=backend_args), dict(type='mmdet.Resize', scale=image_size), # If you don't have a gt annotation, delete the pipeline dict(type='mmdet.LoadAnnotations', with_bbox=True, with_mask=True), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor')) ] train_batch_size_per_gpu = 2 train_num_workers = 2 test_batch_size_per_gpu = 2 test_num_workers = 2 persistent_workers = True data_parent = '/mnt/search01/dataset/cky_data/SSDD' dataset_type = 'SSDDInsSegDataset' val_loader = dict( batch_size=test_batch_size_per_gpu, num_workers=test_num_workers, persistent_workers=persistent_workers, pin_memory=True, dataset=dict( type=dataset_type, data_root=data_parent, # ann_file='NWPU_instances_val.json', # data_prefix=dict(img_path='positive image set'), ann_file='annotations/SSDD_instances_val.json', data_prefix=dict(img_path='imgs'), test_mode=True, filter_cfg=dict(filter_empty_gt=True, min_size=32), pipeline=test_pipeline, backend_args=backend_args)) predict_pipeline = [ dict(type='mmdet.Resize', scale=image_size), dict( type='mmdet.PackDetInputs', meta_keys=('ori_shape', 'img_shape', 'scale_factor')) ] datamodule_cfg = dict( type='PLDataModule', train_loader=dict( batch_size=train_batch_size_per_gpu, num_workers=train_num_workers, persistent_workers=persistent_workers, pin_memory=True, dataset=dict( type=dataset_type, data_root=data_parent, # ann_file='NWPU_instances_train.json', # data_prefix=dict(img_path='positive image set'), ann_file='annotations/SSDD_instances_train.json', data_prefix=dict(img_path='imgs'), filter_cfg=dict(filter_empty_gt=True, min_size=32), pipeline=train_pipeline, backend_args=backend_args) ), val_loader=val_loader, # test_loader=val_loader predict_loader=val_loader )