{"dataset_type": "ADE20KDataset", "data_root": "data/ade/ADEChallengeData2016", "crop_size": [512, 512], "train_pipeline": [{"type": "LoadImageFromFile"}, {"type": "LoadAnnotations", "reduce_zero_label": true}, {"type": "RandomResize", "scale": [2048, 512], "ratio_range": [0.5, 2.0], "keep_ratio": true}, {"type": "RandomCrop", "crop_size": [512, 512], "cat_max_ratio": 0.75}, {"type": "RandomFlip", "prob": 0.5}, {"type": "PhotoMetricDistortion"}, {"type": "PackSegInputs"}], "test_pipeline": [{"type": "LoadImageFromFile"}, {"type": "Resize", "scale": [2048, 512], "keep_ratio": true}, {"type": "LoadAnnotations", "reduce_zero_label": true}, {"type": "PackSegInputs"}], "img_ratios": [0.5, 0.75, 1.0, 1.25, 1.5, 1.75], "tta_pipeline": [{"type": "LoadImageFromFile", "backend_args": null}, {"type": "TestTimeAug", "transforms": [[{"type": "Resize", "scale_factor": 0.5, "keep_ratio": true}, {"type": "Resize", "scale_factor": 0.75, "keep_ratio": true}, {"type": "Resize", "scale_factor": 1.0, "keep_ratio": true}, {"type": "Resize", "scale_factor": 1.25, "keep_ratio": true}, {"type": "Resize", "scale_factor": 1.5, "keep_ratio": true}, {"type": "Resize", "scale_factor": 1.75, "keep_ratio": true}], [{"type": "RandomFlip", "prob": 0.0, "direction": "horizontal"}, {"type": "RandomFlip", "prob": 1.0, "direction": "horizontal"}], [{"type": "LoadAnnotations"}], [{"type": "PackSegInputs"}]]}], "train_dataloader": {"batch_size": 2, "num_workers": 2, "persistent_workers": true, "sampler": {"type": "InfiniteSampler", "shuffle": true}, "dataset": {"type": "ADE20KDataset", "data_root": "data/ade/ADEChallengeData2016", "data_prefix": {"img_path": "images/training", "seg_map_path": "annotations/training"}, "pipeline": [{"type": "LoadImageFromFile"}, {"type": "LoadAnnotations", "reduce_zero_label": true}, {"type": "RandomResize", "scale": [2048, 512], "ratio_range": [0.5, 2.0], "keep_ratio": true}, {"type": "RandomCrop", "crop_size": [512, 512], "cat_max_ratio": 0.75}, {"type": "RandomFlip", "prob": 0.5}, {"type": "PhotoMetricDistortion"}, {"type": "PackSegInputs"}]}}, "val_dataloader": {"batch_size": 1, "num_workers": 4, "persistent_workers": true, "sampler": {"type": "DefaultSampler", "shuffle": false}, "dataset": {"type": "ADE20KDataset", "data_root": "data/ade/ADEChallengeData2016", "data_prefix": {"img_path": "images/validation", "seg_map_path": "annotations/validation"}, "pipeline": [{"type": "LoadImageFromFile"}, {"type": "Resize", "scale": [2048, 512], "keep_ratio": true}, {"type": "LoadAnnotations", "reduce_zero_label": true}, {"type": "PackSegInputs"}]}}, "test_dataloader": {"batch_size": 1, "num_workers": 4, "persistent_workers": true, "sampler": {"type": "DefaultSampler", "shuffle": false}, "dataset": {"type": "ADE20KDataset", "data_root": "data/ade/ADEChallengeData2016", "data_prefix": {"img_path": "images/validation", "seg_map_path": "annotations/validation"}, "pipeline": [{"type": "LoadImageFromFile"}, {"type": "Resize", "scale": [2048, 512], "keep_ratio": true}, {"type": "LoadAnnotations", "reduce_zero_label": true}, {"type": "PackSegInputs"}]}}, "val_evaluator": {"type": "IoUMetric", "iou_metrics": ["mIoU"]}, "test_evaluator": {"type": "IoUMetric", "iou_metrics": ["mIoU"]}, "default_scope": "mmseg", "env_cfg": {"cudnn_benchmark": true, "mp_cfg": {"mp_start_method": "fork", "opencv_num_threads": 0}, "dist_cfg": {"backend": "nccl"}}, "vis_backends": [{"type": "LocalVisBackend"}], "visualizer": {"type": "SegLocalVisualizer", "vis_backends": [{"type": "LocalVisBackend"}], "name": "visualizer"}, "log_processor": {"by_epoch": false}, "log_level": "INFO", "load_from": null, "resume": false, "tta_model": {"type": "SegTTAModel"}, "optimizer": {"type": "AdamW", "lr": 0.0001, "momentum": 0.9, "weight_decay": 0.0001, "betas": [0.9, 0.999]}, "optim_wrapper": {"type": "OptimWrapper", "optimizer": {"type": "AdamW", "lr": 0.0001, "betas": [0.9, 0.999], "weight_decay": 0.0001}, "clip_grad": {"max_norm": 0.01, "norm_type": 2}, "paramwise_cfg": {"custom_keys": {"backbone": {"lr_mult": 0.1}}}}, "param_scheduler": [{"type": "PolyLR", "eta_min": 0, "power": 0.9, "begin": 0, "end": 160000, "by_epoch": false}], "train_cfg": {"type": "IterBasedTrainLoop", "max_iters": 160000, "val_interval": 16000}, "val_cfg": {"type": "ValLoop"}, "test_cfg": {"type": "TestLoop"}, "default_hooks": {"timer": {"type": "IterTimerHook"}, "logger": {"type": "LoggerHook", "interval": 50, "log_metric_by_epoch": false}, "param_scheduler": {"type": "ParamSchedulerHook"}, "checkpoint": {"type": "CheckpointHook", "by_epoch": false, "interval": 16000}, "sampler_seed": {"type": "DistSamplerSeedHook"}, "visualization": {"type": "SegVisualizationHook"}}, "norm_cfg": {"type": "SyncBN", "requires_grad": true}, "data_preprocessor": {"type": "SegDataPreProcessor", "size": [512, 512], "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "bgr_to_rgb": true, "pad_val": 0, "seg_pad_val": 255}, "num_classes": 150, "model": {"type": "EncoderDecoder", "data_preprocessor": {"type": "SegDataPreProcessor", "size": [512, 512], "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "bgr_to_rgb": true, "pad_val": 0, "seg_pad_val": 255}, "backbone": {"type": "ResNet", "depth": 50, "num_stages": 4, "out_indices": [0, 1, 2, 3], "dilations": [1, 1, 1, 1], "strides": [1, 2, 2, 2], "norm_cfg": {"type": "SyncBN", "requires_grad": true}, "norm_eval": true, "style": "pytorch", "contract_dilation": true, "init_cfg": {"type": "Pretrained", "checkpoint": "torchvision://resnet50"}}, "decode_head": {"type": "MaskFormerHead", "in_channels": [256, 512, 1024, 2048], "feat_channels": 256, "in_index": [0, 1, 2, 3], "num_classes": 150, "out_channels": 256, "num_queries": 100, "pixel_decoder": {"type": "mmdet.PixelDecoder", "norm_cfg": {"type": "GN", "num_groups": 32}, "act_cfg": {"type": "ReLU"}}, "enforce_decoder_input_project": false, "positional_encoding": {"num_feats": 128, "normalize": true}, "transformer_decoder": {"return_intermediate": true, "num_layers": 6, "layer_cfg": {"self_attn_cfg": {"embed_dims": 256, "num_heads": 8, "attn_drop": 0.1, "proj_drop": 0.1, "dropout_layer": null, "batch_first": true}, "cross_attn_cfg": {"embed_dims": 256, "num_heads": 8, "attn_drop": 0.1, "proj_drop": 0.1, "dropout_layer": null, "batch_first": true}, "ffn_cfg": {"embed_dims": 256, "feedforward_channels": 2048, "num_fcs": 2, "act_cfg": {"type": "ReLU", "inplace": true}, "ffn_drop": 0.1, "dropout_layer": null, "add_identity": true}}, "init_cfg": null}, "loss_cls": {"type": "mmdet.CrossEntropyLoss", "use_sigmoid": false, "loss_weight": 1.0, "reduction": "mean", "class_weight": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1]}, "loss_mask": {"type": "mmdet.FocalLoss", "use_sigmoid": true, "gamma": 2.0, "alpha": 0.25, "reduction": "mean", "loss_weight": 20.0}, "loss_dice": {"type": "mmdet.DiceLoss", "use_sigmoid": true, "activate": true, "reduction": "mean", "naive_dice": true, "eps": 1.0, "loss_weight": 1.0}, "train_cfg": {"assigner": {"type": "mmdet.HungarianAssigner", "match_costs": [{"type": "mmdet.ClassificationCost", "weight": 1.0}, {"type": "mmdet.FocalLossCost", "weight": 20.0, "binary_input": true}, {"type": "mmdet.DiceCost", "weight": 1.0, "pred_act": true, "eps": 1.0}]}, "sampler": {"type": "mmdet.MaskPseudoSampler"}}}, "train_cfg": {}, "test_cfg": {"mode": "whole"}}}