{ "model_type": "unet", "model_config": { "encoder": { "type": "resnet18", "pretrained": true }, "decoder": { "type": "unet", "num_classes": 104 } } } "norm_cfg": { "type": "SyncBN", "requires_grad": true }, "model": { "type": "EncoderDecoder", "pretrained": "pretrained/swin_small_patch4_window7_224.pth", "backbone": { "type": "SwinTransformer", "embed_dim": 96, "depths": [2, 2, 18, 2], "num_heads": [3, 6, 12, 24], "window_size": 7, "mlp_ratio": 4.0, "qkv_bias": true, "qk_scale": null, "drop_rate": 0.0, "attn_drop_rate": 0.0, "drop_path_rate": 0.3, "ape": false, "patch_norm": true, "out_indices": [0, 1, 2, 3], "use_checkpoint": false }, "decode_head": { "type": "UPerHead", "in_channels": [96, 192, 384, 768], "in_index": [0, 1, 2, 3], "pool_scales": [1, 2, 3, 6], "channels": 512, "dropout_ratio": 0.1, "num_classes": 104, "norm_cfg": { "type": "SyncBN", "requires_grad": true }, "align_corners": false, "loss_decode": { "type": "CrossEntropyLoss", "use_sigmoid": false, "loss_weight": 1.0 } }, "auxiliary_head": { "type": "FCNHead", "in_channels": 384, "in_index": 2, "channels": 256, "num_convs": 1, "concat_input": false, "dropout_ratio": 0.1, "num_classes": 104, "norm_cfg": { "type": "SyncBN", "requires_grad": true }, "align_corners": false, "loss_decode": { "type": "CrossEntropyLoss", "use_sigmoid": false, "loss_weight": 0.4 } }, "train_cfg": {}, "test_cfg": { "mode": "whole" } }, "dataset_type": "CustomDataset", "data_root": "./data/FoodSeg103/Images/", "img_norm_cfg": { "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true }, "crop_size": [512, 1024], "train_pipeline": [ { "type": "LoadImageFromFile" }, { "type": "LoadAnnotations" }, { "type": "Resize", "img_scale": [2048, 1024], "ratio_range": [0.5, 2.0] }, { "type": "RandomCrop", "crop_size": [512, 1024], "cat_max_ratio": 0.75 }, { "type": "RandomFlip", "prob": 0.5 }, { "type": "PhotoMetricDistortion" }, { "type": "Normalize", "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true }, { "type": "Pad", "size": [512, 1024], "pad_val": 0, "seg_pad_val": 255 }, { "type": "DefaultFormatBundle" }, { "type": "Collect", "keys": ["img", "gt_semantic_seg"] } ], "test_pipeline": [ { "type": "LoadImageFromFile" }, { "type": "MultiScaleFlipAug", "img_scale": [2048, 1024], "flip": false, "transforms": [ { "type": "Resize", "keep_ratio": true }, { "type": "RandomFlip" }, { "type": "Normalize", "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true }, { "type": "ImageToTensor", "keys": ["img"] }, { "type": "Collect", "keys": ["img"] } ] } ], "data": { "samples_per_gpu": 2, "workers_per_gpu": 2, "train": { "type": "CustomDataset", "data_root": "./data/FoodSeg103/Images/", "img_dir": "img_dir/train", "ann_dir": "ann_dir/train", "pipeline": [ { "type": "LoadImageFromFile" }, { "type": "LoadAnnotations" }, { "type": "Resize", "img_scale": [2048, 1024], "ratio_range": [0.5, 2.0] }, { "type": "RandomCrop", "crop_size": [512, 1024], "cat_max_ratio": 0.75 }, { "type": "RandomFlip", "prob": 0.5 }, { "type": "PhotoMetricDistortion" }, { "type": "Normalize", "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true }, { "type": "Pad", "size": [512, 1024], "pad_val": 0, "seg_pad_val": 255 }, { "type": "DefaultFormatBundle" }, { "type": "Collect", "keys": ["img", "gt_semantic_seg"] } ] }, "val": { "type": "CustomDataset", "data_root": "./data/FoodSeg103/Images/", "img_dir": "img_dir/test", "ann_dir": "ann_dir/test", "pipeline": [ { "type": "LoadImageFromFile" }, { "type": "MultiScaleFlipAug", "img_scale": [2048, 1024],