{
  "architectures": [
    "MobileViTForSemanticSegmentation"
  ],
  "aspp_dropout_prob": 0.1,
  "aspp_out_channels": 256,
  "atrous_rates": [
    6,
    12,
    18
  ],
  "attention_probs_dropout_prob": 0.0,
  "classifier_dropout_prob": 0.1,
  "conv_kernel_size": 3,
  "expand_ratio": 4.0,
  "hidden_act": "silu",
  "hidden_dropout_prob": 0.1,
  "hidden_sizes": [
    144,
    192,
    240
  ],
  "id2label": {
    "0": "background",
    "1": "aeroplane",
    "2": "bicycle",
    "3": "bird",
    "4": "boat",
    "5": "bottle",
    "6": "bus",
    "7": "car",
    "8": "cat",
    "9": "chair",
    "10": "cow",
    "11": "diningtable",
    "12": "dog",
    "13": "horse",
    "14": "motorbike",
    "15": "person",
    "16": "pottedplant",
    "17": "sheep",
    "18": "sofa",
    "19": "train",
    "20": "tvmonitor"
  },
  "image_size": 512,
  "initializer_range": 0.02,
  "label2id": {
    "aeroplane": 1,
    "background": 0,
    "bicycle": 2,
    "bird": 3,
    "boat": 4,
    "bottle": 5,
    "bus": 6,
    "car": 7,
    "cat": 8,
    "chair": 9,
    "cow": 10,
    "diningtable": 11,
    "dog": 12,
    "horse": 13,
    "motorbike": 14,
    "person": 15,
    "pottedplant": 16,
    "sheep": 17,
    "sofa": 18,
    "train": 19,
    "tvmonitor": 20
  },
  "layer_norm_eps": 1e-05,
  "mlp_ratio": 2.0,
  "model_type": "mobilevit",
  "neck_hidden_sizes": [
    16,
    32,
    64,
    96,
    128,
    160,
    640
  ],
  "num_attention_heads": 4,
  "num_channels": 3,
  "output_stride": 16,
  "patch_size": 2,
  "qkv_bias": true,
  "semantic_loss_ignore_index": 255,
  "torch_dtype": "float32",
  "transformers_version": "4.20.0.dev0"
}