{ "_name_or_path": "facebook/detr-resnet-50", "activation_dropout": 0.0, "activation_function": "relu", "architectures": [ "DetrForObjectDetection" ], "attention_dropout": 0.0, "auxiliary_loss": false, "backbone": "resnet50", "backbone_config": null, "backbone_kwargs": { "in_chans": 3, "out_indices": [ 1, 2, 3, 4 ] }, "bbox_cost": 5, "bbox_loss_coefficient": 5, "class_cost": 1, "classifier_dropout": 0.0, "d_model": 256, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.0, "decoder_layers": 6, "dice_loss_coefficient": 1, "dilation": false, "dropout": 0.1, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.0, "encoder_layers": 6, "eos_coefficient": 0.1, "giou_cost": 2, "giou_loss_coefficient": 2, "id2label": { "1": 63, "2": 10, "3": 67, "4": 32, "5": 53, "6": 26, "7": 27, "8": 106, "9": 107, "10": 2, "11": 41, "12": 42, "13": 47, "14": 48, "15": 1, "16": 70, "17": 25, "18": 23, "19": 24, "20": 101, "21": 100, "22": 102, "23": 69, "24": 20, "25": 19, "26": 83, "27": 14, "28": 16, "29": 87, "30": 92, "31": 93, "32": 74, "33": 22, "34": 20, "35": 19, "36": 87, "37": 88, "38": 50, "39": 83, "40": 23, "41": 24, "42": 74, "43": 28, "44": 29, "45": 17, "46": 72, "47": 101, "48": 100, "49": 102, "50": 8, "51": 15, "52": 12, "53": 97, "54": 98, "55": 99, "56": 67, "57": 57, "58": 58, "59": 81, "60": 80, "61": 78, "62": 79, "63": 53, "64": 66, "65": 87, "66": 89, "67": 51, "68": 52, "69": 35, "70": 34, "71": 53, "72": 42, "73": 43, "74": 6, "75": 3, "76": 63, "77": 63, "78": 97, "79": 98, "80": 99, "81": 15, "82": 12, "83": 68, "84": 92, "85": 28, "86": 30, "87": 76, "88": 77, "89": 51, "90": 61, "91": 62, "92": 74, "93": 75, "94": 8, "95": 32, "96": 33, "97": 70, "98": 71, "99": 61, "100": 62, "101": 98, "102": 99, "103": 11, "104": 13, "105": 94, "106": 92, "107": 36, "108": 37, "109": 38, "110": 59, "111": 60, "112": 20, "113": 19, "114": 63, "115": 64, "116": 103, "117": 104, "118": 105, "119": 39, "120": 40, "121": 84, "122": 49, "123": 106, "124": 107, "125": 2, "126": 28, "127": 29, "128": 7, "129": 9, "130": 82, "131": 72, "132": 73, "133": 57, "134": 58, "135": 4, "136": 7, "137": 21, "138": 10, "139": 18, "140": 26, "141": 90, "142": 91, "143": 85, "144": 86, "145": 44, "146": 45, "147": 46, "148": 14, "149": 54, "150": 55, "151": 56, "152": 35, "153": 34, "154": 81, "155": 80, "156": 78, "157": 95, "158": 96, "159": 31 }, "init_std": 0.02, "init_xavier_std": 1.0, "is_encoder_decoder": true, "label2id": { "1": 15, "2": 125, "3": 75, "4": 135, "6": 74, "7": 136, "8": 94, "9": 129, "10": 138, "11": 103, "12": 82, "13": 104, "14": 148, "15": 81, "16": 28, "17": 45, "18": 139, "19": 113, "20": 112, "21": 137, "22": 33, "23": 40, "24": 41, "25": 17, "26": 140, "27": 7, "28": 126, "29": 127, "30": 86, "31": 159, "32": 95, "33": 96, "34": 153, "35": 152, "36": 107, "37": 108, "38": 109, "39": 119, "40": 120, "41": 11, "42": 72, "43": 73, "44": 145, "45": 146, "46": 147, "47": 13, "48": 14, "49": 122, "50": 38, "51": 89, "52": 68, "53": 71, "54": 149, "55": 150, "56": 151, "57": 133, "58": 134, "59": 110, "60": 111, "61": 99, "62": 100, "63": 114, "64": 115, "66": 64, "67": 56, "68": 83, "69": 23, "70": 97, "71": 98, "72": 131, "73": 132, "74": 92, "75": 93, "76": 87, "77": 88, "78": 156, "79": 62, "80": 155, "81": 154, "82": 130, "83": 39, "84": 121, "85": 143, "86": 144, "87": 65, "88": 37, "89": 66, "90": 141, "91": 142, "92": 106, "93": 31, "94": 105, "95": 157, "96": 158, "97": 78, "98": 101, "99": 102, "100": 48, "101": 47, "102": 49, "103": 116, "104": 117, "105": 118, "106": 123, "107": 124 }, "mask_loss_coefficient": 1, "max_position_embeddings": 1024, "model_type": "detr", "num_channels": 3, "num_hidden_layers": 6, "num_queries": 100, "position_embedding_type": "sine", "scale_embedding": false, "torch_dtype": "float32", "transformers_version": "4.41.1", "use_pretrained_backbone": true, "use_timm_backbone": true }