{ "_name_or_path": "facebook/detr-resnet-50", "activation_dropout": 0.0, "activation_function": "relu", "architectures": [ "DetrForObjectDetection" ], "attention_dropout": 0.0, "auxiliary_loss": false, "backbone": "resnet50", "backbone_config": null, "backbone_kwargs": { "in_chans": 3, "out_indices": [ 1, 2, 3, 4 ] }, "bbox_cost": 5, "bbox_loss_coefficient": 5, "class_cost": 1, "classifier_dropout": 0.0, "d_model": 256, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.0, "decoder_layers": 6, "dice_loss_coefficient": 1, "dilation": false, "dropout": 0.1, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.0, "encoder_layers": 6, "eos_coefficient": 0.1, "giou_cost": 2, "giou_loss_coefficient": 2, "id2label": { "1": 32, "2": 25, "3": 23, "4": 24, "5": 47, "6": 48, "7": 35, "8": 34, "9": 74, "10": 97, "11": 98, "12": 99, "13": 61, "14": 62, "15": 76, "16": 77, "17": 87, "18": 1, "19": 106, "20": 107, "21": 2, "22": 20, "23": 19, "24": 3, "25": 8, "26": 39, "27": 40, "28": 63, "29": 63, "30": 103, "31": 104, "32": 105, "33": 92, "34": 28, "35": 30, "36": 7, "37": 9, "38": 23, "39": 24, "40": 53, "41": 67, "42": 54, "43": 55, "44": 56, "45": 97, "46": 98, "47": 99, "48": 20, "49": 19, "50": 18, "51": 95, "52": 96, "53": 61, "54": 62, "55": 51, "56": 66, "57": 31, "58": 63, "59": 64, "60": 57, "61": 58, "62": 35, "63": 34, "64": 63, "65": 4, "66": 83, "67": 17, "68": 72, "69": 73, "70": 28, "71": 29, "72": 50, "73": 32, "74": 33, "75": 36, "76": 37, "77": 38, "78": 90, "79": 91, "80": 59, "81": 60, "82": 15, "83": 12, "84": 14, "85": 57, "86": 58, "87": 68, "88": 72, "89": 51, "90": 26, "91": 84, "92": 22, "93": 20, "94": 19, "95": 87, "96": 88, "97": 83, "98": 7, "99": 81, "100": 80, "101": 78, "102": 106, "103": 107, "104": 2, "105": 101, "106": 100, "107": 102, "108": 74, "109": 70, "110": 69, "111": 21, "112": 82, "113": 11, "114": 13, "115": 98, "116": 99, "117": 10, "118": 85, "119": 86, "120": 70, "121": 71, "122": 44, "123": 45, "124": 46, "125": 49, "126": 6, "127": 14, "128": 16, "129": 74, "130": 75, "131": 8, "132": 28, "133": 29, "134": 53, "135": 42, "136": 43, "137": 81, "138": 80, "139": 78, "140": 79, "141": 92, "142": 93, "143": 26, "144": 27, "145": 41, "146": 42, "147": 15, "148": 12, "149": 87, "150": 89, "151": 52, "152": 67, "153": 94, "154": 92, "155": 53, "156": 10, "157": 101, "158": 100, "159": 102 }, "init_std": 0.02, "init_xavier_std": 1.0, "is_encoder_decoder": true, "label2id": { "1": 18, "2": 104, "3": 24, "4": 65, "6": 126, "7": 98, "8": 131, "9": 37, "10": 156, "11": 113, "12": 148, "13": 114, "14": 127, "15": 147, "16": 128, "17": 67, "18": 50, "19": 94, "20": 93, "21": 111, "22": 92, "23": 38, "24": 39, "25": 2, "26": 143, "27": 144, "28": 132, "29": 133, "30": 35, "31": 57, "32": 73, "33": 74, "34": 63, "35": 62, "36": 75, "37": 76, "38": 77, "39": 26, "40": 27, "41": 145, "42": 146, "43": 136, "44": 122, "45": 123, "46": 124, "47": 5, "48": 6, "49": 125, "50": 72, "51": 89, "52": 151, "53": 155, "54": 42, "55": 43, "56": 44, "57": 85, "58": 86, "59": 80, "60": 81, "61": 53, "62": 54, "63": 64, "64": 59, "66": 56, "67": 152, "68": 87, "69": 110, "70": 120, "71": 121, "72": 88, "73": 69, "74": 129, "75": 130, "76": 15, "77": 16, "78": 139, "79": 140, "80": 138, "81": 137, "82": 112, "83": 97, "84": 91, "85": 118, "86": 119, "87": 149, "88": 96, "89": 150, "90": 78, "91": 79, "92": 154, "93": 142, "94": 153, "95": 51, "96": 52, "97": 45, "98": 115, "99": 116, "100": 158, "101": 157, "102": 159, "103": 30, "104": 31, "105": 32, "106": 102, "107": 103 }, "mask_loss_coefficient": 1, "max_position_embeddings": 1024, "model_type": "detr", "num_channels": 3, "num_hidden_layers": 6, "num_queries": 100, "position_embedding_type": "sine", "scale_embedding": false, "torch_dtype": "float32", "transformers_version": "4.41.1", "use_pretrained_backbone": true, "use_timm_backbone": true }