{ "_name_or_path": "facebook/detr-resnet-50", "activation_dropout": 0.0, "activation_function": "relu", "architectures": [ "DetrForObjectDetection" ], "attention_dropout": 0.0, "auxiliary_loss": false, "backbone": "resnet50", "backbone_config": null, "backbone_kwargs": null, "bbox_cost": 5, "bbox_loss_coefficient": 5, "class_cost": 1, "classifier_dropout": 0.0, "d_model": 256, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.0, "decoder_layers": 6, "dice_loss_coefficient": 1, "dilation": false, "dropout": 0.1, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.0, "encoder_layers": 6, "eos_coefficient": 0.1, "giou_cost": 2, "giou_loss_coefficient": 2, "id2label": { "1": 81, "2": 80, "3": 78, "4": 66, "5": 42, "6": 43, "7": 74, "8": 54, "9": 55, "10": 56, "11": 47, "12": 48, "13": 10, "14": 61, "15": 62, "16": 41, "17": 42, "18": 17, "19": 21, "20": 57, "21": 58, "22": 70, "23": 71, "24": 92, "25": 93, "26": 1, "27": 101, "28": 100, "29": 102, "30": 52, "31": 70, "32": 87, "33": 89, "34": 22, "35": 20, "36": 19, "37": 8, "38": 35, "39": 34, "40": 49, "41": 57, "42": 58, "43": 59, "44": 60, "45": 28, "46": 30, "47": 18, "48": 90, "49": 91, "50": 14, "51": 82, "52": 3, "53": 94, "54": 92, "55": 98, "56": 99, "57": 69, "58": 83, "59": 97, "60": 98, "61": 99, "62": 6, "63": 32, "64": 103, "65": 104, "66": 105, "67": 63, "68": 64, "69": 87, "70": 51, "71": 83, "72": 36, "73": 37, "74": 38, "75": 8, "76": 106, "77": 107, "78": 2, "79": 28, "80": 29, "81": 67, "82": 74, "83": 63, "84": 63, "85": 20, "86": 19, "87": 50, "88": 7, "89": 9, "90": 10, "91": 28, "92": 29, "93": 63, "94": 97, "95": 98, "96": 99, "97": 53, "98": 51, "99": 76, "100": 77, "101": 4, "102": 72, "103": 73, "104": 68, "105": 14, "106": 16, "107": 106, "108": 107, "109": 2, "110": 23, "111": 24, "112": 15, "113": 12, "114": 61, "115": 62, "116": 35, "117": 34, "118": 32, "119": 33, "120": 53, "121": 84, "122": 67, "123": 25, "124": 23, "125": 24, "126": 72, "127": 26, "128": 27, "129": 26, "130": 87, "131": 88, "132": 53, "133": 85, "134": 86, "135": 101, "136": 100, "137": 102, "138": 20, "139": 19, "140": 39, "141": 40, "142": 31, "143": 81, "144": 80, "145": 78, "146": 79, "147": 7, "148": 74, "149": 75, "150": 44, "151": 45, "152": 46, "153": 92, "154": 95, "155": 96, "156": 15, "157": 12, "158": 11, "159": 13 }, "init_std": 0.02, "init_xavier_std": 1.0, "is_encoder_decoder": true, "label2id": { "1": 26, "2": 109, "3": 52, "4": 101, "6": 62, "7": 147, "8": 75, "9": 89, "10": 90, "11": 158, "12": 157, "13": 159, "14": 105, "15": 156, "16": 106, "17": 18, "18": 47, "19": 139, "20": 138, "21": 19, "22": 34, "23": 124, "24": 125, "25": 123, "26": 129, "27": 128, "28": 91, "29": 92, "30": 46, "31": 142, "32": 118, "33": 119, "34": 117, "35": 116, "36": 72, "37": 73, "38": 74, "39": 140, "40": 141, "41": 16, "42": 17, "43": 6, "44": 150, "45": 151, "46": 152, "47": 11, "48": 12, "49": 40, "50": 87, "51": 98, "52": 30, "53": 132, "54": 8, "55": 9, "56": 10, "57": 41, "58": 42, "59": 43, "60": 44, "61": 114, "62": 115, "63": 93, "64": 68, "66": 4, "67": 122, "68": 104, "69": 57, "70": 31, "71": 23, "72": 126, "73": 103, "74": 148, "75": 149, "76": 99, "77": 100, "78": 145, "79": 146, "80": 144, "81": 143, "82": 51, "83": 71, "84": 121, "85": 133, "86": 134, "87": 130, "88": 131, "89": 33, "90": 48, "91": 49, "92": 153, "93": 25, "94": 53, "95": 154, "96": 155, "97": 94, "98": 95, "99": 96, "100": 136, "101": 135, "102": 137, "103": 64, "104": 65, "105": 66, "106": 107, "107": 108 }, "mask_loss_coefficient": 1, "max_position_embeddings": 1024, "model_type": "detr", "num_channels": 3, "num_hidden_layers": 6, "num_queries": 100, "position_embedding_type": "sine", "scale_embedding": false, "torch_dtype": "float32", "transformers_version": "4.40.2", "use_pretrained_backbone": true, "use_timm_backbone": true }