{ "_name_or_path": "facebook/detr-resnet-50", "activation_dropout": 0.0, "activation_function": "relu", "architectures": [ "DetrForObjectDetection" ], "attention_dropout": 0.0, "auxiliary_loss": false, "backbone": "resnet50", "backbone_config": null, "backbone_kwargs": { "in_chans": 3, "out_indices": [ 1, 2, 3, 4 ] }, "bbox_cost": 5, "bbox_loss_coefficient": 5, "class_cost": 1, "classifier_dropout": 0.0, "d_model": 256, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.0, "decoder_layers": 6, "dice_loss_coefficient": 1, "dilation": false, "dropout": 0.1, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.0, "encoder_layers": 6, "eos_coefficient": 0.1, "giou_cost": 2, "giou_loss_coefficient": 2, "id2label": { "1": 50, "2": 106, "3": 107, "4": 2, "5": 15, "6": 12, "7": 97, "8": 98, "9": 99, "10": 106, "11": 107, "12": 2, "13": 84, "14": 8, "15": 32, "16": 33, "17": 47, "18": 48, "19": 61, "20": 62, "21": 83, "22": 57, "23": 58, "24": 18, "25": 83, "26": 36, "27": 37, "28": 38, "29": 87, "30": 74, "31": 63, "32": 63, "33": 53, "34": 59, "35": 60, "36": 72, "37": 35, "38": 34, "39": 7, "40": 98, "41": 99, "42": 81, "43": 80, "44": 78, "45": 44, "46": 45, "47": 46, "48": 39, "49": 40, "50": 26, "51": 27, "52": 87, "53": 89, "54": 11, "55": 13, "56": 28, "57": 29, "58": 51, "59": 54, "60": 55, "61": 56, "62": 3, "63": 8, "64": 74, "65": 21, "66": 95, "67": 96, "68": 22, "69": 20, "70": 19, "71": 41, "72": 42, "73": 10, "74": 26, "75": 101, "76": 100, "77": 102, "78": 92, "79": 93, "80": 70, "81": 71, "82": 70, "83": 51, "84": 61, "85": 62, "86": 67, "87": 52, "88": 49, "89": 7, "90": 9, "91": 74, "92": 75, "93": 17, "94": 14, "95": 16, "96": 32, "97": 14, "98": 82, "99": 31, "100": 69, "101": 4, "102": 28, "103": 30, "104": 85, "105": 86, "106": 20, "107": 19, "108": 10, "109": 53, "110": 101, "111": 100, "112": 102, "113": 23, "114": 24, "115": 90, "116": 91, "117": 103, "118": 104, "119": 105, "120": 66, "121": 1, "122": 94, "123": 92, "124": 57, "125": 58, "126": 35, "127": 34, "128": 42, "129": 43, "130": 15, "131": 12, "132": 63, "133": 6, "134": 28, "135": 29, "136": 72, "137": 73, "138": 81, "139": 80, "140": 78, "141": 79, "142": 67, "143": 68, "144": 87, "145": 88, "146": 20, "147": 19, "148": 97, "149": 98, "150": 99, "151": 53, "152": 63, "153": 64, "154": 25, "155": 23, "156": 24, "157": 92, "158": 76, "159": 77 }, "init_std": 0.02, "init_xavier_std": 1.0, "is_encoder_decoder": true, "label2id": { "1": 121, "2": 12, "3": 62, "4": 101, "6": 133, "7": 89, "8": 63, "9": 90, "10": 108, "11": 54, "12": 131, "13": 55, "14": 97, "15": 130, "16": 95, "17": 93, "18": 24, "19": 147, "20": 146, "21": 65, "22": 68, "23": 155, "24": 156, "25": 154, "26": 74, "27": 51, "28": 134, "29": 135, "30": 103, "31": 99, "32": 96, "33": 16, "34": 127, "35": 126, "36": 26, "37": 27, "38": 28, "39": 48, "40": 49, "41": 71, "42": 128, "43": 129, "44": 45, "45": 46, "46": 47, "47": 17, "48": 18, "49": 88, "50": 1, "51": 83, "52": 87, "53": 151, "54": 59, "55": 60, "56": 61, "57": 124, "58": 125, "59": 34, "60": 35, "61": 84, "62": 85, "63": 152, "64": 153, "66": 120, "67": 142, "68": 143, "69": 100, "70": 82, "71": 81, "72": 136, "73": 137, "74": 91, "75": 92, "76": 158, "77": 159, "78": 140, "79": 141, "80": 139, "81": 138, "82": 98, "83": 25, "84": 13, "85": 104, "86": 105, "87": 144, "88": 145, "89": 53, "90": 115, "91": 116, "92": 157, "93": 79, "94": 122, "95": 66, "96": 67, "97": 148, "98": 149, "99": 150, "100": 111, "101": 110, "102": 112, "103": 117, "104": 118, "105": 119, "106": 10, "107": 11 }, "mask_loss_coefficient": 1, "max_position_embeddings": 1024, "model_type": "detr", "num_channels": 3, "num_hidden_layers": 6, "num_queries": 100, "position_embedding_type": "sine", "scale_embedding": false, "torch_dtype": "float32", "transformers_version": "4.41.1", "use_pretrained_backbone": true, "use_timm_backbone": true }