{ "_commit_hash": null, "activation_dropout": 0.0, "activation_function": "relu", "architectures": [ "DetaForObjectDetection" ], "assign_first_stage": true, "attention_dropout": 0.0, "auxiliary_loss": false, "backbone_config": { "_name_or_path": "", "add_cross_attention": false, "architectures": null, "bad_words_ids": null, "begin_suppress_tokens": null, "bos_token_id": null, "chunk_size_feed_forward": 0, "cross_attention_hidden_size": null, "decoder_start_token_id": null, "depths": [ 3, 4, 6, 3 ], "diversity_penalty": 0.0, "do_sample": false, "downsample_in_first_stage": false, "early_stopping": false, "embedding_size": 64, "encoder_no_repeat_ngram_size": 0, "eos_token_id": null, "exponential_decay_length_penalty": null, "finetuning_task": null, "forced_bos_token_id": null, "forced_eos_token_id": null, "hidden_act": "relu", "hidden_sizes": [ 256, 512, 1024, 2048 ], "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "is_decoder": false, "is_encoder_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_type": "bottleneck", "length_penalty": 1.0, "max_length": 20, "min_length": 0, "model_type": "resnet", "no_repeat_ngram_size": 0, "num_beam_groups": 1, "num_beams": 1, "num_channels": 3, "num_return_sequences": 1, "out_features": [ "stage2", "stage3", "stage4" ], "output_attentions": false, "output_hidden_states": false, "output_scores": false, "pad_token_id": null, "prefix": null, "problem_type": null, "pruned_heads": {}, "remove_invalid_values": false, "repetition_penalty": 1.0, "return_dict": true, "return_dict_in_generate": false, "sep_token_id": null, "stage_names": [ "stem", "stage1", "stage2", "stage3", "stage4" ], "suppress_tokens": null, "task_specific_params": null, "temperature": 1.0, "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, "top_k": 50, "top_p": 1.0, "torch_dtype": null, "torchscript": false, "transformers_version": "4.27.0.dev0", "typical_p": 1.0, "use_bfloat16": false }, "bbox_cost": 5, "bbox_loss_coefficient": 5, "class_cost": 1, "d_model": 256, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.0, "decoder_layers": 6, "decoder_n_points": 4, "dice_loss_coefficient": 1, "dropout": 0.1, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.0, "encoder_layers": 6, "encoder_n_points": 4, "eos_coefficient": 0.1, "focal_alpha": 0.25, "giou_cost": 2, "giou_loss_coefficient": 2, "id2label": { "0": "N/A", "1": "person", "2": "bicycle", "3": "car", "4": "motorcycle", "5": "airplane", "6": "bus", "7": "train", "8": "truck", "9": "boat", "10": "traffic light", "11": "fire hydrant", "12": "N/A", "13": "stop sign", "14": "parking meter", "15": "bench", "16": "bird", "17": "cat", "18": "dog", "19": "horse", "20": "sheep", "21": "cow", "22": "elephant", "23": "bear", "24": "zebra", "25": "giraffe", "26": "N/A", "27": "backpack", "28": "umbrella", "29": "N/A", "30": "N/A", "31": "handbag", "32": "tie", "33": "suitcase", "34": "frisbee", "35": "skis", "36": "snowboard", "37": "sports ball", "38": "kite", "39": "baseball bat", "40": "baseball glove", "41": "skateboard", "42": "surfboard", "43": "tennis racket", "44": "bottle", "45": "N/A", "46": "wine glass", "47": "cup", "48": "fork", "49": "knife", "50": "spoon", "51": "bowl", "52": "banana", "53": "apple", "54": "sandwich", "55": "orange", "56": "broccoli", "57": "carrot", "58": "hot dog", "59": "pizza", "60": "donut", "61": "cake", "62": "chair", "63": "couch", "64": "potted plant", "65": "bed", "66": "N/A", "67": "dining table", "68": "N/A", "69": "N/A", "70": "toilet", "71": "N/A", "72": "tv", "73": "laptop", "74": "mouse", "75": "remote", "76": "keyboard", "77": "cell phone", "78": "microwave", "79": "oven", "80": "toaster", "81": "sink", "82": "refrigerator", "83": "N/A", "84": "book", "85": "clock", "86": "vase", "87": "scissors", "88": "teddy bear", "89": "hair drier", "90": "toothbrush" }, "init_std": 0.02, "init_xavier_std": 1.0, "is_encoder_decoder": true, "label2id": { "N/A": 83, "airplane": 5, "apple": 53, "backpack": 27, "banana": 52, "baseball bat": 39, "baseball glove": 40, "bear": 23, "bed": 65, "bench": 15, "bicycle": 2, "bird": 16, "boat": 9, "book": 84, "bottle": 44, "bowl": 51, "broccoli": 56, "bus": 6, "cake": 61, "car": 3, "carrot": 57, "cat": 17, "cell phone": 77, "chair": 62, "clock": 85, "couch": 63, "cow": 21, "cup": 47, "dining table": 67, "dog": 18, "donut": 60, "elephant": 22, "fire hydrant": 11, "fork": 48, "frisbee": 34, "giraffe": 25, "hair drier": 89, "handbag": 31, "horse": 19, "hot dog": 58, "keyboard": 76, "kite": 38, "knife": 49, "laptop": 73, "microwave": 78, "motorcycle": 4, "mouse": 74, "orange": 55, "oven": 79, "parking meter": 14, "person": 1, "pizza": 59, "potted plant": 64, "refrigerator": 82, "remote": 75, "sandwich": 54, "scissors": 87, "sheep": 20, "sink": 81, "skateboard": 41, "skis": 35, "snowboard": 36, "spoon": 50, "sports ball": 37, "stop sign": 13, "suitcase": 33, "surfboard": 42, "teddy bear": 88, "tennis racket": 43, "tie": 32, "toaster": 80, "toilet": 70, "toothbrush": 90, "traffic light": 10, "train": 7, "truck": 8, "tv": 72, "umbrella": 28, "vase": 86, "wine glass": 46, "zebra": 24 }, "mask_loss_coefficient": 1, "max_position_embeddings": 2048, "model_type": "deta", "num_feature_levels": 5, "num_queries": 900, "position_embedding_type": "sine", "torch_dtype": "float32", "transformers_version": null, "two_stage": true, "two_stage_num_proposals": 300, "with_box_refine": true }