{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "giraffe", "1": "right", "2": "airplane", "3": "black", "4": "net", "5": "cover", "6": "ski pole", "7": "cup", "8": "red and yellow", "9": "poles", "10": "office", "11": "orange", "12": "2", "13": "white and blue", "14": "bus", "15": "6", "16": "gray", "17": "clothes", "18": "surfing", "19": "motorcycle", "20": "monitor", "21": "chair", "22": "computer", "23": "africa", "24": "dog", "25": "sweat", "26": "resting", "27": "air", "28": "conference room", "29": "green and black", "30": "yellow", "31": "background", "32": "jeep", "33": "0", "34": "shadow", "35": "watching", "36": "sign", "37": "backpack", "38": "4", "39": "walking", "40": "snowsuit", "41": "human", "42": "pink", "43": "big", "44": "crossing", "45": "brushing", "46": "birthday", "47": "spots", "48": "laptop", "49": "red and blue", "50": "website", "51": "white and black", "52": "red and white", "53": "very", "54": "bedroom", "55": "wedding", "56": "zoo", "57": "ice cream", "58": "standing", "59": "many", "60": "park", "61": "log", "62": "hawaii", "63": "purse", "64": "wine", "65": "snow", "66": "game", "67": "dirt bike", "68": "pole", "69": "surfboard", "70": "double", "71": "out", "72": "trees", "73": "sky", "74": "boy", "75": "down", "76": "can't tell", "77": "tiles", "78": "10", "79": "36", "80": "wall", "81": "sun", "82": "style", "83": "nothing", "84": "girl", "85": "vent", "86": "business", "87": "no", "88": "20 feet", "89": "beige", "90": "on", "91": "ceiling", "92": "cat", "93": "little girl", "94": "tall", "95": "shadows", "96": "black and white", "97": "woods", "98": "bear", "99": "shrimp", "100": "yes", "101": "remote", "102": "brown", "103": "leather", "104": "living room", "105": "crown", "106": "white and gray", "107": "red", "108": "smile", "109": "glasses", "110": "donut", "111": "eating", "112": "white", "113": "chopsticks", "114": "plastic", "115": "fashion", "116": "blue and white", "117": "pitcher", "118": "doughnut", "119": "helmet", "120": "skis", "121": "car", "122": "frisbee", "123": "picnic table", "124": "queen", "125": "hat", "126": "they aren't", "127": "8", "128": "blonde", "129": "tree", "130": "very big", "131": "off", "132": "7", "133": "ski poles", "134": "full", "135": "wine tasting", "136": "not sure", "137": "table", "138": "skiing", "139": "toothbrush", "140": "5", "141": "waiting", "142": "large", "143": "skateboard", "144": "motorbike", "145": "brushing teeth", "146": "train", "147": "green", "148": "backwards", "149": "paper", "150": "design", "151": "suv", "152": "desktop", "153": "outside", "154": "3", "155": "wii", "156": "can't see", "157": "blue", "158": "forest", "159": "unknown", "160": "woman", "161": "power", "162": "purple and white", "163": "tired", "164": "catcher", "165": "black white", "166": "1", "167": "king", "168": "at table" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 33, "1": 166, "10": 78, "2": 12, "20 feet": 88, "3": 154, "36": 79, "4": 38, "5": 140, "6": 15, "7": 132, "8": 127, "africa": 23, "air": 27, "airplane": 2, "at table": 168, "background": 31, "backpack": 37, "backwards": 148, "bear": 98, "bedroom": 54, "beige": 89, "big": 43, "birthday": 46, "black": 3, "black and white": 96, "black white": 165, "blonde": 128, "blue": 157, "blue and white": 116, "boy": 74, "brown": 102, "brushing": 45, "brushing teeth": 145, "bus": 14, "business": 86, "can't see": 156, "can't tell": 76, "car": 121, "cat": 92, "catcher": 164, "ceiling": 91, "chair": 21, "chopsticks": 113, "clothes": 17, "computer": 22, "conference room": 28, "cover": 5, "crossing": 44, "crown": 105, "cup": 7, "design": 150, "desktop": 152, "dirt bike": 67, "dog": 24, "donut": 110, "double": 70, "doughnut": 118, "down": 75, "eating": 111, "fashion": 115, "forest": 158, "frisbee": 122, "full": 134, "game": 66, "giraffe": 0, "girl": 84, "glasses": 109, "gray": 16, "green": 147, "green and black": 29, "hat": 125, "hawaii": 62, "helmet": 119, "human": 41, "ice cream": 57, "jeep": 32, "king": 167, "laptop": 48, "large": 142, "leather": 103, "little girl": 93, "living room": 104, "log": 61, "many": 59, "monitor": 20, "motorbike": 144, "motorcycle": 19, "net": 4, "no": 87, "not sure": 136, "nothing": 83, "off": 131, "office": 10, "on": 90, "orange": 11, "out": 71, "outside": 153, "paper": 149, "park": 60, "picnic table": 123, "pink": 42, "pitcher": 117, "plastic": 114, "pole": 68, "poles": 9, "power": 161, "purple and white": 162, "purse": 63, "queen": 124, "red": 107, "red and blue": 49, "red and white": 52, "red and yellow": 8, "remote": 101, "resting": 26, "right": 1, "shadow": 34, "shadows": 95, "shrimp": 99, "sign": 36, "skateboard": 143, "ski pole": 6, "ski poles": 133, "skiing": 138, "skis": 120, "sky": 73, "smile": 108, "snow": 65, "snowsuit": 40, "spots": 47, "standing": 58, "style": 82, "sun": 81, "surfboard": 69, "surfing": 18, "suv": 151, "sweat": 25, "table": 137, "tall": 94, "they aren't": 126, "tiles": 77, "tired": 163, "toothbrush": 139, "train": 146, "tree": 129, "trees": 72, "unknown": 159, "vent": 85, "very": 53, "very big": 130, "waiting": 141, "walking": 39, "wall": 80, "watching": 35, "website": 50, "wedding": 55, "white": 112, "white and black": 51, "white and blue": 13, "white and gray": 106, "wii": 155, "wine": 64, "wine tasting": 135, "woman": 160, "woods": 97, "yellow": 30, "yes": 100, "zoo": 56 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.38.1", "type_vocab_size": 2, "vocab_size": 30522 }