{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "down", "1": "shrimp", "2": "3", "3": "stand", "4": "wine tasting", "5": "ball", "6": "at table", "7": "picnic table", "8": "8", "9": "king", "10": "0", "11": "door", "12": "monitor", "13": "not sure", "14": "snowboard", "15": "air", "16": "ice cream", "17": "crossing", "18": "crown", "19": "blue", "20": "black and white", "21": "curtain", "22": "resting", "23": "dirt", "24": "woman", "25": "large", "26": "skiing", "27": "10", "28": "low", "29": "tabby", "30": "5", "31": "in car", "32": "protection", "33": "nothing", "34": "necklace", "35": "brown", "36": "7:45", "37": "bedroom", "38": "dog", "39": "on road", "40": "little girl", "41": "bricks", "42": "6", "43": "red and yellow", "44": "name tag", "45": "screen", "46": "sidewalk", "47": "brick", "48": "human", "49": "wine", "50": "women", "51": "cup", "52": "smile", "53": "talking", "54": "street", "55": "hair", "56": "girl", "57": "natural", "58": "small", "59": "photographer", "60": "gray and black", "61": "beige", "62": "boy", "63": "plain", "64": "2010", "65": "don't know", "66": "lanyard", "67": "double", "68": "lg", "69": "french", "70": "4", "71": "desert", "72": "many", "73": "white and blue", "74": "church", "75": "outside", "76": "walking", "77": "table", "78": "cage", "79": "shelter", "80": "park", "81": "full", "82": "7", "83": "snowboarder", "84": "camera", "85": "tired", "86": "chopsticks", "87": "car", "88": "station", "89": "tv", "90": "talking on phone", "91": "fashion", "92": "birthday", "93": "cloudy", "94": "leather", "95": "exit", "96": "gray", "97": "orange", "98": "9:35", "99": "suv", "100": "white", "101": "yes", "102": "person", "103": "yellow", "104": "forest", "105": "security", "106": "sleeping", "107": "1", "108": "purple", "109": "giraffes", "110": "blue and white", "111": "2", "112": "not there", "113": "shade", "114": "wedding", "115": "bicycle", "116": "hawaii", "117": "tan", "118": "windows", "119": "queen", "120": "window", "121": "plate", "122": "sky", "123": "hat", "124": "calico", "125": "jeep", "126": "clock", "127": "blonde", "128": "snow", "129": "chair", "130": "platform", "131": "on street", "132": "unknown", "133": "laying down", "134": "happy", "135": "big ben", "136": "plastic", "137": "train", "138": "stripes", "139": "net", "140": "beagle", "141": "7:35", "142": "solid", "143": "watching", "144": "canopy", "145": "bus", "146": "cross", "147": "rack", "148": "doughnut", "149": "black", "150": "pink", "151": "woods", "152": "they aren't", "153": "shadow", "154": "2013", "155": "8:35", "156": "right", "157": "can't tell", "158": "man", "159": "backpack", "160": "no", "161": "tent", "162": "white and black", "163": "skateboard", "164": "green", "165": "wall", "166": "roof", "167": "donut", "168": "style", "169": "sun", "170": "bikes", "171": "fence", "172": "trees", "173": "zoo", "174": "shadows", "175": "arrow", "176": "clock tower", "177": "snowboarding", "178": "soccer", "179": "africa", "180": "out", "181": "red and blue", "182": "skateboarding", "183": "red", "184": "neon", "185": "smiling", "186": "bicycles", "187": "lying down", "188": "soccer ball", "189": "skier", "190": "curtains", "191": "giraffe", "192": "tower", "193": "lady", "194": "bike rack", "195": "2000", "196": "clear", "197": "cat", "198": "ground" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 10, "1": 107, "10": 27, "2": 111, "2000": 195, "2010": 64, "2013": 154, "3": 2, "4": 70, "5": 30, "6": 42, "7": 82, "7:35": 141, "7:45": 36, "8": 8, "8:35": 155, "9:35": 98, "africa": 179, "air": 15, "arrow": 175, "at table": 6, "backpack": 159, "ball": 5, "beagle": 140, "bedroom": 37, "beige": 61, "bicycle": 115, "bicycles": 186, "big ben": 135, "bike rack": 194, "bikes": 170, "birthday": 92, "black": 149, "black and white": 20, "blonde": 127, "blue": 19, "blue and white": 110, "boy": 62, "brick": 47, "bricks": 41, "brown": 35, "bus": 145, "cage": 78, "calico": 124, "camera": 84, "can't tell": 157, "canopy": 144, "car": 87, "cat": 197, "chair": 129, "chopsticks": 86, "church": 74, "clear": 196, "clock": 126, "clock tower": 176, "cloudy": 93, "cross": 146, "crossing": 17, "crown": 18, "cup": 51, "curtain": 21, "curtains": 190, "desert": 71, "dirt": 23, "dog": 38, "don't know": 65, "donut": 167, "door": 11, "double": 67, "doughnut": 148, "down": 0, "exit": 95, "fashion": 91, "fence": 171, "forest": 104, "french": 69, "full": 81, "giraffe": 191, "giraffes": 109, "girl": 56, "gray": 96, "gray and black": 60, "green": 164, "ground": 198, "hair": 55, "happy": 134, "hat": 123, "hawaii": 116, "human": 48, "ice cream": 16, "in car": 31, "jeep": 125, "king": 9, "lady": 193, "lanyard": 66, "large": 25, "laying down": 133, "leather": 94, "lg": 68, "little girl": 40, "low": 28, "lying down": 187, "man": 158, "many": 72, "monitor": 12, "name tag": 44, "natural": 57, "necklace": 34, "neon": 184, "net": 139, "no": 160, "not sure": 13, "not there": 112, "nothing": 33, "on road": 39, "on street": 131, "orange": 97, "out": 180, "outside": 75, "park": 80, "person": 102, "photographer": 59, "picnic table": 7, "pink": 150, "plain": 63, "plastic": 136, "plate": 121, "platform": 130, "protection": 32, "purple": 108, "queen": 119, "rack": 147, "red": 183, "red and blue": 181, "red and yellow": 43, "resting": 22, "right": 156, "roof": 166, "screen": 45, "security": 105, "shade": 113, "shadow": 153, "shadows": 174, "shelter": 79, "shrimp": 1, "sidewalk": 46, "skateboard": 163, "skateboarding": 182, "skier": 189, "skiing": 26, "sky": 122, "sleeping": 106, "small": 58, "smile": 52, "smiling": 185, "snow": 128, "snowboard": 14, "snowboarder": 83, "snowboarding": 177, "soccer": 178, "soccer ball": 188, "solid": 142, "stand": 3, "station": 88, "street": 54, "stripes": 138, "style": 168, "sun": 169, "suv": 99, "tabby": 29, "table": 77, "talking": 53, "talking on phone": 90, "tan": 117, "tent": 161, "they aren't": 152, "tired": 85, "tower": 192, "train": 137, "trees": 172, "tv": 89, "unknown": 132, "walking": 76, "wall": 165, "watching": 143, "wedding": 114, "white": 100, "white and black": 162, "white and blue": 73, "window": 120, "windows": 118, "wine": 49, "wine tasting": 4, "woman": 24, "women": 50, "woods": 151, "yellow": 103, "yes": 101, "zoo": 173 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.39.3", "type_vocab_size": 2, "vocab_size": 30522 }