{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "tabby", "1": "shade", "2": "tv", "3": "fence", "4": "shelter", "5": "out", "6": "unknown", "7": "smile", "8": "zoo", "9": "beige", "10": "7:45", "11": "they aren't", "12": "security", "13": "not sure", "14": "green", "15": "double", "16": "ice cream", "17": "sky", "18": "trees", "19": "car", "20": "3", "21": "bicycle", "22": "white and blue", "23": "chair", "24": "walking", "25": "queen", "26": "low", "27": "in car", "28": "1", "29": "fashion", "30": "smiling", "31": "gray", "32": "2000", "33": "bus", "34": "pink", "35": "outside", "36": "lying down", "37": "human", "38": "yellow", "39": "on road", "40": "roof", "41": "name tag", "42": "5", "43": "ball", "44": "skier", "45": "station", "46": "cup", "47": "bricks", "48": "cat", "49": "hair", "50": "solid", "51": "neon", "52": "2013", "53": "cloudy", "54": "6", "55": "clear", "56": "leather", "57": "on street", "58": "lady", "59": "tired", "60": "lg", "61": "sun", "62": "big ben", "63": "dirt", "64": "black and white", "65": "9:35", "66": "church", "67": "plain", "68": "hawaii", "69": "french", "70": "soccer", "71": "red and blue", "72": "at table", "73": "skateboarding", "74": "4", "75": "hat", "76": "women", "77": "orange", "78": "don't know", "79": "picnic table", "80": "white", "81": "clock tower", "82": "lanyard", "83": "necklace", "84": "beagle", "85": "wine tasting", "86": "canopy", "87": "laying down", "88": "brick", "89": "sidewalk", "90": "cage", "91": "right", "92": "no", "93": "shrimp", "94": "backpack", "95": "8:35", "96": "7:35", "97": "plate", "98": "net", "99": "platform", "100": "exit", "101": "purple", "102": "many", "103": "bikes", "104": "train", "105": "park", "106": "2010", "107": "style", "108": "snowboard", "109": "man", "110": "giraffe", "111": "woman", "112": "down", "113": "skateboard", "114": "brown", "115": "person", "116": "shadow", "117": "bicycles", "118": "air", "119": "gray and black", "120": "skiing", "121": "dog", "122": "jeep", "123": "tan", "124": "blue", "125": "blonde", "126": "large", "127": "plastic", "128": "boy", "129": "chopsticks", "130": "shadows", "131": "wedding", "132": "donut", "133": "rack", "134": "wall", "135": "screen", "136": "protection", "137": "natural", "138": "bike rack", "139": "suv", "140": "arrow", "141": "street", "142": "calico", "143": "resting", "144": "tent", "145": "curtain", "146": "cross", "147": "table", "148": "little girl", "149": "10", "150": "red", "151": "snowboarding", "152": "birthday", "153": "black", "154": "doughnut", "155": "soccer ball", "156": "stripes", "157": "snow", "158": "photographer", "159": "windows", "160": "talking", "161": "0", "162": "wine", "163": "ground", "164": "blue and white", "165": "monitor", "166": "stand", "167": "africa", "168": "white and black", "169": "tower", "170": "crown", "171": "not there", "172": "happy", "173": "can't tell", "174": "camera", "175": "red and yellow", "176": "desert", "177": "full", "178": "girl", "179": "woods", "180": "watching", "181": "7", "182": "nothing", "183": "king", "184": "forest", "185": "8", "186": "door", "187": "talking on phone", "188": "window", "189": "crossing", "190": "yes", "191": "giraffes", "192": "clock", "193": "2", "194": "small", "195": "sleeping", "196": "curtains", "197": "snowboarder", "198": "bedroom" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 161, "1": 28, "10": 149, "2": 193, "2000": 32, "2010": 106, "2013": 52, "3": 20, "4": 74, "5": 42, "6": 54, "7": 181, "7:35": 96, "7:45": 10, "8": 185, "8:35": 95, "9:35": 65, "africa": 167, "air": 118, "arrow": 140, "at table": 72, "backpack": 94, "ball": 43, "beagle": 84, "bedroom": 198, "beige": 9, "bicycle": 21, "bicycles": 117, "big ben": 62, "bike rack": 138, "bikes": 103, "birthday": 152, "black": 153, "black and white": 64, "blonde": 125, "blue": 124, "blue and white": 164, "boy": 128, "brick": 88, "bricks": 47, "brown": 114, "bus": 33, "cage": 90, "calico": 142, "camera": 174, "can't tell": 173, "canopy": 86, "car": 19, "cat": 48, "chair": 23, "chopsticks": 129, "church": 66, "clear": 55, "clock": 192, "clock tower": 81, "cloudy": 53, "cross": 146, "crossing": 189, "crown": 170, "cup": 46, "curtain": 145, "curtains": 196, "desert": 176, "dirt": 63, "dog": 121, "don't know": 78, "donut": 132, "door": 186, "double": 15, "doughnut": 154, "down": 112, "exit": 100, "fashion": 29, "fence": 3, "forest": 184, "french": 69, "full": 177, "giraffe": 110, "giraffes": 191, "girl": 178, "gray": 31, "gray and black": 119, "green": 14, "ground": 163, "hair": 49, "happy": 172, "hat": 75, "hawaii": 68, "human": 37, "ice cream": 16, "in car": 27, "jeep": 122, "king": 183, "lady": 58, "lanyard": 82, "large": 126, "laying down": 87, "leather": 56, "lg": 60, "little girl": 148, "low": 26, "lying down": 36, "man": 109, "many": 102, "monitor": 165, "name tag": 41, "natural": 137, "necklace": 83, "neon": 51, "net": 98, "no": 92, "not sure": 13, "not there": 171, "nothing": 182, "on road": 39, "on street": 57, "orange": 77, "out": 5, "outside": 35, "park": 105, "person": 115, "photographer": 158, "picnic table": 79, "pink": 34, "plain": 67, "plastic": 127, "plate": 97, "platform": 99, "protection": 136, "purple": 101, "queen": 25, "rack": 133, "red": 150, "red and blue": 71, "red and yellow": 175, "resting": 143, "right": 91, "roof": 40, "screen": 135, "security": 12, "shade": 1, "shadow": 116, "shadows": 130, "shelter": 4, "shrimp": 93, "sidewalk": 89, "skateboard": 113, "skateboarding": 73, "skier": 44, "skiing": 120, "sky": 17, "sleeping": 195, "small": 194, "smile": 7, "smiling": 30, "snow": 157, "snowboard": 108, "snowboarder": 197, "snowboarding": 151, "soccer": 70, "soccer ball": 155, "solid": 50, "stand": 166, "station": 45, "street": 141, "stripes": 156, "style": 107, "sun": 61, "suv": 139, "tabby": 0, "table": 147, "talking": 160, "talking on phone": 187, "tan": 123, "tent": 144, "they aren't": 11, "tired": 59, "tower": 169, "train": 104, "trees": 18, "tv": 2, "unknown": 6, "walking": 24, "wall": 134, "watching": 180, "wedding": 131, "white": 80, "white and black": 168, "white and blue": 22, "window": 188, "windows": 159, "wine": 162, "wine tasting": 85, "woman": 111, "women": 76, "woods": 179, "yellow": 38, "yes": 190, "zoo": 8 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.34.0", "type_vocab_size": 2, "vocab_size": 30522 }