{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "shrimp", "1": "woods", "2": "sky", "3": "at table", "4": "style", "5": "desert", "6": "lying down", "7": "sleeping", "8": "shadows", "9": "skateboard", "10": "birthday", "11": "bikes", "12": "air", "13": "4", "14": "train", "15": "stand", "16": "bus", "17": "white and black", "18": "fashion", "19": "man", "20": "women", "21": "trees", "22": "dirt", "23": "screen", "24": "platform", "25": "tired", "26": "plastic", "27": "full", "28": "ice cream", "29": "hat", "30": "arrow", "31": "donut", "32": "6", "33": "not sure", "34": "brick", "35": "crown", "36": "8", "37": "cat", "38": "0", "39": "many", "40": "king", "41": "clock tower", "42": "big ben", "43": "name tag", "44": "solid", "45": "tv", "46": "outside", "47": "nothing", "48": "wine tasting", "49": "car", "50": "soccer", "51": "girl", "52": "pink", "53": "chair", "54": "blue", "55": "giraffe", "56": "bricks", "57": "blue and white", "58": "they aren't", "59": "double", "60": "giraffes", "61": "lanyard", "62": "beagle", "63": "tabby", "64": "7:35", "65": "park", "66": "7", "67": "9:35", "68": "blonde", "69": "photographer", "70": "ball", "71": "curtains", "72": "natural", "73": "cloudy", "74": "door", "75": "on street", "76": "unknown", "77": "woman", "78": "tower", "79": "cup", "80": "yes", "81": "black", "82": "can't tell", "83": "green", "84": "black and white", "85": "3", "86": "2010", "87": "white and blue", "88": "tent", "89": "wall", "90": "wedding", "91": "africa", "92": "7:45", "93": "fence", "94": "station", "95": "crossing", "96": "cage", "97": "skiing", "98": "plate", "99": "snowboarding", "100": "table", "101": "purple", "102": "lady", "103": "lg", "104": "in car", "105": "red and blue", "106": "bicycle", "107": "leather", "108": "clear", "109": "not there", "110": "smile", "111": "small", "112": "1", "113": "clock", "114": "gray and black", "115": "security", "116": "orange", "117": "ground", "118": "brown", "119": "person", "120": "talking on phone", "121": "2013", "122": "bedroom", "123": "shelter", "124": "camera", "125": "down", "126": "monitor", "127": "neon", "128": "queen", "129": "hawaii", "130": "human", "131": "calico", "132": "out", "133": "snowboarder", "134": "zoo", "135": "white", "136": "protection", "137": "happy", "138": "street", "139": "suv", "140": "plain", "141": "cross", "142": "low", "143": "french", "144": "8:35", "145": "chopsticks", "146": "shade", "147": "no", "148": "necklace", "149": "large", "150": "2000", "151": "sun", "152": "tan", "153": "snowboard", "154": "hair", "155": "skateboarding", "156": "shadow", "157": "curtain", "158": "doughnut", "159": "jeep", "160": "red", "161": "gray", "162": "dog", "163": "backpack", "164": "bicycles", "165": "little girl", "166": "exit", "167": "rack", "168": "roof", "169": "net", "170": "snow", "171": "2", "172": "watching", "173": "skier", "174": "canopy", "175": "talking", "176": "boy", "177": "walking", "178": "church", "179": "soccer ball", "180": "windows", "181": "picnic table", "182": "wine", "183": "beige", "184": "bike rack", "185": "forest", "186": "10", "187": "red and yellow", "188": "5", "189": "right", "190": "smiling", "191": "stripes", "192": "resting", "193": "don't know", "194": "window", "195": "on road", "196": "laying down", "197": "sidewalk", "198": "yellow" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 38, "1": 112, "10": 186, "2": 171, "2000": 150, "2010": 86, "2013": 121, "3": 85, "4": 13, "5": 188, "6": 32, "7": 66, "7:35": 64, "7:45": 92, "8": 36, "8:35": 144, "9:35": 67, "africa": 91, "air": 12, "arrow": 30, "at table": 3, "backpack": 163, "ball": 70, "beagle": 62, "bedroom": 122, "beige": 183, "bicycle": 106, "bicycles": 164, "big ben": 42, "bike rack": 184, "bikes": 11, "birthday": 10, "black": 81, "black and white": 84, "blonde": 68, "blue": 54, "blue and white": 57, "boy": 176, "brick": 34, "bricks": 56, "brown": 118, "bus": 16, "cage": 96, "calico": 131, "camera": 124, "can't tell": 82, "canopy": 174, "car": 49, "cat": 37, "chair": 53, "chopsticks": 145, "church": 178, "clear": 108, "clock": 113, "clock tower": 41, "cloudy": 73, "cross": 141, "crossing": 95, "crown": 35, "cup": 79, "curtain": 157, "curtains": 71, "desert": 5, "dirt": 22, "dog": 162, "don't know": 193, "donut": 31, "door": 74, "double": 59, "doughnut": 158, "down": 125, "exit": 166, "fashion": 18, "fence": 93, "forest": 185, "french": 143, "full": 27, "giraffe": 55, "giraffes": 60, "girl": 51, "gray": 161, "gray and black": 114, "green": 83, "ground": 117, "hair": 154, "happy": 137, "hat": 29, "hawaii": 129, "human": 130, "ice cream": 28, "in car": 104, "jeep": 159, "king": 40, "lady": 102, "lanyard": 61, "large": 149, "laying down": 196, "leather": 107, "lg": 103, "little girl": 165, "low": 142, "lying down": 6, "man": 19, "many": 39, "monitor": 126, "name tag": 43, "natural": 72, "necklace": 148, "neon": 127, "net": 169, "no": 147, "not sure": 33, "not there": 109, "nothing": 47, "on road": 195, "on street": 75, "orange": 116, "out": 132, "outside": 46, "park": 65, "person": 119, "photographer": 69, "picnic table": 181, "pink": 52, "plain": 140, "plastic": 26, "plate": 98, "platform": 24, "protection": 136, "purple": 101, "queen": 128, "rack": 167, "red": 160, "red and blue": 105, "red and yellow": 187, "resting": 192, "right": 189, "roof": 168, "screen": 23, "security": 115, "shade": 146, "shadow": 156, "shadows": 8, "shelter": 123, "shrimp": 0, "sidewalk": 197, "skateboard": 9, "skateboarding": 155, "skier": 173, "skiing": 97, "sky": 2, "sleeping": 7, "small": 111, "smile": 110, "smiling": 190, "snow": 170, "snowboard": 153, "snowboarder": 133, "snowboarding": 99, "soccer": 50, "soccer ball": 179, "solid": 44, "stand": 15, "station": 94, "street": 138, "stripes": 191, "style": 4, "sun": 151, "suv": 139, "tabby": 63, "table": 100, "talking": 175, "talking on phone": 120, "tan": 152, "tent": 88, "they aren't": 58, "tired": 25, "tower": 78, "train": 14, "trees": 21, "tv": 45, "unknown": 76, "walking": 177, "wall": 89, "watching": 172, "wedding": 90, "white": 135, "white and black": 17, "white and blue": 87, "window": 194, "windows": 180, "wine": 182, "wine tasting": 48, "woman": 77, "women": 20, "woods": 1, "yellow": 198, "yes": 80, "zoo": 134 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.33.3", "type_vocab_size": 2, "vocab_size": 30522 }