{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "skateboard", "1": "cloudy", "2": "stripes", "3": "8", "4": "giraffes", "5": "backpack", "6": "king", "7": "nothing", "8": "pink", "9": "arrow", "10": "snowboard", "11": "don't know", "12": "7:45", "13": "4", "14": "talking", "15": "train", "16": "camera", "17": "style", "18": "unknown", "19": "snowboarding", "20": "10", "21": "beige", "22": "zoo", "23": "no", "24": "orange", "25": "bikes", "26": "photographer", "27": "blue and white", "28": "sun", "29": "desert", "30": "wine", "31": "women", "32": "suv", "33": "white and black", "34": "2", "35": "picnic table", "36": "wine tasting", "37": "shade", "38": "big ben", "39": "brown", "40": "ground", "41": "snow", "42": "beagle", "43": "lying down", "44": "birthday", "45": "can't tell", "46": "monitor", "47": "little girl", "48": "right", "49": "hat", "50": "2010", "51": "girl", "52": "wedding", "53": "tired", "54": "skateboarding", "55": "cage", "56": "platform", "57": "they aren't", "58": "gray and black", "59": "in car", "60": "red and blue", "61": "stand", "62": "car", "63": "tower", "64": "double", "65": "black and white", "66": "air", "67": "5", "68": "9:35", "69": "not there", "70": "bricks", "71": "laying down", "72": "out", "73": "screen", "74": "tan", "75": "cat", "76": "sky", "77": "station", "78": "soccer", "79": "0", "80": "yes", "81": "exit", "82": "cup", "83": "talking on phone", "84": "resting", "85": "windows", "86": "bike rack", "87": "on street", "88": "human", "89": "dog", "90": "2000", "91": "person", "92": "plate", "93": "down", "94": "crossing", "95": "low", "96": "woods", "97": "plain", "98": "park", "99": "2013", "100": "curtain", "101": "giraffe", "102": "snowboarder", "103": "tent", "104": "shelter", "105": "woman", "106": "neon", "107": "blonde", "108": "bedroom", "109": "purple", "110": "natural", "111": "shadows", "112": "door", "113": "red and yellow", "114": "small", "115": "hawaii", "116": "black", "117": "solid", "118": "large", "119": "watching", "120": "net", "121": "leather", "122": "donut", "123": "cross", "124": "soccer ball", "125": "french", "126": "yellow", "127": "fence", "128": "happy", "129": "sidewalk", "130": "doughnut", "131": "1", "132": "white and blue", "133": "shrimp", "134": "dirt", "135": "lady", "136": "queen", "137": "not sure", "138": "bicycle", "139": "trees", "140": "bus", "141": "green", "142": "curtains", "143": "6", "144": "church", "145": "man", "146": "calico", "147": "street", "148": "chopsticks", "149": "ice cream", "150": "3", "151": "walking", "152": "roof", "153": "lanyard", "154": "7:35", "155": "plastic", "156": "at table", "157": "brick", "158": "wall", "159": "fashion", "160": "canopy", "161": "gray", "162": "clock tower", "163": "skier", "164": "lg", "165": "ball", "166": "tv", "167": "full", "168": "bicycles", "169": "sleeping", "170": "rack", "171": "crown", "172": "africa", "173": "security", "174": "boy", "175": "shadow", "176": "tabby", "177": "blue", "178": "window", "179": "hair", "180": "outside", "181": "red", "182": "table", "183": "clear", "184": "8:35", "185": "protection", "186": "necklace", "187": "jeep", "188": "white", "189": "7", "190": "many", "191": "skiing", "192": "forest", "193": "name tag", "194": "chair", "195": "smile", "196": "smiling", "197": "clock", "198": "on road" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 79, "1": 131, "10": 20, "2": 34, "2000": 90, "2010": 50, "2013": 99, "3": 150, "4": 13, "5": 67, "6": 143, "7": 189, "7:35": 154, "7:45": 12, "8": 3, "8:35": 184, "9:35": 68, "africa": 172, "air": 66, "arrow": 9, "at table": 156, "backpack": 5, "ball": 165, "beagle": 42, "bedroom": 108, "beige": 21, "bicycle": 138, "bicycles": 168, "big ben": 38, "bike rack": 86, "bikes": 25, "birthday": 44, "black": 116, "black and white": 65, "blonde": 107, "blue": 177, "blue and white": 27, "boy": 174, "brick": 157, "bricks": 70, "brown": 39, "bus": 140, "cage": 55, "calico": 146, "camera": 16, "can't tell": 45, "canopy": 160, "car": 62, "cat": 75, "chair": 194, "chopsticks": 148, "church": 144, "clear": 183, "clock": 197, "clock tower": 162, "cloudy": 1, "cross": 123, "crossing": 94, "crown": 171, "cup": 82, "curtain": 100, "curtains": 142, "desert": 29, "dirt": 134, "dog": 89, "don't know": 11, "donut": 122, "door": 112, "double": 64, "doughnut": 130, "down": 93, "exit": 81, "fashion": 159, "fence": 127, "forest": 192, "french": 125, "full": 167, "giraffe": 101, "giraffes": 4, "girl": 51, "gray": 161, "gray and black": 58, "green": 141, "ground": 40, "hair": 179, "happy": 128, "hat": 49, "hawaii": 115, "human": 88, "ice cream": 149, "in car": 59, "jeep": 187, "king": 6, "lady": 135, "lanyard": 153, "large": 118, "laying down": 71, "leather": 121, "lg": 164, "little girl": 47, "low": 95, "lying down": 43, "man": 145, "many": 190, "monitor": 46, "name tag": 193, "natural": 110, "necklace": 186, "neon": 106, "net": 120, "no": 23, "not sure": 137, "not there": 69, "nothing": 7, "on road": 198, "on street": 87, "orange": 24, "out": 72, "outside": 180, "park": 98, "person": 91, "photographer": 26, "picnic table": 35, "pink": 8, "plain": 97, "plastic": 155, "plate": 92, "platform": 56, "protection": 185, "purple": 109, "queen": 136, "rack": 170, "red": 181, "red and blue": 60, "red and yellow": 113, "resting": 84, "right": 48, "roof": 152, "screen": 73, "security": 173, "shade": 37, "shadow": 175, "shadows": 111, "shelter": 104, "shrimp": 133, "sidewalk": 129, "skateboard": 0, "skateboarding": 54, "skier": 163, "skiing": 191, "sky": 76, "sleeping": 169, "small": 114, "smile": 195, "smiling": 196, "snow": 41, "snowboard": 10, "snowboarder": 102, "snowboarding": 19, "soccer": 78, "soccer ball": 124, "solid": 117, "stand": 61, "station": 77, "street": 147, "stripes": 2, "style": 17, "sun": 28, "suv": 32, "tabby": 176, "table": 182, "talking": 14, "talking on phone": 83, "tan": 74, "tent": 103, "they aren't": 57, "tired": 53, "tower": 63, "train": 15, "trees": 139, "tv": 166, "unknown": 18, "walking": 151, "wall": 158, "watching": 119, "wedding": 52, "white": 188, "white and black": 33, "white and blue": 132, "window": 178, "windows": 85, "wine": 30, "wine tasting": 36, "woman": 105, "women": 31, "woods": 96, "yellow": 126, "yes": 80, "zoo": 22 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.38.2", "type_vocab_size": 2, "vocab_size": 30522 }