{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "doughnut", "1": "style", "2": "8:35", "3": "rack", "4": "bricks", "5": "red and blue", "6": "unknown", "7": "green", "8": "big ben", "9": "snow", "10": "shade", "11": "picnic table", "12": "not sure", "13": "dog", "14": "street", "15": "cat", "16": "fashion", "17": "shadows", "18": "brown", "19": "suv", "20": "beagle", "21": "crown", "22": "10", "23": "full", "24": "woods", "25": "0", "26": "snowboarding", "27": "hat", "28": "nothing", "29": "cup", "30": "train", "31": "window", "32": "6", "33": "out", "34": "blue and white", "35": "snowboard", "36": "plastic", "37": "down", "38": "shelter", "39": "curtain", "40": "beige", "41": "monitor", "42": "tan", "43": "gray and black", "44": "in car", "45": "skateboard", "46": "cage", "47": "name tag", "48": "fence", "49": "8", "50": "orange", "51": "security", "52": "birthday", "53": "camera", "54": "7", "55": "women", "56": "shadow", "57": "sun", "58": "person", "59": "exit", "60": "9:35", "61": "clear", "62": "white and black", "63": "little girl", "64": "double", "65": "queen", "66": "blonde", "67": "snowboarder", "68": "neon", "69": "smile", "70": "canopy", "71": "7:35", "72": "church", "73": "sidewalk", "74": "man", "75": "blue", "76": "jeep", "77": "clock", "78": "zoo", "79": "skiing", "80": "hair", "81": "ice cream", "82": "white and blue", "83": "king", "84": "giraffes", "85": "soccer", "86": "walking", "87": "2", "88": "red", "89": "bikes", "90": "not there", "91": "2010", "92": "bicycles", "93": "no", "94": "tower", "95": "solid", "96": "on road", "97": "white", "98": "tabby", "99": "5", "100": "boy", "101": "black and white", "102": "red and yellow", "103": "yes", "104": "stand", "105": "roof", "106": "french", "107": "4", "108": "clock tower", "109": "bicycle", "110": "bus", "111": "tent", "112": "happy", "113": "soccer ball", "114": "natural", "115": "tv", "116": "calico", "117": "wine tasting", "118": "lg", "119": "3", "120": "giraffe", "121": "protection", "122": "outside", "123": "don't know", "124": "africa", "125": "cross", "126": "lanyard", "127": "on street", "128": "air", "129": "wine", "130": "backpack", "131": "desert", "132": "right", "133": "large", "134": "car", "135": "screen", "136": "laying down", "137": "many", "138": "door", "139": "platform", "140": "windows", "141": "they aren't", "142": "1", "143": "purple", "144": "gray", "145": "arrow", "146": "low", "147": "7:45", "148": "chair", "149": "woman", "150": "table", "151": "can't tell", "152": "brick", "153": "plate", "154": "black", "155": "net", "156": "watching", "157": "shrimp", "158": "crossing", "159": "sleeping", "160": "necklace", "161": "talking", "162": "station", "163": "2013", "164": "trees", "165": "lying down", "166": "lady", "167": "sky", "168": "photographer", "169": "donut", "170": "wall", "171": "curtains", "172": "leather", "173": "ball", "174": "bedroom", "175": "at table", "176": "hawaii", "177": "girl", "178": "cloudy", "179": "smiling", "180": "pink", "181": "skateboarding", "182": "2000", "183": "skier", "184": "resting", "185": "park", "186": "talking on phone", "187": "ground", "188": "human", "189": "tired", "190": "yellow", "191": "forest", "192": "small", "193": "bike rack", "194": "wedding", "195": "dirt", "196": "stripes", "197": "plain", "198": "chopsticks" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 25, "1": 142, "10": 22, "2": 87, "2000": 182, "2010": 91, "2013": 163, "3": 119, "4": 107, "5": 99, "6": 32, "7": 54, "7:35": 71, "7:45": 147, "8": 49, "8:35": 2, "9:35": 60, "africa": 124, "air": 128, "arrow": 145, "at table": 175, "backpack": 130, "ball": 173, "beagle": 20, "bedroom": 174, "beige": 40, "bicycle": 109, "bicycles": 92, "big ben": 8, "bike rack": 193, "bikes": 89, "birthday": 52, "black": 154, "black and white": 101, "blonde": 66, "blue": 75, "blue and white": 34, "boy": 100, "brick": 152, "bricks": 4, "brown": 18, "bus": 110, "cage": 46, "calico": 116, "camera": 53, "can't tell": 151, "canopy": 70, "car": 134, "cat": 15, "chair": 148, "chopsticks": 198, "church": 72, "clear": 61, "clock": 77, "clock tower": 108, "cloudy": 178, "cross": 125, "crossing": 158, "crown": 21, "cup": 29, "curtain": 39, "curtains": 171, "desert": 131, "dirt": 195, "dog": 13, "don't know": 123, "donut": 169, "door": 138, "double": 64, "doughnut": 0, "down": 37, "exit": 59, "fashion": 16, "fence": 48, "forest": 191, "french": 106, "full": 23, "giraffe": 120, "giraffes": 84, "girl": 177, "gray": 144, "gray and black": 43, "green": 7, "ground": 187, "hair": 80, "happy": 112, "hat": 27, "hawaii": 176, "human": 188, "ice cream": 81, "in car": 44, "jeep": 76, "king": 83, "lady": 166, "lanyard": 126, "large": 133, "laying down": 136, "leather": 172, "lg": 118, "little girl": 63, "low": 146, "lying down": 165, "man": 74, "many": 137, "monitor": 41, "name tag": 47, "natural": 114, "necklace": 160, "neon": 68, "net": 155, "no": 93, "not sure": 12, "not there": 90, "nothing": 28, "on road": 96, "on street": 127, "orange": 50, "out": 33, "outside": 122, "park": 185, "person": 58, "photographer": 168, "picnic table": 11, "pink": 180, "plain": 197, "plastic": 36, "plate": 153, "platform": 139, "protection": 121, "purple": 143, "queen": 65, "rack": 3, "red": 88, "red and blue": 5, "red and yellow": 102, "resting": 184, "right": 132, "roof": 105, "screen": 135, "security": 51, "shade": 10, "shadow": 56, "shadows": 17, "shelter": 38, "shrimp": 157, "sidewalk": 73, "skateboard": 45, "skateboarding": 181, "skier": 183, "skiing": 79, "sky": 167, "sleeping": 159, "small": 192, "smile": 69, "smiling": 179, "snow": 9, "snowboard": 35, "snowboarder": 67, "snowboarding": 26, "soccer": 85, "soccer ball": 113, "solid": 95, "stand": 104, "station": 162, "street": 14, "stripes": 196, "style": 1, "sun": 57, "suv": 19, "tabby": 98, "table": 150, "talking": 161, "talking on phone": 186, "tan": 42, "tent": 111, "they aren't": 141, "tired": 189, "tower": 94, "train": 30, "trees": 164, "tv": 115, "unknown": 6, "walking": 86, "wall": 170, "watching": 156, "wedding": 194, "white": 97, "white and black": 62, "white and blue": 82, "window": 31, "windows": 140, "wine": 129, "wine tasting": 117, "woman": 149, "women": 55, "woods": 24, "yellow": 190, "yes": 103, "zoo": 78 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.34.0", "type_vocab_size": 2, "vocab_size": 30522 }