{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "donut", "1": "exit", "2": "person", "3": "out", "4": "bus", "5": "don't know", "6": "shade", "7": "neon", "8": "photographer", "9": "they aren't", "10": "2", "11": "sun", "12": "cage", "13": "shelter", "14": "blue", "15": "net", "16": "human", "17": "screen", "18": "curtain", "19": "7:35", "20": "chopsticks", "21": "air", "22": "4", "23": "lanyard", "24": "dog", "25": "red", "26": "boy", "27": "stand", "28": "wine", "29": "3", "30": "french", "31": "2010", "32": "yes", "33": "not sure", "34": "platform", "35": "church", "36": "shrimp", "37": "beige", "38": "shadows", "39": "canopy", "40": "dirt", "41": "rack", "42": "green", "43": "tv", "44": "hat", "45": "at table", "46": "giraffe", "47": "not there", "48": "skateboarding", "49": "park", "50": "resting", "51": "pink", "52": "clock", "53": "can't tell", "54": "smiling", "55": "station", "56": "camera", "57": "watching", "58": "white and blue", "59": "little girl", "60": "table", "61": "sidewalk", "62": "birthday", "63": "blonde", "64": "black", "65": "tent", "66": "door", "67": "7:45", "68": "crown", "69": "happy", "70": "windows", "71": "hawaii", "72": "style", "73": "on street", "74": "security", "75": "bicycle", "76": "queen", "77": "soccer ball", "78": "lg", "79": "women", "80": "black and white", "81": "brick", "82": "snowboarding", "83": "natural", "84": "picnic table", "85": "orange", "86": "chair", "87": "curtains", "88": "nothing", "89": "in car", "90": "snow", "91": "suv", "92": "crossing", "93": "snowboarder", "94": "bike rack", "95": "plastic", "96": "7", "97": "forest", "98": "9:35", "99": "cat", "100": "brown", "101": "africa", "102": "cross", "103": "laying down", "104": "woods", "105": "woman", "106": "protection", "107": "backpack", "108": "talking on phone", "109": "clock tower", "110": "fashion", "111": "clear", "112": "skateboard", "113": "lying down", "114": "giraffes", "115": "big ben", "116": "man", "117": "8", "118": "full", "119": "gray", "120": "gray and black", "121": "low", "122": "name tag", "123": "down", "124": "white and black", "125": "outside", "126": "tan", "127": "talking", "128": "double", "129": "bicycles", "130": "skiing", "131": "no", "132": "cup", "133": "right", "134": "walking", "135": "trees", "136": "2000", "137": "10", "138": "doughnut", "139": "stripes", "140": "0", "141": "sleeping", "142": "lady", "143": "soccer", "144": "solid", "145": "bricks", "146": "skier", "147": "white", "148": "king", "149": "wedding", "150": "street", "151": "shadow", "152": "arrow", "153": "necklace", "154": "girl", "155": "5", "156": "2013", "157": "zoo", "158": "blue and white", "159": "yellow", "160": "hair", "161": "small", "162": "red and yellow", "163": "ice cream", "164": "car", "165": "red and blue", "166": "unknown", "167": "plain", "168": "desert", "169": "sky", "170": "jeep", "171": "wall", "172": "roof", "173": "bikes", "174": "smile", "175": "wine tasting", "176": "cloudy", "177": "purple", "178": "snowboard", "179": "plate", "180": "beagle", "181": "ball", "182": "tabby", "183": "bedroom", "184": "1", "185": "8:35", "186": "train", "187": "monitor", "188": "ground", "189": "large", "190": "on road", "191": "tower", "192": "tired", "193": "window", "194": "calico", "195": "fence", "196": "many", "197": "leather", "198": "6" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 140, "1": 184, "10": 137, "2": 10, "2000": 136, "2010": 31, "2013": 156, "3": 29, "4": 22, "5": 155, "6": 198, "7": 96, "7:35": 19, "7:45": 67, "8": 117, "8:35": 185, "9:35": 98, "africa": 101, "air": 21, "arrow": 152, "at table": 45, "backpack": 107, "ball": 181, "beagle": 180, "bedroom": 183, "beige": 37, "bicycle": 75, "bicycles": 129, "big ben": 115, "bike rack": 94, "bikes": 173, "birthday": 62, "black": 64, "black and white": 80, "blonde": 63, "blue": 14, "blue and white": 158, "boy": 26, "brick": 81, "bricks": 145, "brown": 100, "bus": 4, "cage": 12, "calico": 194, "camera": 56, "can't tell": 53, "canopy": 39, "car": 164, "cat": 99, "chair": 86, "chopsticks": 20, "church": 35, "clear": 111, "clock": 52, "clock tower": 109, "cloudy": 176, "cross": 102, "crossing": 92, "crown": 68, "cup": 132, "curtain": 18, "curtains": 87, "desert": 168, "dirt": 40, "dog": 24, "don't know": 5, "donut": 0, "door": 66, "double": 128, "doughnut": 138, "down": 123, "exit": 1, "fashion": 110, "fence": 195, "forest": 97, "french": 30, "full": 118, "giraffe": 46, "giraffes": 114, "girl": 154, "gray": 119, "gray and black": 120, "green": 42, "ground": 188, "hair": 160, "happy": 69, "hat": 44, "hawaii": 71, "human": 16, "ice cream": 163, "in car": 89, "jeep": 170, "king": 148, "lady": 142, "lanyard": 23, "large": 189, "laying down": 103, "leather": 197, "lg": 78, "little girl": 59, "low": 121, "lying down": 113, "man": 116, "many": 196, "monitor": 187, "name tag": 122, "natural": 83, "necklace": 153, "neon": 7, "net": 15, "no": 131, "not sure": 33, "not there": 47, "nothing": 88, "on road": 190, "on street": 73, "orange": 85, "out": 3, "outside": 125, "park": 49, "person": 2, "photographer": 8, "picnic table": 84, "pink": 51, "plain": 167, "plastic": 95, "plate": 179, "platform": 34, "protection": 106, "purple": 177, "queen": 76, "rack": 41, "red": 25, "red and blue": 165, "red and yellow": 162, "resting": 50, "right": 133, "roof": 172, "screen": 17, "security": 74, "shade": 6, "shadow": 151, "shadows": 38, "shelter": 13, "shrimp": 36, "sidewalk": 61, "skateboard": 112, "skateboarding": 48, "skier": 146, "skiing": 130, "sky": 169, "sleeping": 141, "small": 161, "smile": 174, "smiling": 54, "snow": 90, "snowboard": 178, "snowboarder": 93, "snowboarding": 82, "soccer": 143, "soccer ball": 77, "solid": 144, "stand": 27, "station": 55, "street": 150, "stripes": 139, "style": 72, "sun": 11, "suv": 91, "tabby": 182, "table": 60, "talking": 127, "talking on phone": 108, "tan": 126, "tent": 65, "they aren't": 9, "tired": 192, "tower": 191, "train": 186, "trees": 135, "tv": 43, "unknown": 166, "walking": 134, "wall": 171, "watching": 57, "wedding": 149, "white": 147, "white and black": 124, "white and blue": 58, "window": 193, "windows": 70, "wine": 28, "wine tasting": 175, "woman": 105, "women": 79, "woods": 104, "yellow": 159, "yes": 32, "zoo": 157 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.33.1", "type_vocab_size": 2, "vocab_size": 30522 }