{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "french", "1": "plain", "2": "arrow", "3": "green", "4": "skateboard", "5": "down", "6": "protection", "7": "lady", "8": "snowboard", "9": "black", "10": "lanyard", "11": "cross", "12": "style", "13": "man", "14": "don't know", "15": "africa", "16": "4", "17": "hawaii", "18": "boy", "19": "sidewalk", "20": "skier", "21": "walking", "22": "8:35", "23": "king", "24": "door", "25": "bricks", "26": "security", "27": "church", "28": "window", "29": "gray", "30": "skiing", "31": "white", "32": "red and yellow", "33": "tv", "34": "canopy", "35": "cup", "36": "giraffes", "37": "red and blue", "38": "camera", "39": "shrimp", "40": "blue", "41": "suv", "42": "crossing", "43": "in car", "44": "roof", "45": "talking", "46": "queen", "47": "on road", "48": "shelter", "49": "women", "50": "tan", "51": "snow", "52": "smiling", "53": "bike rack", "54": "black and white", "55": "fence", "56": "smile", "57": "5", "58": "sleeping", "59": "table", "60": "exit", "61": "picnic table", "62": "dog", "63": "windows", "64": "right", "65": "purple", "66": "many", "67": "blonde", "68": "woman", "69": "calico", "70": "7", "71": "monitor", "72": "0", "73": "low", "74": "zoo", "75": "tired", "76": "street", "77": "person", "78": "yes", "79": "pink", "80": "double", "81": "clear", "82": "gray and black", "83": "natural", "84": "ground", "85": "lg", "86": "can't tell", "87": "clock tower", "88": "shadow", "89": "red", "90": "blue and white", "91": "soccer", "92": "girl", "93": "bikes", "94": "air", "95": "beagle", "96": "soccer ball", "97": "nothing", "98": "screen", "99": "stand", "100": "not sure", "101": "orange", "102": "snowboarding", "103": "2013", "104": "shadows", "105": "laying down", "106": "wine tasting", "107": "photographer", "108": "happy", "109": "yellow", "110": "bedroom", "111": "solid", "112": "wall", "113": "station", "114": "ball", "115": "hair", "116": "sky", "117": "crown", "118": "2010", "119": "not there", "120": "lying down", "121": "plastic", "122": "large", "123": "white and black", "124": "cloudy", "125": "6", "126": "platform", "127": "they aren't", "128": "curtains", "129": "birthday", "130": "hat", "131": "neon", "132": "snowboarder", "133": "tower", "134": "watching", "135": "full", "136": "backpack", "137": "doughnut", "138": "cage", "139": "sun", "140": "1", "141": "resting", "142": "trees", "143": "donut", "144": "train", "145": "talking on phone", "146": "bicycles", "147": "beige", "148": "9:35", "149": "jeep", "150": "clock", "151": "name tag", "152": "ice cream", "153": "white and blue", "154": "bus", "155": "3", "156": "brown", "157": "outside", "158": "giraffe", "159": "out", "160": "chair", "161": "big ben", "162": "small", "163": "bicycle", "164": "2000", "165": "woods", "166": "unknown", "167": "shade", "168": "net", "169": "desert", "170": "little girl", "171": "fashion", "172": "cat", "173": "2", "174": "tabby", "175": "leather", "176": "7:45", "177": "chopsticks", "178": "on street", "179": "no", "180": "rack", "181": "park", "182": "stripes", "183": "skateboarding", "184": "dirt", "185": "car", "186": "wedding", "187": "necklace", "188": "curtain", "189": "10", "190": "at table", "191": "forest", "192": "plate", "193": "human", "194": "brick", "195": "tent", "196": "8", "197": "7:35", "198": "wine" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 72, "1": 140, "10": 189, "2": 173, "2000": 164, "2010": 118, "2013": 103, "3": 155, "4": 16, "5": 57, "6": 125, "7": 70, "7:35": 197, "7:45": 176, "8": 196, "8:35": 22, "9:35": 148, "africa": 15, "air": 94, "arrow": 2, "at table": 190, "backpack": 136, "ball": 114, "beagle": 95, "bedroom": 110, "beige": 147, "bicycle": 163, "bicycles": 146, "big ben": 161, "bike rack": 53, "bikes": 93, "birthday": 129, "black": 9, "black and white": 54, "blonde": 67, "blue": 40, "blue and white": 90, "boy": 18, "brick": 194, "bricks": 25, "brown": 156, "bus": 154, "cage": 138, "calico": 69, "camera": 38, "can't tell": 86, "canopy": 34, "car": 185, "cat": 172, "chair": 160, "chopsticks": 177, "church": 27, "clear": 81, "clock": 150, "clock tower": 87, "cloudy": 124, "cross": 11, "crossing": 42, "crown": 117, "cup": 35, "curtain": 188, "curtains": 128, "desert": 169, "dirt": 184, "dog": 62, "don't know": 14, "donut": 143, "door": 24, "double": 80, "doughnut": 137, "down": 5, "exit": 60, "fashion": 171, "fence": 55, "forest": 191, "french": 0, "full": 135, "giraffe": 158, "giraffes": 36, "girl": 92, "gray": 29, "gray and black": 82, "green": 3, "ground": 84, "hair": 115, "happy": 108, "hat": 130, "hawaii": 17, "human": 193, "ice cream": 152, "in car": 43, "jeep": 149, "king": 23, "lady": 7, "lanyard": 10, "large": 122, "laying down": 105, "leather": 175, "lg": 85, "little girl": 170, "low": 73, "lying down": 120, "man": 13, "many": 66, "monitor": 71, "name tag": 151, "natural": 83, "necklace": 187, "neon": 131, "net": 168, "no": 179, "not sure": 100, "not there": 119, "nothing": 97, "on road": 47, "on street": 178, "orange": 101, "out": 159, "outside": 157, "park": 181, "person": 77, "photographer": 107, "picnic table": 61, "pink": 79, "plain": 1, "plastic": 121, "plate": 192, "platform": 126, "protection": 6, "purple": 65, "queen": 46, "rack": 180, "red": 89, "red and blue": 37, "red and yellow": 32, "resting": 141, "right": 64, "roof": 44, "screen": 98, "security": 26, "shade": 167, "shadow": 88, "shadows": 104, "shelter": 48, "shrimp": 39, "sidewalk": 19, "skateboard": 4, "skateboarding": 183, "skier": 20, "skiing": 30, "sky": 116, "sleeping": 58, "small": 162, "smile": 56, "smiling": 52, "snow": 51, "snowboard": 8, "snowboarder": 132, "snowboarding": 102, "soccer": 91, "soccer ball": 96, "solid": 111, "stand": 99, "station": 113, "street": 76, "stripes": 182, "style": 12, "sun": 139, "suv": 41, "tabby": 174, "table": 59, "talking": 45, "talking on phone": 145, "tan": 50, "tent": 195, "they aren't": 127, "tired": 75, "tower": 133, "train": 144, "trees": 142, "tv": 33, "unknown": 166, "walking": 21, "wall": 112, "watching": 134, "wedding": 186, "white": 31, "white and black": 123, "white and blue": 153, "window": 28, "windows": 63, "wine": 198, "wine tasting": 106, "woman": 68, "women": 49, "woods": 165, "yellow": 109, "yes": 78, "zoo": 74 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.40.2", "type_vocab_size": 2, "vocab_size": 30522 }