graphcorevqa_03 / config.json
vamsidulam's picture
Training in progress, step 200
a510fb4
{
"_name_or_path": "dandelin/vilt-b32-mlm",
"architectures": [
"ViltForQuestionAnswering"
],
"attention_probs_dropout_prob": 0.0,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "bus stop",
"1": "tree branch",
"2": "silver and black",
"3": "french",
"4": "pastries",
"5": "security",
"6": "blue and white",
"7": "open",
"8": "cage",
"9": "plain",
"10": "red and blue",
"11": "soccer ball",
"12": "17",
"13": "20",
"14": "beige",
"15": "people",
"16": "snowboard",
"17": "13",
"18": "suv",
"19": "7:45",
"20": "can't tell",
"21": "gray and white",
"22": "air",
"23": "on grass",
"24": "calico",
"25": "clear",
"26": "clock",
"27": "on road",
"28": "rain",
"29": "happiness",
"30": "15",
"31": "german",
"32": "graffiti",
"33": "not possible",
"34": "orange",
"35": "exit",
"36": "14",
"37": "on bench",
"38": "fashion",
"39": "daisy",
"40": "black",
"41": "brown",
"42": "ball",
"43": "plate",
"44": "2013",
"45": "snowboarder",
"46": "pelican",
"47": "bird",
"48": "parking lot",
"49": "solid",
"50": "train",
"51": "lg",
"52": "yellow and orange",
"53": "in water",
"54": "neither",
"55": "unknown",
"56": "2000",
"57": "leather",
"58": "bear",
"59": "motorbike",
"60": "i don't know",
"61": "airplanes",
"62": "daisies",
"63": "marker",
"64": "gas",
"65": "surfer",
"66": "white and blue",
"67": "bikes",
"68": "cloudy",
"69": "waiting",
"70": "doughnut",
"71": "landing",
"72": "looking out window",
"73": "at table",
"74": "1",
"75": "human",
"76": "resting",
"77": "road",
"78": "happy",
"79": "table",
"80": "cross",
"81": "cleaning",
"82": "7:35",
"83": "harley",
"84": "donut",
"85": "lying down",
"86": "plastic",
"87": "donuts",
"88": "2",
"89": "green",
"90": "stripes",
"91": "ostrich",
"92": "talking",
"93": "me",
"94": "marble",
"95": "nobody",
"96": "10",
"97": "left",
"98": "real",
"99": "bush",
"100": "wild",
"101": "dirt",
"102": "africa",
"103": "18",
"104": "shoes",
"105": "8",
"106": "earring",
"107": "roses",
"108": "snowboarding",
"109": "screen",
"110": "bike",
"111": "in field",
"112": "down",
"113": "not here",
"114": "rack",
"115": "forward",
"116": "purse",
"117": "hardwood",
"118": "curtains",
"119": "shrimp",
"120": "morning",
"121": "lilies",
"122": "talking on phone",
"123": "sad",
"124": "giraffe",
"125": "wood",
"126": "wedding",
"127": "skiing",
"128": "tired",
"129": "corn",
"130": "daytime",
"131": "lifeguard",
"132": "fence",
"133": "plane",
"134": "lots",
"135": "very",
"136": "dusk",
"137": "good",
"138": "savannah",
"139": "lamp",
"140": "laptop",
"141": "computer",
"142": "skateboarding",
"143": "skyscraper",
"144": "couch",
"145": "inside",
"146": "regular",
"147": "white",
"148": "3",
"149": "washing",
"150": "bowl",
"151": "luggage",
"152": "metal",
"153": "protection",
"154": "right",
"155": "walking",
"156": "skier",
"157": "toilet",
"158": "door",
"159": "nowhere",
"160": "natural",
"161": "ears",
"162": "usa",
"163": "front",
"164": "11",
"165": "ear",
"166": "picnic table",
"167": "big ben",
"168": "windows",
"169": "baseball",
"170": "fishing",
"171": "chopsticks",
"172": "hugging",
"173": "net",
"174": "dress",
"175": "display",
"176": "student",
"177": "in car",
"178": "king",
"179": "9:35",
"180": "ceramic",
"181": "person",
"182": "sun",
"183": "boy",
"184": "safari",
"185": "6",
"186": "hair",
"187": "grazing",
"188": "blonde",
"189": "sidewalk",
"190": "bathroom",
"191": "photographer",
"192": "monitor",
"193": "wiimote",
"194": "crown",
"195": "queen",
"196": "motorcycle",
"197": "bag",
"198": "necklace",
"199": "giraffes",
"200": "lady",
"201": "button up",
"202": "no",
"203": "tent",
"204": "not there",
"205": "cat",
"206": "not sure",
"207": "tv",
"208": "conductor",
"209": "remote",
"210": "afternoon",
"211": "street",
"212": "train tracks",
"213": "carnations",
"214": "man",
"215": "cement",
"216": "bicycles",
"217": "smiling",
"218": "beagle",
"219": "not very",
"220": "male",
"221": "purple",
"222": "serious",
"223": "out",
"224": "shelter",
"225": "little girl",
"226": "small",
"227": "noon",
"228": "email",
"229": "owner",
"230": "can't see",
"231": "16",
"232": "branch",
"233": "cannot tell",
"234": "concrete",
"235": "don't know",
"236": "many",
"237": "bicycle",
"238": "electric",
"239": "american",
"240": "tour",
"241": "engine",
"242": "dog",
"243": "rv",
"244": "helmet",
"245": "turkey",
"246": "large",
"247": "paper",
"248": "4",
"249": "tower",
"250": "wine tasting",
"251": "color",
"252": "smile",
"253": "umbrellas",
"254": "trees",
"255": "model",
"256": "pink",
"257": "passengers",
"258": "white and black",
"259": "red and yellow",
"260": "curtain",
"261": "women",
"262": "stand",
"263": "on street",
"264": "orange and yellow",
"265": "tulips",
"266": "gray and black",
"267": "sleeping",
"268": "straight",
"269": "television",
"270": "sky",
"271": "hat",
"272": "car",
"273": "white and green",
"274": "bricks",
"275": "rose",
"276": "they aren't",
"277": "asphalt",
"278": "backpack",
"279": "fire hydrant",
"280": "bus",
"281": "standing",
"282": "red",
"283": "in air",
"284": "closed",
"285": "canopy",
"286": "shadow",
"287": "crossing",
"288": "white and brown",
"289": "brick",
"290": "girl",
"291": "wine",
"292": "style",
"293": "female",
"294": "8:35",
"295": "birthday",
"296": "bored",
"297": "skateboard",
"298": "forest",
"299": "double",
"300": "crane",
"301": "above",
"302": "tracks",
"303": "motor",
"304": "husky",
"305": "snow",
"306": "humans",
"307": "suitcase",
"308": "platform",
"309": "blue",
"310": "airplane",
"311": "chicago",
"312": "center",
"313": "avocado",
"314": "under",
"315": "woods",
"316": "soccer",
"317": "field",
"318": "bench",
"319": "cubs",
"320": "zoo",
"321": "shadows",
"322": "5",
"323": "full",
"324": "sunny",
"325": "porcelain",
"326": "tan",
"327": "taking off",
"328": "grass",
"329": "wall",
"330": "toilet paper",
"331": "laying down",
"332": "light",
"333": "woman",
"334": "camera",
"335": "desert",
"336": "bike rack",
"337": "window",
"338": "outside",
"339": "name tag",
"340": "cranes",
"341": "2010",
"342": "cap",
"343": "silver",
"344": "cream",
"345": "ground",
"346": "glass",
"347": "yellow",
"348": "tabby",
"349": "roof",
"350": "sofa",
"351": "arrow",
"352": "ice cream",
"353": "chair",
"354": "pelicans",
"355": "hydrant",
"356": "neon",
"357": "track",
"358": "young",
"359": "station",
"360": "twin",
"361": "unsure",
"362": "7",
"363": "jeep",
"364": "nothing",
"365": "park",
"366": "angry",
"367": "watching",
"368": "bedroom",
"369": "hawaii",
"370": "parked",
"371": "over",
"372": "hotel room",
"373": "ducati",
"374": "square",
"375": "tree",
"376": "yes",
"377": "shade",
"378": "church",
"379": "cup",
"380": "0",
"381": "single",
"382": "lanyard",
"383": "clock tower",
"384": "wii",
"385": "gray",
"386": "shower",
"387": "boredom",
"388": "black and white",
"389": "low",
"390": "raining",
"391": "19"
},
"image_size": 384,
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"0": 380,
"1": 74,
"10": 96,
"11": 164,
"13": 17,
"14": 36,
"15": 30,
"16": 231,
"17": 12,
"18": 103,
"19": 391,
"2": 88,
"20": 13,
"2000": 56,
"2010": 341,
"2013": 44,
"3": 148,
"4": 248,
"5": 322,
"6": 185,
"7": 362,
"7:35": 82,
"7:45": 19,
"8": 105,
"8:35": 294,
"9:35": 179,
"above": 301,
"africa": 102,
"afternoon": 210,
"air": 22,
"airplane": 310,
"airplanes": 61,
"american": 239,
"angry": 366,
"arrow": 351,
"asphalt": 277,
"at table": 73,
"avocado": 313,
"backpack": 278,
"bag": 197,
"ball": 42,
"baseball": 169,
"bathroom": 190,
"beagle": 218,
"bear": 58,
"bedroom": 368,
"beige": 14,
"bench": 318,
"bicycle": 237,
"bicycles": 216,
"big ben": 167,
"bike": 110,
"bike rack": 336,
"bikes": 67,
"bird": 47,
"birthday": 295,
"black": 40,
"black and white": 388,
"blonde": 188,
"blue": 309,
"blue and white": 6,
"bored": 296,
"boredom": 387,
"bowl": 150,
"boy": 183,
"branch": 232,
"brick": 289,
"bricks": 274,
"brown": 41,
"bus": 280,
"bus stop": 0,
"bush": 99,
"button up": 201,
"cage": 8,
"calico": 24,
"camera": 334,
"can't see": 230,
"can't tell": 20,
"cannot tell": 233,
"canopy": 285,
"cap": 342,
"car": 272,
"carnations": 213,
"cat": 205,
"cement": 215,
"center": 312,
"ceramic": 180,
"chair": 353,
"chicago": 311,
"chopsticks": 171,
"church": 378,
"cleaning": 81,
"clear": 25,
"clock": 26,
"clock tower": 383,
"closed": 284,
"cloudy": 68,
"color": 251,
"computer": 141,
"concrete": 234,
"conductor": 208,
"corn": 129,
"couch": 144,
"crane": 300,
"cranes": 340,
"cream": 344,
"cross": 80,
"crossing": 287,
"crown": 194,
"cubs": 319,
"cup": 379,
"curtain": 260,
"curtains": 118,
"daisies": 62,
"daisy": 39,
"daytime": 130,
"desert": 335,
"dirt": 101,
"display": 175,
"dog": 242,
"don't know": 235,
"donut": 84,
"donuts": 87,
"door": 158,
"double": 299,
"doughnut": 70,
"down": 112,
"dress": 174,
"ducati": 373,
"dusk": 136,
"ear": 165,
"earring": 106,
"ears": 161,
"electric": 238,
"email": 228,
"engine": 241,
"exit": 35,
"fashion": 38,
"female": 293,
"fence": 132,
"field": 317,
"fire hydrant": 279,
"fishing": 170,
"forest": 298,
"forward": 115,
"french": 3,
"front": 163,
"full": 323,
"gas": 64,
"german": 31,
"giraffe": 124,
"giraffes": 199,
"girl": 290,
"glass": 346,
"good": 137,
"graffiti": 32,
"grass": 328,
"gray": 385,
"gray and black": 266,
"gray and white": 21,
"grazing": 187,
"green": 89,
"ground": 345,
"hair": 186,
"happiness": 29,
"happy": 78,
"hardwood": 117,
"harley": 83,
"hat": 271,
"hawaii": 369,
"helmet": 244,
"hotel room": 372,
"hugging": 172,
"human": 75,
"humans": 306,
"husky": 304,
"hydrant": 355,
"i don't know": 60,
"ice cream": 352,
"in air": 283,
"in car": 177,
"in field": 111,
"in water": 53,
"inside": 145,
"jeep": 363,
"king": 178,
"lady": 200,
"lamp": 139,
"landing": 71,
"lanyard": 382,
"laptop": 140,
"large": 246,
"laying down": 331,
"leather": 57,
"left": 97,
"lg": 51,
"lifeguard": 131,
"light": 332,
"lilies": 121,
"little girl": 225,
"looking out window": 72,
"lots": 134,
"low": 389,
"luggage": 151,
"lying down": 85,
"male": 220,
"man": 214,
"many": 236,
"marble": 94,
"marker": 63,
"me": 93,
"metal": 152,
"model": 255,
"monitor": 192,
"morning": 120,
"motor": 303,
"motorbike": 59,
"motorcycle": 196,
"name tag": 339,
"natural": 160,
"necklace": 198,
"neither": 54,
"neon": 356,
"net": 173,
"no": 202,
"nobody": 95,
"noon": 227,
"not here": 113,
"not possible": 33,
"not sure": 206,
"not there": 204,
"not very": 219,
"nothing": 364,
"nowhere": 159,
"on bench": 37,
"on grass": 23,
"on road": 27,
"on street": 263,
"open": 7,
"orange": 34,
"orange and yellow": 264,
"ostrich": 91,
"out": 223,
"outside": 338,
"over": 371,
"owner": 229,
"paper": 247,
"park": 365,
"parked": 370,
"parking lot": 48,
"passengers": 257,
"pastries": 4,
"pelican": 46,
"pelicans": 354,
"people": 15,
"person": 181,
"photographer": 191,
"picnic table": 166,
"pink": 256,
"plain": 9,
"plane": 133,
"plastic": 86,
"plate": 43,
"platform": 308,
"porcelain": 325,
"protection": 153,
"purple": 221,
"purse": 116,
"queen": 195,
"rack": 114,
"rain": 28,
"raining": 390,
"real": 98,
"red": 282,
"red and blue": 10,
"red and yellow": 259,
"regular": 146,
"remote": 209,
"resting": 76,
"right": 154,
"road": 77,
"roof": 349,
"rose": 275,
"roses": 107,
"rv": 243,
"sad": 123,
"safari": 184,
"savannah": 138,
"screen": 109,
"security": 5,
"serious": 222,
"shade": 377,
"shadow": 286,
"shadows": 321,
"shelter": 224,
"shoes": 104,
"shower": 386,
"shrimp": 119,
"sidewalk": 189,
"silver": 343,
"silver and black": 2,
"single": 381,
"skateboard": 297,
"skateboarding": 142,
"skier": 156,
"skiing": 127,
"sky": 270,
"skyscraper": 143,
"sleeping": 267,
"small": 226,
"smile": 252,
"smiling": 217,
"snow": 305,
"snowboard": 16,
"snowboarder": 45,
"snowboarding": 108,
"soccer": 316,
"soccer ball": 11,
"sofa": 350,
"solid": 49,
"square": 374,
"stand": 262,
"standing": 281,
"station": 359,
"straight": 268,
"street": 211,
"stripes": 90,
"student": 176,
"style": 292,
"suitcase": 307,
"sun": 182,
"sunny": 324,
"surfer": 65,
"suv": 18,
"tabby": 348,
"table": 79,
"taking off": 327,
"talking": 92,
"talking on phone": 122,
"tan": 326,
"television": 269,
"tent": 203,
"they aren't": 276,
"tired": 128,
"toilet": 157,
"toilet paper": 330,
"tour": 240,
"tower": 249,
"track": 357,
"tracks": 302,
"train": 50,
"train tracks": 212,
"tree": 375,
"tree branch": 1,
"trees": 254,
"tulips": 265,
"turkey": 245,
"tv": 207,
"twin": 360,
"umbrellas": 253,
"under": 314,
"unknown": 55,
"unsure": 361,
"usa": 162,
"very": 135,
"waiting": 69,
"walking": 155,
"wall": 329,
"washing": 149,
"watching": 367,
"wedding": 126,
"white": 147,
"white and black": 258,
"white and blue": 66,
"white and brown": 288,
"white and green": 273,
"wii": 384,
"wiimote": 193,
"wild": 100,
"window": 337,
"windows": 168,
"wine": 291,
"wine tasting": 250,
"woman": 333,
"women": 261,
"wood": 125,
"woods": 315,
"yellow": 347,
"yellow and orange": 52,
"yes": 376,
"young": 358,
"zoo": 320
},
"layer_norm_eps": 1e-12,
"max_image_length": -1,
"max_position_embeddings": 40,
"modality_type_vocab_size": 2,
"model_type": "vilt",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"num_images": -1,
"patch_size": 32,
"qkv_bias": true,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.29.2",
"type_vocab_size": 2,
"vocab_size": 30522
}