|
{ |
|
"_name_or_path": "dandelin/vilt-b32-mlm", |
|
"architectures": [ |
|
"ViltForQuestionAnswering" |
|
], |
|
"attention_probs_dropout_prob": 0.0, |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.0, |
|
"hidden_size": 768, |
|
"id2label": { |
|
"0": "room", |
|
"1": "united states", |
|
"2": "airport", |
|
"3": "australia", |
|
"4": "usa", |
|
"5": "windows", |
|
"6": "home", |
|
"7": "fridge", |
|
"8": "in car", |
|
"9": "park", |
|
"10": "england", |
|
"11": "on sidewalk", |
|
"12": "nothing", |
|
"13": "mouse", |
|
"14": "sun", |
|
"15": "smile", |
|
"16": "refrigerator", |
|
"17": "british", |
|
"18": "freezer", |
|
"19": "hotel", |
|
"20": "car", |
|
"21": "indoors", |
|
"22": "woods", |
|
"23": "buildings", |
|
"24": "uk", |
|
"25": "nowhere", |
|
"26": "cows", |
|
"27": "dog", |
|
"28": "bus", |
|
"29": "america", |
|
"30": "living room", |
|
"31": "ground", |
|
"32": "us", |
|
"33": "inside", |
|
"34": "yes", |
|
"35": "hallway" |
|
}, |
|
"image_size": 384, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"label2id": { |
|
"airport": 2, |
|
"america": 29, |
|
"australia": 3, |
|
"british": 17, |
|
"buildings": 23, |
|
"bus": 28, |
|
"car": 20, |
|
"cows": 26, |
|
"dog": 27, |
|
"england": 10, |
|
"freezer": 18, |
|
"fridge": 7, |
|
"ground": 31, |
|
"hallway": 35, |
|
"home": 6, |
|
"hotel": 19, |
|
"in car": 8, |
|
"indoors": 21, |
|
"inside": 33, |
|
"living room": 30, |
|
"mouse": 13, |
|
"nothing": 12, |
|
"nowhere": 25, |
|
"on sidewalk": 11, |
|
"park": 9, |
|
"refrigerator": 16, |
|
"room": 0, |
|
"smile": 15, |
|
"sun": 14, |
|
"uk": 24, |
|
"united states": 1, |
|
"us": 32, |
|
"usa": 4, |
|
"windows": 5, |
|
"woods": 22, |
|
"yes": 34 |
|
}, |
|
"layer_norm_eps": 1e-12, |
|
"max_image_length": -1, |
|
"max_position_embeddings": 40, |
|
"modality_type_vocab_size": 2, |
|
"model_type": "vilt", |
|
"num_attention_heads": 12, |
|
"num_channels": 3, |
|
"num_hidden_layers": 12, |
|
"num_images": -1, |
|
"patch_size": 32, |
|
"qkv_bias": true, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.40.2", |
|
"type_vocab_size": 2, |
|
"vocab_size": 30522 |
|
} |
|
|