{ "architectures": [ "LxmertModel" ], "attention_probs_dropout_prob": 0.1, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "l_layers": 9, "layer_norm_eps": 1e-12, "max_position_embeddings": 512, "model_type": "lxmert", "num_attention_heads": 12, "num_attr_labels": 400, "num_object_labels": 1600, "num_qa_labels": 3129, "r_layers": 5, "task_mask_lm": true, "task_matched": true, "task_obj_predict": true, "task_qa": true, "type_vocab_size": 2, "visual_attr_loss": true, "visual_feat_dim": 2048, "visual_feat_loss": true, "visual_loss_normalizer": 6.67, "visual_obj_loss": true, "visual_pos_dim": 4, "vocab_size": 30522, "x_layers": 5 }