Feature Extraction
clip
vision
uform-vl-multilingual-v2 / torch_config.json
kimihailv's picture
Upload torch_config.json
bd4c271
raw
history blame
632 Bytes
{
"text_encoder": {
"model_type": "bert",
"dim": 384,
"context_dim": 768,
"vocab_size": 250037,
"padding_idx": 1,
"num_layers": 12,
"num_heads": 12,
"embedding_dim": 256,
"multimodal_layers_ids": [8, 9, 10, 11],
"head_one_neuron": false,
"pooling": "mean",
"max_position_embeddings": 50,
"dropout_prob": 0.1
},
"image_encoder": {
"dim": 768,
"patch_size": 16,
"image_size": 224,
"num_layers": 12,
"num_heads": 12,
"embedding_dim": 256,
"pooling": "cls"
}
}