Transformers
PyTorch
English
bridgetower
Inference Endpoints
anahita-b commited on
Commit
bd10b90
1 Parent(s): 8023ab2

Adding bridgetower model checkpoint and roberta tokenizer config files

Browse files
Files changed (5) hide show
  1. config.json +53 -0
  2. preprocessor_config.json +52 -0
  3. pytorch_model.bin +3 -0
  4. tokenizer.json +0 -0
  5. vocab.json +0 -0
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+
3
+ "cache_dir":"/tmp",
4
+ "downstream_fusion":false,
5
+ "downstream_fusion_layers":1,
6
+ "downstream_fusion_method":"elmo",
7
+ "drop_rate":0.1,
8
+ "freeze_RoBERTa":false,
9
+ "freeze_ViT":false,
10
+ "freeze_layer_count_roberta":false,
11
+ "freeze_layer_count_vit":false,
12
+ "head_hidden_scale":2,
13
+ "hidden_size":768,
14
+ "image_size":288,
15
+ "input_text_embed_size":768,
16
+ "link_tower_shared":false,
17
+ "link_tower_type":"add",
18
+ "log_dir":"log_dir",
19
+ "loss_names":{"contras": 0,
20
+ "irtr": 0,
21
+ "itm": 0,
22
+ "mlm": 0,
23
+ "mpp": 0,
24
+ "nlvr2": 0,
25
+ "snli": 0,
26
+ "vcr": 0,
27
+ "vcr_qar": 0,
28
+ "vqa": 1},
29
+ "max_text_len":50,
30
+ "mlp_ratio":4,
31
+ "model_type":"bridgetower",
32
+ "num_heads":12,
33
+ "num_layers":6,
34
+ "num_nodes":1,
35
+ "only_load_cross_modal_from_meter":false,
36
+ "patch_size":16,
37
+ "resolution_before":224,
38
+ "stop_gradient":false,
39
+ "task_head_layers":2,
40
+ "test_only":false,
41
+ "tokenizer":"roberta-base",
42
+ "unfreeze_RoBERTa_attention":false,
43
+ "unfreeze_RoBERTa_embeddings":false,
44
+ "unfreeze_RoBERTa_encoder":false,
45
+ "unfreeze_RoBERTa_layernorm":false,
46
+ "unfreeze_ViT_attention":false,
47
+ "unfreeze_ViT_layernorm":false,
48
+ "vit":"ViT-B-16-weights.pt",
49
+ "vit_layernorm_init_from_vit":false,
50
+ "vit_layernorm_shared":true,
51
+ "vit_remove_last":false,
52
+ "vocab_size":50265
53
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+
3
+ "cache_dir":"/tmp",
4
+ "downstream_fusion":false,
5
+ "downstream_fusion_layers":1,
6
+ "downstream_fusion_method":"elmo",
7
+ "drop_rate":0.1,
8
+ "freeze_RoBERTa":false,
9
+ "freeze_ViT":false,
10
+ "freeze_layer_count_roberta":false,
11
+ "freeze_layer_count_vit":false,
12
+ "head_hidden_scale":2,
13
+ "hidden_size":768,
14
+ "input_text_embed_size":768,
15
+ "link_tower_shared":false,
16
+ "link_tower_type":"add",
17
+ "log_dir":"log_dir",
18
+ "loss_names":{"contras": 0,
19
+ "irtr": 0,
20
+ "itm": 0,
21
+ "mlm": 0,
22
+ "mpp": 0,
23
+ "nlvr2": 0,
24
+ "snli": 0,
25
+ "vcr": 0,
26
+ "vcr_qar": 0,
27
+ "vqa": 1},
28
+ "max_text_len":50,
29
+ "mlp_ratio":4,
30
+ "model_type":"bridgetower",
31
+ "num_heads":12,
32
+ "num_layers":6,
33
+ "num_nodes":1,
34
+ "only_load_cross_modal_from_meter":false,
35
+ "patch_size":16,
36
+ "resolution_before":224,
37
+ "stop_gradient":false,
38
+ "task_head_layers":2,
39
+ "test_only":false,
40
+ "tokenizer":"roberta-base",
41
+ "unfreeze_RoBERTa_attention":false,
42
+ "unfreeze_RoBERTa_embeddings":false,
43
+ "unfreeze_RoBERTa_encoder":false,
44
+ "unfreeze_RoBERTa_layernorm":false,
45
+ "unfreeze_ViT_attention":false,
46
+ "unfreeze_ViT_layernorm":false,
47
+ "vit":"ViT-B-16-weights.pt",
48
+ "vit_layernorm_init_from_vit":false,
49
+ "vit_layernorm_shared":true,
50
+ "vit_remove_last":false,
51
+ "vocab_size":50265
52
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53cc79c8597b3810b0769968b7dbca0e6c26c4cf0f0896764573cd2dfd3efbaf
3
+ size 1564038428
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
vocab.json ADDED
The diff for this file is too large to render. See raw diff