Transformers
PyTorch
English
bridgetower
Inference Endpoints
shaoyent commited on
Commit
a679400
1 Parent(s): dcb6033

Initial update

Browse files
Files changed (4) hide show
  1. config.json +53 -0
  2. preprocessor_config.json +52 -0
  3. tokenizer.json +0 -0
  4. vocab.json +0 -0
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+
3
+ "cache_dir":"/tmp",
4
+ "downstream_fusion":false,
5
+ "downstream_fusion_layers":1,
6
+ "downstream_fusion_method":"elmo",
7
+ "drop_rate":0.1,
8
+ "freeze_RoBERTa":false,
9
+ "freeze_ViT":false,
10
+ "freeze_layer_count_roberta":false,
11
+ "freeze_layer_count_vit":false,
12
+ "head_hidden_scale":2,
13
+ "hidden_size":768,
14
+ "image_size":288,
15
+ "input_text_embed_size":768,
16
+ "link_tower_shared":false,
17
+ "link_tower_type":"add",
18
+ "log_dir":"log_dir",
19
+ "loss_names":{"contras": 0,
20
+ "irtr": 0,
21
+ "itm": 0,
22
+ "mlm": 0,
23
+ "mpp": 0,
24
+ "nlvr2": 0,
25
+ "snli": 0,
26
+ "vcr": 0,
27
+ "vcr_qar": 0,
28
+ "vqa": 1},
29
+ "max_text_len":50,
30
+ "mlp_ratio":4,
31
+ "model_type":"bridgetower",
32
+ "num_heads":12,
33
+ "num_layers":6,
34
+ "num_nodes":1,
35
+ "only_load_cross_modal_from_meter":false,
36
+ "patch_size":16,
37
+ "resolution_before":224,
38
+ "stop_gradient":false,
39
+ "task_head_layers":2,
40
+ "test_only":false,
41
+ "tokenizer":"roberta-base",
42
+ "unfreeze_RoBERTa_attention":false,
43
+ "unfreeze_RoBERTa_embeddings":false,
44
+ "unfreeze_RoBERTa_encoder":false,
45
+ "unfreeze_RoBERTa_layernorm":false,
46
+ "unfreeze_ViT_attention":false,
47
+ "unfreeze_ViT_layernorm":false,
48
+ "vit":"ViT-B/16",
49
+ "vit_layernorm_init_from_vit":false,
50
+ "vit_layernorm_shared":true,
51
+ "vit_remove_last":false,
52
+ "vocab_size":50265
53
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+
3
+ "cache_dir":"/tmp",
4
+ "downstream_fusion":false,
5
+ "downstream_fusion_layers":1,
6
+ "downstream_fusion_method":"elmo",
7
+ "drop_rate":0.1,
8
+ "freeze_RoBERTa":false,
9
+ "freeze_ViT":false,
10
+ "freeze_layer_count_roberta":false,
11
+ "freeze_layer_count_vit":false,
12
+ "head_hidden_scale":2,
13
+ "hidden_size":768,
14
+ "input_text_embed_size":768,
15
+ "link_tower_shared":false,
16
+ "link_tower_type":"add",
17
+ "log_dir":"log_dir",
18
+ "loss_names":{"contras": 0,
19
+ "irtr": 0,
20
+ "itm": 0,
21
+ "mlm": 0,
22
+ "mpp": 0,
23
+ "nlvr2": 0,
24
+ "snli": 0,
25
+ "vcr": 0,
26
+ "vcr_qar": 0,
27
+ "vqa": 1},
28
+ "max_text_len":50,
29
+ "mlp_ratio":4,
30
+ "model_type":"bridgetower",
31
+ "num_heads":12,
32
+ "num_layers":6,
33
+ "num_nodes":1,
34
+ "only_load_cross_modal_from_meter":false,
35
+ "patch_size":16,
36
+ "resolution_before":224,
37
+ "stop_gradient":false,
38
+ "task_head_layers":2,
39
+ "test_only":false,
40
+ "tokenizer":"roberta-base",
41
+ "unfreeze_RoBERTa_attention":false,
42
+ "unfreeze_RoBERTa_embeddings":false,
43
+ "unfreeze_RoBERTa_encoder":false,
44
+ "unfreeze_RoBERTa_layernorm":false,
45
+ "unfreeze_ViT_attention":false,
46
+ "unfreeze_ViT_layernorm":false,
47
+ "vit":"ViT-B-16-weights.pt",
48
+ "vit_layernorm_init_from_vit":false,
49
+ "vit_layernorm_shared":true,
50
+ "vit_remove_last":false,
51
+ "vocab_size":50265
52
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff