Xenova HF staff commited on
Commit
a69ef9d
1 Parent(s): c32d8e0

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +16 -163
config.json CHANGED
@@ -1,167 +1,20 @@
1
  {
2
- "_commit_hash": "57c216476eefef5ab752ec549e440a49ae4ae5f3",
3
- "_name_or_path": "openai/clip-vit-base-patch16",
4
- "architectures": [
5
- "CLIPModel"
6
- ],
7
- "initializer_factor": 1.0,
8
- "logit_scale_init_value": 2.6592,
9
  "model_type": "clip",
10
- "projection_dim": 512,
11
- "text_config": {
12
- "_name_or_path": "",
13
- "add_cross_attention": false,
14
- "architectures": null,
15
- "attention_dropout": 0.0,
16
- "bad_words_ids": null,
17
- "begin_suppress_tokens": null,
18
- "bos_token_id": 0,
19
- "chunk_size_feed_forward": 0,
20
- "cross_attention_hidden_size": null,
21
- "decoder_start_token_id": null,
22
- "diversity_penalty": 0.0,
23
- "do_sample": false,
24
- "dropout": 0.0,
25
- "early_stopping": false,
26
- "encoder_no_repeat_ngram_size": 0,
27
- "eos_token_id": 2,
28
- "exponential_decay_length_penalty": null,
29
- "finetuning_task": null,
30
- "forced_bos_token_id": null,
31
- "forced_eos_token_id": null,
32
- "hidden_act": "quick_gelu",
33
- "hidden_size": 512,
34
- "id2label": {
35
- "0": "LABEL_0",
36
- "1": "LABEL_1"
37
- },
38
- "initializer_factor": 1.0,
39
- "initializer_range": 0.02,
40
- "intermediate_size": 2048,
41
- "is_decoder": false,
42
- "is_encoder_decoder": false,
43
- "label2id": {
44
- "LABEL_0": 0,
45
- "LABEL_1": 1
46
- },
47
- "layer_norm_eps": 1e-05,
48
- "length_penalty": 1.0,
49
- "max_length": 20,
50
- "max_position_embeddings": 77,
51
- "min_length": 0,
52
- "model_type": "clip_text_model",
53
- "no_repeat_ngram_size": 0,
54
- "num_attention_heads": 8,
55
- "num_beam_groups": 1,
56
- "num_beams": 1,
57
- "num_hidden_layers": 12,
58
- "num_return_sequences": 1,
59
- "output_attentions": false,
60
- "output_hidden_states": false,
61
- "output_scores": false,
62
- "pad_token_id": 1,
63
- "prefix": null,
64
- "problem_type": null,
65
- "projection_dim": 512,
66
- "pruned_heads": {},
67
- "remove_invalid_values": false,
68
- "repetition_penalty": 1.0,
69
- "return_dict": true,
70
- "return_dict_in_generate": false,
71
- "sep_token_id": null,
72
- "suppress_tokens": null,
73
- "task_specific_params": null,
74
- "temperature": 1.0,
75
- "tf_legacy_loss": false,
76
- "tie_encoder_decoder": false,
77
- "tie_word_embeddings": true,
78
- "tokenizer_class": null,
79
- "top_k": 50,
80
- "top_p": 1.0,
81
- "torch_dtype": null,
82
- "torchscript": false,
83
- "transformers_version": "4.29.2",
84
- "typical_p": 1.0,
85
- "use_bfloat16": false,
86
- "vocab_size": 49408
87
  },
88
- "transformers_version": null,
89
- "vision_config": {
90
- "_name_or_path": "",
91
- "add_cross_attention": false,
92
- "architectures": null,
93
- "attention_dropout": 0.0,
94
- "bad_words_ids": null,
95
- "begin_suppress_tokens": null,
96
- "bos_token_id": null,
97
- "chunk_size_feed_forward": 0,
98
- "cross_attention_hidden_size": null,
99
- "decoder_start_token_id": null,
100
- "diversity_penalty": 0.0,
101
- "do_sample": false,
102
- "dropout": 0.0,
103
- "early_stopping": false,
104
- "encoder_no_repeat_ngram_size": 0,
105
- "eos_token_id": null,
106
- "exponential_decay_length_penalty": null,
107
- "finetuning_task": null,
108
- "forced_bos_token_id": null,
109
- "forced_eos_token_id": null,
110
- "hidden_act": "quick_gelu",
111
- "hidden_size": 768,
112
- "id2label": {
113
- "0": "LABEL_0",
114
- "1": "LABEL_1"
115
- },
116
- "image_size": 224,
117
- "initializer_factor": 1.0,
118
- "initializer_range": 0.02,
119
- "intermediate_size": 3072,
120
- "is_decoder": false,
121
- "is_encoder_decoder": false,
122
- "label2id": {
123
- "LABEL_0": 0,
124
- "LABEL_1": 1
125
- },
126
- "layer_norm_eps": 1e-05,
127
- "length_penalty": 1.0,
128
- "max_length": 20,
129
- "min_length": 0,
130
- "model_type": "clip_vision_model",
131
- "no_repeat_ngram_size": 0,
132
- "num_attention_heads": 12,
133
- "num_beam_groups": 1,
134
- "num_beams": 1,
135
- "num_channels": 3,
136
- "num_hidden_layers": 12,
137
- "num_return_sequences": 1,
138
- "output_attentions": false,
139
- "output_hidden_states": false,
140
- "output_scores": false,
141
- "pad_token_id": null,
142
- "patch_size": 16,
143
- "prefix": null,
144
- "problem_type": null,
145
- "projection_dim": 512,
146
- "pruned_heads": {},
147
- "remove_invalid_values": false,
148
- "repetition_penalty": 1.0,
149
- "return_dict": true,
150
- "return_dict_in_generate": false,
151
- "sep_token_id": null,
152
- "suppress_tokens": null,
153
- "task_specific_params": null,
154
- "temperature": 1.0,
155
- "tf_legacy_loss": false,
156
- "tie_encoder_decoder": false,
157
- "tie_word_embeddings": true,
158
- "tokenizer_class": null,
159
- "top_k": 50,
160
- "top_p": 1.0,
161
- "torch_dtype": null,
162
- "torchscript": false,
163
- "transformers_version": "4.29.2",
164
- "typical_p": 1.0,
165
- "use_bfloat16": false
166
  }
167
- }
 
1
  {
 
 
 
 
 
 
 
2
  "model_type": "clip",
3
+ "img_encoder":{
4
+ "backbone":"deit3_base_patch16_224_in21ft1k",
5
+ "dim":768,
6
+ "pooling":"cls",
7
+ "output_dim":256,
8
+ "backbone_type":"vit"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  },
10
+ "text_encoder":{
11
+ "backbone":"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
12
+ "backbone_type":"bert",
13
+ "unimodal_n_layers":8,
14
+ "dim":384,
15
+ "pooling":"mean",
16
+ "context_dim":768,
17
+ "output_dim":256,
18
+ "head_one_neuron":true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
+ }