williamberman
commited on
Commit
•
6e95467
1
Parent(s):
babb4f6
Add projection dim to text and vision model configs for CLIPVisionModelWithProjection and CLIPTextModelWithProjection support
Browse filesThe default projection_dim is 512 which will throw an error when loading weights for
```py
from transformers import CLIPVisionModelWithProjection
CLIPVisionModelWithProjection.from_pretrained('laion/CLIP-ViT-H-14-laion2B-s32B-b79K')
```
or
```py
from transformers import CLIPTextModelWithProjection
CLIPTextModelWithProjection.from_pretrained('laion/CLIP-ViT-H-14-laion2B-s32B-b79K')
```
Loading CLIPModel will not throw an error because it uses the `projection_dim` on the top level of the config.
```py
from transformers import CLIPModel
CLIPModel.from_pretrained('laion/CLIP-ViT-H-14-laion2B-s32B-b79K')
```
- config.json +6 -2
config.json
CHANGED
@@ -59,6 +59,7 @@
|
|
59 |
"pad_token_id": 1,
|
60 |
"prefix": null,
|
61 |
"problem_type": null,
|
|
|
62 |
"pruned_heads": {},
|
63 |
"remove_invalid_values": false,
|
64 |
"repetition_penalty": 1.0,
|
@@ -85,7 +86,8 @@
|
|
85 |
"hidden_size": 1024,
|
86 |
"intermediate_size": 4096,
|
87 |
"num_attention_heads": 16,
|
88 |
-
"num_hidden_layers": 24
|
|
|
89 |
},
|
90 |
"torch_dtype": "float32",
|
91 |
"transformers_version": null,
|
@@ -144,6 +146,7 @@
|
|
144 |
"patch_size": 14,
|
145 |
"prefix": null,
|
146 |
"problem_type": null,
|
|
|
147 |
"pruned_heads": {},
|
148 |
"remove_invalid_values": false,
|
149 |
"repetition_penalty": 1.0,
|
@@ -170,6 +173,7 @@
|
|
170 |
"intermediate_size": 5120,
|
171 |
"num_attention_heads": 16,
|
172 |
"num_hidden_layers": 32,
|
173 |
-
"patch_size": 14
|
|
|
174 |
}
|
175 |
}
|
|
|
59 |
"pad_token_id": 1,
|
60 |
"prefix": null,
|
61 |
"problem_type": null,
|
62 |
+
"projection_dim": 1024,
|
63 |
"pruned_heads": {},
|
64 |
"remove_invalid_values": false,
|
65 |
"repetition_penalty": 1.0,
|
|
|
86 |
"hidden_size": 1024,
|
87 |
"intermediate_size": 4096,
|
88 |
"num_attention_heads": 16,
|
89 |
+
"num_hidden_layers": 24,
|
90 |
+
"projection_dim": 1024
|
91 |
},
|
92 |
"torch_dtype": "float32",
|
93 |
"transformers_version": null,
|
|
|
146 |
"patch_size": 14,
|
147 |
"prefix": null,
|
148 |
"problem_type": null,
|
149 |
+
"projection_dim": 1024,
|
150 |
"pruned_heads": {},
|
151 |
"remove_invalid_values": false,
|
152 |
"repetition_penalty": 1.0,
|
|
|
173 |
"intermediate_size": 5120,
|
174 |
"num_attention_heads": 16,
|
175 |
"num_hidden_layers": 32,
|
176 |
+
"patch_size": 14,
|
177 |
+
"projection_dim": 1024
|
178 |
}
|
179 |
}
|