williamberman commited on
Commit
6e95467
1 Parent(s): babb4f6

Add projection dim to text and vision model configs for CLIPVisionModelWithProjection and CLIPTextModelWithProjection support

Browse files

The default projection_dim is 512 which will throw an error when loading weights for

```py
from transformers import CLIPVisionModelWithProjection
CLIPVisionModelWithProjection.from_pretrained('laion/CLIP-ViT-H-14-laion2B-s32B-b79K')
```

or

```py
from transformers import CLIPTextModelWithProjection
CLIPTextModelWithProjection.from_pretrained('laion/CLIP-ViT-H-14-laion2B-s32B-b79K')
```

Loading CLIPModel will not throw an error because it uses the `projection_dim` on the top level of the config.

```py
from transformers import CLIPModel
CLIPModel.from_pretrained('laion/CLIP-ViT-H-14-laion2B-s32B-b79K')
```

Files changed (1) hide show
  1. config.json +6 -2
config.json CHANGED
@@ -59,6 +59,7 @@
59
  "pad_token_id": 1,
60
  "prefix": null,
61
  "problem_type": null,
 
62
  "pruned_heads": {},
63
  "remove_invalid_values": false,
64
  "repetition_penalty": 1.0,
@@ -85,7 +86,8 @@
85
  "hidden_size": 1024,
86
  "intermediate_size": 4096,
87
  "num_attention_heads": 16,
88
- "num_hidden_layers": 24
 
89
  },
90
  "torch_dtype": "float32",
91
  "transformers_version": null,
@@ -144,6 +146,7 @@
144
  "patch_size": 14,
145
  "prefix": null,
146
  "problem_type": null,
 
147
  "pruned_heads": {},
148
  "remove_invalid_values": false,
149
  "repetition_penalty": 1.0,
@@ -170,6 +173,7 @@
170
  "intermediate_size": 5120,
171
  "num_attention_heads": 16,
172
  "num_hidden_layers": 32,
173
- "patch_size": 14
 
174
  }
175
  }
59
  "pad_token_id": 1,
60
  "prefix": null,
61
  "problem_type": null,
62
+ "projection_dim": 1024,
63
  "pruned_heads": {},
64
  "remove_invalid_values": false,
65
  "repetition_penalty": 1.0,
86
  "hidden_size": 1024,
87
  "intermediate_size": 4096,
88
  "num_attention_heads": 16,
89
+ "num_hidden_layers": 24,
90
+ "projection_dim": 1024
91
  },
92
  "torch_dtype": "float32",
93
  "transformers_version": null,
146
  "patch_size": 14,
147
  "prefix": null,
148
  "problem_type": null,
149
+ "projection_dim": 1024,
150
  "pruned_heads": {},
151
  "remove_invalid_values": false,
152
  "repetition_penalty": 1.0,
173
  "intermediate_size": 5120,
174
  "num_attention_heads": 16,
175
  "num_hidden_layers": 32,
176
+ "patch_size": 14,
177
+ "projection_dim": 1024
178
  }
179
  }