amir7d0 commited on
Commit
9f3e48b
1 Parent(s): 941ea1b

Upload model

Browse files
Files changed (2) hide show
  1. config.json +468 -28
  2. pytorch_model.bin +2 -2
config.json CHANGED
@@ -8,14 +8,18 @@
8
  "model_type": "clip",
9
  "projection_dim": 512,
10
  "text_config": {
11
- "_name_or_path": "",
12
  "add_cross_attention": false,
13
- "architectures": null,
 
 
14
  "attention_dropout": 0.0,
 
15
  "bad_words_ids": null,
16
  "begin_suppress_tokens": null,
17
  "bos_token_id": 0,
18
  "chunk_size_feed_forward": 0,
 
19
  "cross_attention_hidden_size": null,
20
  "decoder_start_token_id": null,
21
  "diversity_penalty": 0.0,
@@ -28,7 +32,9 @@
28
  "finetuning_task": null,
29
  "forced_bos_token_id": null,
30
  "forced_eos_token_id": null,
31
- "hidden_act": "quick_gelu",
 
 
32
  "hidden_size": 768,
33
  "id2label": {
34
  "0": "LABEL_0",
@@ -36,30 +42,30 @@
36
  },
37
  "initializer_factor": 1.0,
38
  "initializer_range": 0.02,
39
- "intermediate_size": 2048,
40
  "is_decoder": false,
41
  "is_encoder_decoder": false,
42
  "label2id": {
43
  "LABEL_0": 0,
44
  "LABEL_1": 1
45
  },
46
- "layer_norm_eps": 1e-05,
47
  "length_penalty": 1.0,
48
  "max_length": 20,
49
- "max_position_embeddings": 0,
50
  "min_length": 0,
51
  "model_type": "clip_text_model",
52
  "no_repeat_ngram_size": 0,
53
- "num_attention_heads": 8,
54
  "num_beam_groups": 1,
55
  "num_beams": 1,
56
- "num_hidden_layers": 0,
57
  "num_return_sequences": 1,
58
  "output_attentions": false,
59
  "output_hidden_states": false,
60
  "output_scores": false,
61
  "pad_token_id": 1,
62
- "patch_size": 1,
63
  "prefix": null,
64
  "problem_type": null,
65
  "pruned_heads": {},
@@ -77,26 +83,105 @@
77
  "tokenizer_class": null,
78
  "top_k": 50,
79
  "top_p": 1.0,
80
- "torch_dtype": null,
81
  "torchscript": false,
82
  "transformers_version": "4.24.0",
 
83
  "typical_p": 1.0,
84
  "use_bfloat16": false,
85
- "vocab_size": 0
 
86
  },
87
  "text_config_dict": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  "hidden_size": 768,
89
- "max_position_embeddings": 0,
90
- "num_hidden_layers": 0,
91
- "patch_size": 1,
92
- "vocab_size": 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  },
94
  "torch_dtype": "float32",
95
  "transformers_version": null,
96
  "vision_config": {
97
- "_name_or_path": "",
98
  "add_cross_attention": false,
99
- "architectures": null,
 
 
100
  "attention_dropout": 0.0,
101
  "bad_words_ids": null,
102
  "begin_suppress_tokens": null,
@@ -133,7 +218,6 @@
133
  "layer_norm_eps": 1e-05,
134
  "length_penalty": 1.0,
135
  "max_length": 20,
136
- "max_position_embeddings": 0,
137
  "min_length": 0,
138
  "model_type": "clip_vision_model",
139
  "no_repeat_ngram_size": 0,
@@ -141,15 +225,16 @@
141
  "num_beam_groups": 1,
142
  "num_beams": 1,
143
  "num_channels": 3,
144
- "num_hidden_layers": 0,
145
  "num_return_sequences": 1,
146
  "output_attentions": false,
147
  "output_hidden_states": false,
148
  "output_scores": false,
149
  "pad_token_id": null,
150
- "patch_size": 1,
151
  "prefix": null,
152
  "problem_type": null,
 
153
  "pruned_heads": {},
154
  "remove_invalid_values": false,
155
  "repetition_penalty": 1.0,
@@ -159,25 +244,380 @@
159
  "suppress_tokens": null,
160
  "task_specific_params": null,
161
  "temperature": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  "tf_legacy_loss": false,
163
  "tie_encoder_decoder": false,
164
  "tie_word_embeddings": true,
165
  "tokenizer_class": null,
166
  "top_k": 50,
167
  "top_p": 1.0,
168
- "torch_dtype": null,
169
  "torchscript": false,
170
  "transformers_version": "4.24.0",
171
  "typical_p": 1.0,
172
  "use_bfloat16": false,
173
- "vocab_size": 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  },
175
  "vision_config_dict": {
176
- "hidden_size": 1024,
177
- "intermediate_size": 4096,
178
- "num_attention_heads": 16,
179
- "num_hidden_layers": 24,
180
- "patch_size": 14,
181
- "projection_dim": 768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  }
183
  }
 
8
  "model_type": "clip",
9
  "projection_dim": 512,
10
  "text_config": {
11
+ "_name_or_path": "amir7d0/clip-text-fa",
12
  "add_cross_attention": false,
13
+ "architectures": [
14
+ "RobertaModel"
15
+ ],
16
  "attention_dropout": 0.0,
17
+ "attention_probs_dropout_prob": 0.1,
18
  "bad_words_ids": null,
19
  "begin_suppress_tokens": null,
20
  "bos_token_id": 0,
21
  "chunk_size_feed_forward": 0,
22
+ "classifier_dropout": null,
23
  "cross_attention_hidden_size": null,
24
  "decoder_start_token_id": null,
25
  "diversity_penalty": 0.0,
 
32
  "finetuning_task": null,
33
  "forced_bos_token_id": null,
34
  "forced_eos_token_id": null,
35
+ "gradient_checkpointing": false,
36
+ "hidden_act": "gelu",
37
+ "hidden_dropout_prob": 0.1,
38
  "hidden_size": 768,
39
  "id2label": {
40
  "0": "LABEL_0",
 
42
  },
43
  "initializer_factor": 1.0,
44
  "initializer_range": 0.02,
45
+ "intermediate_size": 3072,
46
  "is_decoder": false,
47
  "is_encoder_decoder": false,
48
  "label2id": {
49
  "LABEL_0": 0,
50
  "LABEL_1": 1
51
  },
52
+ "layer_norm_eps": 1e-12,
53
  "length_penalty": 1.0,
54
  "max_length": 20,
55
+ "max_position_embeddings": 514,
56
  "min_length": 0,
57
  "model_type": "clip_text_model",
58
  "no_repeat_ngram_size": 0,
59
+ "num_attention_heads": 12,
60
  "num_beam_groups": 1,
61
  "num_beams": 1,
62
+ "num_hidden_layers": 12,
63
  "num_return_sequences": 1,
64
  "output_attentions": false,
65
  "output_hidden_states": false,
66
  "output_scores": false,
67
  "pad_token_id": 1,
68
+ "position_embedding_type": "absolute",
69
  "prefix": null,
70
  "problem_type": null,
71
  "pruned_heads": {},
 
83
  "tokenizer_class": null,
84
  "top_k": 50,
85
  "top_p": 1.0,
86
+ "torch_dtype": "float32",
87
  "torchscript": false,
88
  "transformers_version": "4.24.0",
89
+ "type_vocab_size": 1,
90
  "typical_p": 1.0,
91
  "use_bfloat16": false,
92
+ "use_cache": true,
93
+ "vocab_size": 42000
94
  },
95
  "text_config_dict": {
96
+ "_name_or_path": "amir7d0/clip-text-fa",
97
+ "add_cross_attention": false,
98
+ "architectures": [
99
+ "RobertaModel"
100
+ ],
101
+ "attention_probs_dropout_prob": 0.1,
102
+ "bad_words_ids": null,
103
+ "begin_suppress_tokens": null,
104
+ "bos_token_id": 0,
105
+ "chunk_size_feed_forward": 0,
106
+ "classifier_dropout": null,
107
+ "cross_attention_hidden_size": null,
108
+ "decoder_start_token_id": null,
109
+ "diversity_penalty": 0.0,
110
+ "do_sample": false,
111
+ "early_stopping": false,
112
+ "encoder_no_repeat_ngram_size": 0,
113
+ "eos_token_id": 2,
114
+ "exponential_decay_length_penalty": null,
115
+ "finetuning_task": null,
116
+ "forced_bos_token_id": null,
117
+ "forced_eos_token_id": null,
118
+ "gradient_checkpointing": false,
119
+ "hidden_act": "gelu",
120
+ "hidden_dropout_prob": 0.1,
121
  "hidden_size": 768,
122
+ "id2label": {
123
+ "0": "LABEL_0",
124
+ "1": "LABEL_1"
125
+ },
126
+ "initializer_range": 0.02,
127
+ "intermediate_size": 3072,
128
+ "is_decoder": false,
129
+ "is_encoder_decoder": false,
130
+ "label2id": {
131
+ "LABEL_0": 0,
132
+ "LABEL_1": 1
133
+ },
134
+ "layer_norm_eps": 1e-12,
135
+ "length_penalty": 1.0,
136
+ "max_length": 20,
137
+ "max_position_embeddings": 514,
138
+ "min_length": 0,
139
+ "model_type": "roberta",
140
+ "no_repeat_ngram_size": 0,
141
+ "num_attention_heads": 12,
142
+ "num_beam_groups": 1,
143
+ "num_beams": 1,
144
+ "num_hidden_layers": 12,
145
+ "num_return_sequences": 1,
146
+ "output_attentions": false,
147
+ "output_hidden_states": false,
148
+ "output_scores": false,
149
+ "pad_token_id": 1,
150
+ "position_embedding_type": "absolute",
151
+ "prefix": null,
152
+ "problem_type": null,
153
+ "pruned_heads": {},
154
+ "remove_invalid_values": false,
155
+ "repetition_penalty": 1.0,
156
+ "return_dict": true,
157
+ "return_dict_in_generate": false,
158
+ "sep_token_id": null,
159
+ "suppress_tokens": null,
160
+ "task_specific_params": null,
161
+ "temperature": 1.0,
162
+ "tf_legacy_loss": false,
163
+ "tie_encoder_decoder": false,
164
+ "tie_word_embeddings": true,
165
+ "tokenizer_class": null,
166
+ "top_k": 50,
167
+ "top_p": 1.0,
168
+ "torch_dtype": "float32",
169
+ "torchscript": false,
170
+ "transformers_version": "4.24.0",
171
+ "type_vocab_size": 1,
172
+ "typical_p": 1.0,
173
+ "use_bfloat16": false,
174
+ "use_cache": true,
175
+ "vocab_size": 42000
176
  },
177
  "torch_dtype": "float32",
178
  "transformers_version": null,
179
  "vision_config": {
180
+ "_name_or_path": "amir7d0/clip-vit-fa",
181
  "add_cross_attention": false,
182
+ "architectures": [
183
+ "CLIPVisionModel"
184
+ ],
185
  "attention_dropout": 0.0,
186
  "bad_words_ids": null,
187
  "begin_suppress_tokens": null,
 
218
  "layer_norm_eps": 1e-05,
219
  "length_penalty": 1.0,
220
  "max_length": 20,
 
221
  "min_length": 0,
222
  "model_type": "clip_vision_model",
223
  "no_repeat_ngram_size": 0,
 
225
  "num_beam_groups": 1,
226
  "num_beams": 1,
227
  "num_channels": 3,
228
+ "num_hidden_layers": 12,
229
  "num_return_sequences": 1,
230
  "output_attentions": false,
231
  "output_hidden_states": false,
232
  "output_scores": false,
233
  "pad_token_id": null,
234
+ "patch_size": 32,
235
  "prefix": null,
236
  "problem_type": null,
237
+ "projection_dim": 512,
238
  "pruned_heads": {},
239
  "remove_invalid_values": false,
240
  "repetition_penalty": 1.0,
 
244
  "suppress_tokens": null,
245
  "task_specific_params": null,
246
  "temperature": 1.0,
247
+ "text_config": {
248
+ "_name_or_path": "",
249
+ "add_cross_attention": false,
250
+ "architectures": null,
251
+ "attention_dropout": 0.0,
252
+ "bad_words_ids": null,
253
+ "bos_token_id": 0,
254
+ "chunk_size_feed_forward": 0,
255
+ "decoder_start_token_id": null,
256
+ "diversity_penalty": 0.0,
257
+ "do_sample": false,
258
+ "dropout": 0.0,
259
+ "early_stopping": false,
260
+ "encoder_no_repeat_ngram_size": 0,
261
+ "eos_token_id": 2,
262
+ "finetuning_task": null,
263
+ "forced_bos_token_id": null,
264
+ "forced_eos_token_id": null,
265
+ "gradient_checkpointing": false,
266
+ "hidden_act": "quick_gelu",
267
+ "hidden_size": 512,
268
+ "id2label": {
269
+ "0": "LABEL_0",
270
+ "1": "LABEL_1"
271
+ },
272
+ "initializer_factor": 1.0,
273
+ "initializer_range": 0.02,
274
+ "intermediate_size": 2048,
275
+ "is_decoder": false,
276
+ "is_encoder_decoder": false,
277
+ "label2id": {
278
+ "LABEL_0": 0,
279
+ "LABEL_1": 1
280
+ },
281
+ "layer_norm_eps": 1e-05,
282
+ "length_penalty": 1.0,
283
+ "max_length": 20,
284
+ "max_position_embeddings": 77,
285
+ "min_length": 0,
286
+ "model_type": "clip_text_model",
287
+ "no_repeat_ngram_size": 0,
288
+ "num_attention_heads": 8,
289
+ "num_beam_groups": 1,
290
+ "num_beams": 1,
291
+ "num_hidden_layers": 12,
292
+ "num_return_sequences": 1,
293
+ "output_attentions": false,
294
+ "output_hidden_states": false,
295
+ "output_scores": false,
296
+ "pad_token_id": 1,
297
+ "prefix": null,
298
+ "problem_type": null,
299
+ "pruned_heads": {},
300
+ "remove_invalid_values": false,
301
+ "repetition_penalty": 1.0,
302
+ "return_dict": true,
303
+ "return_dict_in_generate": false,
304
+ "sep_token_id": null,
305
+ "task_specific_params": null,
306
+ "temperature": 1.0,
307
+ "tie_encoder_decoder": false,
308
+ "tie_word_embeddings": true,
309
+ "tokenizer_class": null,
310
+ "top_k": 50,
311
+ "top_p": 1.0,
312
+ "torchscript": false,
313
+ "transformers_version": "4.7.0.dev0",
314
+ "use_bfloat16": false,
315
+ "vocab_size": 49408
316
+ },
317
+ "text_config_dict": null,
318
  "tf_legacy_loss": false,
319
  "tie_encoder_decoder": false,
320
  "tie_word_embeddings": true,
321
  "tokenizer_class": null,
322
  "top_k": 50,
323
  "top_p": 1.0,
324
+ "torch_dtype": "float32",
325
  "torchscript": false,
326
  "transformers_version": "4.24.0",
327
  "typical_p": 1.0,
328
  "use_bfloat16": false,
329
+ "vision_config": {
330
+ "_name_or_path": "",
331
+ "add_cross_attention": false,
332
+ "architectures": null,
333
+ "attention_dropout": 0.0,
334
+ "bad_words_ids": null,
335
+ "bos_token_id": null,
336
+ "chunk_size_feed_forward": 0,
337
+ "decoder_start_token_id": null,
338
+ "diversity_penalty": 0.0,
339
+ "do_sample": false,
340
+ "dropout": 0.0,
341
+ "early_stopping": false,
342
+ "encoder_no_repeat_ngram_size": 0,
343
+ "eos_token_id": null,
344
+ "finetuning_task": null,
345
+ "forced_bos_token_id": null,
346
+ "forced_eos_token_id": null,
347
+ "gradient_checkpointing": false,
348
+ "hidden_act": "quick_gelu",
349
+ "hidden_size": 768,
350
+ "id2label": {
351
+ "0": "LABEL_0",
352
+ "1": "LABEL_1"
353
+ },
354
+ "image_size": 224,
355
+ "initializer_factor": 1.0,
356
+ "initializer_range": 0.02,
357
+ "intermediate_size": 3072,
358
+ "is_decoder": false,
359
+ "is_encoder_decoder": false,
360
+ "label2id": {
361
+ "LABEL_0": 0,
362
+ "LABEL_1": 1
363
+ },
364
+ "layer_norm_eps": 1e-05,
365
+ "length_penalty": 1.0,
366
+ "max_length": 20,
367
+ "min_length": 0,
368
+ "model_type": "clip_vision_model",
369
+ "no_repeat_ngram_size": 0,
370
+ "num_attention_heads": 12,
371
+ "num_beam_groups": 1,
372
+ "num_beams": 1,
373
+ "num_hidden_layers": 12,
374
+ "num_return_sequences": 1,
375
+ "output_attentions": false,
376
+ "output_hidden_states": false,
377
+ "output_scores": false,
378
+ "pad_token_id": null,
379
+ "patch_size": 32,
380
+ "prefix": null,
381
+ "problem_type": null,
382
+ "pruned_heads": {},
383
+ "remove_invalid_values": false,
384
+ "repetition_penalty": 1.0,
385
+ "return_dict": true,
386
+ "return_dict_in_generate": false,
387
+ "sep_token_id": null,
388
+ "task_specific_params": null,
389
+ "temperature": 1.0,
390
+ "tie_encoder_decoder": false,
391
+ "tie_word_embeddings": true,
392
+ "tokenizer_class": null,
393
+ "top_k": 50,
394
+ "top_p": 1.0,
395
+ "torchscript": false,
396
+ "transformers_version": "4.7.0.dev0",
397
+ "use_bfloat16": false
398
+ },
399
+ "vision_config_dict": null
400
  },
401
  "vision_config_dict": {
402
+ "_name_or_path": "amir7d0/clip-vit-fa",
403
+ "add_cross_attention": false,
404
+ "architectures": [
405
+ "CLIPVisionModel"
406
+ ],
407
+ "attention_dropout": 0.0,
408
+ "bad_words_ids": null,
409
+ "begin_suppress_tokens": null,
410
+ "bos_token_id": null,
411
+ "chunk_size_feed_forward": 0,
412
+ "cross_attention_hidden_size": null,
413
+ "decoder_start_token_id": null,
414
+ "diversity_penalty": 0.0,
415
+ "do_sample": false,
416
+ "dropout": 0.0,
417
+ "early_stopping": false,
418
+ "encoder_no_repeat_ngram_size": 0,
419
+ "eos_token_id": null,
420
+ "exponential_decay_length_penalty": null,
421
+ "finetuning_task": null,
422
+ "forced_bos_token_id": null,
423
+ "forced_eos_token_id": null,
424
+ "hidden_act": "quick_gelu",
425
+ "hidden_size": 768,
426
+ "id2label": {
427
+ "0": "LABEL_0",
428
+ "1": "LABEL_1"
429
+ },
430
+ "image_size": 224,
431
+ "initializer_factor": 1.0,
432
+ "initializer_range": 0.02,
433
+ "intermediate_size": 3072,
434
+ "is_decoder": false,
435
+ "is_encoder_decoder": false,
436
+ "label2id": {
437
+ "LABEL_0": 0,
438
+ "LABEL_1": 1
439
+ },
440
+ "layer_norm_eps": 1e-05,
441
+ "length_penalty": 1.0,
442
+ "max_length": 20,
443
+ "min_length": 0,
444
+ "model_type": "clip_vision_model",
445
+ "no_repeat_ngram_size": 0,
446
+ "num_attention_heads": 12,
447
+ "num_beam_groups": 1,
448
+ "num_beams": 1,
449
+ "num_channels": 3,
450
+ "num_hidden_layers": 12,
451
+ "num_return_sequences": 1,
452
+ "output_attentions": false,
453
+ "output_hidden_states": false,
454
+ "output_scores": false,
455
+ "pad_token_id": null,
456
+ "patch_size": 32,
457
+ "prefix": null,
458
+ "problem_type": null,
459
+ "projection_dim": 512,
460
+ "pruned_heads": {},
461
+ "remove_invalid_values": false,
462
+ "repetition_penalty": 1.0,
463
+ "return_dict": true,
464
+ "return_dict_in_generate": false,
465
+ "sep_token_id": null,
466
+ "suppress_tokens": null,
467
+ "task_specific_params": null,
468
+ "temperature": 1.0,
469
+ "text_config": {
470
+ "_name_or_path": "",
471
+ "add_cross_attention": false,
472
+ "architectures": null,
473
+ "attention_dropout": 0.0,
474
+ "bad_words_ids": null,
475
+ "bos_token_id": 0,
476
+ "chunk_size_feed_forward": 0,
477
+ "decoder_start_token_id": null,
478
+ "diversity_penalty": 0.0,
479
+ "do_sample": false,
480
+ "dropout": 0.0,
481
+ "early_stopping": false,
482
+ "encoder_no_repeat_ngram_size": 0,
483
+ "eos_token_id": 2,
484
+ "finetuning_task": null,
485
+ "forced_bos_token_id": null,
486
+ "forced_eos_token_id": null,
487
+ "gradient_checkpointing": false,
488
+ "hidden_act": "quick_gelu",
489
+ "hidden_size": 512,
490
+ "id2label": {
491
+ "0": "LABEL_0",
492
+ "1": "LABEL_1"
493
+ },
494
+ "initializer_factor": 1.0,
495
+ "initializer_range": 0.02,
496
+ "intermediate_size": 2048,
497
+ "is_decoder": false,
498
+ "is_encoder_decoder": false,
499
+ "label2id": {
500
+ "LABEL_0": 0,
501
+ "LABEL_1": 1
502
+ },
503
+ "layer_norm_eps": 1e-05,
504
+ "length_penalty": 1.0,
505
+ "max_length": 20,
506
+ "max_position_embeddings": 77,
507
+ "min_length": 0,
508
+ "model_type": "clip_text_model",
509
+ "no_repeat_ngram_size": 0,
510
+ "num_attention_heads": 8,
511
+ "num_beam_groups": 1,
512
+ "num_beams": 1,
513
+ "num_hidden_layers": 12,
514
+ "num_return_sequences": 1,
515
+ "output_attentions": false,
516
+ "output_hidden_states": false,
517
+ "output_scores": false,
518
+ "pad_token_id": 1,
519
+ "prefix": null,
520
+ "problem_type": null,
521
+ "pruned_heads": {},
522
+ "remove_invalid_values": false,
523
+ "repetition_penalty": 1.0,
524
+ "return_dict": true,
525
+ "return_dict_in_generate": false,
526
+ "sep_token_id": null,
527
+ "task_specific_params": null,
528
+ "temperature": 1.0,
529
+ "tie_encoder_decoder": false,
530
+ "tie_word_embeddings": true,
531
+ "tokenizer_class": null,
532
+ "top_k": 50,
533
+ "top_p": 1.0,
534
+ "torchscript": false,
535
+ "transformers_version": "4.7.0.dev0",
536
+ "use_bfloat16": false,
537
+ "vocab_size": 49408
538
+ },
539
+ "text_config_dict": null,
540
+ "tf_legacy_loss": false,
541
+ "tie_encoder_decoder": false,
542
+ "tie_word_embeddings": true,
543
+ "tokenizer_class": null,
544
+ "top_k": 50,
545
+ "top_p": 1.0,
546
+ "torch_dtype": "float32",
547
+ "torchscript": false,
548
+ "transformers_version": "4.24.0",
549
+ "typical_p": 1.0,
550
+ "use_bfloat16": false,
551
+ "vision_config": {
552
+ "_name_or_path": "",
553
+ "add_cross_attention": false,
554
+ "architectures": null,
555
+ "attention_dropout": 0.0,
556
+ "bad_words_ids": null,
557
+ "bos_token_id": null,
558
+ "chunk_size_feed_forward": 0,
559
+ "decoder_start_token_id": null,
560
+ "diversity_penalty": 0.0,
561
+ "do_sample": false,
562
+ "dropout": 0.0,
563
+ "early_stopping": false,
564
+ "encoder_no_repeat_ngram_size": 0,
565
+ "eos_token_id": null,
566
+ "finetuning_task": null,
567
+ "forced_bos_token_id": null,
568
+ "forced_eos_token_id": null,
569
+ "gradient_checkpointing": false,
570
+ "hidden_act": "quick_gelu",
571
+ "hidden_size": 768,
572
+ "id2label": {
573
+ "0": "LABEL_0",
574
+ "1": "LABEL_1"
575
+ },
576
+ "image_size": 224,
577
+ "initializer_factor": 1.0,
578
+ "initializer_range": 0.02,
579
+ "intermediate_size": 3072,
580
+ "is_decoder": false,
581
+ "is_encoder_decoder": false,
582
+ "label2id": {
583
+ "LABEL_0": 0,
584
+ "LABEL_1": 1
585
+ },
586
+ "layer_norm_eps": 1e-05,
587
+ "length_penalty": 1.0,
588
+ "max_length": 20,
589
+ "min_length": 0,
590
+ "model_type": "clip_vision_model",
591
+ "no_repeat_ngram_size": 0,
592
+ "num_attention_heads": 12,
593
+ "num_beam_groups": 1,
594
+ "num_beams": 1,
595
+ "num_hidden_layers": 12,
596
+ "num_return_sequences": 1,
597
+ "output_attentions": false,
598
+ "output_hidden_states": false,
599
+ "output_scores": false,
600
+ "pad_token_id": null,
601
+ "patch_size": 32,
602
+ "prefix": null,
603
+ "problem_type": null,
604
+ "pruned_heads": {},
605
+ "remove_invalid_values": false,
606
+ "repetition_penalty": 1.0,
607
+ "return_dict": true,
608
+ "return_dict_in_generate": false,
609
+ "sep_token_id": null,
610
+ "task_specific_params": null,
611
+ "temperature": 1.0,
612
+ "tie_encoder_decoder": false,
613
+ "tie_word_embeddings": true,
614
+ "tokenizer_class": null,
615
+ "top_k": 50,
616
+ "top_p": 1.0,
617
+ "torchscript": false,
618
+ "transformers_version": "4.7.0.dev0",
619
+ "use_bfloat16": false
620
+ },
621
+ "vision_config_dict": null
622
  }
623
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:334d43676e94c5ad5cfea0f171255a8e827dedbf33186ecff93155bed24127d4
3
- size 823159263
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72e2aa2c90242720595c6e78515ca21e9e704c96d60f51215157ccc1172ea5f0
3
+ size 826305627