add pt model, tokenizer, feat extractor

Files changed (8) hide show

config.json CHANGED Viewed

@@ -1,10 +1,12 @@
 {
   "architectures": [
-    "HybridCLIP"
   ],
   "freeze_backbones": true,
   "initializer_factor": 1.0,
-  "model_type": "hybrid-clip",
   "projection_dim": 512,
   "seed": 42,
   "text_config": {
@@ -17,6 +19,8 @@
     "bad_words_ids": null,
     "bos_token_id": null,
     "chunk_size_feed_forward": 0,
     "decoder_start_token_id": null,
     "diversity_penalty": 0.0,
     "do_sample": false,
@@ -76,12 +80,13 @@
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
-    "transformers_version": "4.9.0.dev0",
     "type_vocab_size": 2,
     "use_bfloat16": false,
     "use_cache": true,
     "vocab_size": 32102
   },
   "transformers_version": null,
   "vision_config": {
     "_name_or_path": "",
@@ -91,6 +96,7 @@
     "bad_words_ids": null,
     "bos_token_id": null,
     "chunk_size_feed_forward": 0,
     "decoder_start_token_id": null,
     "diversity_penalty": 0.0,
     "do_sample": false,
@@ -151,7 +157,7 @@
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
-    "transformers_version": "4.9.0.dev0",
     "use_bfloat16": false
   }
 }

 {
+  "_name_or_path": "clip-italian",
   "architectures": [
+    "VisionTextDualEncoderModel"
   ],
   "freeze_backbones": true,
   "initializer_factor": 1.0,
+  "logit_scale_init_value": 1,
+  "model_type": "vision-text-dual-encoder",
   "projection_dim": 512,
   "seed": 42,
   "text_config": {
     "bad_words_ids": null,
     "bos_token_id": null,
     "chunk_size_feed_forward": 0,
+    "classifier_dropout": null,
+    "cross_attention_hidden_size": null,
     "decoder_start_token_id": null,
     "diversity_penalty": 0.0,
     "do_sample": false,
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
+    "transformers_version": "4.13.0.dev0",
     "type_vocab_size": 2,
     "use_bfloat16": false,
     "use_cache": true,
     "vocab_size": 32102
   },
+  "torch_dtype": "float32",
   "transformers_version": null,
   "vision_config": {
     "_name_or_path": "",
     "bad_words_ids": null,
     "bos_token_id": null,
     "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
     "decoder_start_token_id": null,
     "diversity_penalty": 0.0,
     "do_sample": false,
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
+    "transformers_version": "4.13.0.dev0",
     "use_bfloat16": false
   }
 }

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58b92aa4ee1096e2059c8ab8e88334def7e9cdc142e82a564427be2db150b430
-size 795766586

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d33679dc0f9fdfbcdca2c747fd8298590dd52a86668201ce85d4bd1a5685e1f
+size 795766616

preprocessor_config.json ADDED Viewed

+{
+  "crop_size": 224,
+  "do_center_crop": true,
+  "do_normalize": true,
+  "do_resize": true,
+  "feature_extractor_type": "CLIPFeatureExtractor",
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "size": 224
+}

pytorch_model.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a41ee21fc1b9415547cb3aa93273a302f6a4bb92a1c1a7ec1c0f99ee1b3d8bce
+size 795915483

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "max_len": 512, "special_tokens_map_file": null, "name_or_path": "dbmdz/bert-base-italian-xxl-uncased", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff