Add v2023 model and tokenizer(BBPE)

Files changed (4) hide show

config.json CHANGED Viewed

@@ -15,7 +15,7 @@
   "max_position_embeddings": 512,
   "model_type": "electra",
   "type_vocab_size": 2,
-  "vocab_size": 54343,
-  "pad_token_id": 0,
-  "tokenizer_class": "BertTokenizer"
 }

   "max_position_embeddings": 512,
   "model_type": "electra",
   "type_vocab_size": 2,
+  "vocab_size": 30000,
+  "pad_token_id": 3,
+  "tokenizer_class": "PreTrainedTokenizerFast"
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3460bb4187cdda4fbc6c8d7dfb97779e43660a86091dc9d1f01810eb5264fe53
-size 511193709

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb28c533dac6884c7eb6e6f8bc194e09a4cecf29f04b724fc85456055aa702d7
+size 436413813

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

vocab.txt DELETED Viewed

The diff for this file is too large to render. See raw diff