june42 commited on
Commit
5deca41
1 Parent(s): 4cf9eb1

Add v2023 model and tokenizer(BBPE)

Browse files
Files changed (4) hide show
  1. config.json +3 -3
  2. pytorch_model.bin +2 -2
  3. tokenizer.json +0 -0
  4. vocab.txt +0 -0
config.json CHANGED
@@ -15,7 +15,7 @@
15
  "max_position_embeddings": 512,
16
  "model_type": "electra",
17
  "type_vocab_size": 2,
18
- "vocab_size": 54343,
19
- "pad_token_id": 0,
20
- "tokenizer_class": "BertTokenizer"
21
  }
15
  "max_position_embeddings": 512,
16
  "model_type": "electra",
17
  "type_vocab_size": 2,
18
+ "vocab_size": 30000,
19
+ "pad_token_id": 3,
20
+ "tokenizer_class": "PreTrainedTokenizerFast"
21
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3460bb4187cdda4fbc6c8d7dfb97779e43660a86091dc9d1f01810eb5264fe53
3
- size 511193709
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb28c533dac6884c7eb6e6f8bc194e09a4cecf29f04b724fc85456055aa702d7
3
+ size 436413813
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
vocab.txt DELETED
The diff for this file is too large to render. See raw diff