ArthurZ HF staff commited on
Commit
4507c51
1 Parent(s): 2e48a32

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[book]": 11,
3
+ "[c4]": 12,
4
+ "[code]": 10,
5
+ "[convo]": 7,
6
+ "[eod]": 4,
7
+ "[eot]": 15,
8
+ "[fc]": 8,
9
+ "[ffc]": 9,
10
+ "[forum]": 14,
11
+ "[news]": 13,
12
+ "[translate]": 6,
13
+ "[wiki]": 5
14
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[eod]",
4
+ "[wiki]",
5
+ "[translate]",
6
+ "[convo]",
7
+ "[fc]",
8
+ "[ffc]",
9
+ "[code]",
10
+ "[book]",
11
+ "[c4]",
12
+ "[news]",
13
+ "[forum]",
14
+ "[eot]"
15
+ ],
16
+ "bos_token": "<s>",
17
+ "eos_token": "</s>",
18
+ "pad_token": "<pad>",
19
+ "unk_token": "<unk>"
20
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3909a67b780650b35cf529ac782ad2b6b26e6d1f849d3fbb6a872905f452458
3
+ size 4548313
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "eos_token": "</s>",
5
+ "model_max_length": 1000000000000000019884624838656,
6
+ "pad_token": "<pad>",
7
+ "sep_token": "<::::>",
8
+ "sp_model_kwargs": {},
9
+ "tokenizer_class": "BertGenerationTokenizer",
10
+ "unk_token": "<unk>"
11
+ }