Upload tokenizer
Browse files- added_tokens.json +3 -1
- tokenizer_config.json +16 -0
added_tokens.json
CHANGED
@@ -1607,5 +1607,7 @@
|
|
1607 |
"<|yi|>": 50335,
|
1608 |
"<|yo|>": 50325,
|
1609 |
"<|yue|>": 50358,
|
1610 |
-
"<|zh|>": 50260
|
|
|
|
|
1611 |
}
|
|
|
1607 |
"<|yi|>": 50335,
|
1608 |
"<|yo|>": 50325,
|
1609 |
"<|yue|>": 50358,
|
1610 |
+
"<|zh|>": 50260,
|
1611 |
+
"[END_PAD]": 51867,
|
1612 |
+
"[PAD]": 51866
|
1613 |
}
|
tokenizer_config.json
CHANGED
@@ -12872,6 +12872,22 @@
|
|
12872 |
"rstrip": false,
|
12873 |
"single_word": false,
|
12874 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12875 |
}
|
12876 |
},
|
12877 |
"additional_special_tokens": [
|
|
|
12872 |
"rstrip": false,
|
12873 |
"single_word": false,
|
12874 |
"special": false
|
12875 |
+
},
|
12876 |
+
"51866": {
|
12877 |
+
"content": "[PAD]",
|
12878 |
+
"lstrip": false,
|
12879 |
+
"normalized": true,
|
12880 |
+
"rstrip": false,
|
12881 |
+
"single_word": false,
|
12882 |
+
"special": false
|
12883 |
+
},
|
12884 |
+
"51867": {
|
12885 |
+
"content": "[END_PAD]",
|
12886 |
+
"lstrip": false,
|
12887 |
+
"normalized": true,
|
12888 |
+
"rstrip": false,
|
12889 |
+
"single_word": false,
|
12890 |
+
"special": false
|
12891 |
}
|
12892 |
},
|
12893 |
"additional_special_tokens": [
|