Update tokenizer.json
Browse files- tokenizer.json +3 -2
tokenizer.json
CHANGED
|
@@ -26,7 +26,7 @@
|
|
| 26 |
{
|
| 27 |
"type": "Replace",
|
| 28 |
"pattern": {
|
| 29 |
-
"Regex": "[^a-zA-Z0-9\\s\\-
|
| 30 |
},
|
| 31 |
"content": ""
|
| 32 |
}
|
|
@@ -163,7 +163,8 @@
|
|
| 163 |
"̀": 117,
|
| 164 |
"́": 118,
|
| 165 |
"̂": 119,
|
| 166 |
-
"̌": 120
|
|
|
|
| 167 |
},
|
| 168 |
"unk_token": "�"
|
| 169 |
}
|
|
|
|
| 26 |
{
|
| 27 |
"type": "Replace",
|
| 28 |
"pattern": {
|
| 29 |
+
"Regex": "[^a-zA-Z0-9\\s\\-!\"$%()*+,.\\/:;?@_ÀÁÂÉÈÊËÌÍÎÏÒÓÔÙÚÛŸŃŊŋƆɔƐɛʉǎǐǒǔḿẅ’ʼ£̀́̂̌]"
|
| 30 |
},
|
| 31 |
"content": ""
|
| 32 |
}
|
|
|
|
| 163 |
"̀": 117,
|
| 164 |
"́": 118,
|
| 165 |
"̂": 119,
|
| 166 |
+
"̌": 120,
|
| 167 |
+
"�": 121
|
| 168 |
},
|
| 169 |
"unk_token": "�"
|
| 170 |
}
|