Upload tokenizer.json
Browse files- tokenizer.json +28 -0
tokenizer.json
CHANGED
@@ -98,6 +98,34 @@
|
|
98 |
},
|
99 |
"content":""
|
100 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
{
|
102 |
"type":"BPEDecoder",
|
103 |
"suffix":"</w>"
|
|
|
98 |
},
|
99 |
"content":""
|
100 |
},
|
101 |
+
{
|
102 |
+
"type":"Replace",
|
103 |
+
"pattern":{
|
104 |
+
"String":"__start__"
|
105 |
+
},
|
106 |
+
"content":"__start__</w>"
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"type":"Replace",
|
110 |
+
"pattern":{
|
111 |
+
"String":"__end__"
|
112 |
+
},
|
113 |
+
"content":"__end__</w>"
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"type":"Replace",
|
117 |
+
"pattern":{
|
118 |
+
"String":"__unk__"
|
119 |
+
},
|
120 |
+
"content":"__unk__</w>"
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"type":"Replace",
|
124 |
+
"pattern":{
|
125 |
+
"String":"__null__"
|
126 |
+
},
|
127 |
+
"content":"__null__</w>"
|
128 |
+
},
|
129 |
{
|
130 |
"type":"BPEDecoder",
|
131 |
"suffix":"</w>"
|