add tokenizer
Browse files- tokenizer.json +10 -25
- vocab.txt +5 -5
tokenizer.json
CHANGED
@@ -85,12 +85,6 @@
|
|
85 |
"id": "A",
|
86 |
"type_id": 0
|
87 |
}
|
88 |
-
},
|
89 |
-
{
|
90 |
-
"SpecialToken": {
|
91 |
-
"id": "[SEP]",
|
92 |
-
"type_id": 0
|
93 |
-
}
|
94 |
}
|
95 |
],
|
96 |
"pair": [
|
@@ -116,15 +110,6 @@
|
|
116 |
"tokens": [
|
117 |
"[CLS]"
|
118 |
]
|
119 |
-
},
|
120 |
-
"[SEP]": {
|
121 |
-
"id": "[SEP]",
|
122 |
-
"ids": [
|
123 |
-
3
|
124 |
-
],
|
125 |
-
"tokens": [
|
126 |
-
"[SEP]"
|
127 |
-
]
|
128 |
}
|
129 |
}
|
130 |
},
|
@@ -198,17 +183,17 @@
|
|
198 |
"r": 56,
|
199 |
"s": 57,
|
200 |
"##\"": 58,
|
201 |
-
"##
|
202 |
"##C": 60,
|
203 |
-
"##
|
204 |
-
"##
|
205 |
-
"##
|
206 |
-
"##
|
207 |
-
"##
|
208 |
-
"##
|
209 |
-
"##
|
210 |
-
"##
|
211 |
-
"##
|
212 |
}
|
213 |
}
|
214 |
}
|
|
|
85 |
"id": "A",
|
86 |
"type_id": 0
|
87 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
}
|
89 |
],
|
90 |
"pair": [
|
|
|
110 |
"tokens": [
|
111 |
"[CLS]"
|
112 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
}
|
114 |
}
|
115 |
},
|
|
|
183 |
"r": 56,
|
184 |
"s": 57,
|
185 |
"##\"": 58,
|
186 |
+
"##c": 59,
|
187 |
"##C": 60,
|
188 |
+
"##S": 61,
|
189 |
+
"##E": 62,
|
190 |
+
"##P": 63,
|
191 |
+
"##]": 64,
|
192 |
+
"##N": 65,
|
193 |
+
"##F": 66,
|
194 |
+
"##B": 67,
|
195 |
+
"##[": 68,
|
196 |
+
"##O": 69
|
197 |
}
|
198 |
}
|
199 |
}
|
vocab.txt
CHANGED
@@ -57,14 +57,14 @@ o
|
|
57 |
r
|
58 |
s
|
59 |
##"
|
60 |
-
##
|
61 |
##C
|
62 |
-
##O
|
63 |
##S
|
64 |
##E
|
65 |
##P
|
66 |
##]
|
67 |
-
##B
|
68 |
-
##F
|
69 |
##N
|
70 |
-
##
|
|
|
|
|
|
|
|
57 |
r
|
58 |
s
|
59 |
##"
|
60 |
+
##c
|
61 |
##C
|
|
|
62 |
##S
|
63 |
##E
|
64 |
##P
|
65 |
##]
|
|
|
|
|
66 |
##N
|
67 |
+
##F
|
68 |
+
##B
|
69 |
+
##[
|
70 |
+
##O
|