johnpaulbin
commited on
Commit
•
4da58cd
1
Parent(s):
37ae70c
Upload processor
Browse files- added_tokens.json +3 -0
- merges.txt +768 -0
- special_tokens_map.json +788 -0
- tokenizer.json +2993 -0
- tokenizer_config.json +990 -0
- vocab.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|tp|>": 1130
|
3 |
+
}
|
merges.txt
ADDED
@@ -0,0 +1,768 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#version: 0.2
|
2 |
+
Ġ l
|
3 |
+
n a
|
4 |
+
Ġl i
|
5 |
+
Ġ t
|
6 |
+
Ġ p
|
7 |
+
Ġ s
|
8 |
+
Ġ m
|
9 |
+
a n
|
10 |
+
Ġ e
|
11 |
+
Ġ k
|
12 |
+
l i
|
13 |
+
o na
|
14 |
+
e n
|
15 |
+
w a
|
16 |
+
a l
|
17 |
+
n i
|
18 |
+
s i
|
19 |
+
Ġ ni
|
20 |
+
al a
|
21 |
+
Ġl a
|
22 |
+
Ġt a
|
23 |
+
Ġp i
|
24 |
+
o n
|
25 |
+
j an
|
26 |
+
Ġm i
|
27 |
+
k i
|
28 |
+
m a
|
29 |
+
Ġs i
|
30 |
+
Ġta wa
|
31 |
+
t e
|
32 |
+
m i
|
33 |
+
Ġl on
|
34 |
+
Ġ w
|
35 |
+
Ġ ala
|
36 |
+
Ġt o
|
37 |
+
i l
|
38 |
+
a ma
|
39 |
+
Ġp ona
|
40 |
+
Ġm u
|
41 |
+
Ġ o
|
42 |
+
Ġ jan
|
43 |
+
Ġp a
|
44 |
+
k e
|
45 |
+
k a
|
46 |
+
Ġsi na
|
47 |
+
Ġl u
|
48 |
+
li n
|
49 |
+
Ġk ama
|
50 |
+
s o
|
51 |
+
Ġ ona
|
52 |
+
p o
|
53 |
+
Ġto ki
|
54 |
+
il e
|
55 |
+
Ġ na
|
56 |
+
en po
|
57 |
+
Ġs u
|
58 |
+
Ġw ile
|
59 |
+
Ġs e
|
60 |
+
Ġ i
|
61 |
+
Ġmu te
|
62 |
+
p a
|
63 |
+
si na
|
64 |
+
Ġ a
|
65 |
+
Ġk en
|
66 |
+
l o
|
67 |
+
p e
|
68 |
+
ki n
|
69 |
+
m e
|
70 |
+
Ġ an
|
71 |
+
j o
|
72 |
+
m o
|
73 |
+
Ġs ona
|
74 |
+
Ġt an
|
75 |
+
Ġlu kin
|
76 |
+
s a
|
77 |
+
Ġpi lin
|
78 |
+
k en
|
79 |
+
al e
|
80 |
+
t a
|
81 |
+
e lo
|
82 |
+
Ġi ke
|
83 |
+
Ġto mo
|
84 |
+
k u
|
85 |
+
Ġ ale
|
86 |
+
il o
|
87 |
+
w i
|
88 |
+
Ġm o
|
89 |
+
p u
|
90 |
+
Ġk u
|
91 |
+
e li
|
92 |
+
Ġse me
|
93 |
+
si n
|
94 |
+
Ġli li
|
95 |
+
Ġt enpo
|
96 |
+
Ġk i
|
97 |
+
Ġsu li
|
98 |
+
Ġ wa
|
99 |
+
Ġk e
|
100 |
+
pe ken
|
101 |
+
Ġpa li
|
102 |
+
Ġke peken
|
103 |
+
j a
|
104 |
+
t enpo
|
105 |
+
Ġm a
|
106 |
+
Ġpa na
|
107 |
+
p i
|
108 |
+
Ġ jo
|
109 |
+
w en
|
110 |
+
w eli
|
111 |
+
Ġt u
|
112 |
+
Ġta so
|
113 |
+
u n
|
114 |
+
Ġan te
|
115 |
+
Ġmo ku
|
116 |
+
e ka
|
117 |
+
l en
|
118 |
+
Ġp o
|
119 |
+
Ġw eka
|
120 |
+
k ala
|
121 |
+
Ġw an
|
122 |
+
Ġlu ka
|
123 |
+
Ġna sin
|
124 |
+
Ġsi n
|
125 |
+
Ġo lin
|
126 |
+
k o
|
127 |
+
n pa
|
128 |
+
Ġmu si
|
129 |
+
Ġwa wa
|
130 |
+
Ġs ama
|
131 |
+
Ġk ala
|
132 |
+
n o
|
133 |
+
Ġt elo
|
134 |
+
l u
|
135 |
+
Ġa wen
|
136 |
+
Ġni mi
|
137 |
+
j e
|
138 |
+
Ġna npa
|
139 |
+
Ġ K
|
140 |
+
te len
|
141 |
+
Ġ T
|
142 |
+
Ġla wa
|
143 |
+
Ġ ilo
|
144 |
+
Ġs o
|
145 |
+
Ġli pu
|
146 |
+
Ġpa kala
|
147 |
+
t u
|
148 |
+
Ġpi ni
|
149 |
+
Ġsi telen
|
150 |
+
u ta
|
151 |
+
Ġsu no
|
152 |
+
Ġso weli
|
153 |
+
Ġse wi
|
154 |
+
Ġna sa
|
155 |
+
ĠT u
|
156 |
+
Ġe n
|
157 |
+
ta so
|
158 |
+
t o
|
159 |
+
j elo
|
160 |
+
Ġki n
|
161 |
+
Ġpo ka
|
162 |
+
lu pu
|
163 |
+
tu li
|
164 |
+
Ġi jo
|
165 |
+
un tuli
|
166 |
+
ĠK untuli
|
167 |
+
l e
|
168 |
+
Ġla pe
|
169 |
+
Ġku te
|
170 |
+
Ġk a
|
171 |
+
so weli
|
172 |
+
Ġan u
|
173 |
+
Ġl en
|
174 |
+
Ġkala ma
|
175 |
+
Ġm on
|
176 |
+
Ġsi ke
|
177 |
+
Ġmo li
|
178 |
+
Ġka si
|
179 |
+
p en
|
180 |
+
Ġala sa
|
181 |
+
Ġsu wi
|
182 |
+
Ġan pa
|
183 |
+
Ġku le
|
184 |
+
ke si
|
185 |
+
n sa
|
186 |
+
Ġo pen
|
187 |
+
Ġ ja
|
188 |
+
Ġk on
|
189 |
+
Ġku lupu
|
190 |
+
Ġ S
|
191 |
+
Ġ u
|
192 |
+
Ġi nsa
|
193 |
+
o je
|
194 |
+
Ġ "
|
195 |
+
Ġl oje
|
196 |
+
Ġse li
|
197 |
+
Ġki li
|
198 |
+
t ala
|
199 |
+
l a
|
200 |
+
Ġa li
|
201 |
+
s un
|
202 |
+
na sin
|
203 |
+
pi n
|
204 |
+
Ġu tala
|
205 |
+
an i
|
206 |
+
Ġpo ki
|
207 |
+
e na
|
208 |
+
s uta
|
209 |
+
Ġsi jelo
|
210 |
+
Ġ n
|
211 |
+
Ġja ki
|
212 |
+
Ġ A
|
213 |
+
Ġ P
|
214 |
+
Ġl e
|
215 |
+
Ġp an
|
216 |
+
Ġm ani
|
217 |
+
Ġk o
|
218 |
+
me ja
|
219 |
+
Ġ uta
|
220 |
+
Ġp u
|
221 |
+
Ġpi meja
|
222 |
+
to ki
|
223 |
+
Ġmon suta
|
224 |
+
Ġn ena
|
225 |
+
Ġli n
|
226 |
+
ni mi
|
227 |
+
Ġmi je
|
228 |
+
Ġmu n
|
229 |
+
Ġlu pa
|
230 |
+
Ġ L
|
231 |
+
Ġm ama
|
232 |
+
ma ko
|
233 |
+
Ġna mako
|
234 |
+
Ġsin pin
|
235 |
+
t an
|
236 |
+
Ġm eli
|
237 |
+
Ġsu pa
|
238 |
+
Ġwa so
|
239 |
+
to mo
|
240 |
+
si ke
|
241 |
+
Ġa kesi
|
242 |
+
no ka
|
243 |
+
l on
|
244 |
+
ka si
|
245 |
+
Ġlin ja
|
246 |
+
m u
|
247 |
+
Ġ noka
|
248 |
+
Ġwa lo
|
249 |
+
a kesi
|
250 |
+
Ġ M
|
251 |
+
Ġ un
|
252 |
+
ku lupu
|
253 |
+
Ġki wen
|
254 |
+
pi si
|
255 |
+
ĠA n
|
256 |
+
i jo
|
257 |
+
s e
|
258 |
+
s an
|
259 |
+
w e
|
260 |
+
Ġ jelo
|
261 |
+
Ġe sun
|
262 |
+
Ġla so
|
263 |
+
Ġpa ke
|
264 |
+
Ġki pisi
|
265 |
+
p an
|
266 |
+
li pu
|
267 |
+
te lo
|
268 |
+
Ġmon si
|
269 |
+
Ġle te
|
270 |
+
Ġun pa
|
271 |
+
al u
|
272 |
+
ke wi
|
273 |
+
kala ma
|
274 |
+
sike ke
|
275 |
+
ĠAn kewi
|
276 |
+
j u
|
277 |
+
si la
|
278 |
+
Ġpali sa
|
279 |
+
k alu
|
280 |
+
Ġ E
|
281 |
+
si telen
|
282 |
+
te san
|
283 |
+
Ġse lo
|
284 |
+
ta kalu
|
285 |
+
je tesan
|
286 |
+
jetesan takalu
|
287 |
+
wa so
|
288 |
+
Ġmi sikeke
|
289 |
+
pi pi
|
290 |
+
Ġpo we
|
291 |
+
ko sila
|
292 |
+
sun o
|
293 |
+
Ġ I
|
294 |
+
on si
|
295 |
+
pa li
|
296 |
+
me li
|
297 |
+
mu si
|
298 |
+
" .
|
299 |
+
k on
|
300 |
+
o lin
|
301 |
+
s u
|
302 |
+
â Ģ
|
303 |
+
Ġ N
|
304 |
+
Ġt onsi
|
305 |
+
wa wa
|
306 |
+
Ġpi pi
|
307 |
+
ma ma
|
308 |
+
Ġo ko
|
309 |
+
mo ku
|
310 |
+
Ġle ko
|
311 |
+
Ġko kosila
|
312 |
+
s ona
|
313 |
+
se me
|
314 |
+
ju na
|
315 |
+
e so
|
316 |
+
Ġm eso
|
317 |
+
lin ja
|
318 |
+
Ġma juna
|
319 |
+
lu kin
|
320 |
+
ĠS on
|
321 |
+
ĠI n
|
322 |
+
s elo
|
323 |
+
w ile
|
324 |
+
Ġl an
|
325 |
+
si ma
|
326 |
+
ki li
|
327 |
+
pi lin
|
328 |
+
lu ka
|
329 |
+
ĠK a
|
330 |
+
ĠT e
|
331 |
+
ĠL i
|
332 |
+
ĠM e
|
333 |
+
ĠIn li
|
334 |
+
Ġlan pan
|
335 |
+
. .
|
336 |
+
i ke
|
337 |
+
s eli
|
338 |
+
t on
|
339 |
+
w an
|
340 |
+
si jelo
|
341 |
+
ki jetesantakalu
|
342 |
+
Ġa pe
|
343 |
+
pe si
|
344 |
+
ku le
|
345 |
+
pi ku
|
346 |
+
Ġso ko
|
347 |
+
Ġja sima
|
348 |
+
ĠP i
|
349 |
+
ĠSon ja
|
350 |
+
Ġape ja
|
351 |
+
Ġ J
|
352 |
+
na npa
|
353 |
+
sin pin
|
354 |
+
Ġki jetesantakalu
|
355 |
+
lu pa
|
356 |
+
la wa
|
357 |
+
a li
|
358 |
+
n ton
|
359 |
+
Ġ âĢ
|
360 |
+
Ġt e
|
361 |
+
ki wen
|
362 |
+
pa kala
|
363 |
+
wi nton
|
364 |
+
lu wi
|
365 |
+
Ġlin luwi
|
366 |
+
ĠL o
|
367 |
+
ĠE winton
|
368 |
+
âĢ Ŀ
|
369 |
+
ĠN i
|
370 |
+
ĠâĢ ľ
|
371 |
+
l an
|
372 |
+
n tu
|
373 |
+
o ko
|
374 |
+
Ġ W
|
375 |
+
Ġm elo
|
376 |
+
Ġe piku
|
377 |
+
mi je
|
378 |
+
pi ni
|
379 |
+
ĠK i
|
380 |
+
ĠT o
|
381 |
+
Ġka pesi
|
382 |
+
ĠS a
|
383 |
+
ĠS u
|
384 |
+
ĠS an
|
385 |
+
ĠS en
|
386 |
+
ĠP a
|
387 |
+
ĠP e
|
388 |
+
ĠL a
|
389 |
+
pali sa
|
390 |
+
ĠTe len
|
391 |
+
ĠLi sa
|
392 |
+
Ġmelo me
|
393 |
+
a wen
|
394 |
+
m on
|
395 |
+
m un
|
396 |
+
p ona
|
397 |
+
Ġ O
|
398 |
+
na ta
|
399 |
+
li ja
|
400 |
+
po ki
|
401 |
+
sa wi
|
402 |
+
Ġku ntu
|
403 |
+
ja ki
|
404 |
+
Ġpu wa
|
405 |
+
ĠM i
|
406 |
+
se wi
|
407 |
+
su pa
|
408 |
+
ĠKa nata
|
409 |
+
ĠMe wi
|
410 |
+
.. .
|
411 |
+
ĠJ u
|
412 |
+
ĠSen i
|
413 |
+
mon suta
|
414 |
+
e piku
|
415 |
+
o pen
|
416 |
+
an te
|
417 |
+
si ja
|
418 |
+
Ġmi jo
|
419 |
+
mi sikeke
|
420 |
+
ama na
|
421 |
+
po ka
|
422 |
+
sa li
|
423 |
+
ĠK u
|
424 |
+
ĠK en
|
425 |
+
ĠT a
|
426 |
+
ĠS i
|
427 |
+
ĠS amana
|
428 |
+
Ġu sawi
|
429 |
+
la pe
|
430 |
+
la pi
|
431 |
+
ĠP o
|
432 |
+
mu te
|
433 |
+
pan po
|
434 |
+
ĠE lo
|
435 |
+
ĠKa pesi
|
436 |
+
ĠKi n
|
437 |
+
ĠSu panpo
|
438 |
+
ĠSan ta
|
439 |
+
ĠLa sina
|
440 |
+
ĠMewi ka
|
441 |
+
Ġmijo mi
|
442 |
+
e ko
|
443 |
+
e sun
|
444 |
+
i mi
|
445 |
+
i nsa
|
446 |
+
j uta
|
447 |
+
k ama
|
448 |
+
m ala
|
449 |
+
n ena
|
450 |
+
u tala
|
451 |
+
y u
|
452 |
+
¼ ģ
|
453 |
+
ï ¼ģ
|
454 |
+
Ġ U
|
455 |
+
na sa
|
456 |
+
Ġs an
|
457 |
+
Ġe te
|
458 |
+
Ġk an
|
459 |
+
wa la
|
460 |
+
si pin
|
461 |
+
ala ta
|
462 |
+
ala sija
|
463 |
+
on ken
|
464 |
+
Ġpa sila
|
465 |
+
ke ke
|
466 |
+
pa na
|
467 |
+
pe lan
|
468 |
+
Ġki ki
|
469 |
+
ko kosila
|
470 |
+
ĠK an
|
471 |
+
ĠK eli
|
472 |
+
ĠT imi
|
473 |
+
Ġka mala
|
474 |
+
ĠS o
|
475 |
+
ĠA si
|
476 |
+
ĠP u
|
477 |
+
ĠP an
|
478 |
+
ĠP ona
|
479 |
+
ĠP alata
|
480 |
+
ĠL e
|
481 |
+
ĠM a
|
482 |
+
ĠM alasija
|
483 |
+
Ġun u
|
484 |
+
ĠE ko
|
485 |
+
ĠE pelan
|
486 |
+
ĠI ta
|
487 |
+
ĠN e
|
488 |
+
ĠN u
|
489 |
+
ĠSon ko
|
490 |
+
ĠLi n
|
491 |
+
ĠMe keke
|
492 |
+
ĠPi ta
|
493 |
+
ĠJ onken
|
494 |
+
ĠLo la
|
495 |
+
âĢĿ .
|
496 |
+
ĠW a
|
497 |
+
ĠTo si
|
498 |
+
ĠTo ki
|
499 |
+
ĠSu san
|
500 |
+
ĠPe pu
|
501 |
+
ĠMi sali
|
502 |
+
ĠSi ko
|
503 |
+
lapi su
|
504 |
+
ĠElo pa
|
505 |
+
ĠKin kili
|
506 |
+
Ġkamala wala
|
507 |
+
ĠEpelan to
|
508 |
+
! "
|
509 |
+
" ,
|
510 |
+
" ?
|
511 |
+
. "
|
512 |
+
? !
|
513 |
+
E ko
|
514 |
+
a sin
|
515 |
+
e si
|
516 |
+
e ta
|
517 |
+
e lon
|
518 |
+
e sija
|
519 |
+
i je
|
520 |
+
i sipin
|
521 |
+
j al
|
522 |
+
j on
|
523 |
+
j ama
|
524 |
+
k an
|
525 |
+
l ani
|
526 |
+
m an
|
527 |
+
m ani
|
528 |
+
n u
|
529 |
+
o mi
|
530 |
+
o se
|
531 |
+
p on
|
532 |
+
p il
|
533 |
+
p ani
|
534 |
+
p alu
|
535 |
+
s on
|
536 |
+
s ama
|
537 |
+
t onsi
|
538 |
+
u pi
|
539 |
+
w ani
|
540 |
+
Ġ yu
|
541 |
+
na wi
|
542 |
+
na ja
|
543 |
+
Ġp eta
|
544 |
+
an u
|
545 |
+
an ka
|
546 |
+
an pa
|
547 |
+
an tan
|
548 |
+
li a
|
549 |
+
li sa
|
550 |
+
li ku
|
551 |
+
li lan
|
552 |
+
wa si
|
553 |
+
wa wi
|
554 |
+
ni ja
|
555 |
+
si ko
|
556 |
+
ala sa
|
557 |
+
Ġta ki
|
558 |
+
on yu
|
559 |
+
Ġmi sa
|
560 |
+
ki ni
|
561 |
+
ki jo
|
562 |
+
ki ko
|
563 |
+
ki mu
|
564 |
+
ki pisi
|
565 |
+
ki san
|
566 |
+
ma juna
|
567 |
+
te lan
|
568 |
+
mi le
|
569 |
+
il a
|
570 |
+
Ġmu lapisu
|
571 |
+
Ġo we
|
572 |
+
Ġo juta
|
573 |
+
ke peken
|
574 |
+
ke naja
|
575 |
+
ka so
|
576 |
+
ka ton
|
577 |
+
ka wan
|
578 |
+
so ko
|
579 |
+
po lo
|
580 |
+
po we
|
581 |
+
Ġi sipin
|
582 |
+
pa jal
|
583 |
+
pa kawan
|
584 |
+
lo je
|
585 |
+
pe ta
|
586 |
+
pe ko
|
587 |
+
me so
|
588 |
+
me su
|
589 |
+
jo le
|
590 |
+
jo ju
|
591 |
+
mo si
|
592 |
+
mo mo
|
593 |
+
mo to
|
594 |
+
sa lin
|
595 |
+
ta wa
|
596 |
+
ta wan
|
597 |
+
ta sali
|
598 |
+
ku kan
|
599 |
+
wi to
|
600 |
+
pu si
|
601 |
+
pu wani
|
602 |
+
Ġwa le
|
603 |
+
Ġke se
|
604 |
+
ja sima
|
605 |
+
pi li
|
606 |
+
pi meja
|
607 |
+
Ġtu li
|
608 |
+
un eko
|
609 |
+
ko wa
|
610 |
+
no kijo
|
611 |
+
je sa
|
612 |
+
je kenaja
|
613 |
+
ĠK ena
|
614 |
+
ĠK ijo
|
615 |
+
ĠK elon
|
616 |
+
ĠK omi
|
617 |
+
ĠK ila
|
618 |
+
ĠT en
|
619 |
+
ĠT onyu
|
620 |
+
Ġso to
|
621 |
+
le te
|
622 |
+
le ko
|
623 |
+
Ġkon we
|
624 |
+
ĠS uneko
|
625 |
+
Ġu mesu
|
626 |
+
la so
|
627 |
+
la sin
|
628 |
+
ĠA li
|
629 |
+
ĠA wi
|
630 |
+
ĠA lapi
|
631 |
+
ĠA man
|
632 |
+
ĠA wawi
|
633 |
+
ĠA polo
|
634 |
+
ĠA tawan
|
635 |
+
ĠA lasin
|
636 |
+
ĠP uta
|
637 |
+
ĠL uta
|
638 |
+
ĠL upi
|
639 |
+
ĠL antan
|
640 |
+
mu lapisu
|
641 |
+
ĠM ena
|
642 |
+
ĠM ose
|
643 |
+
ĠM anka
|
644 |
+
ĠAn se
|
645 |
+
ĠAn son
|
646 |
+
ĠAn kowa
|
647 |
+
ijo sa
|
648 |
+
alu to
|
649 |
+
ju si
|
650 |
+
ju wan
|
651 |
+
ju kini
|
652 |
+
ĠE ki
|
653 |
+
ĠE lisa
|
654 |
+
ĠI lan
|
655 |
+
su li
|
656 |
+
su ka
|
657 |
+
su wi
|
658 |
+
su pan
|
659 |
+
ĠN asin
|
660 |
+
ĠN aluto
|
661 |
+
ĠIn ton
|
662 |
+
ĠKa pil
|
663 |
+
ĠKa jesa
|
664 |
+
ĠTe po
|
665 |
+
ĠTe ja
|
666 |
+
ĠTe wen
|
667 |
+
ĠTe pani
|
668 |
+
ĠLi lija
|
669 |
+
ĠLi lilan
|
670 |
+
ĠMe lani
|
671 |
+
ĠMe siko
|
672 |
+
ĠMe kiko
|
673 |
+
seli ja
|
674 |
+
ĠPi wi
|
675 |
+
ĠPi sin
|
676 |
+
ĠPi ju
|
677 |
+
ĠPi nija
|
678 |
+
ĠPi kaso
|
679 |
+
ĠPi nokijo
|
680 |
+
ĠJ an
|
681 |
+
Ġte je
|
682 |
+
ĠLo pin
|
683 |
+
ĠLo wasi
|
684 |
+
ĠLo katon
|
685 |
+
ĠLo supan
|
686 |
+
ĠNi ki
|
687 |
+
ĠNi mu
|
688 |
+
ĠNi jon
|
689 |
+
ĠNi pon
|
690 |
+
ĠNi mile
|
691 |
+
ĠW eko
|
692 |
+
ĠW ije
|
693 |
+
ĠW ijosa
|
694 |
+
ĠKi ta
|
695 |
+
ĠKi liku
|
696 |
+
ĠTo to
|
697 |
+
ĠSa ku
|
698 |
+
ĠSa salin
|
699 |
+
ĠSa jusi
|
700 |
+
ĠSen tu
|
701 |
+
ĠPa si
|
702 |
+
ĠPa kala
|
703 |
+
ĠPa kisan
|
704 |
+
ĠPa puwani
|
705 |
+
ĠPe we
|
706 |
+
ĠPe kimu
|
707 |
+
ĠPe joju
|
708 |
+
ĠLa jo
|
709 |
+
ĠLa wito
|
710 |
+
ĠO sin
|
711 |
+
ĠO we
|
712 |
+
ĠO juta
|
713 |
+
ĠO selija
|
714 |
+
ĠMi moku
|
715 |
+
ĠMi jama
|
716 |
+
ĠJu li
|
717 |
+
ĠJu ke
|
718 |
+
ĠJu tu
|
719 |
+
ĠJu lija
|
720 |
+
ĠKu pa
|
721 |
+
ĠKu lija
|
722 |
+
ĠKu kukan
|
723 |
+
ĠTa wi
|
724 |
+
ĠTa pajal
|
725 |
+
ĠTa juwan
|
726 |
+
ĠPo ta
|
727 |
+
ĠPo moto
|
728 |
+
ĠPo suka
|
729 |
+
ĠU tu
|
730 |
+
ĠU sawi
|
731 |
+
ĠKan se
|
732 |
+
ĠKan pusi
|
733 |
+
ĠSo momo
|
734 |
+
ĠPu tu
|
735 |
+
ĠPu nawi
|
736 |
+
ĠPan to
|
737 |
+
ĠPan tasali
|
738 |
+
ĠLe wi
|
739 |
+
ĠLe mosi
|
740 |
+
ĠMa ku
|
741 |
+
ĠMa su
|
742 |
+
ĠIta lija
|
743 |
+
ĠIta lia
|
744 |
+
ĠNe palu
|
745 |
+
ĠNe telan
|
746 |
+
ĠNu ken
|
747 |
+
ĠNu wan
|
748 |
+
ĠLin ta
|
749 |
+
ĠLin ja
|
750 |
+
ĠWa jole
|
751 |
+
ĠWa pili
|
752 |
+
esi po
|
753 |
+
Ġyu peko
|
754 |
+
Ġwale ja
|
755 |
+
ĠKena jekenaja
|
756 |
+
ĠKijo to
|
757 |
+
ĠKomi nu
|
758 |
+
ĠAnse tan
|
759 |
+
ĠElisa pe
|
760 |
+
ĠNasin esipo
|
761 |
+
ĠInton esija
|
762 |
+
ĠKapil u
|
763 |
+
ĠKiliku ntu
|
764 |
+
ĠPapuwani jukini
|
765 |
+
ĠTapajal o
|
766 |
+
ĠPomoto lo
|
767 |
+
ĠPantasali pakawan
|
768 |
+
Ġyupeko si
|
special_tokens_map.json
ADDED
@@ -0,0 +1,788 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "<|endoftext|>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"content": "<|startoftranscript|>",
|
12 |
+
"lstrip": false,
|
13 |
+
"normalized": false,
|
14 |
+
"rstrip": false,
|
15 |
+
"single_word": false
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"content": "<|en|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"content": "<|zh|>",
|
26 |
+
"lstrip": false,
|
27 |
+
"normalized": false,
|
28 |
+
"rstrip": false,
|
29 |
+
"single_word": false
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"content": "<|de|>",
|
33 |
+
"lstrip": false,
|
34 |
+
"normalized": false,
|
35 |
+
"rstrip": false,
|
36 |
+
"single_word": false
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"content": "<|es|>",
|
40 |
+
"lstrip": false,
|
41 |
+
"normalized": false,
|
42 |
+
"rstrip": false,
|
43 |
+
"single_word": false
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"content": "<|ru|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"content": "<|ko|>",
|
54 |
+
"lstrip": false,
|
55 |
+
"normalized": false,
|
56 |
+
"rstrip": false,
|
57 |
+
"single_word": false
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"content": "<|fr|>",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": false,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"content": "<|ja|>",
|
68 |
+
"lstrip": false,
|
69 |
+
"normalized": false,
|
70 |
+
"rstrip": false,
|
71 |
+
"single_word": false
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"content": "<|pt|>",
|
75 |
+
"lstrip": false,
|
76 |
+
"normalized": false,
|
77 |
+
"rstrip": false,
|
78 |
+
"single_word": false
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"content": "<|tr|>",
|
82 |
+
"lstrip": false,
|
83 |
+
"normalized": false,
|
84 |
+
"rstrip": false,
|
85 |
+
"single_word": false
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"content": "<|pl|>",
|
89 |
+
"lstrip": false,
|
90 |
+
"normalized": false,
|
91 |
+
"rstrip": false,
|
92 |
+
"single_word": false
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"content": "<|ca|>",
|
96 |
+
"lstrip": false,
|
97 |
+
"normalized": false,
|
98 |
+
"rstrip": false,
|
99 |
+
"single_word": false
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"content": "<|nl|>",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": false,
|
105 |
+
"rstrip": false,
|
106 |
+
"single_word": false
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"content": "<|ar|>",
|
110 |
+
"lstrip": false,
|
111 |
+
"normalized": false,
|
112 |
+
"rstrip": false,
|
113 |
+
"single_word": false
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"content": "<|sv|>",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": false,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"content": "<|it|>",
|
124 |
+
"lstrip": false,
|
125 |
+
"normalized": false,
|
126 |
+
"rstrip": false,
|
127 |
+
"single_word": false
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"content": "<|id|>",
|
131 |
+
"lstrip": false,
|
132 |
+
"normalized": false,
|
133 |
+
"rstrip": false,
|
134 |
+
"single_word": false
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"content": "<|hi|>",
|
138 |
+
"lstrip": false,
|
139 |
+
"normalized": false,
|
140 |
+
"rstrip": false,
|
141 |
+
"single_word": false
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"content": "<|fi|>",
|
145 |
+
"lstrip": false,
|
146 |
+
"normalized": false,
|
147 |
+
"rstrip": false,
|
148 |
+
"single_word": false
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"content": "<|vi|>",
|
152 |
+
"lstrip": false,
|
153 |
+
"normalized": false,
|
154 |
+
"rstrip": false,
|
155 |
+
"single_word": false
|
156 |
+
},
|
157 |
+
{
|
158 |
+
"content": "<|he|>",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": false,
|
161 |
+
"rstrip": false,
|
162 |
+
"single_word": false
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"content": "<|uk|>",
|
166 |
+
"lstrip": false,
|
167 |
+
"normalized": false,
|
168 |
+
"rstrip": false,
|
169 |
+
"single_word": false
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"content": "<|el|>",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": false,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"content": "<|ms|>",
|
180 |
+
"lstrip": false,
|
181 |
+
"normalized": false,
|
182 |
+
"rstrip": false,
|
183 |
+
"single_word": false
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"content": "<|cs|>",
|
187 |
+
"lstrip": false,
|
188 |
+
"normalized": false,
|
189 |
+
"rstrip": false,
|
190 |
+
"single_word": false
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"content": "<|ro|>",
|
194 |
+
"lstrip": false,
|
195 |
+
"normalized": false,
|
196 |
+
"rstrip": false,
|
197 |
+
"single_word": false
|
198 |
+
},
|
199 |
+
{
|
200 |
+
"content": "<|da|>",
|
201 |
+
"lstrip": false,
|
202 |
+
"normalized": false,
|
203 |
+
"rstrip": false,
|
204 |
+
"single_word": false
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"content": "<|hu|>",
|
208 |
+
"lstrip": false,
|
209 |
+
"normalized": false,
|
210 |
+
"rstrip": false,
|
211 |
+
"single_word": false
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"content": "<|ta|>",
|
215 |
+
"lstrip": false,
|
216 |
+
"normalized": false,
|
217 |
+
"rstrip": false,
|
218 |
+
"single_word": false
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"content": "<|no|>",
|
222 |
+
"lstrip": false,
|
223 |
+
"normalized": false,
|
224 |
+
"rstrip": false,
|
225 |
+
"single_word": false
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"content": "<|th|>",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"content": "<|ur|>",
|
236 |
+
"lstrip": false,
|
237 |
+
"normalized": false,
|
238 |
+
"rstrip": false,
|
239 |
+
"single_word": false
|
240 |
+
},
|
241 |
+
{
|
242 |
+
"content": "<|hr|>",
|
243 |
+
"lstrip": false,
|
244 |
+
"normalized": false,
|
245 |
+
"rstrip": false,
|
246 |
+
"single_word": false
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"content": "<|bg|>",
|
250 |
+
"lstrip": false,
|
251 |
+
"normalized": false,
|
252 |
+
"rstrip": false,
|
253 |
+
"single_word": false
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"content": "<|lt|>",
|
257 |
+
"lstrip": false,
|
258 |
+
"normalized": false,
|
259 |
+
"rstrip": false,
|
260 |
+
"single_word": false
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"content": "<|la|>",
|
264 |
+
"lstrip": false,
|
265 |
+
"normalized": false,
|
266 |
+
"rstrip": false,
|
267 |
+
"single_word": false
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"content": "<|mi|>",
|
271 |
+
"lstrip": false,
|
272 |
+
"normalized": false,
|
273 |
+
"rstrip": false,
|
274 |
+
"single_word": false
|
275 |
+
},
|
276 |
+
{
|
277 |
+
"content": "<|ml|>",
|
278 |
+
"lstrip": false,
|
279 |
+
"normalized": false,
|
280 |
+
"rstrip": false,
|
281 |
+
"single_word": false
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"content": "<|cy|>",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": false,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"content": "<|sk|>",
|
292 |
+
"lstrip": false,
|
293 |
+
"normalized": false,
|
294 |
+
"rstrip": false,
|
295 |
+
"single_word": false
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"content": "<|te|>",
|
299 |
+
"lstrip": false,
|
300 |
+
"normalized": false,
|
301 |
+
"rstrip": false,
|
302 |
+
"single_word": false
|
303 |
+
},
|
304 |
+
{
|
305 |
+
"content": "<|fa|>",
|
306 |
+
"lstrip": false,
|
307 |
+
"normalized": false,
|
308 |
+
"rstrip": false,
|
309 |
+
"single_word": false
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"content": "<|lv|>",
|
313 |
+
"lstrip": false,
|
314 |
+
"normalized": false,
|
315 |
+
"rstrip": false,
|
316 |
+
"single_word": false
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"content": "<|bn|>",
|
320 |
+
"lstrip": false,
|
321 |
+
"normalized": false,
|
322 |
+
"rstrip": false,
|
323 |
+
"single_word": false
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"content": "<|sr|>",
|
327 |
+
"lstrip": false,
|
328 |
+
"normalized": false,
|
329 |
+
"rstrip": false,
|
330 |
+
"single_word": false
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"content": "<|az|>",
|
334 |
+
"lstrip": false,
|
335 |
+
"normalized": false,
|
336 |
+
"rstrip": false,
|
337 |
+
"single_word": false
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"content": "<|sl|>",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": false,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"content": "<|kn|>",
|
348 |
+
"lstrip": false,
|
349 |
+
"normalized": false,
|
350 |
+
"rstrip": false,
|
351 |
+
"single_word": false
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"content": "<|et|>",
|
355 |
+
"lstrip": false,
|
356 |
+
"normalized": false,
|
357 |
+
"rstrip": false,
|
358 |
+
"single_word": false
|
359 |
+
},
|
360 |
+
{
|
361 |
+
"content": "<|mk|>",
|
362 |
+
"lstrip": false,
|
363 |
+
"normalized": false,
|
364 |
+
"rstrip": false,
|
365 |
+
"single_word": false
|
366 |
+
},
|
367 |
+
{
|
368 |
+
"content": "<|br|>",
|
369 |
+
"lstrip": false,
|
370 |
+
"normalized": false,
|
371 |
+
"rstrip": false,
|
372 |
+
"single_word": false
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"content": "<|eu|>",
|
376 |
+
"lstrip": false,
|
377 |
+
"normalized": false,
|
378 |
+
"rstrip": false,
|
379 |
+
"single_word": false
|
380 |
+
},
|
381 |
+
{
|
382 |
+
"content": "<|is|>",
|
383 |
+
"lstrip": false,
|
384 |
+
"normalized": false,
|
385 |
+
"rstrip": false,
|
386 |
+
"single_word": false
|
387 |
+
},
|
388 |
+
{
|
389 |
+
"content": "<|hy|>",
|
390 |
+
"lstrip": false,
|
391 |
+
"normalized": false,
|
392 |
+
"rstrip": false,
|
393 |
+
"single_word": false
|
394 |
+
},
|
395 |
+
{
|
396 |
+
"content": "<|ne|>",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": false,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false
|
401 |
+
},
|
402 |
+
{
|
403 |
+
"content": "<|mn|>",
|
404 |
+
"lstrip": false,
|
405 |
+
"normalized": false,
|
406 |
+
"rstrip": false,
|
407 |
+
"single_word": false
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"content": "<|bs|>",
|
411 |
+
"lstrip": false,
|
412 |
+
"normalized": false,
|
413 |
+
"rstrip": false,
|
414 |
+
"single_word": false
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"content": "<|kk|>",
|
418 |
+
"lstrip": false,
|
419 |
+
"normalized": false,
|
420 |
+
"rstrip": false,
|
421 |
+
"single_word": false
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"content": "<|sq|>",
|
425 |
+
"lstrip": false,
|
426 |
+
"normalized": false,
|
427 |
+
"rstrip": false,
|
428 |
+
"single_word": false
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"content": "<|sw|>",
|
432 |
+
"lstrip": false,
|
433 |
+
"normalized": false,
|
434 |
+
"rstrip": false,
|
435 |
+
"single_word": false
|
436 |
+
},
|
437 |
+
{
|
438 |
+
"content": "<|gl|>",
|
439 |
+
"lstrip": false,
|
440 |
+
"normalized": false,
|
441 |
+
"rstrip": false,
|
442 |
+
"single_word": false
|
443 |
+
},
|
444 |
+
{
|
445 |
+
"content": "<|mr|>",
|
446 |
+
"lstrip": false,
|
447 |
+
"normalized": false,
|
448 |
+
"rstrip": false,
|
449 |
+
"single_word": false
|
450 |
+
},
|
451 |
+
{
|
452 |
+
"content": "<|pa|>",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": false,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false
|
457 |
+
},
|
458 |
+
{
|
459 |
+
"content": "<|si|>",
|
460 |
+
"lstrip": false,
|
461 |
+
"normalized": false,
|
462 |
+
"rstrip": false,
|
463 |
+
"single_word": false
|
464 |
+
},
|
465 |
+
{
|
466 |
+
"content": "<|km|>",
|
467 |
+
"lstrip": false,
|
468 |
+
"normalized": false,
|
469 |
+
"rstrip": false,
|
470 |
+
"single_word": false
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"content": "<|sn|>",
|
474 |
+
"lstrip": false,
|
475 |
+
"normalized": false,
|
476 |
+
"rstrip": false,
|
477 |
+
"single_word": false
|
478 |
+
},
|
479 |
+
{
|
480 |
+
"content": "<|yo|>",
|
481 |
+
"lstrip": false,
|
482 |
+
"normalized": false,
|
483 |
+
"rstrip": false,
|
484 |
+
"single_word": false
|
485 |
+
},
|
486 |
+
{
|
487 |
+
"content": "<|so|>",
|
488 |
+
"lstrip": false,
|
489 |
+
"normalized": false,
|
490 |
+
"rstrip": false,
|
491 |
+
"single_word": false
|
492 |
+
},
|
493 |
+
{
|
494 |
+
"content": "<|af|>",
|
495 |
+
"lstrip": false,
|
496 |
+
"normalized": false,
|
497 |
+
"rstrip": false,
|
498 |
+
"single_word": false
|
499 |
+
},
|
500 |
+
{
|
501 |
+
"content": "<|oc|>",
|
502 |
+
"lstrip": false,
|
503 |
+
"normalized": false,
|
504 |
+
"rstrip": false,
|
505 |
+
"single_word": false
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"content": "<|ka|>",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": false,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"content": "<|be|>",
|
516 |
+
"lstrip": false,
|
517 |
+
"normalized": false,
|
518 |
+
"rstrip": false,
|
519 |
+
"single_word": false
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"content": "<|tg|>",
|
523 |
+
"lstrip": false,
|
524 |
+
"normalized": false,
|
525 |
+
"rstrip": false,
|
526 |
+
"single_word": false
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"content": "<|sd|>",
|
530 |
+
"lstrip": false,
|
531 |
+
"normalized": false,
|
532 |
+
"rstrip": false,
|
533 |
+
"single_word": false
|
534 |
+
},
|
535 |
+
{
|
536 |
+
"content": "<|gu|>",
|
537 |
+
"lstrip": false,
|
538 |
+
"normalized": false,
|
539 |
+
"rstrip": false,
|
540 |
+
"single_word": false
|
541 |
+
},
|
542 |
+
{
|
543 |
+
"content": "<|am|>",
|
544 |
+
"lstrip": false,
|
545 |
+
"normalized": false,
|
546 |
+
"rstrip": false,
|
547 |
+
"single_word": false
|
548 |
+
},
|
549 |
+
{
|
550 |
+
"content": "<|yi|>",
|
551 |
+
"lstrip": false,
|
552 |
+
"normalized": false,
|
553 |
+
"rstrip": false,
|
554 |
+
"single_word": false
|
555 |
+
},
|
556 |
+
{
|
557 |
+
"content": "<|lo|>",
|
558 |
+
"lstrip": false,
|
559 |
+
"normalized": false,
|
560 |
+
"rstrip": false,
|
561 |
+
"single_word": false
|
562 |
+
},
|
563 |
+
{
|
564 |
+
"content": "<|uz|>",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": false,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false
|
569 |
+
},
|
570 |
+
{
|
571 |
+
"content": "<|fo|>",
|
572 |
+
"lstrip": false,
|
573 |
+
"normalized": false,
|
574 |
+
"rstrip": false,
|
575 |
+
"single_word": false
|
576 |
+
},
|
577 |
+
{
|
578 |
+
"content": "<|ht|>",
|
579 |
+
"lstrip": false,
|
580 |
+
"normalized": false,
|
581 |
+
"rstrip": false,
|
582 |
+
"single_word": false
|
583 |
+
},
|
584 |
+
{
|
585 |
+
"content": "<|ps|>",
|
586 |
+
"lstrip": false,
|
587 |
+
"normalized": false,
|
588 |
+
"rstrip": false,
|
589 |
+
"single_word": false
|
590 |
+
},
|
591 |
+
{
|
592 |
+
"content": "<|tk|>",
|
593 |
+
"lstrip": false,
|
594 |
+
"normalized": false,
|
595 |
+
"rstrip": false,
|
596 |
+
"single_word": false
|
597 |
+
},
|
598 |
+
{
|
599 |
+
"content": "<|nn|>",
|
600 |
+
"lstrip": false,
|
601 |
+
"normalized": false,
|
602 |
+
"rstrip": false,
|
603 |
+
"single_word": false
|
604 |
+
},
|
605 |
+
{
|
606 |
+
"content": "<|mt|>",
|
607 |
+
"lstrip": false,
|
608 |
+
"normalized": false,
|
609 |
+
"rstrip": false,
|
610 |
+
"single_word": false
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"content": "<|sa|>",
|
614 |
+
"lstrip": false,
|
615 |
+
"normalized": false,
|
616 |
+
"rstrip": false,
|
617 |
+
"single_word": false
|
618 |
+
},
|
619 |
+
{
|
620 |
+
"content": "<|lb|>",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": false,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false
|
625 |
+
},
|
626 |
+
{
|
627 |
+
"content": "<|my|>",
|
628 |
+
"lstrip": false,
|
629 |
+
"normalized": false,
|
630 |
+
"rstrip": false,
|
631 |
+
"single_word": false
|
632 |
+
},
|
633 |
+
{
|
634 |
+
"content": "<|bo|>",
|
635 |
+
"lstrip": false,
|
636 |
+
"normalized": false,
|
637 |
+
"rstrip": false,
|
638 |
+
"single_word": false
|
639 |
+
},
|
640 |
+
{
|
641 |
+
"content": "<|tl|>",
|
642 |
+
"lstrip": false,
|
643 |
+
"normalized": false,
|
644 |
+
"rstrip": false,
|
645 |
+
"single_word": false
|
646 |
+
},
|
647 |
+
{
|
648 |
+
"content": "<|mg|>",
|
649 |
+
"lstrip": false,
|
650 |
+
"normalized": false,
|
651 |
+
"rstrip": false,
|
652 |
+
"single_word": false
|
653 |
+
},
|
654 |
+
{
|
655 |
+
"content": "<|as|>",
|
656 |
+
"lstrip": false,
|
657 |
+
"normalized": false,
|
658 |
+
"rstrip": false,
|
659 |
+
"single_word": false
|
660 |
+
},
|
661 |
+
{
|
662 |
+
"content": "<|tt|>",
|
663 |
+
"lstrip": false,
|
664 |
+
"normalized": false,
|
665 |
+
"rstrip": false,
|
666 |
+
"single_word": false
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"content": "<|haw|>",
|
670 |
+
"lstrip": false,
|
671 |
+
"normalized": false,
|
672 |
+
"rstrip": false,
|
673 |
+
"single_word": false
|
674 |
+
},
|
675 |
+
{
|
676 |
+
"content": "<|ln|>",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": false,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"content": "<|ha|>",
|
684 |
+
"lstrip": false,
|
685 |
+
"normalized": false,
|
686 |
+
"rstrip": false,
|
687 |
+
"single_word": false
|
688 |
+
},
|
689 |
+
{
|
690 |
+
"content": "<|ba|>",
|
691 |
+
"lstrip": false,
|
692 |
+
"normalized": false,
|
693 |
+
"rstrip": false,
|
694 |
+
"single_word": false
|
695 |
+
},
|
696 |
+
{
|
697 |
+
"content": "<|jw|>",
|
698 |
+
"lstrip": false,
|
699 |
+
"normalized": false,
|
700 |
+
"rstrip": false,
|
701 |
+
"single_word": false
|
702 |
+
},
|
703 |
+
{
|
704 |
+
"content": "<|su|>",
|
705 |
+
"lstrip": false,
|
706 |
+
"normalized": false,
|
707 |
+
"rstrip": false,
|
708 |
+
"single_word": false
|
709 |
+
},
|
710 |
+
{
|
711 |
+
"content": "<|translate|>",
|
712 |
+
"lstrip": false,
|
713 |
+
"normalized": false,
|
714 |
+
"rstrip": false,
|
715 |
+
"single_word": false
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"content": "<|transcribe|>",
|
719 |
+
"lstrip": false,
|
720 |
+
"normalized": false,
|
721 |
+
"rstrip": false,
|
722 |
+
"single_word": false
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"content": "<|startoflm|>",
|
726 |
+
"lstrip": false,
|
727 |
+
"normalized": false,
|
728 |
+
"rstrip": false,
|
729 |
+
"single_word": false
|
730 |
+
},
|
731 |
+
{
|
732 |
+
"content": "<|startofprev|>",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": false,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false
|
737 |
+
},
|
738 |
+
{
|
739 |
+
"content": "<|nocaptions|>",
|
740 |
+
"lstrip": false,
|
741 |
+
"normalized": false,
|
742 |
+
"rstrip": false,
|
743 |
+
"single_word": false
|
744 |
+
},
|
745 |
+
{
|
746 |
+
"content": "<|notimestamps|>",
|
747 |
+
"lstrip": false,
|
748 |
+
"normalized": false,
|
749 |
+
"rstrip": false,
|
750 |
+
"single_word": false
|
751 |
+
},
|
752 |
+
{
|
753 |
+
"content": "<|tp|>",
|
754 |
+
"lstrip": false,
|
755 |
+
"normalized": false,
|
756 |
+
"rstrip": false,
|
757 |
+
"single_word": false
|
758 |
+
}
|
759 |
+
],
|
760 |
+
"bos_token": {
|
761 |
+
"content": "<|endoftext|>",
|
762 |
+
"lstrip": false,
|
763 |
+
"normalized": false,
|
764 |
+
"rstrip": false,
|
765 |
+
"single_word": false
|
766 |
+
},
|
767 |
+
"eos_token": {
|
768 |
+
"content": "<|endoftext|>",
|
769 |
+
"lstrip": false,
|
770 |
+
"normalized": false,
|
771 |
+
"rstrip": false,
|
772 |
+
"single_word": false
|
773 |
+
},
|
774 |
+
"pad_token": {
|
775 |
+
"content": "<|endoftext|>",
|
776 |
+
"lstrip": false,
|
777 |
+
"normalized": false,
|
778 |
+
"rstrip": false,
|
779 |
+
"single_word": false
|
780 |
+
},
|
781 |
+
"unk_token": {
|
782 |
+
"content": "<|endoftext|>",
|
783 |
+
"lstrip": false,
|
784 |
+
"normalized": false,
|
785 |
+
"rstrip": false,
|
786 |
+
"single_word": false
|
787 |
+
}
|
788 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,2993 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"version": "1.0",
|
3 |
+
"truncation": null,
|
4 |
+
"padding": null,
|
5 |
+
"added_tokens": [
|
6 |
+
{
|
7 |
+
"id": 0,
|
8 |
+
"content": "<|endoftext|>",
|
9 |
+
"single_word": false,
|
10 |
+
"lstrip": false,
|
11 |
+
"rstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"special": true
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"id": 1,
|
17 |
+
"content": "<|startoftranscript|>",
|
18 |
+
"single_word": false,
|
19 |
+
"lstrip": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"normalized": false,
|
22 |
+
"special": true
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"id": 2,
|
26 |
+
"content": "<|en|>",
|
27 |
+
"single_word": false,
|
28 |
+
"lstrip": false,
|
29 |
+
"rstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"special": true
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"id": 3,
|
35 |
+
"content": "<|zh|>",
|
36 |
+
"single_word": false,
|
37 |
+
"lstrip": false,
|
38 |
+
"rstrip": false,
|
39 |
+
"normalized": false,
|
40 |
+
"special": true
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"id": 4,
|
44 |
+
"content": "<|de|>",
|
45 |
+
"single_word": false,
|
46 |
+
"lstrip": false,
|
47 |
+
"rstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"special": true
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"id": 5,
|
53 |
+
"content": "<|es|>",
|
54 |
+
"single_word": false,
|
55 |
+
"lstrip": false,
|
56 |
+
"rstrip": false,
|
57 |
+
"normalized": false,
|
58 |
+
"special": true
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"id": 6,
|
62 |
+
"content": "<|ru|>",
|
63 |
+
"single_word": false,
|
64 |
+
"lstrip": false,
|
65 |
+
"rstrip": false,
|
66 |
+
"normalized": false,
|
67 |
+
"special": true
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"id": 7,
|
71 |
+
"content": "<|ko|>",
|
72 |
+
"single_word": false,
|
73 |
+
"lstrip": false,
|
74 |
+
"rstrip": false,
|
75 |
+
"normalized": false,
|
76 |
+
"special": true
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"id": 8,
|
80 |
+
"content": "<|fr|>",
|
81 |
+
"single_word": false,
|
82 |
+
"lstrip": false,
|
83 |
+
"rstrip": false,
|
84 |
+
"normalized": false,
|
85 |
+
"special": true
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"id": 9,
|
89 |
+
"content": "<|ja|>",
|
90 |
+
"single_word": false,
|
91 |
+
"lstrip": false,
|
92 |
+
"rstrip": false,
|
93 |
+
"normalized": false,
|
94 |
+
"special": true
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"id": 10,
|
98 |
+
"content": "<|pt|>",
|
99 |
+
"single_word": false,
|
100 |
+
"lstrip": false,
|
101 |
+
"rstrip": false,
|
102 |
+
"normalized": false,
|
103 |
+
"special": true
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"id": 11,
|
107 |
+
"content": "<|tr|>",
|
108 |
+
"single_word": false,
|
109 |
+
"lstrip": false,
|
110 |
+
"rstrip": false,
|
111 |
+
"normalized": false,
|
112 |
+
"special": true
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"id": 12,
|
116 |
+
"content": "<|pl|>",
|
117 |
+
"single_word": false,
|
118 |
+
"lstrip": false,
|
119 |
+
"rstrip": false,
|
120 |
+
"normalized": false,
|
121 |
+
"special": true
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"id": 13,
|
125 |
+
"content": "<|ca|>",
|
126 |
+
"single_word": false,
|
127 |
+
"lstrip": false,
|
128 |
+
"rstrip": false,
|
129 |
+
"normalized": false,
|
130 |
+
"special": true
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"id": 14,
|
134 |
+
"content": "<|nl|>",
|
135 |
+
"single_word": false,
|
136 |
+
"lstrip": false,
|
137 |
+
"rstrip": false,
|
138 |
+
"normalized": false,
|
139 |
+
"special": true
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"id": 15,
|
143 |
+
"content": "<|ar|>",
|
144 |
+
"single_word": false,
|
145 |
+
"lstrip": false,
|
146 |
+
"rstrip": false,
|
147 |
+
"normalized": false,
|
148 |
+
"special": true
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"id": 16,
|
152 |
+
"content": "<|sv|>",
|
153 |
+
"single_word": false,
|
154 |
+
"lstrip": false,
|
155 |
+
"rstrip": false,
|
156 |
+
"normalized": false,
|
157 |
+
"special": true
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"id": 17,
|
161 |
+
"content": "<|it|>",
|
162 |
+
"single_word": false,
|
163 |
+
"lstrip": false,
|
164 |
+
"rstrip": false,
|
165 |
+
"normalized": false,
|
166 |
+
"special": true
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"id": 18,
|
170 |
+
"content": "<|id|>",
|
171 |
+
"single_word": false,
|
172 |
+
"lstrip": false,
|
173 |
+
"rstrip": false,
|
174 |
+
"normalized": false,
|
175 |
+
"special": true
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"id": 19,
|
179 |
+
"content": "<|hi|>",
|
180 |
+
"single_word": false,
|
181 |
+
"lstrip": false,
|
182 |
+
"rstrip": false,
|
183 |
+
"normalized": false,
|
184 |
+
"special": true
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"id": 20,
|
188 |
+
"content": "<|fi|>",
|
189 |
+
"single_word": false,
|
190 |
+
"lstrip": false,
|
191 |
+
"rstrip": false,
|
192 |
+
"normalized": false,
|
193 |
+
"special": true
|
194 |
+
},
|
195 |
+
{
|
196 |
+
"id": 21,
|
197 |
+
"content": "<|vi|>",
|
198 |
+
"single_word": false,
|
199 |
+
"lstrip": false,
|
200 |
+
"rstrip": false,
|
201 |
+
"normalized": false,
|
202 |
+
"special": true
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"id": 22,
|
206 |
+
"content": "<|he|>",
|
207 |
+
"single_word": false,
|
208 |
+
"lstrip": false,
|
209 |
+
"rstrip": false,
|
210 |
+
"normalized": false,
|
211 |
+
"special": true
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"id": 23,
|
215 |
+
"content": "<|uk|>",
|
216 |
+
"single_word": false,
|
217 |
+
"lstrip": false,
|
218 |
+
"rstrip": false,
|
219 |
+
"normalized": false,
|
220 |
+
"special": true
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"id": 24,
|
224 |
+
"content": "<|el|>",
|
225 |
+
"single_word": false,
|
226 |
+
"lstrip": false,
|
227 |
+
"rstrip": false,
|
228 |
+
"normalized": false,
|
229 |
+
"special": true
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"id": 25,
|
233 |
+
"content": "<|ms|>",
|
234 |
+
"single_word": false,
|
235 |
+
"lstrip": false,
|
236 |
+
"rstrip": false,
|
237 |
+
"normalized": false,
|
238 |
+
"special": true
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"id": 26,
|
242 |
+
"content": "<|cs|>",
|
243 |
+
"single_word": false,
|
244 |
+
"lstrip": false,
|
245 |
+
"rstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"special": true
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"id": 27,
|
251 |
+
"content": "<|ro|>",
|
252 |
+
"single_word": false,
|
253 |
+
"lstrip": false,
|
254 |
+
"rstrip": false,
|
255 |
+
"normalized": false,
|
256 |
+
"special": true
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"id": 28,
|
260 |
+
"content": "<|da|>",
|
261 |
+
"single_word": false,
|
262 |
+
"lstrip": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"normalized": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"id": 29,
|
269 |
+
"content": "<|hu|>",
|
270 |
+
"single_word": false,
|
271 |
+
"lstrip": false,
|
272 |
+
"rstrip": false,
|
273 |
+
"normalized": false,
|
274 |
+
"special": true
|
275 |
+
},
|
276 |
+
{
|
277 |
+
"id": 30,
|
278 |
+
"content": "<|ta|>",
|
279 |
+
"single_word": false,
|
280 |
+
"lstrip": false,
|
281 |
+
"rstrip": false,
|
282 |
+
"normalized": false,
|
283 |
+
"special": true
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"id": 31,
|
287 |
+
"content": "<|no|>",
|
288 |
+
"single_word": false,
|
289 |
+
"lstrip": false,
|
290 |
+
"rstrip": false,
|
291 |
+
"normalized": false,
|
292 |
+
"special": true
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"id": 32,
|
296 |
+
"content": "<|th|>",
|
297 |
+
"single_word": false,
|
298 |
+
"lstrip": false,
|
299 |
+
"rstrip": false,
|
300 |
+
"normalized": false,
|
301 |
+
"special": true
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"id": 33,
|
305 |
+
"content": "<|ur|>",
|
306 |
+
"single_word": false,
|
307 |
+
"lstrip": false,
|
308 |
+
"rstrip": false,
|
309 |
+
"normalized": false,
|
310 |
+
"special": true
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"id": 34,
|
314 |
+
"content": "<|hr|>",
|
315 |
+
"single_word": false,
|
316 |
+
"lstrip": false,
|
317 |
+
"rstrip": false,
|
318 |
+
"normalized": false,
|
319 |
+
"special": true
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"id": 35,
|
323 |
+
"content": "<|bg|>",
|
324 |
+
"single_word": false,
|
325 |
+
"lstrip": false,
|
326 |
+
"rstrip": false,
|
327 |
+
"normalized": false,
|
328 |
+
"special": true
|
329 |
+
},
|
330 |
+
{
|
331 |
+
"id": 36,
|
332 |
+
"content": "<|lt|>",
|
333 |
+
"single_word": false,
|
334 |
+
"lstrip": false,
|
335 |
+
"rstrip": false,
|
336 |
+
"normalized": false,
|
337 |
+
"special": true
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"id": 37,
|
341 |
+
"content": "<|la|>",
|
342 |
+
"single_word": false,
|
343 |
+
"lstrip": false,
|
344 |
+
"rstrip": false,
|
345 |
+
"normalized": false,
|
346 |
+
"special": true
|
347 |
+
},
|
348 |
+
{
|
349 |
+
"id": 38,
|
350 |
+
"content": "<|mi|>",
|
351 |
+
"single_word": false,
|
352 |
+
"lstrip": false,
|
353 |
+
"rstrip": false,
|
354 |
+
"normalized": false,
|
355 |
+
"special": true
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"id": 39,
|
359 |
+
"content": "<|ml|>",
|
360 |
+
"single_word": false,
|
361 |
+
"lstrip": false,
|
362 |
+
"rstrip": false,
|
363 |
+
"normalized": false,
|
364 |
+
"special": true
|
365 |
+
},
|
366 |
+
{
|
367 |
+
"id": 40,
|
368 |
+
"content": "<|cy|>",
|
369 |
+
"single_word": false,
|
370 |
+
"lstrip": false,
|
371 |
+
"rstrip": false,
|
372 |
+
"normalized": false,
|
373 |
+
"special": true
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"id": 41,
|
377 |
+
"content": "<|sk|>",
|
378 |
+
"single_word": false,
|
379 |
+
"lstrip": false,
|
380 |
+
"rstrip": false,
|
381 |
+
"normalized": false,
|
382 |
+
"special": true
|
383 |
+
},
|
384 |
+
{
|
385 |
+
"id": 42,
|
386 |
+
"content": "<|te|>",
|
387 |
+
"single_word": false,
|
388 |
+
"lstrip": false,
|
389 |
+
"rstrip": false,
|
390 |
+
"normalized": false,
|
391 |
+
"special": true
|
392 |
+
},
|
393 |
+
{
|
394 |
+
"id": 43,
|
395 |
+
"content": "<|fa|>",
|
396 |
+
"single_word": false,
|
397 |
+
"lstrip": false,
|
398 |
+
"rstrip": false,
|
399 |
+
"normalized": false,
|
400 |
+
"special": true
|
401 |
+
},
|
402 |
+
{
|
403 |
+
"id": 44,
|
404 |
+
"content": "<|lv|>",
|
405 |
+
"single_word": false,
|
406 |
+
"lstrip": false,
|
407 |
+
"rstrip": false,
|
408 |
+
"normalized": false,
|
409 |
+
"special": true
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"id": 45,
|
413 |
+
"content": "<|bn|>",
|
414 |
+
"single_word": false,
|
415 |
+
"lstrip": false,
|
416 |
+
"rstrip": false,
|
417 |
+
"normalized": false,
|
418 |
+
"special": true
|
419 |
+
},
|
420 |
+
{
|
421 |
+
"id": 46,
|
422 |
+
"content": "<|sr|>",
|
423 |
+
"single_word": false,
|
424 |
+
"lstrip": false,
|
425 |
+
"rstrip": false,
|
426 |
+
"normalized": false,
|
427 |
+
"special": true
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"id": 47,
|
431 |
+
"content": "<|az|>",
|
432 |
+
"single_word": false,
|
433 |
+
"lstrip": false,
|
434 |
+
"rstrip": false,
|
435 |
+
"normalized": false,
|
436 |
+
"special": true
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"id": 48,
|
440 |
+
"content": "<|sl|>",
|
441 |
+
"single_word": false,
|
442 |
+
"lstrip": false,
|
443 |
+
"rstrip": false,
|
444 |
+
"normalized": false,
|
445 |
+
"special": true
|
446 |
+
},
|
447 |
+
{
|
448 |
+
"id": 49,
|
449 |
+
"content": "<|kn|>",
|
450 |
+
"single_word": false,
|
451 |
+
"lstrip": false,
|
452 |
+
"rstrip": false,
|
453 |
+
"normalized": false,
|
454 |
+
"special": true
|
455 |
+
},
|
456 |
+
{
|
457 |
+
"id": 50,
|
458 |
+
"content": "<|et|>",
|
459 |
+
"single_word": false,
|
460 |
+
"lstrip": false,
|
461 |
+
"rstrip": false,
|
462 |
+
"normalized": false,
|
463 |
+
"special": true
|
464 |
+
},
|
465 |
+
{
|
466 |
+
"id": 51,
|
467 |
+
"content": "<|mk|>",
|
468 |
+
"single_word": false,
|
469 |
+
"lstrip": false,
|
470 |
+
"rstrip": false,
|
471 |
+
"normalized": false,
|
472 |
+
"special": true
|
473 |
+
},
|
474 |
+
{
|
475 |
+
"id": 52,
|
476 |
+
"content": "<|br|>",
|
477 |
+
"single_word": false,
|
478 |
+
"lstrip": false,
|
479 |
+
"rstrip": false,
|
480 |
+
"normalized": false,
|
481 |
+
"special": true
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"id": 53,
|
485 |
+
"content": "<|eu|>",
|
486 |
+
"single_word": false,
|
487 |
+
"lstrip": false,
|
488 |
+
"rstrip": false,
|
489 |
+
"normalized": false,
|
490 |
+
"special": true
|
491 |
+
},
|
492 |
+
{
|
493 |
+
"id": 54,
|
494 |
+
"content": "<|is|>",
|
495 |
+
"single_word": false,
|
496 |
+
"lstrip": false,
|
497 |
+
"rstrip": false,
|
498 |
+
"normalized": false,
|
499 |
+
"special": true
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"id": 55,
|
503 |
+
"content": "<|hy|>",
|
504 |
+
"single_word": false,
|
505 |
+
"lstrip": false,
|
506 |
+
"rstrip": false,
|
507 |
+
"normalized": false,
|
508 |
+
"special": true
|
509 |
+
},
|
510 |
+
{
|
511 |
+
"id": 56,
|
512 |
+
"content": "<|ne|>",
|
513 |
+
"single_word": false,
|
514 |
+
"lstrip": false,
|
515 |
+
"rstrip": false,
|
516 |
+
"normalized": false,
|
517 |
+
"special": true
|
518 |
+
},
|
519 |
+
{
|
520 |
+
"id": 57,
|
521 |
+
"content": "<|mn|>",
|
522 |
+
"single_word": false,
|
523 |
+
"lstrip": false,
|
524 |
+
"rstrip": false,
|
525 |
+
"normalized": false,
|
526 |
+
"special": true
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"id": 58,
|
530 |
+
"content": "<|bs|>",
|
531 |
+
"single_word": false,
|
532 |
+
"lstrip": false,
|
533 |
+
"rstrip": false,
|
534 |
+
"normalized": false,
|
535 |
+
"special": true
|
536 |
+
},
|
537 |
+
{
|
538 |
+
"id": 59,
|
539 |
+
"content": "<|kk|>",
|
540 |
+
"single_word": false,
|
541 |
+
"lstrip": false,
|
542 |
+
"rstrip": false,
|
543 |
+
"normalized": false,
|
544 |
+
"special": true
|
545 |
+
},
|
546 |
+
{
|
547 |
+
"id": 60,
|
548 |
+
"content": "<|sq|>",
|
549 |
+
"single_word": false,
|
550 |
+
"lstrip": false,
|
551 |
+
"rstrip": false,
|
552 |
+
"normalized": false,
|
553 |
+
"special": true
|
554 |
+
},
|
555 |
+
{
|
556 |
+
"id": 61,
|
557 |
+
"content": "<|sw|>",
|
558 |
+
"single_word": false,
|
559 |
+
"lstrip": false,
|
560 |
+
"rstrip": false,
|
561 |
+
"normalized": false,
|
562 |
+
"special": true
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"id": 62,
|
566 |
+
"content": "<|gl|>",
|
567 |
+
"single_word": false,
|
568 |
+
"lstrip": false,
|
569 |
+
"rstrip": false,
|
570 |
+
"normalized": false,
|
571 |
+
"special": true
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"id": 63,
|
575 |
+
"content": "<|mr|>",
|
576 |
+
"single_word": false,
|
577 |
+
"lstrip": false,
|
578 |
+
"rstrip": false,
|
579 |
+
"normalized": false,
|
580 |
+
"special": true
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"id": 64,
|
584 |
+
"content": "<|pa|>",
|
585 |
+
"single_word": false,
|
586 |
+
"lstrip": false,
|
587 |
+
"rstrip": false,
|
588 |
+
"normalized": false,
|
589 |
+
"special": true
|
590 |
+
},
|
591 |
+
{
|
592 |
+
"id": 65,
|
593 |
+
"content": "<|si|>",
|
594 |
+
"single_word": false,
|
595 |
+
"lstrip": false,
|
596 |
+
"rstrip": false,
|
597 |
+
"normalized": false,
|
598 |
+
"special": true
|
599 |
+
},
|
600 |
+
{
|
601 |
+
"id": 66,
|
602 |
+
"content": "<|km|>",
|
603 |
+
"single_word": false,
|
604 |
+
"lstrip": false,
|
605 |
+
"rstrip": false,
|
606 |
+
"normalized": false,
|
607 |
+
"special": true
|
608 |
+
},
|
609 |
+
{
|
610 |
+
"id": 67,
|
611 |
+
"content": "<|sn|>",
|
612 |
+
"single_word": false,
|
613 |
+
"lstrip": false,
|
614 |
+
"rstrip": false,
|
615 |
+
"normalized": false,
|
616 |
+
"special": true
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"id": 68,
|
620 |
+
"content": "<|yo|>",
|
621 |
+
"single_word": false,
|
622 |
+
"lstrip": false,
|
623 |
+
"rstrip": false,
|
624 |
+
"normalized": false,
|
625 |
+
"special": true
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"id": 69,
|
629 |
+
"content": "<|so|>",
|
630 |
+
"single_word": false,
|
631 |
+
"lstrip": false,
|
632 |
+
"rstrip": false,
|
633 |
+
"normalized": false,
|
634 |
+
"special": true
|
635 |
+
},
|
636 |
+
{
|
637 |
+
"id": 70,
|
638 |
+
"content": "<|af|>",
|
639 |
+
"single_word": false,
|
640 |
+
"lstrip": false,
|
641 |
+
"rstrip": false,
|
642 |
+
"normalized": false,
|
643 |
+
"special": true
|
644 |
+
},
|
645 |
+
{
|
646 |
+
"id": 71,
|
647 |
+
"content": "<|oc|>",
|
648 |
+
"single_word": false,
|
649 |
+
"lstrip": false,
|
650 |
+
"rstrip": false,
|
651 |
+
"normalized": false,
|
652 |
+
"special": true
|
653 |
+
},
|
654 |
+
{
|
655 |
+
"id": 72,
|
656 |
+
"content": "<|ka|>",
|
657 |
+
"single_word": false,
|
658 |
+
"lstrip": false,
|
659 |
+
"rstrip": false,
|
660 |
+
"normalized": false,
|
661 |
+
"special": true
|
662 |
+
},
|
663 |
+
{
|
664 |
+
"id": 73,
|
665 |
+
"content": "<|be|>",
|
666 |
+
"single_word": false,
|
667 |
+
"lstrip": false,
|
668 |
+
"rstrip": false,
|
669 |
+
"normalized": false,
|
670 |
+
"special": true
|
671 |
+
},
|
672 |
+
{
|
673 |
+
"id": 74,
|
674 |
+
"content": "<|tg|>",
|
675 |
+
"single_word": false,
|
676 |
+
"lstrip": false,
|
677 |
+
"rstrip": false,
|
678 |
+
"normalized": false,
|
679 |
+
"special": true
|
680 |
+
},
|
681 |
+
{
|
682 |
+
"id": 75,
|
683 |
+
"content": "<|sd|>",
|
684 |
+
"single_word": false,
|
685 |
+
"lstrip": false,
|
686 |
+
"rstrip": false,
|
687 |
+
"normalized": false,
|
688 |
+
"special": true
|
689 |
+
},
|
690 |
+
{
|
691 |
+
"id": 76,
|
692 |
+
"content": "<|gu|>",
|
693 |
+
"single_word": false,
|
694 |
+
"lstrip": false,
|
695 |
+
"rstrip": false,
|
696 |
+
"normalized": false,
|
697 |
+
"special": true
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"id": 77,
|
701 |
+
"content": "<|am|>",
|
702 |
+
"single_word": false,
|
703 |
+
"lstrip": false,
|
704 |
+
"rstrip": false,
|
705 |
+
"normalized": false,
|
706 |
+
"special": true
|
707 |
+
},
|
708 |
+
{
|
709 |
+
"id": 78,
|
710 |
+
"content": "<|yi|>",
|
711 |
+
"single_word": false,
|
712 |
+
"lstrip": false,
|
713 |
+
"rstrip": false,
|
714 |
+
"normalized": false,
|
715 |
+
"special": true
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"id": 79,
|
719 |
+
"content": "<|lo|>",
|
720 |
+
"single_word": false,
|
721 |
+
"lstrip": false,
|
722 |
+
"rstrip": false,
|
723 |
+
"normalized": false,
|
724 |
+
"special": true
|
725 |
+
},
|
726 |
+
{
|
727 |
+
"id": 80,
|
728 |
+
"content": "<|uz|>",
|
729 |
+
"single_word": false,
|
730 |
+
"lstrip": false,
|
731 |
+
"rstrip": false,
|
732 |
+
"normalized": false,
|
733 |
+
"special": true
|
734 |
+
},
|
735 |
+
{
|
736 |
+
"id": 81,
|
737 |
+
"content": "<|fo|>",
|
738 |
+
"single_word": false,
|
739 |
+
"lstrip": false,
|
740 |
+
"rstrip": false,
|
741 |
+
"normalized": false,
|
742 |
+
"special": true
|
743 |
+
},
|
744 |
+
{
|
745 |
+
"id": 82,
|
746 |
+
"content": "<|ht|>",
|
747 |
+
"single_word": false,
|
748 |
+
"lstrip": false,
|
749 |
+
"rstrip": false,
|
750 |
+
"normalized": false,
|
751 |
+
"special": true
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"id": 83,
|
755 |
+
"content": "<|ps|>",
|
756 |
+
"single_word": false,
|
757 |
+
"lstrip": false,
|
758 |
+
"rstrip": false,
|
759 |
+
"normalized": false,
|
760 |
+
"special": true
|
761 |
+
},
|
762 |
+
{
|
763 |
+
"id": 84,
|
764 |
+
"content": "<|tk|>",
|
765 |
+
"single_word": false,
|
766 |
+
"lstrip": false,
|
767 |
+
"rstrip": false,
|
768 |
+
"normalized": false,
|
769 |
+
"special": true
|
770 |
+
},
|
771 |
+
{
|
772 |
+
"id": 85,
|
773 |
+
"content": "<|nn|>",
|
774 |
+
"single_word": false,
|
775 |
+
"lstrip": false,
|
776 |
+
"rstrip": false,
|
777 |
+
"normalized": false,
|
778 |
+
"special": true
|
779 |
+
},
|
780 |
+
{
|
781 |
+
"id": 86,
|
782 |
+
"content": "<|mt|>",
|
783 |
+
"single_word": false,
|
784 |
+
"lstrip": false,
|
785 |
+
"rstrip": false,
|
786 |
+
"normalized": false,
|
787 |
+
"special": true
|
788 |
+
},
|
789 |
+
{
|
790 |
+
"id": 87,
|
791 |
+
"content": "<|sa|>",
|
792 |
+
"single_word": false,
|
793 |
+
"lstrip": false,
|
794 |
+
"rstrip": false,
|
795 |
+
"normalized": false,
|
796 |
+
"special": true
|
797 |
+
},
|
798 |
+
{
|
799 |
+
"id": 88,
|
800 |
+
"content": "<|lb|>",
|
801 |
+
"single_word": false,
|
802 |
+
"lstrip": false,
|
803 |
+
"rstrip": false,
|
804 |
+
"normalized": false,
|
805 |
+
"special": true
|
806 |
+
},
|
807 |
+
{
|
808 |
+
"id": 89,
|
809 |
+
"content": "<|my|>",
|
810 |
+
"single_word": false,
|
811 |
+
"lstrip": false,
|
812 |
+
"rstrip": false,
|
813 |
+
"normalized": false,
|
814 |
+
"special": true
|
815 |
+
},
|
816 |
+
{
|
817 |
+
"id": 90,
|
818 |
+
"content": "<|bo|>",
|
819 |
+
"single_word": false,
|
820 |
+
"lstrip": false,
|
821 |
+
"rstrip": false,
|
822 |
+
"normalized": false,
|
823 |
+
"special": true
|
824 |
+
},
|
825 |
+
{
|
826 |
+
"id": 91,
|
827 |
+
"content": "<|tl|>",
|
828 |
+
"single_word": false,
|
829 |
+
"lstrip": false,
|
830 |
+
"rstrip": false,
|
831 |
+
"normalized": false,
|
832 |
+
"special": true
|
833 |
+
},
|
834 |
+
{
|
835 |
+
"id": 92,
|
836 |
+
"content": "<|mg|>",
|
837 |
+
"single_word": false,
|
838 |
+
"lstrip": false,
|
839 |
+
"rstrip": false,
|
840 |
+
"normalized": false,
|
841 |
+
"special": true
|
842 |
+
},
|
843 |
+
{
|
844 |
+
"id": 93,
|
845 |
+
"content": "<|as|>",
|
846 |
+
"single_word": false,
|
847 |
+
"lstrip": false,
|
848 |
+
"rstrip": false,
|
849 |
+
"normalized": false,
|
850 |
+
"special": true
|
851 |
+
},
|
852 |
+
{
|
853 |
+
"id": 94,
|
854 |
+
"content": "<|tt|>",
|
855 |
+
"single_word": false,
|
856 |
+
"lstrip": false,
|
857 |
+
"rstrip": false,
|
858 |
+
"normalized": false,
|
859 |
+
"special": true
|
860 |
+
},
|
861 |
+
{
|
862 |
+
"id": 95,
|
863 |
+
"content": "<|haw|>",
|
864 |
+
"single_word": false,
|
865 |
+
"lstrip": false,
|
866 |
+
"rstrip": false,
|
867 |
+
"normalized": false,
|
868 |
+
"special": true
|
869 |
+
},
|
870 |
+
{
|
871 |
+
"id": 96,
|
872 |
+
"content": "<|ln|>",
|
873 |
+
"single_word": false,
|
874 |
+
"lstrip": false,
|
875 |
+
"rstrip": false,
|
876 |
+
"normalized": false,
|
877 |
+
"special": true
|
878 |
+
},
|
879 |
+
{
|
880 |
+
"id": 97,
|
881 |
+
"content": "<|ha|>",
|
882 |
+
"single_word": false,
|
883 |
+
"lstrip": false,
|
884 |
+
"rstrip": false,
|
885 |
+
"normalized": false,
|
886 |
+
"special": true
|
887 |
+
},
|
888 |
+
{
|
889 |
+
"id": 98,
|
890 |
+
"content": "<|ba|>",
|
891 |
+
"single_word": false,
|
892 |
+
"lstrip": false,
|
893 |
+
"rstrip": false,
|
894 |
+
"normalized": false,
|
895 |
+
"special": true
|
896 |
+
},
|
897 |
+
{
|
898 |
+
"id": 99,
|
899 |
+
"content": "<|jw|>",
|
900 |
+
"single_word": false,
|
901 |
+
"lstrip": false,
|
902 |
+
"rstrip": false,
|
903 |
+
"normalized": false,
|
904 |
+
"special": true
|
905 |
+
},
|
906 |
+
{
|
907 |
+
"id": 100,
|
908 |
+
"content": "<|su|>",
|
909 |
+
"single_word": false,
|
910 |
+
"lstrip": false,
|
911 |
+
"rstrip": false,
|
912 |
+
"normalized": false,
|
913 |
+
"special": true
|
914 |
+
},
|
915 |
+
{
|
916 |
+
"id": 101,
|
917 |
+
"content": "<|translate|>",
|
918 |
+
"single_word": false,
|
919 |
+
"lstrip": false,
|
920 |
+
"rstrip": false,
|
921 |
+
"normalized": false,
|
922 |
+
"special": true
|
923 |
+
},
|
924 |
+
{
|
925 |
+
"id": 102,
|
926 |
+
"content": "<|transcribe|>",
|
927 |
+
"single_word": false,
|
928 |
+
"lstrip": false,
|
929 |
+
"rstrip": false,
|
930 |
+
"normalized": false,
|
931 |
+
"special": true
|
932 |
+
},
|
933 |
+
{
|
934 |
+
"id": 103,
|
935 |
+
"content": "<|startoflm|>",
|
936 |
+
"single_word": false,
|
937 |
+
"lstrip": false,
|
938 |
+
"rstrip": false,
|
939 |
+
"normalized": false,
|
940 |
+
"special": true
|
941 |
+
},
|
942 |
+
{
|
943 |
+
"id": 104,
|
944 |
+
"content": "<|startofprev|>",
|
945 |
+
"single_word": false,
|
946 |
+
"lstrip": false,
|
947 |
+
"rstrip": false,
|
948 |
+
"normalized": false,
|
949 |
+
"special": true
|
950 |
+
},
|
951 |
+
{
|
952 |
+
"id": 105,
|
953 |
+
"content": "<|nocaptions|>",
|
954 |
+
"single_word": false,
|
955 |
+
"lstrip": false,
|
956 |
+
"rstrip": false,
|
957 |
+
"normalized": false,
|
958 |
+
"special": true
|
959 |
+
},
|
960 |
+
{
|
961 |
+
"id": 106,
|
962 |
+
"content": "<|notimestamps|>",
|
963 |
+
"single_word": false,
|
964 |
+
"lstrip": false,
|
965 |
+
"rstrip": false,
|
966 |
+
"normalized": false,
|
967 |
+
"special": true
|
968 |
+
},
|
969 |
+
{
|
970 |
+
"id": 1130,
|
971 |
+
"content": "<|tp|>",
|
972 |
+
"single_word": false,
|
973 |
+
"lstrip": false,
|
974 |
+
"rstrip": false,
|
975 |
+
"normalized": false,
|
976 |
+
"special": true
|
977 |
+
}
|
978 |
+
],
|
979 |
+
"normalizer": null,
|
980 |
+
"pre_tokenizer": {
|
981 |
+
"type": "ByteLevel",
|
982 |
+
"add_prefix_space": false,
|
983 |
+
"trim_offsets": true,
|
984 |
+
"use_regex": true
|
985 |
+
},
|
986 |
+
"post_processor": {
|
987 |
+
"type": "TemplateProcessing",
|
988 |
+
"single": [
|
989 |
+
{
|
990 |
+
"SpecialToken": {
|
991 |
+
"id": "<|startoftranscript|>",
|
992 |
+
"type_id": 0
|
993 |
+
}
|
994 |
+
},
|
995 |
+
{
|
996 |
+
"SpecialToken": {
|
997 |
+
"id": "<|notimestamps|>",
|
998 |
+
"type_id": 0
|
999 |
+
}
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"Sequence": {
|
1003 |
+
"id": "A",
|
1004 |
+
"type_id": 0
|
1005 |
+
}
|
1006 |
+
},
|
1007 |
+
{
|
1008 |
+
"SpecialToken": {
|
1009 |
+
"id": "<|endoftext|>",
|
1010 |
+
"type_id": 0
|
1011 |
+
}
|
1012 |
+
}
|
1013 |
+
],
|
1014 |
+
"pair": [
|
1015 |
+
{
|
1016 |
+
"SpecialToken": {
|
1017 |
+
"id": "<|startoftranscript|>",
|
1018 |
+
"type_id": 0
|
1019 |
+
}
|
1020 |
+
},
|
1021 |
+
{
|
1022 |
+
"SpecialToken": {
|
1023 |
+
"id": "<|notimestamps|>",
|
1024 |
+
"type_id": 0
|
1025 |
+
}
|
1026 |
+
},
|
1027 |
+
{
|
1028 |
+
"Sequence": {
|
1029 |
+
"id": "A",
|
1030 |
+
"type_id": 0
|
1031 |
+
}
|
1032 |
+
},
|
1033 |
+
{
|
1034 |
+
"Sequence": {
|
1035 |
+
"id": "B",
|
1036 |
+
"type_id": 1
|
1037 |
+
}
|
1038 |
+
},
|
1039 |
+
{
|
1040 |
+
"SpecialToken": {
|
1041 |
+
"id": "<|endoftext|>",
|
1042 |
+
"type_id": 1
|
1043 |
+
}
|
1044 |
+
}
|
1045 |
+
],
|
1046 |
+
"special_tokens": {
|
1047 |
+
"<|endoftext|>": {
|
1048 |
+
"id": "<|endoftext|>",
|
1049 |
+
"ids": [
|
1050 |
+
0
|
1051 |
+
],
|
1052 |
+
"tokens": [
|
1053 |
+
"<|endoftext|>"
|
1054 |
+
]
|
1055 |
+
},
|
1056 |
+
"<|notimestamps|>": {
|
1057 |
+
"id": "<|notimestamps|>",
|
1058 |
+
"ids": [
|
1059 |
+
106
|
1060 |
+
],
|
1061 |
+
"tokens": [
|
1062 |
+
"<|notimestamps|>"
|
1063 |
+
]
|
1064 |
+
},
|
1065 |
+
"<|startoftranscript|>": {
|
1066 |
+
"id": "<|startoftranscript|>",
|
1067 |
+
"ids": [
|
1068 |
+
1
|
1069 |
+
],
|
1070 |
+
"tokens": [
|
1071 |
+
"<|startoftranscript|>"
|
1072 |
+
]
|
1073 |
+
}
|
1074 |
+
}
|
1075 |
+
},
|
1076 |
+
"decoder": {
|
1077 |
+
"type": "ByteLevel",
|
1078 |
+
"add_prefix_space": true,
|
1079 |
+
"trim_offsets": true,
|
1080 |
+
"use_regex": true
|
1081 |
+
},
|
1082 |
+
"model": {
|
1083 |
+
"type": "BPE",
|
1084 |
+
"dropout": null,
|
1085 |
+
"unk_token": null,
|
1086 |
+
"continuing_subword_prefix": "",
|
1087 |
+
"end_of_word_suffix": "",
|
1088 |
+
"fuse_unk": false,
|
1089 |
+
"byte_fallback": false,
|
1090 |
+
"ignore_merges": false,
|
1091 |
+
"vocab": {
|
1092 |
+
"<|endoftext|>": 0,
|
1093 |
+
"<|startoftranscript|>": 1,
|
1094 |
+
"<|en|>": 2,
|
1095 |
+
"<|zh|>": 3,
|
1096 |
+
"<|de|>": 4,
|
1097 |
+
"<|es|>": 5,
|
1098 |
+
"<|ru|>": 6,
|
1099 |
+
"<|ko|>": 7,
|
1100 |
+
"<|fr|>": 8,
|
1101 |
+
"<|ja|>": 9,
|
1102 |
+
"<|pt|>": 10,
|
1103 |
+
"<|tr|>": 11,
|
1104 |
+
"<|pl|>": 12,
|
1105 |
+
"<|ca|>": 13,
|
1106 |
+
"<|nl|>": 14,
|
1107 |
+
"<|ar|>": 15,
|
1108 |
+
"<|sv|>": 16,
|
1109 |
+
"<|it|>": 17,
|
1110 |
+
"<|id|>": 18,
|
1111 |
+
"<|hi|>": 19,
|
1112 |
+
"<|fi|>": 20,
|
1113 |
+
"<|vi|>": 21,
|
1114 |
+
"<|he|>": 22,
|
1115 |
+
"<|uk|>": 23,
|
1116 |
+
"<|el|>": 24,
|
1117 |
+
"<|ms|>": 25,
|
1118 |
+
"<|cs|>": 26,
|
1119 |
+
"<|ro|>": 27,
|
1120 |
+
"<|da|>": 28,
|
1121 |
+
"<|hu|>": 29,
|
1122 |
+
"<|ta|>": 30,
|
1123 |
+
"<|no|>": 31,
|
1124 |
+
"<|th|>": 32,
|
1125 |
+
"<|ur|>": 33,
|
1126 |
+
"<|hr|>": 34,
|
1127 |
+
"<|bg|>": 35,
|
1128 |
+
"<|lt|>": 36,
|
1129 |
+
"<|la|>": 37,
|
1130 |
+
"<|mi|>": 38,
|
1131 |
+
"<|ml|>": 39,
|
1132 |
+
"<|cy|>": 40,
|
1133 |
+
"<|sk|>": 41,
|
1134 |
+
"<|te|>": 42,
|
1135 |
+
"<|fa|>": 43,
|
1136 |
+
"<|lv|>": 44,
|
1137 |
+
"<|bn|>": 45,
|
1138 |
+
"<|sr|>": 46,
|
1139 |
+
"<|az|>": 47,
|
1140 |
+
"<|sl|>": 48,
|
1141 |
+
"<|kn|>": 49,
|
1142 |
+
"<|et|>": 50,
|
1143 |
+
"<|mk|>": 51,
|
1144 |
+
"<|br|>": 52,
|
1145 |
+
"<|eu|>": 53,
|
1146 |
+
"<|is|>": 54,
|
1147 |
+
"<|hy|>": 55,
|
1148 |
+
"<|ne|>": 56,
|
1149 |
+
"<|mn|>": 57,
|
1150 |
+
"<|bs|>": 58,
|
1151 |
+
"<|kk|>": 59,
|
1152 |
+
"<|sq|>": 60,
|
1153 |
+
"<|sw|>": 61,
|
1154 |
+
"<|gl|>": 62,
|
1155 |
+
"<|mr|>": 63,
|
1156 |
+
"<|pa|>": 64,
|
1157 |
+
"<|si|>": 65,
|
1158 |
+
"<|km|>": 66,
|
1159 |
+
"<|sn|>": 67,
|
1160 |
+
"<|yo|>": 68,
|
1161 |
+
"<|so|>": 69,
|
1162 |
+
"<|af|>": 70,
|
1163 |
+
"<|oc|>": 71,
|
1164 |
+
"<|ka|>": 72,
|
1165 |
+
"<|be|>": 73,
|
1166 |
+
"<|tg|>": 74,
|
1167 |
+
"<|sd|>": 75,
|
1168 |
+
"<|gu|>": 76,
|
1169 |
+
"<|am|>": 77,
|
1170 |
+
"<|yi|>": 78,
|
1171 |
+
"<|lo|>": 79,
|
1172 |
+
"<|uz|>": 80,
|
1173 |
+
"<|fo|>": 81,
|
1174 |
+
"<|ht|>": 82,
|
1175 |
+
"<|ps|>": 83,
|
1176 |
+
"<|tk|>": 84,
|
1177 |
+
"<|nn|>": 85,
|
1178 |
+
"<|mt|>": 86,
|
1179 |
+
"<|sa|>": 87,
|
1180 |
+
"<|lb|>": 88,
|
1181 |
+
"<|my|>": 89,
|
1182 |
+
"<|bo|>": 90,
|
1183 |
+
"<|tl|>": 91,
|
1184 |
+
"<|mg|>": 92,
|
1185 |
+
"<|as|>": 93,
|
1186 |
+
"<|tt|>": 94,
|
1187 |
+
"<|haw|>": 95,
|
1188 |
+
"<|ln|>": 96,
|
1189 |
+
"<|ha|>": 97,
|
1190 |
+
"<|ba|>": 98,
|
1191 |
+
"<|jw|>": 99,
|
1192 |
+
"<|su|>": 100,
|
1193 |
+
"<|translate|>": 101,
|
1194 |
+
"<|transcribe|>": 102,
|
1195 |
+
"<|startoflm|>": 103,
|
1196 |
+
"<|startofprev|>": 104,
|
1197 |
+
"<|nocaptions|>": 105,
|
1198 |
+
"<|notimestamps|>": 106,
|
1199 |
+
"!": 107,
|
1200 |
+
"\"": 108,
|
1201 |
+
"#": 109,
|
1202 |
+
"$": 110,
|
1203 |
+
"%": 111,
|
1204 |
+
"&": 112,
|
1205 |
+
"'": 113,
|
1206 |
+
"(": 114,
|
1207 |
+
")": 115,
|
1208 |
+
"*": 116,
|
1209 |
+
"+": 117,
|
1210 |
+
",": 118,
|
1211 |
+
"-": 119,
|
1212 |
+
".": 120,
|
1213 |
+
"/": 121,
|
1214 |
+
"0": 122,
|
1215 |
+
"1": 123,
|
1216 |
+
"2": 124,
|
1217 |
+
"3": 125,
|
1218 |
+
"4": 126,
|
1219 |
+
"5": 127,
|
1220 |
+
"6": 128,
|
1221 |
+
"7": 129,
|
1222 |
+
"8": 130,
|
1223 |
+
"9": 131,
|
1224 |
+
":": 132,
|
1225 |
+
";": 133,
|
1226 |
+
"<": 134,
|
1227 |
+
"=": 135,
|
1228 |
+
">": 136,
|
1229 |
+
"?": 137,
|
1230 |
+
"@": 138,
|
1231 |
+
"A": 139,
|
1232 |
+
"B": 140,
|
1233 |
+
"C": 141,
|
1234 |
+
"D": 142,
|
1235 |
+
"E": 143,
|
1236 |
+
"F": 144,
|
1237 |
+
"G": 145,
|
1238 |
+
"H": 146,
|
1239 |
+
"I": 147,
|
1240 |
+
"J": 148,
|
1241 |
+
"K": 149,
|
1242 |
+
"L": 150,
|
1243 |
+
"M": 151,
|
1244 |
+
"N": 152,
|
1245 |
+
"O": 153,
|
1246 |
+
"P": 154,
|
1247 |
+
"Q": 155,
|
1248 |
+
"R": 156,
|
1249 |
+
"S": 157,
|
1250 |
+
"T": 158,
|
1251 |
+
"U": 159,
|
1252 |
+
"V": 160,
|
1253 |
+
"W": 161,
|
1254 |
+
"X": 162,
|
1255 |
+
"Y": 163,
|
1256 |
+
"Z": 164,
|
1257 |
+
"[": 165,
|
1258 |
+
"\\": 166,
|
1259 |
+
"]": 167,
|
1260 |
+
"^": 168,
|
1261 |
+
"_": 169,
|
1262 |
+
"`": 170,
|
1263 |
+
"a": 171,
|
1264 |
+
"b": 172,
|
1265 |
+
"c": 173,
|
1266 |
+
"d": 174,
|
1267 |
+
"e": 175,
|
1268 |
+
"f": 176,
|
1269 |
+
"g": 177,
|
1270 |
+
"h": 178,
|
1271 |
+
"i": 179,
|
1272 |
+
"j": 180,
|
1273 |
+
"k": 181,
|
1274 |
+
"l": 182,
|
1275 |
+
"m": 183,
|
1276 |
+
"n": 184,
|
1277 |
+
"o": 185,
|
1278 |
+
"p": 186,
|
1279 |
+
"q": 187,
|
1280 |
+
"r": 188,
|
1281 |
+
"s": 189,
|
1282 |
+
"t": 190,
|
1283 |
+
"u": 191,
|
1284 |
+
"v": 192,
|
1285 |
+
"w": 193,
|
1286 |
+
"x": 194,
|
1287 |
+
"y": 195,
|
1288 |
+
"z": 196,
|
1289 |
+
"{": 197,
|
1290 |
+
"|": 198,
|
1291 |
+
"}": 199,
|
1292 |
+
"~": 200,
|
1293 |
+
"¡": 201,
|
1294 |
+
"¢": 202,
|
1295 |
+
"£": 203,
|
1296 |
+
"¤": 204,
|
1297 |
+
"¥": 205,
|
1298 |
+
"¦": 206,
|
1299 |
+
"§": 207,
|
1300 |
+
"¨": 208,
|
1301 |
+
"©": 209,
|
1302 |
+
"ª": 210,
|
1303 |
+
"«": 211,
|
1304 |
+
"¬": 212,
|
1305 |
+
"®": 213,
|
1306 |
+
"¯": 214,
|
1307 |
+
"°": 215,
|
1308 |
+
"±": 216,
|
1309 |
+
"²": 217,
|
1310 |
+
"³": 218,
|
1311 |
+
"´": 219,
|
1312 |
+
"µ": 220,
|
1313 |
+
"¶": 221,
|
1314 |
+
"·": 222,
|
1315 |
+
"¸": 223,
|
1316 |
+
"¹": 224,
|
1317 |
+
"º": 225,
|
1318 |
+
"»": 226,
|
1319 |
+
"¼": 227,
|
1320 |
+
"½": 228,
|
1321 |
+
"¾": 229,
|
1322 |
+
"¿": 230,
|
1323 |
+
"À": 231,
|
1324 |
+
"Á": 232,
|
1325 |
+
"Â": 233,
|
1326 |
+
"Ã": 234,
|
1327 |
+
"Ä": 235,
|
1328 |
+
"Å": 236,
|
1329 |
+
"Æ": 237,
|
1330 |
+
"Ç": 238,
|
1331 |
+
"È": 239,
|
1332 |
+
"É": 240,
|
1333 |
+
"Ê": 241,
|
1334 |
+
"Ë": 242,
|
1335 |
+
"Ì": 243,
|
1336 |
+
"Í": 244,
|
1337 |
+
"Î": 245,
|
1338 |
+
"Ï": 246,
|
1339 |
+
"Ð": 247,
|
1340 |
+
"Ñ": 248,
|
1341 |
+
"Ò": 249,
|
1342 |
+
"Ó": 250,
|
1343 |
+
"Ô": 251,
|
1344 |
+
"Õ": 252,
|
1345 |
+
"Ö": 253,
|
1346 |
+
"×": 254,
|
1347 |
+
"Ø": 255,
|
1348 |
+
"Ù": 256,
|
1349 |
+
"Ú": 257,
|
1350 |
+
"Û": 258,
|
1351 |
+
"Ü": 259,
|
1352 |
+
"Ý": 260,
|
1353 |
+
"Þ": 261,
|
1354 |
+
"ß": 262,
|
1355 |
+
"à": 263,
|
1356 |
+
"á": 264,
|
1357 |
+
"â": 265,
|
1358 |
+
"ã": 266,
|
1359 |
+
"ä": 267,
|
1360 |
+
"å": 268,
|
1361 |
+
"æ": 269,
|
1362 |
+
"ç": 270,
|
1363 |
+
"è": 271,
|
1364 |
+
"é": 272,
|
1365 |
+
"ê": 273,
|
1366 |
+
"ë": 274,
|
1367 |
+
"ì": 275,
|
1368 |
+
"í": 276,
|
1369 |
+
"î": 277,
|
1370 |
+
"ï": 278,
|
1371 |
+
"ð": 279,
|
1372 |
+
"ñ": 280,
|
1373 |
+
"ò": 281,
|
1374 |
+
"ó": 282,
|
1375 |
+
"ô": 283,
|
1376 |
+
"õ": 284,
|
1377 |
+
"ö": 285,
|
1378 |
+
"÷": 286,
|
1379 |
+
"ø": 287,
|
1380 |
+
"ù": 288,
|
1381 |
+
"ú": 289,
|
1382 |
+
"û": 290,
|
1383 |
+
"ü": 291,
|
1384 |
+
"ý": 292,
|
1385 |
+
"þ": 293,
|
1386 |
+
"ÿ": 294,
|
1387 |
+
"Ā": 295,
|
1388 |
+
"ā": 296,
|
1389 |
+
"Ă": 297,
|
1390 |
+
"ă": 298,
|
1391 |
+
"Ą": 299,
|
1392 |
+
"ą": 300,
|
1393 |
+
"Ć": 301,
|
1394 |
+
"ć": 302,
|
1395 |
+
"Ĉ": 303,
|
1396 |
+
"ĉ": 304,
|
1397 |
+
"Ċ": 305,
|
1398 |
+
"ċ": 306,
|
1399 |
+
"Č": 307,
|
1400 |
+
"č": 308,
|
1401 |
+
"Ď": 309,
|
1402 |
+
"ď": 310,
|
1403 |
+
"Đ": 311,
|
1404 |
+
"đ": 312,
|
1405 |
+
"Ē": 313,
|
1406 |
+
"ē": 314,
|
1407 |
+
"Ĕ": 315,
|
1408 |
+
"ĕ": 316,
|
1409 |
+
"Ė": 317,
|
1410 |
+
"ė": 318,
|
1411 |
+
"Ę": 319,
|
1412 |
+
"ę": 320,
|
1413 |
+
"Ě": 321,
|
1414 |
+
"ě": 322,
|
1415 |
+
"Ĝ": 323,
|
1416 |
+
"ĝ": 324,
|
1417 |
+
"Ğ": 325,
|
1418 |
+
"ğ": 326,
|
1419 |
+
"Ġ": 327,
|
1420 |
+
"ġ": 328,
|
1421 |
+
"Ģ": 329,
|
1422 |
+
"ģ": 330,
|
1423 |
+
"Ĥ": 331,
|
1424 |
+
"ĥ": 332,
|
1425 |
+
"Ħ": 333,
|
1426 |
+
"ħ": 334,
|
1427 |
+
"Ĩ": 335,
|
1428 |
+
"ĩ": 336,
|
1429 |
+
"Ī": 337,
|
1430 |
+
"ī": 338,
|
1431 |
+
"Ĭ": 339,
|
1432 |
+
"ĭ": 340,
|
1433 |
+
"Į": 341,
|
1434 |
+
"į": 342,
|
1435 |
+
"İ": 343,
|
1436 |
+
"ı": 344,
|
1437 |
+
"IJ": 345,
|
1438 |
+
"ij": 346,
|
1439 |
+
"Ĵ": 347,
|
1440 |
+
"ĵ": 348,
|
1441 |
+
"Ķ": 349,
|
1442 |
+
"ķ": 350,
|
1443 |
+
"ĸ": 351,
|
1444 |
+
"Ĺ": 352,
|
1445 |
+
"ĺ": 353,
|
1446 |
+
"Ļ": 354,
|
1447 |
+
"ļ": 355,
|
1448 |
+
"Ľ": 356,
|
1449 |
+
"ľ": 357,
|
1450 |
+
"Ŀ": 358,
|
1451 |
+
"ŀ": 359,
|
1452 |
+
"Ł": 360,
|
1453 |
+
"ł": 361,
|
1454 |
+
"Ń": 362,
|
1455 |
+
"Ġl": 363,
|
1456 |
+
"na": 364,
|
1457 |
+
"Ġli": 365,
|
1458 |
+
"Ġt": 366,
|
1459 |
+
"Ġp": 367,
|
1460 |
+
"Ġs": 368,
|
1461 |
+
"Ġm": 369,
|
1462 |
+
"an": 370,
|
1463 |
+
"Ġe": 371,
|
1464 |
+
"Ġk": 372,
|
1465 |
+
"li": 373,
|
1466 |
+
"ona": 374,
|
1467 |
+
"en": 375,
|
1468 |
+
"wa": 376,
|
1469 |
+
"al": 377,
|
1470 |
+
"ni": 378,
|
1471 |
+
"si": 379,
|
1472 |
+
"Ġni": 380,
|
1473 |
+
"ala": 381,
|
1474 |
+
"Ġla": 382,
|
1475 |
+
"Ġta": 383,
|
1476 |
+
"Ġpi": 384,
|
1477 |
+
"on": 385,
|
1478 |
+
"jan": 386,
|
1479 |
+
"Ġmi": 387,
|
1480 |
+
"ki": 388,
|
1481 |
+
"ma": 389,
|
1482 |
+
"Ġsi": 390,
|
1483 |
+
"Ġtawa": 391,
|
1484 |
+
"te": 392,
|
1485 |
+
"mi": 393,
|
1486 |
+
"Ġlon": 394,
|
1487 |
+
"Ġw": 395,
|
1488 |
+
"Ġala": 396,
|
1489 |
+
"Ġto": 397,
|
1490 |
+
"il": 398,
|
1491 |
+
"ama": 399,
|
1492 |
+
"Ġpona": 400,
|
1493 |
+
"Ġmu": 401,
|
1494 |
+
"Ġo": 402,
|
1495 |
+
"Ġjan": 403,
|
1496 |
+
"Ġpa": 404,
|
1497 |
+
"ke": 405,
|
1498 |
+
"ka": 406,
|
1499 |
+
"Ġsina": 407,
|
1500 |
+
"Ġlu": 408,
|
1501 |
+
"lin": 409,
|
1502 |
+
"Ġkama": 410,
|
1503 |
+
"so": 411,
|
1504 |
+
"Ġona": 412,
|
1505 |
+
"po": 413,
|
1506 |
+
"Ġtoki": 414,
|
1507 |
+
"ile": 415,
|
1508 |
+
"Ġna": 416,
|
1509 |
+
"enpo": 417,
|
1510 |
+
"Ġsu": 418,
|
1511 |
+
"Ġwile": 419,
|
1512 |
+
"Ġse": 420,
|
1513 |
+
"Ġi": 421,
|
1514 |
+
"Ġmute": 422,
|
1515 |
+
"pa": 423,
|
1516 |
+
"sina": 424,
|
1517 |
+
"Ġa": 425,
|
1518 |
+
"Ġken": 426,
|
1519 |
+
"lo": 427,
|
1520 |
+
"pe": 428,
|
1521 |
+
"kin": 429,
|
1522 |
+
"me": 430,
|
1523 |
+
"Ġan": 431,
|
1524 |
+
"jo": 432,
|
1525 |
+
"mo": 433,
|
1526 |
+
"Ġsona": 434,
|
1527 |
+
"Ġtan": 435,
|
1528 |
+
"Ġlukin": 436,
|
1529 |
+
"sa": 437,
|
1530 |
+
"Ġpilin": 438,
|
1531 |
+
"ken": 439,
|
1532 |
+
"ale": 440,
|
1533 |
+
"ta": 441,
|
1534 |
+
"elo": 442,
|
1535 |
+
"Ġike": 443,
|
1536 |
+
"Ġtomo": 444,
|
1537 |
+
"ku": 445,
|
1538 |
+
"Ġale": 446,
|
1539 |
+
"ilo": 447,
|
1540 |
+
"wi": 448,
|
1541 |
+
"Ġmo": 449,
|
1542 |
+
"pu": 450,
|
1543 |
+
"Ġku": 451,
|
1544 |
+
"eli": 452,
|
1545 |
+
"Ġseme": 453,
|
1546 |
+
"sin": 454,
|
1547 |
+
"Ġlili": 455,
|
1548 |
+
"Ġtenpo": 456,
|
1549 |
+
"Ġki": 457,
|
1550 |
+
"Ġsuli": 458,
|
1551 |
+
"Ġwa": 459,
|
1552 |
+
"Ġke": 460,
|
1553 |
+
"peken": 461,
|
1554 |
+
"Ġpali": 462,
|
1555 |
+
"Ġkepeken": 463,
|
1556 |
+
"ja": 464,
|
1557 |
+
"tenpo": 465,
|
1558 |
+
"Ġma": 466,
|
1559 |
+
"Ġpana": 467,
|
1560 |
+
"pi": 468,
|
1561 |
+
"Ġjo": 469,
|
1562 |
+
"wen": 470,
|
1563 |
+
"weli": 471,
|
1564 |
+
"Ġtu": 472,
|
1565 |
+
"Ġtaso": 473,
|
1566 |
+
"un": 474,
|
1567 |
+
"Ġante": 475,
|
1568 |
+
"Ġmoku": 476,
|
1569 |
+
"eka": 477,
|
1570 |
+
"len": 478,
|
1571 |
+
"Ġpo": 479,
|
1572 |
+
"Ġweka": 480,
|
1573 |
+
"kala": 481,
|
1574 |
+
"Ġwan": 482,
|
1575 |
+
"Ġluka": 483,
|
1576 |
+
"Ġnasin": 484,
|
1577 |
+
"Ġsin": 485,
|
1578 |
+
"Ġolin": 486,
|
1579 |
+
"ko": 487,
|
1580 |
+
"npa": 488,
|
1581 |
+
"Ġmusi": 489,
|
1582 |
+
"Ġwawa": 490,
|
1583 |
+
"Ġsama": 491,
|
1584 |
+
"Ġkala": 492,
|
1585 |
+
"no": 493,
|
1586 |
+
"Ġtelo": 494,
|
1587 |
+
"lu": 495,
|
1588 |
+
"Ġawen": 496,
|
1589 |
+
"Ġnimi": 497,
|
1590 |
+
"je": 498,
|
1591 |
+
"Ġnanpa": 499,
|
1592 |
+
"ĠK": 500,
|
1593 |
+
"telen": 501,
|
1594 |
+
"ĠT": 502,
|
1595 |
+
"Ġlawa": 503,
|
1596 |
+
"Ġilo": 504,
|
1597 |
+
"Ġso": 505,
|
1598 |
+
"Ġlipu": 506,
|
1599 |
+
"Ġpakala": 507,
|
1600 |
+
"tu": 508,
|
1601 |
+
"Ġpini": 509,
|
1602 |
+
"Ġsitelen": 510,
|
1603 |
+
"uta": 511,
|
1604 |
+
"Ġsuno": 512,
|
1605 |
+
"Ġsoweli": 513,
|
1606 |
+
"Ġsewi": 514,
|
1607 |
+
"Ġnasa": 515,
|
1608 |
+
"ĠTu": 516,
|
1609 |
+
"Ġen": 517,
|
1610 |
+
"taso": 518,
|
1611 |
+
"to": 519,
|
1612 |
+
"jelo": 520,
|
1613 |
+
"Ġkin": 521,
|
1614 |
+
"Ġpoka": 522,
|
1615 |
+
"lupu": 523,
|
1616 |
+
"tuli": 524,
|
1617 |
+
"Ġijo": 525,
|
1618 |
+
"untuli": 526,
|
1619 |
+
"ĠKuntuli": 527,
|
1620 |
+
"le": 528,
|
1621 |
+
"Ġlape": 529,
|
1622 |
+
"Ġkute": 530,
|
1623 |
+
"Ġka": 531,
|
1624 |
+
"soweli": 532,
|
1625 |
+
"Ġanu": 533,
|
1626 |
+
"Ġlen": 534,
|
1627 |
+
"Ġkalama": 535,
|
1628 |
+
"Ġmon": 536,
|
1629 |
+
"Ġsike": 537,
|
1630 |
+
"Ġmoli": 538,
|
1631 |
+
"Ġkasi": 539,
|
1632 |
+
"pen": 540,
|
1633 |
+
"Ġalasa": 541,
|
1634 |
+
"Ġsuwi": 542,
|
1635 |
+
"Ġanpa": 543,
|
1636 |
+
"Ġkule": 544,
|
1637 |
+
"kesi": 545,
|
1638 |
+
"nsa": 546,
|
1639 |
+
"Ġopen": 547,
|
1640 |
+
"Ġja": 548,
|
1641 |
+
"Ġkon": 549,
|
1642 |
+
"Ġkulupu": 550,
|
1643 |
+
"ĠS": 551,
|
1644 |
+
"Ġu": 552,
|
1645 |
+
"Ġinsa": 553,
|
1646 |
+
"oje": 554,
|
1647 |
+
"Ġ\"": 555,
|
1648 |
+
"Ġloje": 556,
|
1649 |
+
"Ġseli": 557,
|
1650 |
+
"Ġkili": 558,
|
1651 |
+
"tala": 559,
|
1652 |
+
"la": 560,
|
1653 |
+
"Ġali": 561,
|
1654 |
+
"sun": 562,
|
1655 |
+
"nasin": 563,
|
1656 |
+
"pin": 564,
|
1657 |
+
"Ġutala": 565,
|
1658 |
+
"ani": 566,
|
1659 |
+
"Ġpoki": 567,
|
1660 |
+
"ena": 568,
|
1661 |
+
"suta": 569,
|
1662 |
+
"Ġsijelo": 570,
|
1663 |
+
"Ġn": 571,
|
1664 |
+
"Ġjaki": 572,
|
1665 |
+
"ĠA": 573,
|
1666 |
+
"ĠP": 574,
|
1667 |
+
"Ġle": 575,
|
1668 |
+
"Ġpan": 576,
|
1669 |
+
"Ġmani": 577,
|
1670 |
+
"Ġko": 578,
|
1671 |
+
"meja": 579,
|
1672 |
+
"Ġuta": 580,
|
1673 |
+
"Ġpu": 581,
|
1674 |
+
"Ġpimeja": 582,
|
1675 |
+
"toki": 583,
|
1676 |
+
"Ġmonsuta": 584,
|
1677 |
+
"Ġnena": 585,
|
1678 |
+
"Ġlin": 586,
|
1679 |
+
"nimi": 587,
|
1680 |
+
"Ġmije": 588,
|
1681 |
+
"Ġmun": 589,
|
1682 |
+
"Ġlupa": 590,
|
1683 |
+
"ĠL": 591,
|
1684 |
+
"Ġmama": 592,
|
1685 |
+
"mako": 593,
|
1686 |
+
"Ġnamako": 594,
|
1687 |
+
"Ġsinpin": 595,
|
1688 |
+
"tan": 596,
|
1689 |
+
"Ġmeli": 597,
|
1690 |
+
"Ġsupa": 598,
|
1691 |
+
"Ġwaso": 599,
|
1692 |
+
"tomo": 600,
|
1693 |
+
"sike": 601,
|
1694 |
+
"Ġakesi": 602,
|
1695 |
+
"noka": 603,
|
1696 |
+
"lon": 604,
|
1697 |
+
"kasi": 605,
|
1698 |
+
"Ġlinja": 606,
|
1699 |
+
"mu": 607,
|
1700 |
+
"Ġnoka": 608,
|
1701 |
+
"Ġwalo": 609,
|
1702 |
+
"akesi": 610,
|
1703 |
+
"ĠM": 611,
|
1704 |
+
"Ġun": 612,
|
1705 |
+
"kulupu": 613,
|
1706 |
+
"Ġkiwen": 614,
|
1707 |
+
"pisi": 615,
|
1708 |
+
"ĠAn": 616,
|
1709 |
+
"ijo": 617,
|
1710 |
+
"se": 618,
|
1711 |
+
"san": 619,
|
1712 |
+
"we": 620,
|
1713 |
+
"Ġjelo": 621,
|
1714 |
+
"Ġesun": 622,
|
1715 |
+
"Ġlaso": 623,
|
1716 |
+
"Ġpake": 624,
|
1717 |
+
"Ġkipisi": 625,
|
1718 |
+
"pan": 626,
|
1719 |
+
"lipu": 627,
|
1720 |
+
"telo": 628,
|
1721 |
+
"Ġmonsi": 629,
|
1722 |
+
"Ġlete": 630,
|
1723 |
+
"Ġunpa": 631,
|
1724 |
+
"alu": 632,
|
1725 |
+
"kewi": 633,
|
1726 |
+
"kalama": 634,
|
1727 |
+
"sikeke": 635,
|
1728 |
+
"ĠAnkewi": 636,
|
1729 |
+
"ju": 637,
|
1730 |
+
"sila": 638,
|
1731 |
+
"Ġpalisa": 639,
|
1732 |
+
"kalu": 640,
|
1733 |
+
"ĠE": 641,
|
1734 |
+
"sitelen": 642,
|
1735 |
+
"tesan": 643,
|
1736 |
+
"Ġselo": 644,
|
1737 |
+
"takalu": 645,
|
1738 |
+
"jetesan": 646,
|
1739 |
+
"jetesantakalu": 647,
|
1740 |
+
"waso": 648,
|
1741 |
+
"Ġmisikeke": 649,
|
1742 |
+
"pipi": 650,
|
1743 |
+
"Ġpowe": 651,
|
1744 |
+
"kosila": 652,
|
1745 |
+
"suno": 653,
|
1746 |
+
"ĠI": 654,
|
1747 |
+
"onsi": 655,
|
1748 |
+
"pali": 656,
|
1749 |
+
"meli": 657,
|
1750 |
+
"musi": 658,
|
1751 |
+
"\".": 659,
|
1752 |
+
"kon": 660,
|
1753 |
+
"olin": 661,
|
1754 |
+
"su": 662,
|
1755 |
+
"âĢ": 663,
|
1756 |
+
"ĠN": 664,
|
1757 |
+
"Ġtonsi": 665,
|
1758 |
+
"wawa": 666,
|
1759 |
+
"Ġpipi": 667,
|
1760 |
+
"mama": 668,
|
1761 |
+
"Ġoko": 669,
|
1762 |
+
"moku": 670,
|
1763 |
+
"Ġleko": 671,
|
1764 |
+
"Ġkokosila": 672,
|
1765 |
+
"sona": 673,
|
1766 |
+
"seme": 674,
|
1767 |
+
"juna": 675,
|
1768 |
+
"eso": 676,
|
1769 |
+
"Ġmeso": 677,
|
1770 |
+
"linja": 678,
|
1771 |
+
"Ġmajuna": 679,
|
1772 |
+
"lukin": 680,
|
1773 |
+
"ĠSon": 681,
|
1774 |
+
"ĠIn": 682,
|
1775 |
+
"selo": 683,
|
1776 |
+
"wile": 684,
|
1777 |
+
"Ġlan": 685,
|
1778 |
+
"sima": 686,
|
1779 |
+
"kili": 687,
|
1780 |
+
"pilin": 688,
|
1781 |
+
"luka": 689,
|
1782 |
+
"ĠKa": 690,
|
1783 |
+
"ĠTe": 691,
|
1784 |
+
"ĠLi": 692,
|
1785 |
+
"ĠMe": 693,
|
1786 |
+
"ĠInli": 694,
|
1787 |
+
"Ġlanpan": 695,
|
1788 |
+
"..": 696,
|
1789 |
+
"ike": 697,
|
1790 |
+
"seli": 698,
|
1791 |
+
"ton": 699,
|
1792 |
+
"wan": 700,
|
1793 |
+
"sijelo": 701,
|
1794 |
+
"kijetesantakalu": 702,
|
1795 |
+
"Ġape": 703,
|
1796 |
+
"pesi": 704,
|
1797 |
+
"kule": 705,
|
1798 |
+
"piku": 706,
|
1799 |
+
"Ġsoko": 707,
|
1800 |
+
"Ġjasima": 708,
|
1801 |
+
"ĠPi": 709,
|
1802 |
+
"ĠSonja": 710,
|
1803 |
+
"Ġapeja": 711,
|
1804 |
+
"ĠJ": 712,
|
1805 |
+
"nanpa": 713,
|
1806 |
+
"sinpin": 714,
|
1807 |
+
"Ġkijetesantakalu": 715,
|
1808 |
+
"lupa": 716,
|
1809 |
+
"lawa": 717,
|
1810 |
+
"ali": 718,
|
1811 |
+
"nton": 719,
|
1812 |
+
"ĠâĢ": 720,
|
1813 |
+
"Ġte": 721,
|
1814 |
+
"kiwen": 722,
|
1815 |
+
"pakala": 723,
|
1816 |
+
"winton": 724,
|
1817 |
+
"luwi": 725,
|
1818 |
+
"Ġlinluwi": 726,
|
1819 |
+
"ĠLo": 727,
|
1820 |
+
"ĠEwinton": 728,
|
1821 |
+
"âĢĿ": 729,
|
1822 |
+
"ĠNi": 730,
|
1823 |
+
"ĠâĢľ": 731,
|
1824 |
+
"lan": 732,
|
1825 |
+
"ntu": 733,
|
1826 |
+
"oko": 734,
|
1827 |
+
"ĠW": 735,
|
1828 |
+
"Ġmelo": 736,
|
1829 |
+
"Ġepiku": 737,
|
1830 |
+
"mije": 738,
|
1831 |
+
"pini": 739,
|
1832 |
+
"ĠKi": 740,
|
1833 |
+
"ĠTo": 741,
|
1834 |
+
"Ġkapesi": 742,
|
1835 |
+
"ĠSa": 743,
|
1836 |
+
"ĠSu": 744,
|
1837 |
+
"ĠSan": 745,
|
1838 |
+
"ĠSen": 746,
|
1839 |
+
"ĠPa": 747,
|
1840 |
+
"ĠPe": 748,
|
1841 |
+
"ĠLa": 749,
|
1842 |
+
"palisa": 750,
|
1843 |
+
"ĠTelen": 751,
|
1844 |
+
"ĠLisa": 752,
|
1845 |
+
"Ġmelome": 753,
|
1846 |
+
"awen": 754,
|
1847 |
+
"mon": 755,
|
1848 |
+
"mun": 756,
|
1849 |
+
"pona": 757,
|
1850 |
+
"ĠO": 758,
|
1851 |
+
"nata": 759,
|
1852 |
+
"lija": 760,
|
1853 |
+
"poki": 761,
|
1854 |
+
"sawi": 762,
|
1855 |
+
"Ġkuntu": 763,
|
1856 |
+
"jaki": 764,
|
1857 |
+
"Ġpuwa": 765,
|
1858 |
+
"ĠMi": 766,
|
1859 |
+
"sewi": 767,
|
1860 |
+
"supa": 768,
|
1861 |
+
"ĠKanata": 769,
|
1862 |
+
"ĠMewi": 770,
|
1863 |
+
"...": 771,
|
1864 |
+
"ĠJu": 772,
|
1865 |
+
"ĠSeni": 773,
|
1866 |
+
"monsuta": 774,
|
1867 |
+
"epiku": 775,
|
1868 |
+
"open": 776,
|
1869 |
+
"ante": 777,
|
1870 |
+
"sija": 778,
|
1871 |
+
"Ġmijo": 779,
|
1872 |
+
"misikeke": 780,
|
1873 |
+
"amana": 781,
|
1874 |
+
"poka": 782,
|
1875 |
+
"sali": 783,
|
1876 |
+
"ĠKu": 784,
|
1877 |
+
"ĠKen": 785,
|
1878 |
+
"ĠTa": 786,
|
1879 |
+
"ĠSi": 787,
|
1880 |
+
"ĠSamana": 788,
|
1881 |
+
"Ġusawi": 789,
|
1882 |
+
"lape": 790,
|
1883 |
+
"lapi": 791,
|
1884 |
+
"ĠPo": 792,
|
1885 |
+
"mute": 793,
|
1886 |
+
"panpo": 794,
|
1887 |
+
"ĠElo": 795,
|
1888 |
+
"ĠKapesi": 796,
|
1889 |
+
"ĠKin": 797,
|
1890 |
+
"ĠSupanpo": 798,
|
1891 |
+
"ĠSanta": 799,
|
1892 |
+
"ĠLasina": 800,
|
1893 |
+
"ĠMewika": 801,
|
1894 |
+
"Ġmijomi": 802,
|
1895 |
+
"eko": 803,
|
1896 |
+
"esun": 804,
|
1897 |
+
"imi": 805,
|
1898 |
+
"insa": 806,
|
1899 |
+
"juta": 807,
|
1900 |
+
"kama": 808,
|
1901 |
+
"mala": 809,
|
1902 |
+
"nena": 810,
|
1903 |
+
"utala": 811,
|
1904 |
+
"yu": 812,
|
1905 |
+
"¼ģ": 813,
|
1906 |
+
"ï¼ģ": 814,
|
1907 |
+
"ĠU": 815,
|
1908 |
+
"nasa": 816,
|
1909 |
+
"Ġsan": 817,
|
1910 |
+
"Ġete": 818,
|
1911 |
+
"Ġkan": 819,
|
1912 |
+
"wala": 820,
|
1913 |
+
"sipin": 821,
|
1914 |
+
"alata": 822,
|
1915 |
+
"alasija": 823,
|
1916 |
+
"onken": 824,
|
1917 |
+
"Ġpasila": 825,
|
1918 |
+
"keke": 826,
|
1919 |
+
"pana": 827,
|
1920 |
+
"pelan": 828,
|
1921 |
+
"Ġkiki": 829,
|
1922 |
+
"kokosila": 830,
|
1923 |
+
"ĠKan": 831,
|
1924 |
+
"ĠKeli": 832,
|
1925 |
+
"ĠTimi": 833,
|
1926 |
+
"Ġkamala": 834,
|
1927 |
+
"ĠSo": 835,
|
1928 |
+
"ĠAsi": 836,
|
1929 |
+
"ĠPu": 837,
|
1930 |
+
"ĠPan": 838,
|
1931 |
+
"ĠPona": 839,
|
1932 |
+
"ĠPalata": 840,
|
1933 |
+
"ĠLe": 841,
|
1934 |
+
"ĠMa": 842,
|
1935 |
+
"ĠMalasija": 843,
|
1936 |
+
"Ġunu": 844,
|
1937 |
+
"ĠEko": 845,
|
1938 |
+
"ĠEpelan": 846,
|
1939 |
+
"ĠIta": 847,
|
1940 |
+
"ĠNe": 848,
|
1941 |
+
"ĠNu": 849,
|
1942 |
+
"ĠSonko": 850,
|
1943 |
+
"ĠLin": 851,
|
1944 |
+
"ĠMekeke": 852,
|
1945 |
+
"ĠPita": 853,
|
1946 |
+
"ĠJonken": 854,
|
1947 |
+
"ĠLola": 855,
|
1948 |
+
"âĢĿ.": 856,
|
1949 |
+
"ĠWa": 857,
|
1950 |
+
"ĠTosi": 858,
|
1951 |
+
"ĠToki": 859,
|
1952 |
+
"ĠSusan": 860,
|
1953 |
+
"ĠPepu": 861,
|
1954 |
+
"ĠMisali": 862,
|
1955 |
+
"ĠSiko": 863,
|
1956 |
+
"lapisu": 864,
|
1957 |
+
"ĠElopa": 865,
|
1958 |
+
"ĠKinkili": 866,
|
1959 |
+
"Ġkamalawala": 867,
|
1960 |
+
"ĠEpelanto": 868,
|
1961 |
+
"!\"": 869,
|
1962 |
+
"\",": 870,
|
1963 |
+
"\"?": 871,
|
1964 |
+
".\"": 872,
|
1965 |
+
"?!": 873,
|
1966 |
+
"Eko": 874,
|
1967 |
+
"asin": 875,
|
1968 |
+
"esi": 876,
|
1969 |
+
"eta": 877,
|
1970 |
+
"elon": 878,
|
1971 |
+
"esija": 879,
|
1972 |
+
"ije": 880,
|
1973 |
+
"isipin": 881,
|
1974 |
+
"jal": 882,
|
1975 |
+
"jon": 883,
|
1976 |
+
"jama": 884,
|
1977 |
+
"kan": 885,
|
1978 |
+
"lani": 886,
|
1979 |
+
"man": 887,
|
1980 |
+
"mani": 888,
|
1981 |
+
"nu": 889,
|
1982 |
+
"omi": 890,
|
1983 |
+
"ose": 891,
|
1984 |
+
"pon": 892,
|
1985 |
+
"pil": 893,
|
1986 |
+
"pani": 894,
|
1987 |
+
"palu": 895,
|
1988 |
+
"son": 896,
|
1989 |
+
"sama": 897,
|
1990 |
+
"tonsi": 898,
|
1991 |
+
"upi": 899,
|
1992 |
+
"wani": 900,
|
1993 |
+
"Ġyu": 901,
|
1994 |
+
"nawi": 902,
|
1995 |
+
"naja": 903,
|
1996 |
+
"Ġpeta": 904,
|
1997 |
+
"anu": 905,
|
1998 |
+
"anka": 906,
|
1999 |
+
"anpa": 907,
|
2000 |
+
"antan": 908,
|
2001 |
+
"lia": 909,
|
2002 |
+
"lisa": 910,
|
2003 |
+
"liku": 911,
|
2004 |
+
"lilan": 912,
|
2005 |
+
"wasi": 913,
|
2006 |
+
"wawi": 914,
|
2007 |
+
"nija": 915,
|
2008 |
+
"siko": 916,
|
2009 |
+
"alasa": 917,
|
2010 |
+
"Ġtaki": 918,
|
2011 |
+
"onyu": 919,
|
2012 |
+
"Ġmisa": 920,
|
2013 |
+
"kini": 921,
|
2014 |
+
"kijo": 922,
|
2015 |
+
"kiko": 923,
|
2016 |
+
"kimu": 924,
|
2017 |
+
"kipisi": 925,
|
2018 |
+
"kisan": 926,
|
2019 |
+
"majuna": 927,
|
2020 |
+
"telan": 928,
|
2021 |
+
"mile": 929,
|
2022 |
+
"ila": 930,
|
2023 |
+
"Ġmulapisu": 931,
|
2024 |
+
"Ġowe": 932,
|
2025 |
+
"Ġojuta": 933,
|
2026 |
+
"kepeken": 934,
|
2027 |
+
"kenaja": 935,
|
2028 |
+
"kaso": 936,
|
2029 |
+
"katon": 937,
|
2030 |
+
"kawan": 938,
|
2031 |
+
"soko": 939,
|
2032 |
+
"polo": 940,
|
2033 |
+
"powe": 941,
|
2034 |
+
"Ġisipin": 942,
|
2035 |
+
"pajal": 943,
|
2036 |
+
"pakawan": 944,
|
2037 |
+
"loje": 945,
|
2038 |
+
"peta": 946,
|
2039 |
+
"peko": 947,
|
2040 |
+
"meso": 948,
|
2041 |
+
"mesu": 949,
|
2042 |
+
"jole": 950,
|
2043 |
+
"joju": 951,
|
2044 |
+
"mosi": 952,
|
2045 |
+
"momo": 953,
|
2046 |
+
"moto": 954,
|
2047 |
+
"salin": 955,
|
2048 |
+
"tawa": 956,
|
2049 |
+
"tawan": 957,
|
2050 |
+
"tasali": 958,
|
2051 |
+
"kukan": 959,
|
2052 |
+
"wito": 960,
|
2053 |
+
"pusi": 961,
|
2054 |
+
"puwani": 962,
|
2055 |
+
"Ġwale": 963,
|
2056 |
+
"Ġkese": 964,
|
2057 |
+
"jasima": 965,
|
2058 |
+
"pili": 966,
|
2059 |
+
"pimeja": 967,
|
2060 |
+
"Ġtuli": 968,
|
2061 |
+
"uneko": 969,
|
2062 |
+
"kowa": 970,
|
2063 |
+
"nokijo": 971,
|
2064 |
+
"jesa": 972,
|
2065 |
+
"jekenaja": 973,
|
2066 |
+
"ĠKena": 974,
|
2067 |
+
"ĠKijo": 975,
|
2068 |
+
"ĠKelon": 976,
|
2069 |
+
"ĠKomi": 977,
|
2070 |
+
"ĠKila": 978,
|
2071 |
+
"ĠTen": 979,
|
2072 |
+
"ĠTonyu": 980,
|
2073 |
+
"Ġsoto": 981,
|
2074 |
+
"lete": 982,
|
2075 |
+
"leko": 983,
|
2076 |
+
"Ġkonwe": 984,
|
2077 |
+
"ĠSuneko": 985,
|
2078 |
+
"Ġumesu": 986,
|
2079 |
+
"laso": 987,
|
2080 |
+
"lasin": 988,
|
2081 |
+
"ĠAli": 989,
|
2082 |
+
"ĠAwi": 990,
|
2083 |
+
"ĠAlapi": 991,
|
2084 |
+
"ĠAman": 992,
|
2085 |
+
"ĠAwawi": 993,
|
2086 |
+
"ĠApolo": 994,
|
2087 |
+
"ĠAtawan": 995,
|
2088 |
+
"ĠAlasin": 996,
|
2089 |
+
"ĠPuta": 997,
|
2090 |
+
"ĠLuta": 998,
|
2091 |
+
"ĠLupi": 999,
|
2092 |
+
"ĠLantan": 1000,
|
2093 |
+
"mulapisu": 1001,
|
2094 |
+
"ĠMena": 1002,
|
2095 |
+
"ĠMose": 1003,
|
2096 |
+
"ĠManka": 1004,
|
2097 |
+
"ĠAnse": 1005,
|
2098 |
+
"ĠAnson": 1006,
|
2099 |
+
"ĠAnkowa": 1007,
|
2100 |
+
"ijosa": 1008,
|
2101 |
+
"aluto": 1009,
|
2102 |
+
"jusi": 1010,
|
2103 |
+
"juwan": 1011,
|
2104 |
+
"jukini": 1012,
|
2105 |
+
"ĠEki": 1013,
|
2106 |
+
"ĠElisa": 1014,
|
2107 |
+
"ĠIlan": 1015,
|
2108 |
+
"suli": 1016,
|
2109 |
+
"suka": 1017,
|
2110 |
+
"suwi": 1018,
|
2111 |
+
"supan": 1019,
|
2112 |
+
"ĠNasin": 1020,
|
2113 |
+
"ĠNaluto": 1021,
|
2114 |
+
"ĠInton": 1022,
|
2115 |
+
"ĠKapil": 1023,
|
2116 |
+
"ĠKajesa": 1024,
|
2117 |
+
"ĠTepo": 1025,
|
2118 |
+
"ĠTeja": 1026,
|
2119 |
+
"ĠTewen": 1027,
|
2120 |
+
"ĠTepani": 1028,
|
2121 |
+
"ĠLilija": 1029,
|
2122 |
+
"ĠLililan": 1030,
|
2123 |
+
"ĠMelani": 1031,
|
2124 |
+
"ĠMesiko": 1032,
|
2125 |
+
"ĠMekiko": 1033,
|
2126 |
+
"selija": 1034,
|
2127 |
+
"ĠPiwi": 1035,
|
2128 |
+
"ĠPisin": 1036,
|
2129 |
+
"ĠPiju": 1037,
|
2130 |
+
"ĠPinija": 1038,
|
2131 |
+
"ĠPikaso": 1039,
|
2132 |
+
"ĠPinokijo": 1040,
|
2133 |
+
"ĠJan": 1041,
|
2134 |
+
"Ġteje": 1042,
|
2135 |
+
"ĠLopin": 1043,
|
2136 |
+
"ĠLowasi": 1044,
|
2137 |
+
"ĠLokaton": 1045,
|
2138 |
+
"ĠLosupan": 1046,
|
2139 |
+
"ĠNiki": 1047,
|
2140 |
+
"ĠNimu": 1048,
|
2141 |
+
"ĠNijon": 1049,
|
2142 |
+
"ĠNipon": 1050,
|
2143 |
+
"ĠNimile": 1051,
|
2144 |
+
"ĠWeko": 1052,
|
2145 |
+
"ĠWije": 1053,
|
2146 |
+
"ĠWijosa": 1054,
|
2147 |
+
"ĠKita": 1055,
|
2148 |
+
"ĠKiliku": 1056,
|
2149 |
+
"ĠToto": 1057,
|
2150 |
+
"ĠSaku": 1058,
|
2151 |
+
"ĠSasalin": 1059,
|
2152 |
+
"ĠSajusi": 1060,
|
2153 |
+
"ĠSentu": 1061,
|
2154 |
+
"ĠPasi": 1062,
|
2155 |
+
"ĠPakala": 1063,
|
2156 |
+
"ĠPakisan": 1064,
|
2157 |
+
"ĠPapuwani": 1065,
|
2158 |
+
"ĠPewe": 1066,
|
2159 |
+
"ĠPekimu": 1067,
|
2160 |
+
"ĠPejoju": 1068,
|
2161 |
+
"ĠLajo": 1069,
|
2162 |
+
"ĠLawito": 1070,
|
2163 |
+
"ĠOsin": 1071,
|
2164 |
+
"ĠOwe": 1072,
|
2165 |
+
"ĠOjuta": 1073,
|
2166 |
+
"ĠOselija": 1074,
|
2167 |
+
"ĠMimoku": 1075,
|
2168 |
+
"ĠMijama": 1076,
|
2169 |
+
"ĠJuli": 1077,
|
2170 |
+
"ĠJuke": 1078,
|
2171 |
+
"ĠJutu": 1079,
|
2172 |
+
"ĠJulija": 1080,
|
2173 |
+
"ĠKupa": 1081,
|
2174 |
+
"ĠKulija": 1082,
|
2175 |
+
"ĠKukukan": 1083,
|
2176 |
+
"ĠTawi": 1084,
|
2177 |
+
"ĠTapajal": 1085,
|
2178 |
+
"ĠTajuwan": 1086,
|
2179 |
+
"ĠPota": 1087,
|
2180 |
+
"ĠPomoto": 1088,
|
2181 |
+
"ĠPosuka": 1089,
|
2182 |
+
"ĠUtu": 1090,
|
2183 |
+
"ĠUsawi": 1091,
|
2184 |
+
"ĠKanse": 1092,
|
2185 |
+
"ĠKanpusi": 1093,
|
2186 |
+
"ĠSomomo": 1094,
|
2187 |
+
"ĠPutu": 1095,
|
2188 |
+
"ĠPunawi": 1096,
|
2189 |
+
"ĠPanto": 1097,
|
2190 |
+
"ĠPantasali": 1098,
|
2191 |
+
"ĠLewi": 1099,
|
2192 |
+
"ĠLemosi": 1100,
|
2193 |
+
"ĠMaku": 1101,
|
2194 |
+
"ĠMasu": 1102,
|
2195 |
+
"ĠItalija": 1103,
|
2196 |
+
"ĠItalia": 1104,
|
2197 |
+
"ĠNepalu": 1105,
|
2198 |
+
"ĠNetelan": 1106,
|
2199 |
+
"ĠNuken": 1107,
|
2200 |
+
"ĠNuwan": 1108,
|
2201 |
+
"ĠLinta": 1109,
|
2202 |
+
"ĠLinja": 1110,
|
2203 |
+
"ĠWajole": 1111,
|
2204 |
+
"ĠWapili": 1112,
|
2205 |
+
"esipo": 1113,
|
2206 |
+
"Ġyupeko": 1114,
|
2207 |
+
"Ġwaleja": 1115,
|
2208 |
+
"ĠKenajekenaja": 1116,
|
2209 |
+
"ĠKijoto": 1117,
|
2210 |
+
"ĠKominu": 1118,
|
2211 |
+
"ĠAnsetan": 1119,
|
2212 |
+
"ĠElisape": 1120,
|
2213 |
+
"ĠNasinesipo": 1121,
|
2214 |
+
"ĠIntonesija": 1122,
|
2215 |
+
"ĠKapilu": 1123,
|
2216 |
+
"ĠKilikuntu": 1124,
|
2217 |
+
"ĠPapuwanijukini": 1125,
|
2218 |
+
"ĠTapajalo": 1126,
|
2219 |
+
"ĠPomotolo": 1127,
|
2220 |
+
"ĠPantasalipakawan": 1128,
|
2221 |
+
"Ġyupekosi": 1129
|
2222 |
+
},
|
2223 |
+
"merges": [
|
2224 |
+
"Ġ l",
|
2225 |
+
"n a",
|
2226 |
+
"Ġl i",
|
2227 |
+
"Ġ t",
|
2228 |
+
"�� p",
|
2229 |
+
"Ġ s",
|
2230 |
+
"Ġ m",
|
2231 |
+
"a n",
|
2232 |
+
"Ġ e",
|
2233 |
+
"Ġ k",
|
2234 |
+
"l i",
|
2235 |
+
"o na",
|
2236 |
+
"e n",
|
2237 |
+
"w a",
|
2238 |
+
"a l",
|
2239 |
+
"n i",
|
2240 |
+
"s i",
|
2241 |
+
"Ġ ni",
|
2242 |
+
"al a",
|
2243 |
+
"Ġl a",
|
2244 |
+
"Ġt a",
|
2245 |
+
"Ġp i",
|
2246 |
+
"o n",
|
2247 |
+
"j an",
|
2248 |
+
"Ġm i",
|
2249 |
+
"k i",
|
2250 |
+
"m a",
|
2251 |
+
"Ġs i",
|
2252 |
+
"Ġta wa",
|
2253 |
+
"t e",
|
2254 |
+
"m i",
|
2255 |
+
"Ġl on",
|
2256 |
+
"Ġ w",
|
2257 |
+
"Ġ ala",
|
2258 |
+
"Ġt o",
|
2259 |
+
"i l",
|
2260 |
+
"a ma",
|
2261 |
+
"Ġp ona",
|
2262 |
+
"Ġm u",
|
2263 |
+
"Ġ o",
|
2264 |
+
"Ġ jan",
|
2265 |
+
"Ġp a",
|
2266 |
+
"k e",
|
2267 |
+
"k a",
|
2268 |
+
"Ġsi na",
|
2269 |
+
"Ġl u",
|
2270 |
+
"li n",
|
2271 |
+
"Ġk ama",
|
2272 |
+
"s o",
|
2273 |
+
"Ġ ona",
|
2274 |
+
"p o",
|
2275 |
+
"Ġto ki",
|
2276 |
+
"il e",
|
2277 |
+
"Ġ na",
|
2278 |
+
"en po",
|
2279 |
+
"Ġs u",
|
2280 |
+
"Ġw ile",
|
2281 |
+
"Ġs e",
|
2282 |
+
"Ġ i",
|
2283 |
+
"Ġmu te",
|
2284 |
+
"p a",
|
2285 |
+
"si na",
|
2286 |
+
"Ġ a",
|
2287 |
+
"Ġk en",
|
2288 |
+
"l o",
|
2289 |
+
"p e",
|
2290 |
+
"ki n",
|
2291 |
+
"m e",
|
2292 |
+
"Ġ an",
|
2293 |
+
"j o",
|
2294 |
+
"m o",
|
2295 |
+
"Ġs ona",
|
2296 |
+
"Ġt an",
|
2297 |
+
"Ġlu kin",
|
2298 |
+
"s a",
|
2299 |
+
"Ġpi lin",
|
2300 |
+
"k en",
|
2301 |
+
"al e",
|
2302 |
+
"t a",
|
2303 |
+
"e lo",
|
2304 |
+
"Ġi ke",
|
2305 |
+
"Ġto mo",
|
2306 |
+
"k u",
|
2307 |
+
"Ġ ale",
|
2308 |
+
"il o",
|
2309 |
+
"w i",
|
2310 |
+
"Ġm o",
|
2311 |
+
"p u",
|
2312 |
+
"Ġk u",
|
2313 |
+
"e li",
|
2314 |
+
"Ġse me",
|
2315 |
+
"si n",
|
2316 |
+
"Ġli li",
|
2317 |
+
"Ġt enpo",
|
2318 |
+
"Ġk i",
|
2319 |
+
"Ġsu li",
|
2320 |
+
"Ġ wa",
|
2321 |
+
"Ġk e",
|
2322 |
+
"pe ken",
|
2323 |
+
"Ġpa li",
|
2324 |
+
"Ġke peken",
|
2325 |
+
"j a",
|
2326 |
+
"t enpo",
|
2327 |
+
"Ġm a",
|
2328 |
+
"Ġpa na",
|
2329 |
+
"p i",
|
2330 |
+
"Ġ jo",
|
2331 |
+
"w en",
|
2332 |
+
"w eli",
|
2333 |
+
"Ġt u",
|
2334 |
+
"Ġta so",
|
2335 |
+
"u n",
|
2336 |
+
"Ġan te",
|
2337 |
+
"Ġmo ku",
|
2338 |
+
"e ka",
|
2339 |
+
"l en",
|
2340 |
+
"Ġp o",
|
2341 |
+
"Ġw eka",
|
2342 |
+
"k ala",
|
2343 |
+
"Ġw an",
|
2344 |
+
"Ġlu ka",
|
2345 |
+
"Ġna sin",
|
2346 |
+
"Ġsi n",
|
2347 |
+
"Ġo lin",
|
2348 |
+
"k o",
|
2349 |
+
"n pa",
|
2350 |
+
"Ġmu si",
|
2351 |
+
"Ġwa wa",
|
2352 |
+
"Ġs ama",
|
2353 |
+
"Ġk ala",
|
2354 |
+
"n o",
|
2355 |
+
"Ġt elo",
|
2356 |
+
"l u",
|
2357 |
+
"Ġa wen",
|
2358 |
+
"Ġni mi",
|
2359 |
+
"j e",
|
2360 |
+
"Ġna npa",
|
2361 |
+
"Ġ K",
|
2362 |
+
"te len",
|
2363 |
+
"Ġ T",
|
2364 |
+
"Ġla wa",
|
2365 |
+
"Ġ ilo",
|
2366 |
+
"Ġs o",
|
2367 |
+
"Ġli pu",
|
2368 |
+
"Ġpa kala",
|
2369 |
+
"t u",
|
2370 |
+
"Ġpi ni",
|
2371 |
+
"Ġsi telen",
|
2372 |
+
"u ta",
|
2373 |
+
"Ġsu no",
|
2374 |
+
"Ġso weli",
|
2375 |
+
"Ġse wi",
|
2376 |
+
"Ġna sa",
|
2377 |
+
"ĠT u",
|
2378 |
+
"Ġe n",
|
2379 |
+
"ta so",
|
2380 |
+
"t o",
|
2381 |
+
"j elo",
|
2382 |
+
"Ġki n",
|
2383 |
+
"Ġpo ka",
|
2384 |
+
"lu pu",
|
2385 |
+
"tu li",
|
2386 |
+
"Ġi jo",
|
2387 |
+
"un tuli",
|
2388 |
+
"ĠK untuli",
|
2389 |
+
"l e",
|
2390 |
+
"Ġla pe",
|
2391 |
+
"Ġku te",
|
2392 |
+
"Ġk a",
|
2393 |
+
"so weli",
|
2394 |
+
"Ġan u",
|
2395 |
+
"Ġl en",
|
2396 |
+
"Ġkala ma",
|
2397 |
+
"Ġm on",
|
2398 |
+
"Ġsi ke",
|
2399 |
+
"Ġmo li",
|
2400 |
+
"Ġka si",
|
2401 |
+
"p en",
|
2402 |
+
"Ġala sa",
|
2403 |
+
"Ġsu wi",
|
2404 |
+
"Ġan pa",
|
2405 |
+
"Ġku le",
|
2406 |
+
"ke si",
|
2407 |
+
"n sa",
|
2408 |
+
"Ġo pen",
|
2409 |
+
"Ġ ja",
|
2410 |
+
"Ġk on",
|
2411 |
+
"Ġku lupu",
|
2412 |
+
"Ġ S",
|
2413 |
+
"Ġ u",
|
2414 |
+
"Ġi nsa",
|
2415 |
+
"o je",
|
2416 |
+
"Ġ \"",
|
2417 |
+
"Ġl oje",
|
2418 |
+
"Ġse li",
|
2419 |
+
"Ġki li",
|
2420 |
+
"t ala",
|
2421 |
+
"l a",
|
2422 |
+
"Ġa li",
|
2423 |
+
"s un",
|
2424 |
+
"na sin",
|
2425 |
+
"pi n",
|
2426 |
+
"Ġu tala",
|
2427 |
+
"an i",
|
2428 |
+
"Ġpo ki",
|
2429 |
+
"e na",
|
2430 |
+
"s uta",
|
2431 |
+
"Ġsi jelo",
|
2432 |
+
"Ġ n",
|
2433 |
+
"Ġja ki",
|
2434 |
+
"Ġ A",
|
2435 |
+
"Ġ P",
|
2436 |
+
"Ġl e",
|
2437 |
+
"Ġp an",
|
2438 |
+
"Ġm ani",
|
2439 |
+
"Ġk o",
|
2440 |
+
"me ja",
|
2441 |
+
"Ġ uta",
|
2442 |
+
"Ġp u",
|
2443 |
+
"Ġpi meja",
|
2444 |
+
"to ki",
|
2445 |
+
"Ġmon suta",
|
2446 |
+
"Ġn ena",
|
2447 |
+
"Ġli n",
|
2448 |
+
"ni mi",
|
2449 |
+
"Ġmi je",
|
2450 |
+
"Ġmu n",
|
2451 |
+
"Ġlu pa",
|
2452 |
+
"Ġ L",
|
2453 |
+
"Ġm ama",
|
2454 |
+
"ma ko",
|
2455 |
+
"Ġna mako",
|
2456 |
+
"Ġsin pin",
|
2457 |
+
"t an",
|
2458 |
+
"Ġm eli",
|
2459 |
+
"Ġsu pa",
|
2460 |
+
"Ġwa so",
|
2461 |
+
"to mo",
|
2462 |
+
"si ke",
|
2463 |
+
"Ġa kesi",
|
2464 |
+
"no ka",
|
2465 |
+
"l on",
|
2466 |
+
"ka si",
|
2467 |
+
"Ġlin ja",
|
2468 |
+
"m u",
|
2469 |
+
"Ġ noka",
|
2470 |
+
"Ġwa lo",
|
2471 |
+
"a kesi",
|
2472 |
+
"Ġ M",
|
2473 |
+
"Ġ un",
|
2474 |
+
"ku lupu",
|
2475 |
+
"Ġki wen",
|
2476 |
+
"pi si",
|
2477 |
+
"ĠA n",
|
2478 |
+
"i jo",
|
2479 |
+
"s e",
|
2480 |
+
"s an",
|
2481 |
+
"w e",
|
2482 |
+
"Ġ jelo",
|
2483 |
+
"Ġe sun",
|
2484 |
+
"Ġla so",
|
2485 |
+
"Ġpa ke",
|
2486 |
+
"Ġki pisi",
|
2487 |
+
"p an",
|
2488 |
+
"li pu",
|
2489 |
+
"te lo",
|
2490 |
+
"Ġmon si",
|
2491 |
+
"Ġle te",
|
2492 |
+
"Ġun pa",
|
2493 |
+
"al u",
|
2494 |
+
"ke wi",
|
2495 |
+
"kala ma",
|
2496 |
+
"sike ke",
|
2497 |
+
"ĠAn kewi",
|
2498 |
+
"j u",
|
2499 |
+
"si la",
|
2500 |
+
"Ġpali sa",
|
2501 |
+
"k alu",
|
2502 |
+
"Ġ E",
|
2503 |
+
"si telen",
|
2504 |
+
"te san",
|
2505 |
+
"Ġse lo",
|
2506 |
+
"ta kalu",
|
2507 |
+
"je tesan",
|
2508 |
+
"jetesan takalu",
|
2509 |
+
"wa so",
|
2510 |
+
"Ġmi sikeke",
|
2511 |
+
"pi pi",
|
2512 |
+
"Ġpo we",
|
2513 |
+
"ko sila",
|
2514 |
+
"sun o",
|
2515 |
+
"Ġ I",
|
2516 |
+
"on si",
|
2517 |
+
"pa li",
|
2518 |
+
"me li",
|
2519 |
+
"mu si",
|
2520 |
+
"\" .",
|
2521 |
+
"k on",
|
2522 |
+
"o lin",
|
2523 |
+
"s u",
|
2524 |
+
"â Ģ",
|
2525 |
+
"Ġ N",
|
2526 |
+
"Ġt onsi",
|
2527 |
+
"wa wa",
|
2528 |
+
"Ġpi pi",
|
2529 |
+
"ma ma",
|
2530 |
+
"Ġo ko",
|
2531 |
+
"mo ku",
|
2532 |
+
"Ġle ko",
|
2533 |
+
"Ġko kosila",
|
2534 |
+
"s ona",
|
2535 |
+
"se me",
|
2536 |
+
"ju na",
|
2537 |
+
"e so",
|
2538 |
+
"Ġm eso",
|
2539 |
+
"lin ja",
|
2540 |
+
"Ġma juna",
|
2541 |
+
"lu kin",
|
2542 |
+
"ĠS on",
|
2543 |
+
"ĠI n",
|
2544 |
+
"s elo",
|
2545 |
+
"w ile",
|
2546 |
+
"Ġl an",
|
2547 |
+
"si ma",
|
2548 |
+
"ki li",
|
2549 |
+
"pi lin",
|
2550 |
+
"lu ka",
|
2551 |
+
"ĠK a",
|
2552 |
+
"ĠT e",
|
2553 |
+
"ĠL i",
|
2554 |
+
"ĠM e",
|
2555 |
+
"ĠIn li",
|
2556 |
+
"Ġlan pan",
|
2557 |
+
". .",
|
2558 |
+
"i ke",
|
2559 |
+
"s eli",
|
2560 |
+
"t on",
|
2561 |
+
"w an",
|
2562 |
+
"si jelo",
|
2563 |
+
"ki jetesantakalu",
|
2564 |
+
"Ġa pe",
|
2565 |
+
"pe si",
|
2566 |
+
"ku le",
|
2567 |
+
"pi ku",
|
2568 |
+
"Ġso ko",
|
2569 |
+
"Ġja sima",
|
2570 |
+
"ĠP i",
|
2571 |
+
"ĠSon ja",
|
2572 |
+
"Ġape ja",
|
2573 |
+
"Ġ J",
|
2574 |
+
"na npa",
|
2575 |
+
"sin pin",
|
2576 |
+
"Ġki jetesantakalu",
|
2577 |
+
"lu pa",
|
2578 |
+
"la wa",
|
2579 |
+
"a li",
|
2580 |
+
"n ton",
|
2581 |
+
"Ġ âĢ",
|
2582 |
+
"Ġt e",
|
2583 |
+
"ki wen",
|
2584 |
+
"pa kala",
|
2585 |
+
"wi nton",
|
2586 |
+
"lu wi",
|
2587 |
+
"Ġlin luwi",
|
2588 |
+
"ĠL o",
|
2589 |
+
"ĠE winton",
|
2590 |
+
"âĢ Ŀ",
|
2591 |
+
"ĠN i",
|
2592 |
+
"ĠâĢ ľ",
|
2593 |
+
"l an",
|
2594 |
+
"n tu",
|
2595 |
+
"o ko",
|
2596 |
+
"Ġ W",
|
2597 |
+
"Ġm elo",
|
2598 |
+
"Ġe piku",
|
2599 |
+
"mi je",
|
2600 |
+
"pi ni",
|
2601 |
+
"ĠK i",
|
2602 |
+
"ĠT o",
|
2603 |
+
"Ġka pesi",
|
2604 |
+
"ĠS a",
|
2605 |
+
"ĠS u",
|
2606 |
+
"ĠS an",
|
2607 |
+
"ĠS en",
|
2608 |
+
"ĠP a",
|
2609 |
+
"ĠP e",
|
2610 |
+
"ĠL a",
|
2611 |
+
"pali sa",
|
2612 |
+
"ĠTe len",
|
2613 |
+
"ĠLi sa",
|
2614 |
+
"Ġmelo me",
|
2615 |
+
"a wen",
|
2616 |
+
"m on",
|
2617 |
+
"m un",
|
2618 |
+
"p ona",
|
2619 |
+
"Ġ O",
|
2620 |
+
"na ta",
|
2621 |
+
"li ja",
|
2622 |
+
"po ki",
|
2623 |
+
"sa wi",
|
2624 |
+
"Ġku ntu",
|
2625 |
+
"ja ki",
|
2626 |
+
"Ġpu wa",
|
2627 |
+
"ĠM i",
|
2628 |
+
"se wi",
|
2629 |
+
"su pa",
|
2630 |
+
"ĠKa nata",
|
2631 |
+
"ĠMe wi",
|
2632 |
+
".. .",
|
2633 |
+
"ĠJ u",
|
2634 |
+
"ĠSen i",
|
2635 |
+
"mon suta",
|
2636 |
+
"e piku",
|
2637 |
+
"o pen",
|
2638 |
+
"an te",
|
2639 |
+
"si ja",
|
2640 |
+
"Ġmi jo",
|
2641 |
+
"mi sikeke",
|
2642 |
+
"ama na",
|
2643 |
+
"po ka",
|
2644 |
+
"sa li",
|
2645 |
+
"ĠK u",
|
2646 |
+
"ĠK en",
|
2647 |
+
"ĠT a",
|
2648 |
+
"ĠS i",
|
2649 |
+
"ĠS amana",
|
2650 |
+
"Ġu sawi",
|
2651 |
+
"la pe",
|
2652 |
+
"la pi",
|
2653 |
+
"ĠP o",
|
2654 |
+
"mu te",
|
2655 |
+
"pan po",
|
2656 |
+
"ĠE lo",
|
2657 |
+
"ĠKa pesi",
|
2658 |
+
"ĠKi n",
|
2659 |
+
"ĠSu panpo",
|
2660 |
+
"ĠSan ta",
|
2661 |
+
"ĠLa sina",
|
2662 |
+
"ĠMewi ka",
|
2663 |
+
"Ġmijo mi",
|
2664 |
+
"e ko",
|
2665 |
+
"e sun",
|
2666 |
+
"i mi",
|
2667 |
+
"i nsa",
|
2668 |
+
"j uta",
|
2669 |
+
"k ama",
|
2670 |
+
"m ala",
|
2671 |
+
"n ena",
|
2672 |
+
"u tala",
|
2673 |
+
"y u",
|
2674 |
+
"¼ ģ",
|
2675 |
+
"ï ¼ģ",
|
2676 |
+
"Ġ U",
|
2677 |
+
"na sa",
|
2678 |
+
"Ġs an",
|
2679 |
+
"Ġe te",
|
2680 |
+
"Ġk an",
|
2681 |
+
"wa la",
|
2682 |
+
"si pin",
|
2683 |
+
"ala ta",
|
2684 |
+
"ala sija",
|
2685 |
+
"on ken",
|
2686 |
+
"Ġpa sila",
|
2687 |
+
"ke ke",
|
2688 |
+
"pa na",
|
2689 |
+
"pe lan",
|
2690 |
+
"Ġki ki",
|
2691 |
+
"ko kosila",
|
2692 |
+
"ĠK an",
|
2693 |
+
"ĠK eli",
|
2694 |
+
"ĠT imi",
|
2695 |
+
"Ġka mala",
|
2696 |
+
"ĠS o",
|
2697 |
+
"ĠA si",
|
2698 |
+
"ĠP u",
|
2699 |
+
"ĠP an",
|
2700 |
+
"ĠP ona",
|
2701 |
+
"ĠP alata",
|
2702 |
+
"ĠL e",
|
2703 |
+
"ĠM a",
|
2704 |
+
"ĠM alasija",
|
2705 |
+
"Ġun u",
|
2706 |
+
"ĠE ko",
|
2707 |
+
"ĠE pelan",
|
2708 |
+
"ĠI ta",
|
2709 |
+
"ĠN e",
|
2710 |
+
"ĠN u",
|
2711 |
+
"ĠSon ko",
|
2712 |
+
"ĠLi n",
|
2713 |
+
"ĠMe keke",
|
2714 |
+
"ĠPi ta",
|
2715 |
+
"ĠJ onken",
|
2716 |
+
"ĠLo la",
|
2717 |
+
"âĢĿ .",
|
2718 |
+
"ĠW a",
|
2719 |
+
"ĠTo si",
|
2720 |
+
"ĠTo ki",
|
2721 |
+
"ĠSu san",
|
2722 |
+
"ĠPe pu",
|
2723 |
+
"ĠMi sali",
|
2724 |
+
"ĠSi ko",
|
2725 |
+
"lapi su",
|
2726 |
+
"ĠElo pa",
|
2727 |
+
"ĠKin kili",
|
2728 |
+
"Ġkamala wala",
|
2729 |
+
"ĠEpelan to",
|
2730 |
+
"! \"",
|
2731 |
+
"\" ,",
|
2732 |
+
"\" ?",
|
2733 |
+
". \"",
|
2734 |
+
"? !",
|
2735 |
+
"E ko",
|
2736 |
+
"a sin",
|
2737 |
+
"e si",
|
2738 |
+
"e ta",
|
2739 |
+
"e lon",
|
2740 |
+
"e sija",
|
2741 |
+
"i je",
|
2742 |
+
"i sipin",
|
2743 |
+
"j al",
|
2744 |
+
"j on",
|
2745 |
+
"j ama",
|
2746 |
+
"k an",
|
2747 |
+
"l ani",
|
2748 |
+
"m an",
|
2749 |
+
"m ani",
|
2750 |
+
"n u",
|
2751 |
+
"o mi",
|
2752 |
+
"o se",
|
2753 |
+
"p on",
|
2754 |
+
"p il",
|
2755 |
+
"p ani",
|
2756 |
+
"p alu",
|
2757 |
+
"s on",
|
2758 |
+
"s ama",
|
2759 |
+
"t onsi",
|
2760 |
+
"u pi",
|
2761 |
+
"w ani",
|
2762 |
+
"Ġ yu",
|
2763 |
+
"na wi",
|
2764 |
+
"na ja",
|
2765 |
+
"Ġp eta",
|
2766 |
+
"an u",
|
2767 |
+
"an ka",
|
2768 |
+
"an pa",
|
2769 |
+
"an tan",
|
2770 |
+
"li a",
|
2771 |
+
"li sa",
|
2772 |
+
"li ku",
|
2773 |
+
"li lan",
|
2774 |
+
"wa si",
|
2775 |
+
"wa wi",
|
2776 |
+
"ni ja",
|
2777 |
+
"si ko",
|
2778 |
+
"ala sa",
|
2779 |
+
"Ġta ki",
|
2780 |
+
"on yu",
|
2781 |
+
"Ġmi sa",
|
2782 |
+
"ki ni",
|
2783 |
+
"ki jo",
|
2784 |
+
"ki ko",
|
2785 |
+
"ki mu",
|
2786 |
+
"ki pisi",
|
2787 |
+
"ki san",
|
2788 |
+
"ma juna",
|
2789 |
+
"te lan",
|
2790 |
+
"mi le",
|
2791 |
+
"il a",
|
2792 |
+
"Ġmu lapisu",
|
2793 |
+
"Ġo we",
|
2794 |
+
"Ġo juta",
|
2795 |
+
"ke peken",
|
2796 |
+
"ke naja",
|
2797 |
+
"ka so",
|
2798 |
+
"ka ton",
|
2799 |
+
"ka wan",
|
2800 |
+
"so ko",
|
2801 |
+
"po lo",
|
2802 |
+
"po we",
|
2803 |
+
"Ġi sipin",
|
2804 |
+
"pa jal",
|
2805 |
+
"pa kawan",
|
2806 |
+
"lo je",
|
2807 |
+
"pe ta",
|
2808 |
+
"pe ko",
|
2809 |
+
"me so",
|
2810 |
+
"me su",
|
2811 |
+
"jo le",
|
2812 |
+
"jo ju",
|
2813 |
+
"mo si",
|
2814 |
+
"mo mo",
|
2815 |
+
"mo to",
|
2816 |
+
"sa lin",
|
2817 |
+
"ta wa",
|
2818 |
+
"ta wan",
|
2819 |
+
"ta sali",
|
2820 |
+
"ku kan",
|
2821 |
+
"wi to",
|
2822 |
+
"pu si",
|
2823 |
+
"pu wani",
|
2824 |
+
"Ġwa le",
|
2825 |
+
"Ġke se",
|
2826 |
+
"ja sima",
|
2827 |
+
"pi li",
|
2828 |
+
"pi meja",
|
2829 |
+
"Ġtu li",
|
2830 |
+
"un eko",
|
2831 |
+
"ko wa",
|
2832 |
+
"no kijo",
|
2833 |
+
"je sa",
|
2834 |
+
"je kenaja",
|
2835 |
+
"ĠK ena",
|
2836 |
+
"ĠK ijo",
|
2837 |
+
"ĠK elon",
|
2838 |
+
"ĠK omi",
|
2839 |
+
"ĠK ila",
|
2840 |
+
"ĠT en",
|
2841 |
+
"ĠT onyu",
|
2842 |
+
"Ġso to",
|
2843 |
+
"le te",
|
2844 |
+
"le ko",
|
2845 |
+
"Ġkon we",
|
2846 |
+
"ĠS uneko",
|
2847 |
+
"Ġu mesu",
|
2848 |
+
"la so",
|
2849 |
+
"la sin",
|
2850 |
+
"ĠA li",
|
2851 |
+
"ĠA wi",
|
2852 |
+
"ĠA lapi",
|
2853 |
+
"ĠA man",
|
2854 |
+
"ĠA wawi",
|
2855 |
+
"ĠA polo",
|
2856 |
+
"ĠA tawan",
|
2857 |
+
"ĠA lasin",
|
2858 |
+
"ĠP uta",
|
2859 |
+
"ĠL uta",
|
2860 |
+
"ĠL upi",
|
2861 |
+
"ĠL antan",
|
2862 |
+
"mu lapisu",
|
2863 |
+
"ĠM ena",
|
2864 |
+
"ĠM ose",
|
2865 |
+
"ĠM anka",
|
2866 |
+
"ĠAn se",
|
2867 |
+
"ĠAn son",
|
2868 |
+
"ĠAn kowa",
|
2869 |
+
"ijo sa",
|
2870 |
+
"alu to",
|
2871 |
+
"ju si",
|
2872 |
+
"ju wan",
|
2873 |
+
"ju kini",
|
2874 |
+
"ĠE ki",
|
2875 |
+
"ĠE lisa",
|
2876 |
+
"ĠI lan",
|
2877 |
+
"su li",
|
2878 |
+
"su ka",
|
2879 |
+
"su wi",
|
2880 |
+
"su pan",
|
2881 |
+
"ĠN asin",
|
2882 |
+
"ĠN aluto",
|
2883 |
+
"ĠIn ton",
|
2884 |
+
"ĠKa pil",
|
2885 |
+
"ĠKa jesa",
|
2886 |
+
"ĠTe po",
|
2887 |
+
"ĠTe ja",
|
2888 |
+
"ĠTe wen",
|
2889 |
+
"ĠTe pani",
|
2890 |
+
"ĠLi lija",
|
2891 |
+
"ĠLi lilan",
|
2892 |
+
"ĠMe lani",
|
2893 |
+
"ĠMe siko",
|
2894 |
+
"ĠMe kiko",
|
2895 |
+
"seli ja",
|
2896 |
+
"ĠPi wi",
|
2897 |
+
"ĠPi sin",
|
2898 |
+
"ĠPi ju",
|
2899 |
+
"ĠPi nija",
|
2900 |
+
"ĠPi kaso",
|
2901 |
+
"ĠPi nokijo",
|
2902 |
+
"ĠJ an",
|
2903 |
+
"Ġte je",
|
2904 |
+
"ĠLo pin",
|
2905 |
+
"ĠLo wasi",
|
2906 |
+
"ĠLo katon",
|
2907 |
+
"ĠLo supan",
|
2908 |
+
"ĠNi ki",
|
2909 |
+
"ĠNi mu",
|
2910 |
+
"ĠNi jon",
|
2911 |
+
"ĠNi pon",
|
2912 |
+
"ĠNi mile",
|
2913 |
+
"ĠW eko",
|
2914 |
+
"ĠW ije",
|
2915 |
+
"ĠW ijosa",
|
2916 |
+
"ĠKi ta",
|
2917 |
+
"ĠKi liku",
|
2918 |
+
"ĠTo to",
|
2919 |
+
"ĠSa ku",
|
2920 |
+
"ĠSa salin",
|
2921 |
+
"ĠSa jusi",
|
2922 |
+
"ĠSen tu",
|
2923 |
+
"ĠPa si",
|
2924 |
+
"ĠPa kala",
|
2925 |
+
"ĠPa kisan",
|
2926 |
+
"ĠPa puwani",
|
2927 |
+
"ĠPe we",
|
2928 |
+
"ĠPe kimu",
|
2929 |
+
"ĠPe joju",
|
2930 |
+
"ĠLa jo",
|
2931 |
+
"ĠLa wito",
|
2932 |
+
"ĠO sin",
|
2933 |
+
"ĠO we",
|
2934 |
+
"ĠO juta",
|
2935 |
+
"ĠO selija",
|
2936 |
+
"ĠMi moku",
|
2937 |
+
"ĠMi jama",
|
2938 |
+
"ĠJu li",
|
2939 |
+
"ĠJu ke",
|
2940 |
+
"ĠJu tu",
|
2941 |
+
"ĠJu lija",
|
2942 |
+
"ĠKu pa",
|
2943 |
+
"ĠKu lija",
|
2944 |
+
"ĠKu kukan",
|
2945 |
+
"ĠTa wi",
|
2946 |
+
"ĠTa pajal",
|
2947 |
+
"ĠTa juwan",
|
2948 |
+
"ĠPo ta",
|
2949 |
+
"ĠPo moto",
|
2950 |
+
"ĠPo suka",
|
2951 |
+
"ĠU tu",
|
2952 |
+
"ĠU sawi",
|
2953 |
+
"ĠKan se",
|
2954 |
+
"ĠKan pusi",
|
2955 |
+
"ĠSo momo",
|
2956 |
+
"ĠPu tu",
|
2957 |
+
"ĠPu nawi",
|
2958 |
+
"ĠPan to",
|
2959 |
+
"ĠPan tasali",
|
2960 |
+
"ĠLe wi",
|
2961 |
+
"ĠLe mosi",
|
2962 |
+
"ĠMa ku",
|
2963 |
+
"ĠMa su",
|
2964 |
+
"ĠIta lija",
|
2965 |
+
"ĠIta lia",
|
2966 |
+
"ĠNe palu",
|
2967 |
+
"ĠNe telan",
|
2968 |
+
"ĠNu ken",
|
2969 |
+
"ĠNu wan",
|
2970 |
+
"ĠLin ta",
|
2971 |
+
"ĠLin ja",
|
2972 |
+
"ĠWa jole",
|
2973 |
+
"ĠWa pili",
|
2974 |
+
"esi po",
|
2975 |
+
"Ġyu peko",
|
2976 |
+
"Ġwale ja",
|
2977 |
+
"ĠKena jekenaja",
|
2978 |
+
"ĠKijo to",
|
2979 |
+
"ĠKomi nu",
|
2980 |
+
"ĠAnse tan",
|
2981 |
+
"ĠElisa pe",
|
2982 |
+
"ĠNasin esipo",
|
2983 |
+
"ĠInton esija",
|
2984 |
+
"ĠKapil u",
|
2985 |
+
"ĠKiliku ntu",
|
2986 |
+
"ĠPapuwani jukini",
|
2987 |
+
"ĠTapajal o",
|
2988 |
+
"ĠPomoto lo",
|
2989 |
+
"ĠPantasali pakawan",
|
2990 |
+
"Ġyupeko si"
|
2991 |
+
]
|
2992 |
+
}
|
2993 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,990 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<|startoftranscript|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "<|en|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"3": {
|
30 |
+
"content": "<|zh|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"4": {
|
38 |
+
"content": "<|de|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"5": {
|
46 |
+
"content": "<|es|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"6": {
|
54 |
+
"content": "<|ru|>",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": false,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"7": {
|
62 |
+
"content": "<|ko|>",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": false,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
},
|
69 |
+
"8": {
|
70 |
+
"content": "<|fr|>",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": false,
|
73 |
+
"rstrip": false,
|
74 |
+
"single_word": false,
|
75 |
+
"special": true
|
76 |
+
},
|
77 |
+
"9": {
|
78 |
+
"content": "<|ja|>",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": false,
|
81 |
+
"rstrip": false,
|
82 |
+
"single_word": false,
|
83 |
+
"special": true
|
84 |
+
},
|
85 |
+
"10": {
|
86 |
+
"content": "<|pt|>",
|
87 |
+
"lstrip": false,
|
88 |
+
"normalized": false,
|
89 |
+
"rstrip": false,
|
90 |
+
"single_word": false,
|
91 |
+
"special": true
|
92 |
+
},
|
93 |
+
"11": {
|
94 |
+
"content": "<|tr|>",
|
95 |
+
"lstrip": false,
|
96 |
+
"normalized": false,
|
97 |
+
"rstrip": false,
|
98 |
+
"single_word": false,
|
99 |
+
"special": true
|
100 |
+
},
|
101 |
+
"12": {
|
102 |
+
"content": "<|pl|>",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": false,
|
105 |
+
"rstrip": false,
|
106 |
+
"single_word": false,
|
107 |
+
"special": true
|
108 |
+
},
|
109 |
+
"13": {
|
110 |
+
"content": "<|ca|>",
|
111 |
+
"lstrip": false,
|
112 |
+
"normalized": false,
|
113 |
+
"rstrip": false,
|
114 |
+
"single_word": false,
|
115 |
+
"special": true
|
116 |
+
},
|
117 |
+
"14": {
|
118 |
+
"content": "<|nl|>",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": false,
|
121 |
+
"rstrip": false,
|
122 |
+
"single_word": false,
|
123 |
+
"special": true
|
124 |
+
},
|
125 |
+
"15": {
|
126 |
+
"content": "<|ar|>",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": false,
|
129 |
+
"rstrip": false,
|
130 |
+
"single_word": false,
|
131 |
+
"special": true
|
132 |
+
},
|
133 |
+
"16": {
|
134 |
+
"content": "<|sv|>",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": false,
|
137 |
+
"rstrip": false,
|
138 |
+
"single_word": false,
|
139 |
+
"special": true
|
140 |
+
},
|
141 |
+
"17": {
|
142 |
+
"content": "<|it|>",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": false,
|
145 |
+
"rstrip": false,
|
146 |
+
"single_word": false,
|
147 |
+
"special": true
|
148 |
+
},
|
149 |
+
"18": {
|
150 |
+
"content": "<|id|>",
|
151 |
+
"lstrip": false,
|
152 |
+
"normalized": false,
|
153 |
+
"rstrip": false,
|
154 |
+
"single_word": false,
|
155 |
+
"special": true
|
156 |
+
},
|
157 |
+
"19": {
|
158 |
+
"content": "<|hi|>",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": false,
|
161 |
+
"rstrip": false,
|
162 |
+
"single_word": false,
|
163 |
+
"special": true
|
164 |
+
},
|
165 |
+
"20": {
|
166 |
+
"content": "<|fi|>",
|
167 |
+
"lstrip": false,
|
168 |
+
"normalized": false,
|
169 |
+
"rstrip": false,
|
170 |
+
"single_word": false,
|
171 |
+
"special": true
|
172 |
+
},
|
173 |
+
"21": {
|
174 |
+
"content": "<|vi|>",
|
175 |
+
"lstrip": false,
|
176 |
+
"normalized": false,
|
177 |
+
"rstrip": false,
|
178 |
+
"single_word": false,
|
179 |
+
"special": true
|
180 |
+
},
|
181 |
+
"22": {
|
182 |
+
"content": "<|he|>",
|
183 |
+
"lstrip": false,
|
184 |
+
"normalized": false,
|
185 |
+
"rstrip": false,
|
186 |
+
"single_word": false,
|
187 |
+
"special": true
|
188 |
+
},
|
189 |
+
"23": {
|
190 |
+
"content": "<|uk|>",
|
191 |
+
"lstrip": false,
|
192 |
+
"normalized": false,
|
193 |
+
"rstrip": false,
|
194 |
+
"single_word": false,
|
195 |
+
"special": true
|
196 |
+
},
|
197 |
+
"24": {
|
198 |
+
"content": "<|el|>",
|
199 |
+
"lstrip": false,
|
200 |
+
"normalized": false,
|
201 |
+
"rstrip": false,
|
202 |
+
"single_word": false,
|
203 |
+
"special": true
|
204 |
+
},
|
205 |
+
"25": {
|
206 |
+
"content": "<|ms|>",
|
207 |
+
"lstrip": false,
|
208 |
+
"normalized": false,
|
209 |
+
"rstrip": false,
|
210 |
+
"single_word": false,
|
211 |
+
"special": true
|
212 |
+
},
|
213 |
+
"26": {
|
214 |
+
"content": "<|cs|>",
|
215 |
+
"lstrip": false,
|
216 |
+
"normalized": false,
|
217 |
+
"rstrip": false,
|
218 |
+
"single_word": false,
|
219 |
+
"special": true
|
220 |
+
},
|
221 |
+
"27": {
|
222 |
+
"content": "<|ro|>",
|
223 |
+
"lstrip": false,
|
224 |
+
"normalized": false,
|
225 |
+
"rstrip": false,
|
226 |
+
"single_word": false,
|
227 |
+
"special": true
|
228 |
+
},
|
229 |
+
"28": {
|
230 |
+
"content": "<|da|>",
|
231 |
+
"lstrip": false,
|
232 |
+
"normalized": false,
|
233 |
+
"rstrip": false,
|
234 |
+
"single_word": false,
|
235 |
+
"special": true
|
236 |
+
},
|
237 |
+
"29": {
|
238 |
+
"content": "<|hu|>",
|
239 |
+
"lstrip": false,
|
240 |
+
"normalized": false,
|
241 |
+
"rstrip": false,
|
242 |
+
"single_word": false,
|
243 |
+
"special": true
|
244 |
+
},
|
245 |
+
"30": {
|
246 |
+
"content": "<|ta|>",
|
247 |
+
"lstrip": false,
|
248 |
+
"normalized": false,
|
249 |
+
"rstrip": false,
|
250 |
+
"single_word": false,
|
251 |
+
"special": true
|
252 |
+
},
|
253 |
+
"31": {
|
254 |
+
"content": "<|no|>",
|
255 |
+
"lstrip": false,
|
256 |
+
"normalized": false,
|
257 |
+
"rstrip": false,
|
258 |
+
"single_word": false,
|
259 |
+
"special": true
|
260 |
+
},
|
261 |
+
"32": {
|
262 |
+
"content": "<|th|>",
|
263 |
+
"lstrip": false,
|
264 |
+
"normalized": false,
|
265 |
+
"rstrip": false,
|
266 |
+
"single_word": false,
|
267 |
+
"special": true
|
268 |
+
},
|
269 |
+
"33": {
|
270 |
+
"content": "<|ur|>",
|
271 |
+
"lstrip": false,
|
272 |
+
"normalized": false,
|
273 |
+
"rstrip": false,
|
274 |
+
"single_word": false,
|
275 |
+
"special": true
|
276 |
+
},
|
277 |
+
"34": {
|
278 |
+
"content": "<|hr|>",
|
279 |
+
"lstrip": false,
|
280 |
+
"normalized": false,
|
281 |
+
"rstrip": false,
|
282 |
+
"single_word": false,
|
283 |
+
"special": true
|
284 |
+
},
|
285 |
+
"35": {
|
286 |
+
"content": "<|bg|>",
|
287 |
+
"lstrip": false,
|
288 |
+
"normalized": false,
|
289 |
+
"rstrip": false,
|
290 |
+
"single_word": false,
|
291 |
+
"special": true
|
292 |
+
},
|
293 |
+
"36": {
|
294 |
+
"content": "<|lt|>",
|
295 |
+
"lstrip": false,
|
296 |
+
"normalized": false,
|
297 |
+
"rstrip": false,
|
298 |
+
"single_word": false,
|
299 |
+
"special": true
|
300 |
+
},
|
301 |
+
"37": {
|
302 |
+
"content": "<|la|>",
|
303 |
+
"lstrip": false,
|
304 |
+
"normalized": false,
|
305 |
+
"rstrip": false,
|
306 |
+
"single_word": false,
|
307 |
+
"special": true
|
308 |
+
},
|
309 |
+
"38": {
|
310 |
+
"content": "<|mi|>",
|
311 |
+
"lstrip": false,
|
312 |
+
"normalized": false,
|
313 |
+
"rstrip": false,
|
314 |
+
"single_word": false,
|
315 |
+
"special": true
|
316 |
+
},
|
317 |
+
"39": {
|
318 |
+
"content": "<|ml|>",
|
319 |
+
"lstrip": false,
|
320 |
+
"normalized": false,
|
321 |
+
"rstrip": false,
|
322 |
+
"single_word": false,
|
323 |
+
"special": true
|
324 |
+
},
|
325 |
+
"40": {
|
326 |
+
"content": "<|cy|>",
|
327 |
+
"lstrip": false,
|
328 |
+
"normalized": false,
|
329 |
+
"rstrip": false,
|
330 |
+
"single_word": false,
|
331 |
+
"special": true
|
332 |
+
},
|
333 |
+
"41": {
|
334 |
+
"content": "<|sk|>",
|
335 |
+
"lstrip": false,
|
336 |
+
"normalized": false,
|
337 |
+
"rstrip": false,
|
338 |
+
"single_word": false,
|
339 |
+
"special": true
|
340 |
+
},
|
341 |
+
"42": {
|
342 |
+
"content": "<|te|>",
|
343 |
+
"lstrip": false,
|
344 |
+
"normalized": false,
|
345 |
+
"rstrip": false,
|
346 |
+
"single_word": false,
|
347 |
+
"special": true
|
348 |
+
},
|
349 |
+
"43": {
|
350 |
+
"content": "<|fa|>",
|
351 |
+
"lstrip": false,
|
352 |
+
"normalized": false,
|
353 |
+
"rstrip": false,
|
354 |
+
"single_word": false,
|
355 |
+
"special": true
|
356 |
+
},
|
357 |
+
"44": {
|
358 |
+
"content": "<|lv|>",
|
359 |
+
"lstrip": false,
|
360 |
+
"normalized": false,
|
361 |
+
"rstrip": false,
|
362 |
+
"single_word": false,
|
363 |
+
"special": true
|
364 |
+
},
|
365 |
+
"45": {
|
366 |
+
"content": "<|bn|>",
|
367 |
+
"lstrip": false,
|
368 |
+
"normalized": false,
|
369 |
+
"rstrip": false,
|
370 |
+
"single_word": false,
|
371 |
+
"special": true
|
372 |
+
},
|
373 |
+
"46": {
|
374 |
+
"content": "<|sr|>",
|
375 |
+
"lstrip": false,
|
376 |
+
"normalized": false,
|
377 |
+
"rstrip": false,
|
378 |
+
"single_word": false,
|
379 |
+
"special": true
|
380 |
+
},
|
381 |
+
"47": {
|
382 |
+
"content": "<|az|>",
|
383 |
+
"lstrip": false,
|
384 |
+
"normalized": false,
|
385 |
+
"rstrip": false,
|
386 |
+
"single_word": false,
|
387 |
+
"special": true
|
388 |
+
},
|
389 |
+
"48": {
|
390 |
+
"content": "<|sl|>",
|
391 |
+
"lstrip": false,
|
392 |
+
"normalized": false,
|
393 |
+
"rstrip": false,
|
394 |
+
"single_word": false,
|
395 |
+
"special": true
|
396 |
+
},
|
397 |
+
"49": {
|
398 |
+
"content": "<|kn|>",
|
399 |
+
"lstrip": false,
|
400 |
+
"normalized": false,
|
401 |
+
"rstrip": false,
|
402 |
+
"single_word": false,
|
403 |
+
"special": true
|
404 |
+
},
|
405 |
+
"50": {
|
406 |
+
"content": "<|et|>",
|
407 |
+
"lstrip": false,
|
408 |
+
"normalized": false,
|
409 |
+
"rstrip": false,
|
410 |
+
"single_word": false,
|
411 |
+
"special": true
|
412 |
+
},
|
413 |
+
"51": {
|
414 |
+
"content": "<|mk|>",
|
415 |
+
"lstrip": false,
|
416 |
+
"normalized": false,
|
417 |
+
"rstrip": false,
|
418 |
+
"single_word": false,
|
419 |
+
"special": true
|
420 |
+
},
|
421 |
+
"52": {
|
422 |
+
"content": "<|br|>",
|
423 |
+
"lstrip": false,
|
424 |
+
"normalized": false,
|
425 |
+
"rstrip": false,
|
426 |
+
"single_word": false,
|
427 |
+
"special": true
|
428 |
+
},
|
429 |
+
"53": {
|
430 |
+
"content": "<|eu|>",
|
431 |
+
"lstrip": false,
|
432 |
+
"normalized": false,
|
433 |
+
"rstrip": false,
|
434 |
+
"single_word": false,
|
435 |
+
"special": true
|
436 |
+
},
|
437 |
+
"54": {
|
438 |
+
"content": "<|is|>",
|
439 |
+
"lstrip": false,
|
440 |
+
"normalized": false,
|
441 |
+
"rstrip": false,
|
442 |
+
"single_word": false,
|
443 |
+
"special": true
|
444 |
+
},
|
445 |
+
"55": {
|
446 |
+
"content": "<|hy|>",
|
447 |
+
"lstrip": false,
|
448 |
+
"normalized": false,
|
449 |
+
"rstrip": false,
|
450 |
+
"single_word": false,
|
451 |
+
"special": true
|
452 |
+
},
|
453 |
+
"56": {
|
454 |
+
"content": "<|ne|>",
|
455 |
+
"lstrip": false,
|
456 |
+
"normalized": false,
|
457 |
+
"rstrip": false,
|
458 |
+
"single_word": false,
|
459 |
+
"special": true
|
460 |
+
},
|
461 |
+
"57": {
|
462 |
+
"content": "<|mn|>",
|
463 |
+
"lstrip": false,
|
464 |
+
"normalized": false,
|
465 |
+
"rstrip": false,
|
466 |
+
"single_word": false,
|
467 |
+
"special": true
|
468 |
+
},
|
469 |
+
"58": {
|
470 |
+
"content": "<|bs|>",
|
471 |
+
"lstrip": false,
|
472 |
+
"normalized": false,
|
473 |
+
"rstrip": false,
|
474 |
+
"single_word": false,
|
475 |
+
"special": true
|
476 |
+
},
|
477 |
+
"59": {
|
478 |
+
"content": "<|kk|>",
|
479 |
+
"lstrip": false,
|
480 |
+
"normalized": false,
|
481 |
+
"rstrip": false,
|
482 |
+
"single_word": false,
|
483 |
+
"special": true
|
484 |
+
},
|
485 |
+
"60": {
|
486 |
+
"content": "<|sq|>",
|
487 |
+
"lstrip": false,
|
488 |
+
"normalized": false,
|
489 |
+
"rstrip": false,
|
490 |
+
"single_word": false,
|
491 |
+
"special": true
|
492 |
+
},
|
493 |
+
"61": {
|
494 |
+
"content": "<|sw|>",
|
495 |
+
"lstrip": false,
|
496 |
+
"normalized": false,
|
497 |
+
"rstrip": false,
|
498 |
+
"single_word": false,
|
499 |
+
"special": true
|
500 |
+
},
|
501 |
+
"62": {
|
502 |
+
"content": "<|gl|>",
|
503 |
+
"lstrip": false,
|
504 |
+
"normalized": false,
|
505 |
+
"rstrip": false,
|
506 |
+
"single_word": false,
|
507 |
+
"special": true
|
508 |
+
},
|
509 |
+
"63": {
|
510 |
+
"content": "<|mr|>",
|
511 |
+
"lstrip": false,
|
512 |
+
"normalized": false,
|
513 |
+
"rstrip": false,
|
514 |
+
"single_word": false,
|
515 |
+
"special": true
|
516 |
+
},
|
517 |
+
"64": {
|
518 |
+
"content": "<|pa|>",
|
519 |
+
"lstrip": false,
|
520 |
+
"normalized": false,
|
521 |
+
"rstrip": false,
|
522 |
+
"single_word": false,
|
523 |
+
"special": true
|
524 |
+
},
|
525 |
+
"65": {
|
526 |
+
"content": "<|si|>",
|
527 |
+
"lstrip": false,
|
528 |
+
"normalized": false,
|
529 |
+
"rstrip": false,
|
530 |
+
"single_word": false,
|
531 |
+
"special": true
|
532 |
+
},
|
533 |
+
"66": {
|
534 |
+
"content": "<|km|>",
|
535 |
+
"lstrip": false,
|
536 |
+
"normalized": false,
|
537 |
+
"rstrip": false,
|
538 |
+
"single_word": false,
|
539 |
+
"special": true
|
540 |
+
},
|
541 |
+
"67": {
|
542 |
+
"content": "<|sn|>",
|
543 |
+
"lstrip": false,
|
544 |
+
"normalized": false,
|
545 |
+
"rstrip": false,
|
546 |
+
"single_word": false,
|
547 |
+
"special": true
|
548 |
+
},
|
549 |
+
"68": {
|
550 |
+
"content": "<|yo|>",
|
551 |
+
"lstrip": false,
|
552 |
+
"normalized": false,
|
553 |
+
"rstrip": false,
|
554 |
+
"single_word": false,
|
555 |
+
"special": true
|
556 |
+
},
|
557 |
+
"69": {
|
558 |
+
"content": "<|so|>",
|
559 |
+
"lstrip": false,
|
560 |
+
"normalized": false,
|
561 |
+
"rstrip": false,
|
562 |
+
"single_word": false,
|
563 |
+
"special": true
|
564 |
+
},
|
565 |
+
"70": {
|
566 |
+
"content": "<|af|>",
|
567 |
+
"lstrip": false,
|
568 |
+
"normalized": false,
|
569 |
+
"rstrip": false,
|
570 |
+
"single_word": false,
|
571 |
+
"special": true
|
572 |
+
},
|
573 |
+
"71": {
|
574 |
+
"content": "<|oc|>",
|
575 |
+
"lstrip": false,
|
576 |
+
"normalized": false,
|
577 |
+
"rstrip": false,
|
578 |
+
"single_word": false,
|
579 |
+
"special": true
|
580 |
+
},
|
581 |
+
"72": {
|
582 |
+
"content": "<|ka|>",
|
583 |
+
"lstrip": false,
|
584 |
+
"normalized": false,
|
585 |
+
"rstrip": false,
|
586 |
+
"single_word": false,
|
587 |
+
"special": true
|
588 |
+
},
|
589 |
+
"73": {
|
590 |
+
"content": "<|be|>",
|
591 |
+
"lstrip": false,
|
592 |
+
"normalized": false,
|
593 |
+
"rstrip": false,
|
594 |
+
"single_word": false,
|
595 |
+
"special": true
|
596 |
+
},
|
597 |
+
"74": {
|
598 |
+
"content": "<|tg|>",
|
599 |
+
"lstrip": false,
|
600 |
+
"normalized": false,
|
601 |
+
"rstrip": false,
|
602 |
+
"single_word": false,
|
603 |
+
"special": true
|
604 |
+
},
|
605 |
+
"75": {
|
606 |
+
"content": "<|sd|>",
|
607 |
+
"lstrip": false,
|
608 |
+
"normalized": false,
|
609 |
+
"rstrip": false,
|
610 |
+
"single_word": false,
|
611 |
+
"special": true
|
612 |
+
},
|
613 |
+
"76": {
|
614 |
+
"content": "<|gu|>",
|
615 |
+
"lstrip": false,
|
616 |
+
"normalized": false,
|
617 |
+
"rstrip": false,
|
618 |
+
"single_word": false,
|
619 |
+
"special": true
|
620 |
+
},
|
621 |
+
"77": {
|
622 |
+
"content": "<|am|>",
|
623 |
+
"lstrip": false,
|
624 |
+
"normalized": false,
|
625 |
+
"rstrip": false,
|
626 |
+
"single_word": false,
|
627 |
+
"special": true
|
628 |
+
},
|
629 |
+
"78": {
|
630 |
+
"content": "<|yi|>",
|
631 |
+
"lstrip": false,
|
632 |
+
"normalized": false,
|
633 |
+
"rstrip": false,
|
634 |
+
"single_word": false,
|
635 |
+
"special": true
|
636 |
+
},
|
637 |
+
"79": {
|
638 |
+
"content": "<|lo|>",
|
639 |
+
"lstrip": false,
|
640 |
+
"normalized": false,
|
641 |
+
"rstrip": false,
|
642 |
+
"single_word": false,
|
643 |
+
"special": true
|
644 |
+
},
|
645 |
+
"80": {
|
646 |
+
"content": "<|uz|>",
|
647 |
+
"lstrip": false,
|
648 |
+
"normalized": false,
|
649 |
+
"rstrip": false,
|
650 |
+
"single_word": false,
|
651 |
+
"special": true
|
652 |
+
},
|
653 |
+
"81": {
|
654 |
+
"content": "<|fo|>",
|
655 |
+
"lstrip": false,
|
656 |
+
"normalized": false,
|
657 |
+
"rstrip": false,
|
658 |
+
"single_word": false,
|
659 |
+
"special": true
|
660 |
+
},
|
661 |
+
"82": {
|
662 |
+
"content": "<|ht|>",
|
663 |
+
"lstrip": false,
|
664 |
+
"normalized": false,
|
665 |
+
"rstrip": false,
|
666 |
+
"single_word": false,
|
667 |
+
"special": true
|
668 |
+
},
|
669 |
+
"83": {
|
670 |
+
"content": "<|ps|>",
|
671 |
+
"lstrip": false,
|
672 |
+
"normalized": false,
|
673 |
+
"rstrip": false,
|
674 |
+
"single_word": false,
|
675 |
+
"special": true
|
676 |
+
},
|
677 |
+
"84": {
|
678 |
+
"content": "<|tk|>",
|
679 |
+
"lstrip": false,
|
680 |
+
"normalized": false,
|
681 |
+
"rstrip": false,
|
682 |
+
"single_word": false,
|
683 |
+
"special": true
|
684 |
+
},
|
685 |
+
"85": {
|
686 |
+
"content": "<|nn|>",
|
687 |
+
"lstrip": false,
|
688 |
+
"normalized": false,
|
689 |
+
"rstrip": false,
|
690 |
+
"single_word": false,
|
691 |
+
"special": true
|
692 |
+
},
|
693 |
+
"86": {
|
694 |
+
"content": "<|mt|>",
|
695 |
+
"lstrip": false,
|
696 |
+
"normalized": false,
|
697 |
+
"rstrip": false,
|
698 |
+
"single_word": false,
|
699 |
+
"special": true
|
700 |
+
},
|
701 |
+
"87": {
|
702 |
+
"content": "<|sa|>",
|
703 |
+
"lstrip": false,
|
704 |
+
"normalized": false,
|
705 |
+
"rstrip": false,
|
706 |
+
"single_word": false,
|
707 |
+
"special": true
|
708 |
+
},
|
709 |
+
"88": {
|
710 |
+
"content": "<|lb|>",
|
711 |
+
"lstrip": false,
|
712 |
+
"normalized": false,
|
713 |
+
"rstrip": false,
|
714 |
+
"single_word": false,
|
715 |
+
"special": true
|
716 |
+
},
|
717 |
+
"89": {
|
718 |
+
"content": "<|my|>",
|
719 |
+
"lstrip": false,
|
720 |
+
"normalized": false,
|
721 |
+
"rstrip": false,
|
722 |
+
"single_word": false,
|
723 |
+
"special": true
|
724 |
+
},
|
725 |
+
"90": {
|
726 |
+
"content": "<|bo|>",
|
727 |
+
"lstrip": false,
|
728 |
+
"normalized": false,
|
729 |
+
"rstrip": false,
|
730 |
+
"single_word": false,
|
731 |
+
"special": true
|
732 |
+
},
|
733 |
+
"91": {
|
734 |
+
"content": "<|tl|>",
|
735 |
+
"lstrip": false,
|
736 |
+
"normalized": false,
|
737 |
+
"rstrip": false,
|
738 |
+
"single_word": false,
|
739 |
+
"special": true
|
740 |
+
},
|
741 |
+
"92": {
|
742 |
+
"content": "<|mg|>",
|
743 |
+
"lstrip": false,
|
744 |
+
"normalized": false,
|
745 |
+
"rstrip": false,
|
746 |
+
"single_word": false,
|
747 |
+
"special": true
|
748 |
+
},
|
749 |
+
"93": {
|
750 |
+
"content": "<|as|>",
|
751 |
+
"lstrip": false,
|
752 |
+
"normalized": false,
|
753 |
+
"rstrip": false,
|
754 |
+
"single_word": false,
|
755 |
+
"special": true
|
756 |
+
},
|
757 |
+
"94": {
|
758 |
+
"content": "<|tt|>",
|
759 |
+
"lstrip": false,
|
760 |
+
"normalized": false,
|
761 |
+
"rstrip": false,
|
762 |
+
"single_word": false,
|
763 |
+
"special": true
|
764 |
+
},
|
765 |
+
"95": {
|
766 |
+
"content": "<|haw|>",
|
767 |
+
"lstrip": false,
|
768 |
+
"normalized": false,
|
769 |
+
"rstrip": false,
|
770 |
+
"single_word": false,
|
771 |
+
"special": true
|
772 |
+
},
|
773 |
+
"96": {
|
774 |
+
"content": "<|ln|>",
|
775 |
+
"lstrip": false,
|
776 |
+
"normalized": false,
|
777 |
+
"rstrip": false,
|
778 |
+
"single_word": false,
|
779 |
+
"special": true
|
780 |
+
},
|
781 |
+
"97": {
|
782 |
+
"content": "<|ha|>",
|
783 |
+
"lstrip": false,
|
784 |
+
"normalized": false,
|
785 |
+
"rstrip": false,
|
786 |
+
"single_word": false,
|
787 |
+
"special": true
|
788 |
+
},
|
789 |
+
"98": {
|
790 |
+
"content": "<|ba|>",
|
791 |
+
"lstrip": false,
|
792 |
+
"normalized": false,
|
793 |
+
"rstrip": false,
|
794 |
+
"single_word": false,
|
795 |
+
"special": true
|
796 |
+
},
|
797 |
+
"99": {
|
798 |
+
"content": "<|jw|>",
|
799 |
+
"lstrip": false,
|
800 |
+
"normalized": false,
|
801 |
+
"rstrip": false,
|
802 |
+
"single_word": false,
|
803 |
+
"special": true
|
804 |
+
},
|
805 |
+
"100": {
|
806 |
+
"content": "<|su|>",
|
807 |
+
"lstrip": false,
|
808 |
+
"normalized": false,
|
809 |
+
"rstrip": false,
|
810 |
+
"single_word": false,
|
811 |
+
"special": true
|
812 |
+
},
|
813 |
+
"101": {
|
814 |
+
"content": "<|translate|>",
|
815 |
+
"lstrip": false,
|
816 |
+
"normalized": false,
|
817 |
+
"rstrip": false,
|
818 |
+
"single_word": false,
|
819 |
+
"special": true
|
820 |
+
},
|
821 |
+
"102": {
|
822 |
+
"content": "<|transcribe|>",
|
823 |
+
"lstrip": false,
|
824 |
+
"normalized": false,
|
825 |
+
"rstrip": false,
|
826 |
+
"single_word": false,
|
827 |
+
"special": true
|
828 |
+
},
|
829 |
+
"103": {
|
830 |
+
"content": "<|startoflm|>",
|
831 |
+
"lstrip": false,
|
832 |
+
"normalized": false,
|
833 |
+
"rstrip": false,
|
834 |
+
"single_word": false,
|
835 |
+
"special": true
|
836 |
+
},
|
837 |
+
"104": {
|
838 |
+
"content": "<|startofprev|>",
|
839 |
+
"lstrip": false,
|
840 |
+
"normalized": false,
|
841 |
+
"rstrip": false,
|
842 |
+
"single_word": false,
|
843 |
+
"special": true
|
844 |
+
},
|
845 |
+
"105": {
|
846 |
+
"content": "<|nocaptions|>",
|
847 |
+
"lstrip": false,
|
848 |
+
"normalized": false,
|
849 |
+
"rstrip": false,
|
850 |
+
"single_word": false,
|
851 |
+
"special": true
|
852 |
+
},
|
853 |
+
"106": {
|
854 |
+
"content": "<|notimestamps|>",
|
855 |
+
"lstrip": false,
|
856 |
+
"normalized": false,
|
857 |
+
"rstrip": false,
|
858 |
+
"single_word": false,
|
859 |
+
"special": true
|
860 |
+
},
|
861 |
+
"1130": {
|
862 |
+
"content": "<|tp|>",
|
863 |
+
"lstrip": false,
|
864 |
+
"normalized": false,
|
865 |
+
"rstrip": false,
|
866 |
+
"single_word": false,
|
867 |
+
"special": true
|
868 |
+
}
|
869 |
+
},
|
870 |
+
"additional_special_tokens": [
|
871 |
+
"<|endoftext|>",
|
872 |
+
"<|startoftranscript|>",
|
873 |
+
"<|en|>",
|
874 |
+
"<|zh|>",
|
875 |
+
"<|de|>",
|
876 |
+
"<|es|>",
|
877 |
+
"<|ru|>",
|
878 |
+
"<|ko|>",
|
879 |
+
"<|fr|>",
|
880 |
+
"<|ja|>",
|
881 |
+
"<|pt|>",
|
882 |
+
"<|tr|>",
|
883 |
+
"<|pl|>",
|
884 |
+
"<|ca|>",
|
885 |
+
"<|nl|>",
|
886 |
+
"<|ar|>",
|
887 |
+
"<|sv|>",
|
888 |
+
"<|it|>",
|
889 |
+
"<|id|>",
|
890 |
+
"<|hi|>",
|
891 |
+
"<|fi|>",
|
892 |
+
"<|vi|>",
|
893 |
+
"<|he|>",
|
894 |
+
"<|uk|>",
|
895 |
+
"<|el|>",
|
896 |
+
"<|ms|>",
|
897 |
+
"<|cs|>",
|
898 |
+
"<|ro|>",
|
899 |
+
"<|da|>",
|
900 |
+
"<|hu|>",
|
901 |
+
"<|ta|>",
|
902 |
+
"<|no|>",
|
903 |
+
"<|th|>",
|
904 |
+
"<|ur|>",
|
905 |
+
"<|hr|>",
|
906 |
+
"<|bg|>",
|
907 |
+
"<|lt|>",
|
908 |
+
"<|la|>",
|
909 |
+
"<|mi|>",
|
910 |
+
"<|ml|>",
|
911 |
+
"<|cy|>",
|
912 |
+
"<|sk|>",
|
913 |
+
"<|te|>",
|
914 |
+
"<|fa|>",
|
915 |
+
"<|lv|>",
|
916 |
+
"<|bn|>",
|
917 |
+
"<|sr|>",
|
918 |
+
"<|az|>",
|
919 |
+
"<|sl|>",
|
920 |
+
"<|kn|>",
|
921 |
+
"<|et|>",
|
922 |
+
"<|mk|>",
|
923 |
+
"<|br|>",
|
924 |
+
"<|eu|>",
|
925 |
+
"<|is|>",
|
926 |
+
"<|hy|>",
|
927 |
+
"<|ne|>",
|
928 |
+
"<|mn|>",
|
929 |
+
"<|bs|>",
|
930 |
+
"<|kk|>",
|
931 |
+
"<|sq|>",
|
932 |
+
"<|sw|>",
|
933 |
+
"<|gl|>",
|
934 |
+
"<|mr|>",
|
935 |
+
"<|pa|>",
|
936 |
+
"<|si|>",
|
937 |
+
"<|km|>",
|
938 |
+
"<|sn|>",
|
939 |
+
"<|yo|>",
|
940 |
+
"<|so|>",
|
941 |
+
"<|af|>",
|
942 |
+
"<|oc|>",
|
943 |
+
"<|ka|>",
|
944 |
+
"<|be|>",
|
945 |
+
"<|tg|>",
|
946 |
+
"<|sd|>",
|
947 |
+
"<|gu|>",
|
948 |
+
"<|am|>",
|
949 |
+
"<|yi|>",
|
950 |
+
"<|lo|>",
|
951 |
+
"<|uz|>",
|
952 |
+
"<|fo|>",
|
953 |
+
"<|ht|>",
|
954 |
+
"<|ps|>",
|
955 |
+
"<|tk|>",
|
956 |
+
"<|nn|>",
|
957 |
+
"<|mt|>",
|
958 |
+
"<|sa|>",
|
959 |
+
"<|lb|>",
|
960 |
+
"<|my|>",
|
961 |
+
"<|bo|>",
|
962 |
+
"<|tl|>",
|
963 |
+
"<|mg|>",
|
964 |
+
"<|as|>",
|
965 |
+
"<|tt|>",
|
966 |
+
"<|haw|>",
|
967 |
+
"<|ln|>",
|
968 |
+
"<|ha|>",
|
969 |
+
"<|ba|>",
|
970 |
+
"<|jw|>",
|
971 |
+
"<|su|>",
|
972 |
+
"<|translate|>",
|
973 |
+
"<|transcribe|>",
|
974 |
+
"<|startoflm|>",
|
975 |
+
"<|startofprev|>",
|
976 |
+
"<|nocaptions|>",
|
977 |
+
"<|notimestamps|>",
|
978 |
+
"<|tp|>"
|
979 |
+
],
|
980 |
+
"bos_token": "<|endoftext|>",
|
981 |
+
"clean_up_tokenization_spaces": true,
|
982 |
+
"eos_token": "<|endoftext|>",
|
983 |
+
"errors": "replace",
|
984 |
+
"model_max_length": 1024,
|
985 |
+
"pad_token": "<|endoftext|>",
|
986 |
+
"processor_class": "WhisperProcessor",
|
987 |
+
"return_attention_mask": false,
|
988 |
+
"tokenizer_class": "WhisperTokenizer",
|
989 |
+
"unk_token": "<|endoftext|>"
|
990 |
+
}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<|endoftext|>":0,"<|startoftranscript|>":1,"<|en|>":2,"<|zh|>":3,"<|de|>":4,"<|es|>":5,"<|ru|>":6,"<|ko|>":7,"<|fr|>":8,"<|ja|>":9,"<|pt|>":10,"<|tr|>":11,"<|pl|>":12,"<|ca|>":13,"<|nl|>":14,"<|ar|>":15,"<|sv|>":16,"<|it|>":17,"<|id|>":18,"<|hi|>":19,"<|fi|>":20,"<|vi|>":21,"<|he|>":22,"<|uk|>":23,"<|el|>":24,"<|ms|>":25,"<|cs|>":26,"<|ro|>":27,"<|da|>":28,"<|hu|>":29,"<|ta|>":30,"<|no|>":31,"<|th|>":32,"<|ur|>":33,"<|hr|>":34,"<|bg|>":35,"<|lt|>":36,"<|la|>":37,"<|mi|>":38,"<|ml|>":39,"<|cy|>":40,"<|sk|>":41,"<|te|>":42,"<|fa|>":43,"<|lv|>":44,"<|bn|>":45,"<|sr|>":46,"<|az|>":47,"<|sl|>":48,"<|kn|>":49,"<|et|>":50,"<|mk|>":51,"<|br|>":52,"<|eu|>":53,"<|is|>":54,"<|hy|>":55,"<|ne|>":56,"<|mn|>":57,"<|bs|>":58,"<|kk|>":59,"<|sq|>":60,"<|sw|>":61,"<|gl|>":62,"<|mr|>":63,"<|pa|>":64,"<|si|>":65,"<|km|>":66,"<|sn|>":67,"<|yo|>":68,"<|so|>":69,"<|af|>":70,"<|oc|>":71,"<|ka|>":72,"<|be|>":73,"<|tg|>":74,"<|sd|>":75,"<|gu|>":76,"<|am|>":77,"<|yi|>":78,"<|lo|>":79,"<|uz|>":80,"<|fo|>":81,"<|ht|>":82,"<|ps|>":83,"<|tk|>":84,"<|nn|>":85,"<|mt|>":86,"<|sa|>":87,"<|lb|>":88,"<|my|>":89,"<|bo|>":90,"<|tl|>":91,"<|mg|>":92,"<|as|>":93,"<|tt|>":94,"<|haw|>":95,"<|ln|>":96,"<|ha|>":97,"<|ba|>":98,"<|jw|>":99,"<|su|>":100,"<|translate|>":101,"<|transcribe|>":102,"<|startoflm|>":103,"<|startofprev|>":104,"<|nocaptions|>":105,"<|notimestamps|>":106,"!":107,"\"":108,"#":109,"$":110,"%":111,"&":112,"'":113,"(":114,")":115,"*":116,"+":117,",":118,"-":119,".":120,"/":121,"0":122,"1":123,"2":124,"3":125,"4":126,"5":127,"6":128,"7":129,"8":130,"9":131,":":132,";":133,"<":134,"=":135,">":136,"?":137,"@":138,"A":139,"B":140,"C":141,"D":142,"E":143,"F":144,"G":145,"H":146,"I":147,"J":148,"K":149,"L":150,"M":151,"N":152,"O":153,"P":154,"Q":155,"R":156,"S":157,"T":158,"U":159,"V":160,"W":161,"X":162,"Y":163,"Z":164,"[":165,"\\":166,"]":167,"^":168,"_":169,"`":170,"a":171,"b":172,"c":173,"d":174,"e":175,"f":176,"g":177,"h":178,"i":179,"j":180,"k":181,"l":182,"m":183,"n":184,"o":185,"p":186,"q":187,"r":188,"s":189,"t":190,"u":191,"v":192,"w":193,"x":194,"y":195,"z":196,"{":197,"|":198,"}":199,"~":200,"¡":201,"¢":202,"£":203,"¤":204,"¥":205,"¦":206,"§":207,"¨":208,"©":209,"ª":210,"«":211,"¬":212,"®":213,"¯":214,"°":215,"±":216,"²":217,"³":218,"´":219,"µ":220,"¶":221,"·":222,"¸":223,"¹":224,"º":225,"»":226,"¼":227,"½":228,"¾":229,"¿":230,"À":231,"Á":232,"Â":233,"Ã":234,"Ä":235,"Å":236,"Æ":237,"Ç":238,"È":239,"É":240,"Ê":241,"Ë":242,"Ì":243,"Í":244,"Î":245,"Ï":246,"Ð":247,"Ñ":248,"Ò":249,"Ó":250,"Ô":251,"Õ":252,"Ö":253,"×":254,"Ø":255,"Ù":256,"Ú":257,"Û":258,"Ü":259,"Ý":260,"Þ":261,"ß":262,"à":263,"á":264,"â":265,"ã":266,"ä":267,"å":268,"æ":269,"ç":270,"è":271,"é":272,"ê":273,"ë":274,"ì":275,"í":276,"î":277,"ï":278,"ð":279,"ñ":280,"ò":281,"ó":282,"ô":283,"õ":284,"ö":285,"÷":286,"ø":287,"ù":288,"ú":289,"û":290,"ü":291,"ý":292,"þ":293,"ÿ":294,"Ā":295,"ā":296,"Ă":297,"ă":298,"Ą":299,"ą":300,"Ć":301,"ć":302,"Ĉ":303,"ĉ":304,"Ċ":305,"ċ":306,"Č":307,"č":308,"Ď":309,"ď":310,"Đ":311,"đ":312,"Ē":313,"ē":314,"Ĕ":315,"ĕ":316,"Ė":317,"ė":318,"Ę":319,"ę":320,"Ě":321,"ě":322,"Ĝ":323,"ĝ":324,"Ğ":325,"ğ":326,"Ġ":327,"ġ":328,"Ģ":329,"ģ":330,"Ĥ":331,"ĥ":332,"Ħ":333,"ħ":334,"Ĩ":335,"ĩ":336,"Ī":337,"ī":338,"Ĭ":339,"ĭ":340,"Į":341,"į":342,"İ":343,"ı":344,"IJ":345,"ij":346,"Ĵ":347,"ĵ":348,"Ķ":349,"ķ":350,"ĸ":351,"Ĺ":352,"ĺ":353,"Ļ":354,"ļ":355,"Ľ":356,"ľ":357,"Ŀ":358,"ŀ":359,"Ł":360,"ł":361,"Ń":362,"Ġl":363,"na":364,"Ġli":365,"Ġt":366,"Ġp":367,"Ġs":368,"Ġm":369,"an":370,"Ġe":371,"Ġk":372,"li":373,"ona":374,"en":375,"wa":376,"al":377,"ni":378,"si":379,"Ġni":380,"ala":381,"Ġla":382,"Ġta":383,"Ġpi":384,"on":385,"jan":386,"Ġmi":387,"ki":388,"ma":389,"Ġsi":390,"Ġtawa":391,"te":392,"mi":393,"Ġlon":394,"Ġw":395,"Ġala":396,"Ġto":397,"il":398,"ama":399,"Ġpona":400,"Ġmu":401,"Ġo":402,"Ġjan":403,"Ġpa":404,"ke":405,"ka":406,"Ġsina":407,"Ġlu":408,"lin":409,"Ġkama":410,"so":411,"Ġona":412,"po":413,"Ġtoki":414,"ile":415,"Ġna":416,"enpo":417,"Ġsu":418,"Ġwile":419,"Ġse":420,"Ġi":421,"Ġmute":422,"pa":423,"sina":424,"Ġa":425,"Ġken":426,"lo":427,"pe":428,"kin":429,"me":430,"Ġan":431,"jo":432,"mo":433,"Ġsona":434,"Ġtan":435,"Ġlukin":436,"sa":437,"Ġpilin":438,"ken":439,"ale":440,"ta":441,"elo":442,"Ġike":443,"Ġtomo":444,"ku":445,"Ġale":446,"ilo":447,"wi":448,"Ġmo":449,"pu":450,"Ġku":451,"eli":452,"Ġseme":453,"sin":454,"Ġlili":455,"Ġtenpo":456,"Ġki":457,"Ġsuli":458,"Ġwa":459,"Ġke":460,"peken":461,"Ġpali":462,"Ġkepeken":463,"ja":464,"tenpo":465,"Ġma":466,"Ġpana":467,"pi":468,"Ġjo":469,"wen":470,"weli":471,"Ġtu":472,"Ġtaso":473,"un":474,"Ġante":475,"Ġmoku":476,"eka":477,"len":478,"Ġpo":479,"Ġweka":480,"kala":481,"Ġwan":482,"Ġluka":483,"Ġnasin":484,"Ġsin":485,"Ġolin":486,"ko":487,"npa":488,"Ġmusi":489,"Ġwawa":490,"Ġsama":491,"Ġkala":492,"no":493,"Ġtelo":494,"lu":495,"Ġawen":496,"Ġnimi":497,"je":498,"Ġnanpa":499,"ĠK":500,"telen":501,"ĠT":502,"Ġlawa":503,"Ġilo":504,"Ġso":505,"Ġlipu":506,"Ġpakala":507,"tu":508,"Ġpini":509,"Ġsitelen":510,"uta":511,"Ġsuno":512,"Ġsoweli":513,"Ġsewi":514,"Ġnasa":515,"ĠTu":516,"Ġen":517,"taso":518,"to":519,"jelo":520,"Ġkin":521,"Ġpoka":522,"lupu":523,"tuli":524,"Ġijo":525,"untuli":526,"ĠKuntuli":527,"le":528,"Ġlape":529,"Ġkute":530,"Ġka":531,"soweli":532,"Ġanu":533,"Ġlen":534,"Ġkalama":535,"Ġmon":536,"Ġsike":537,"Ġmoli":538,"Ġkasi":539,"pen":540,"Ġalasa":541,"Ġsuwi":542,"Ġanpa":543,"Ġkule":544,"kesi":545,"nsa":546,"Ġopen":547,"Ġja":548,"Ġkon":549,"Ġkulupu":550,"ĠS":551,"Ġu":552,"Ġinsa":553,"oje":554,"Ġ\"":555,"Ġloje":556,"Ġseli":557,"Ġkili":558,"tala":559,"la":560,"Ġali":561,"sun":562,"nasin":563,"pin":564,"Ġutala":565,"ani":566,"Ġpoki":567,"ena":568,"suta":569,"Ġsijelo":570,"Ġn":571,"Ġjaki":572,"ĠA":573,"ĠP":574,"Ġle":575,"Ġpan":576,"Ġmani":577,"Ġko":578,"meja":579,"Ġuta":580,"Ġpu":581,"Ġpimeja":582,"toki":583,"Ġmonsuta":584,"Ġnena":585,"Ġlin":586,"nimi":587,"Ġmije":588,"Ġmun":589,"Ġlupa":590,"ĠL":591,"Ġmama":592,"mako":593,"Ġnamako":594,"Ġsinpin":595,"tan":596,"Ġmeli":597,"Ġsupa":598,"Ġwaso":599,"tomo":600,"sike":601,"Ġakesi":602,"noka":603,"lon":604,"kasi":605,"Ġlinja":606,"mu":607,"Ġnoka":608,"Ġwalo":609,"akesi":610,"ĠM":611,"Ġun":612,"kulupu":613,"Ġkiwen":614,"pisi":615,"ĠAn":616,"ijo":617,"se":618,"san":619,"we":620,"Ġjelo":621,"Ġesun":622,"Ġlaso":623,"Ġpake":624,"Ġkipisi":625,"pan":626,"lipu":627,"telo":628,"Ġmonsi":629,"Ġlete":630,"Ġunpa":631,"alu":632,"kewi":633,"kalama":634,"sikeke":635,"ĠAnkewi":636,"ju":637,"sila":638,"Ġpalisa":639,"kalu":640,"ĠE":641,"sitelen":642,"tesan":643,"Ġselo":644,"takalu":645,"jetesan":646,"jetesantakalu":647,"waso":648,"Ġmisikeke":649,"pipi":650,"Ġpowe":651,"kosila":652,"suno":653,"ĠI":654,"onsi":655,"pali":656,"meli":657,"musi":658,"\".":659,"kon":660,"olin":661,"su":662,"âĢ":663,"ĠN":664,"Ġtonsi":665,"wawa":666,"Ġpipi":667,"mama":668,"Ġoko":669,"moku":670,"Ġleko":671,"Ġkokosila":672,"sona":673,"seme":674,"juna":675,"eso":676,"Ġmeso":677,"linja":678,"Ġmajuna":679,"lukin":680,"ĠSon":681,"ĠIn":682,"selo":683,"wile":684,"Ġlan":685,"sima":686,"kili":687,"pilin":688,"luka":689,"ĠKa":690,"ĠTe":691,"ĠLi":692,"ĠMe":693,"ĠInli":694,"Ġlanpan":695,"..":696,"ike":697,"seli":698,"ton":699,"wan":700,"sijelo":701,"kijetesantakalu":702,"Ġape":703,"pesi":704,"kule":705,"piku":706,"Ġsoko":707,"Ġjasima":708,"ĠPi":709,"ĠSonja":710,"Ġapeja":711,"ĠJ":712,"nanpa":713,"sinpin":714,"Ġkijetesantakalu":715,"lupa":716,"lawa":717,"ali":718,"nton":719,"ĠâĢ":720,"Ġte":721,"kiwen":722,"pakala":723,"winton":724,"luwi":725,"Ġlinluwi":726,"ĠLo":727,"ĠEwinton":728,"âĢĿ":729,"ĠNi":730,"ĠâĢľ":731,"lan":732,"ntu":733,"oko":734,"ĠW":735,"Ġmelo":736,"Ġepiku":737,"mije":738,"pini":739,"ĠKi":740,"ĠTo":741,"Ġkapesi":742,"ĠSa":743,"ĠSu":744,"ĠSan":745,"ĠSen":746,"ĠPa":747,"ĠPe":748,"ĠLa":749,"palisa":750,"ĠTelen":751,"ĠLisa":752,"Ġmelome":753,"awen":754,"mon":755,"mun":756,"pona":757,"ĠO":758,"nata":759,"lija":760,"poki":761,"sawi":762,"Ġkuntu":763,"jaki":764,"Ġpuwa":765,"ĠMi":766,"sewi":767,"supa":768,"ĠKanata":769,"ĠMewi":770,"...":771,"ĠJu":772,"ĠSeni":773,"monsuta":774,"epiku":775,"open":776,"ante":777,"sija":778,"Ġmijo":779,"misikeke":780,"amana":781,"poka":782,"sali":783,"ĠKu":784,"ĠKen":785,"ĠTa":786,"ĠSi":787,"ĠSamana":788,"Ġusawi":789,"lape":790,"lapi":791,"ĠPo":792,"mute":793,"panpo":794,"ĠElo":795,"ĠKapesi":796,"ĠKin":797,"ĠSupanpo":798,"ĠSanta":799,"ĠLasina":800,"ĠMewika":801,"Ġmijomi":802,"eko":803,"esun":804,"imi":805,"insa":806,"juta":807,"kama":808,"mala":809,"nena":810,"utala":811,"yu":812,"¼ģ":813,"ï¼ģ":814,"ĠU":815,"nasa":816,"Ġsan":817,"Ġete":818,"Ġkan":819,"wala":820,"sipin":821,"alata":822,"alasija":823,"onken":824,"Ġpasila":825,"keke":826,"pana":827,"pelan":828,"Ġkiki":829,"kokosila":830,"ĠKan":831,"ĠKeli":832,"ĠTimi":833,"Ġkamala":834,"ĠSo":835,"ĠAsi":836,"ĠPu":837,"ĠPan":838,"ĠPona":839,"ĠPalata":840,"ĠLe":841,"ĠMa":842,"ĠMalasija":843,"Ġunu":844,"ĠEko":845,"ĠEpelan":846,"ĠIta":847,"ĠNe":848,"ĠNu":849,"ĠSonko":850,"ĠLin":851,"ĠMekeke":852,"ĠPita":853,"ĠJonken":854,"ĠLola":855,"âĢĿ.":856,"ĠWa":857,"ĠTosi":858,"ĠToki":859,"ĠSusan":860,"ĠPepu":861,"ĠMisali":862,"ĠSiko":863,"lapisu":864,"ĠElopa":865,"ĠKinkili":866,"Ġkamalawala":867,"ĠEpelanto":868,"!\"":869,"\",":870,"\"?":871,".\"":872,"?!":873,"Eko":874,"asin":875,"esi":876,"eta":877,"elon":878,"esija":879,"ije":880,"isipin":881,"jal":882,"jon":883,"jama":884,"kan":885,"lani":886,"man":887,"mani":888,"nu":889,"omi":890,"ose":891,"pon":892,"pil":893,"pani":894,"palu":895,"son":896,"sama":897,"tonsi":898,"upi":899,"wani":900,"Ġyu":901,"nawi":902,"naja":903,"Ġpeta":904,"anu":905,"anka":906,"anpa":907,"antan":908,"lia":909,"lisa":910,"liku":911,"lilan":912,"wasi":913,"wawi":914,"nija":915,"siko":916,"alasa":917,"Ġtaki":918,"onyu":919,"Ġmisa":920,"kini":921,"kijo":922,"kiko":923,"kimu":924,"kipisi":925,"kisan":926,"majuna":927,"telan":928,"mile":929,"ila":930,"��mulapisu":931,"Ġowe":932,"Ġojuta":933,"kepeken":934,"kenaja":935,"kaso":936,"katon":937,"kawan":938,"soko":939,"polo":940,"powe":941,"Ġisipin":942,"pajal":943,"pakawan":944,"loje":945,"peta":946,"peko":947,"meso":948,"mesu":949,"jole":950,"joju":951,"mosi":952,"momo":953,"moto":954,"salin":955,"tawa":956,"tawan":957,"tasali":958,"kukan":959,"wito":960,"pusi":961,"puwani":962,"Ġwale":963,"Ġkese":964,"jasima":965,"pili":966,"pimeja":967,"Ġtuli":968,"uneko":969,"kowa":970,"nokijo":971,"jesa":972,"jekenaja":973,"ĠKena":974,"ĠKijo":975,"ĠKelon":976,"ĠKomi":977,"ĠKila":978,"ĠTen":979,"ĠTonyu":980,"Ġsoto":981,"lete":982,"leko":983,"Ġkonwe":984,"ĠSuneko":985,"Ġumesu":986,"laso":987,"lasin":988,"ĠAli":989,"ĠAwi":990,"ĠAlapi":991,"ĠAman":992,"ĠAwawi":993,"ĠApolo":994,"ĠAtawan":995,"ĠAlasin":996,"ĠPuta":997,"ĠLuta":998,"ĠLupi":999,"ĠLantan":1000,"mulapisu":1001,"ĠMena":1002,"ĠMose":1003,"ĠManka":1004,"ĠAnse":1005,"ĠAnson":1006,"ĠAnkowa":1007,"ijosa":1008,"aluto":1009,"jusi":1010,"juwan":1011,"jukini":1012,"ĠEki":1013,"ĠElisa":1014,"ĠIlan":1015,"suli":1016,"suka":1017,"suwi":1018,"supan":1019,"ĠNasin":1020,"ĠNaluto":1021,"ĠInton":1022,"ĠKapil":1023,"ĠKajesa":1024,"ĠTepo":1025,"ĠTeja":1026,"ĠTewen":1027,"ĠTepani":1028,"ĠLilija":1029,"ĠLililan":1030,"ĠMelani":1031,"ĠMesiko":1032,"ĠMekiko":1033,"selija":1034,"ĠPiwi":1035,"ĠPisin":1036,"ĠPiju":1037,"ĠPinija":1038,"ĠPikaso":1039,"ĠPinokijo":1040,"ĠJan":1041,"Ġteje":1042,"ĠLopin":1043,"ĠLowasi":1044,"ĠLokaton":1045,"ĠLosupan":1046,"ĠNiki":1047,"ĠNimu":1048,"ĠNijon":1049,"ĠNipon":1050,"ĠNimile":1051,"ĠWeko":1052,"ĠWije":1053,"ĠWijosa":1054,"ĠKita":1055,"ĠKiliku":1056,"ĠToto":1057,"ĠSaku":1058,"ĠSasalin":1059,"ĠSajusi":1060,"ĠSentu":1061,"ĠPasi":1062,"ĠPakala":1063,"ĠPakisan":1064,"ĠPapuwani":1065,"ĠPewe":1066,"ĠPekimu":1067,"ĠPejoju":1068,"ĠLajo":1069,"ĠLawito":1070,"ĠOsin":1071,"ĠOwe":1072,"ĠOjuta":1073,"ĠOselija":1074,"ĠMimoku":1075,"ĠMijama":1076,"ĠJuli":1077,"ĠJuke":1078,"ĠJutu":1079,"ĠJulija":1080,"ĠKupa":1081,"ĠKulija":1082,"ĠKukukan":1083,"ĠTawi":1084,"ĠTapajal":1085,"ĠTajuwan":1086,"ĠPota":1087,"ĠPomoto":1088,"ĠPosuka":1089,"ĠUtu":1090,"ĠUsawi":1091,"ĠKanse":1092,"ĠKanpusi":1093,"ĠSomomo":1094,"ĠPutu":1095,"ĠPunawi":1096,"ĠPanto":1097,"ĠPantasali":1098,"ĠLewi":1099,"ĠLemosi":1100,"ĠMaku":1101,"ĠMasu":1102,"ĠItalija":1103,"ĠItalia":1104,"ĠNepalu":1105,"ĠNetelan":1106,"ĠNuken":1107,"ĠNuwan":1108,"ĠLinta":1109,"ĠLinja":1110,"ĠWajole":1111,"ĠWapili":1112,"esipo":1113,"Ġyupeko":1114,"Ġwaleja":1115,"ĠKenajekenaja":1116,"ĠKijoto":1117,"ĠKominu":1118,"ĠAnsetan":1119,"ĠElisape":1120,"ĠNasinesipo":1121,"ĠIntonesija":1122,"ĠKapilu":1123,"ĠKilikuntu":1124,"ĠPapuwanijukini":1125,"ĠTapajalo":1126,"ĠPomotolo":1127,"ĠPantasalipakawan":1128,"Ġyupekosi":1129}
|