Zaid commited on
Commit
960c431
1 Parent(s): d35de3e

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +196 -66
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -191,6 +191,123 @@
191
  "rstrip": false,
192
  "normalized": false,
193
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  }
195
  ],
196
  "normalizer": null,
@@ -229,72 +346,85 @@
229
  "<|meter_13|>": 18,
230
  "<|meter_14|>": 19,
231
  "<|meter_15|>": 20,
232
- " ": 21,
233
- "0": 22,
234
- "1": 23,
235
- "2": 24,
236
- "3": 25,
237
- "4": 26,
238
- "5": 27,
239
- "6": 28,
240
- "7": 29,
241
- "8": 30,
242
- "9": 31,
243
- "<": 32,
244
- ">": 33,
245
- "_": 34,
246
- "b": 35,
247
- "e": 36,
248
- "m": 37,
249
- "p": 38,
250
- "r": 39,
251
- "s": 40,
252
- "t": 41,
253
- "v": 42,
254
- "|": 43,
255
- "~": 44,
256
- "ء": 45,
257
- "أ": 46,
258
- "ؤ": 47,
259
- "ئ": 48,
260
- "ا": 49,
261
- "ب": 50,
262
- "ة": 51,
263
- "ت": 52,
264
- "ث": 53,
265
- "ج": 54,
266
- "ح": 55,
267
- "خ": 56,
268
- "د": 57,
269
- "ذ": 58,
270
- "ر": 59,
271
- "ز": 60,
272
- "س": 61,
273
- "ش": 62,
274
- "ص": 63,
275
- "ض": 64,
276
- "ط": 65,
277
- "ظ": 66,
278
- "ع": 67,
279
- "غ": 68,
280
- "ف": 69,
281
- "ق": 70,
282
- "ك": 71,
283
- "ل": 72,
284
- "م": 73,
285
- "ن": 74,
286
- "ه": 75,
287
- "و": 76,
288
- "ى": 77,
289
- "ي": 78,
290
- "ً": 79,
291
- "ٌ": 80,
292
- "ٍ": 81,
293
- "َ": 82,
294
- "ُ": 83,
295
- "ِ": 84,
296
- "ّ": 85,
297
- "ْ": 86
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  },
299
  "merges": []
300
  }
 
191
  "rstrip": false,
192
  "normalized": false,
193
  "special": true
194
+ },
195
+ {
196
+ "id": 21,
197
+ "content": "<|res_0|>",
198
+ "single_word": false,
199
+ "lstrip": false,
200
+ "rstrip": false,
201
+ "normalized": false,
202
+ "special": true
203
+ },
204
+ {
205
+ "id": 22,
206
+ "content": "<|res_1|>",
207
+ "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
+ },
213
+ {
214
+ "id": 23,
215
+ "content": "<|res_2|>",
216
+ "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
+ },
222
+ {
223
+ "id": 24,
224
+ "content": "<|res_3|>",
225
+ "single_word": false,
226
+ "lstrip": false,
227
+ "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
+ },
231
+ {
232
+ "id": 25,
233
+ "content": "<|res_4|>",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
+ },
240
+ {
241
+ "id": 26,
242
+ "content": "<|res_5|>",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
+ },
249
+ {
250
+ "id": 27,
251
+ "content": "<|res_6|>",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
+ },
258
+ {
259
+ "id": 28,
260
+ "content": "<|res_7|>",
261
+ "single_word": false,
262
+ "lstrip": false,
263
+ "rstrip": false,
264
+ "normalized": false,
265
+ "special": true
266
+ },
267
+ {
268
+ "id": 29,
269
+ "content": "<|res_8|>",
270
+ "single_word": false,
271
+ "lstrip": false,
272
+ "rstrip": false,
273
+ "normalized": false,
274
+ "special": true
275
+ },
276
+ {
277
+ "id": 30,
278
+ "content": "<|res_9|>",
279
+ "single_word": false,
280
+ "lstrip": false,
281
+ "rstrip": false,
282
+ "normalized": false,
283
+ "special": true
284
+ },
285
+ {
286
+ "id": 31,
287
+ "content": "<|res_10|>",
288
+ "single_word": false,
289
+ "lstrip": false,
290
+ "rstrip": false,
291
+ "normalized": false,
292
+ "special": true
293
+ },
294
+ {
295
+ "id": 32,
296
+ "content": "<|res_11|>",
297
+ "single_word": false,
298
+ "lstrip": false,
299
+ "rstrip": false,
300
+ "normalized": false,
301
+ "special": true
302
+ },
303
+ {
304
+ "id": 33,
305
+ "content": "<|res_12|>",
306
+ "single_word": false,
307
+ "lstrip": false,
308
+ "rstrip": false,
309
+ "normalized": false,
310
+ "special": true
311
  }
312
  ],
313
  "normalizer": null,
 
346
  "<|meter_13|>": 18,
347
  "<|meter_14|>": 19,
348
  "<|meter_15|>": 20,
349
+ "<|res_0|>": 21,
350
+ "<|res_1|>": 22,
351
+ "<|res_2|>": 23,
352
+ "<|res_3|>": 24,
353
+ "<|res_4|>": 25,
354
+ "<|res_5|>": 26,
355
+ "<|res_6|>": 27,
356
+ "<|res_7|>": 28,
357
+ "<|res_8|>": 29,
358
+ "<|res_9|>": 30,
359
+ "<|res_10|>": 31,
360
+ "<|res_11|>": 32,
361
+ "<|res_12|>": 33,
362
+ " ": 34,
363
+ "0": 35,
364
+ "1": 36,
365
+ "2": 37,
366
+ "3": 38,
367
+ "4": 39,
368
+ "5": 40,
369
+ "6": 41,
370
+ "7": 42,
371
+ "8": 43,
372
+ "9": 44,
373
+ "<": 45,
374
+ ">": 46,
375
+ "_": 47,
376
+ "b": 48,
377
+ "e": 49,
378
+ "m": 50,
379
+ "p": 51,
380
+ "r": 52,
381
+ "s": 53,
382
+ "t": 54,
383
+ "v": 55,
384
+ "|": 56,
385
+ "~": 57,
386
+ "ء": 58,
387
+ "أ": 59,
388
+ "ؤ": 60,
389
+ "ئ": 61,
390
+ "ا": 62,
391
+ "ب": 63,
392
+ "ة": 64,
393
+ "ت": 65,
394
+ "ث": 66,
395
+ "ج": 67,
396
+ "ح": 68,
397
+ "خ": 69,
398
+ "د": 70,
399
+ "ذ": 71,
400
+ "ر": 72,
401
+ "ز": 73,
402
+ "س": 74,
403
+ "ش": 75,
404
+ "ص": 76,
405
+ "ض": 77,
406
+ "ط": 78,
407
+ "ظ": 79,
408
+ "ع": 80,
409
+ "غ": 81,
410
+ "ف": 82,
411
+ "ق": 83,
412
+ "ك": 84,
413
+ "ل": 85,
414
+ "م": 86,
415
+ "ن": 87,
416
+ "ه": 88,
417
+ "و": 89,
418
+ "ى": 90,
419
+ "ي": 91,
420
+ "ً": 92,
421
+ "ٌ": 93,
422
+ "ٍ": 94,
423
+ "َ": 95,
424
+ "ُ": 96,
425
+ "ِ": 97,
426
+ "ّ": 98,
427
+ "ْ": 99
428
  },
429
  "merges": []
430
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"<|endoftext|>":0,"<|vsep|>":1,"<|bsep|>":2,"<|pad|>":3,"<|psep|>":4,"<|meter_0|>":5,"<|meter_1|>":6,"<|meter_2|>":7,"<|meter_3|>":8,"<|meter_4|>":9,"<|meter_5|>":10,"<|meter_6|>":11,"<|meter_7|>":12,"<|meter_8|>":13,"<|meter_9|>":14,"<|meter_10|>":15,"<|meter_11|>":16,"<|meter_12|>":17,"<|meter_13|>":18,"<|meter_14|>":19,"<|meter_15|>":20," ":21,"0":22,"1":23,"2":24,"3":25,"4":26,"5":27,"6":28,"7":29,"8":30,"9":31,"<":32,">":33,"_":34,"b":35,"e":36,"m":37,"p":38,"r":39,"s":40,"t":41,"v":42,"|":43,"~":44,"ء":45,"أ":46,"ؤ":47,"ئ":48,"ا":49,"ب":50,"ة":51,"ت":52,"ث":53,"ج":54,"ح":55,"خ":56,"د":57,"ذ":58,"ر":59,"ز":60,"س":61,"ش":62,"ص":63,"ض":64,"ط":65,"ظ":66,"ع":67,"غ":68,"ف":69,"ق":70,"ك":71,"ل":72,"م":73,"ن":74,"ه":75,"و":76,"ى":77,"ي":78,"ً":79,"ٌ":80,"ٍ":81,"َ":82,"ُ":83,"ِ":84,"ّ":85,"ْ":86}
 
1
+ {"<|endoftext|>":0,"<|vsep|>":1,"<|bsep|>":2,"<|pad|>":3,"<|psep|>":4,"<|meter_0|>":5,"<|meter_1|>":6,"<|meter_2|>":7,"<|meter_3|>":8,"<|meter_4|>":9,"<|meter_5|>":10,"<|meter_6|>":11,"<|meter_7|>":12,"<|meter_8|>":13,"<|meter_9|>":14,"<|meter_10|>":15,"<|meter_11|>":16,"<|meter_12|>":17,"<|meter_13|>":18,"<|meter_14|>":19,"<|meter_15|>":20,"<|res_0|>":21,"<|res_1|>":22,"<|res_2|>":23,"<|res_3|>":24,"<|res_4|>":25,"<|res_5|>":26,"<|res_6|>":27,"<|res_7|>":28,"<|res_8|>":29,"<|res_9|>":30,"<|res_10|>":31,"<|res_11|>":32,"<|res_12|>":33," ":34,"0":35,"1":36,"2":37,"3":38,"4":39,"5":40,"6":41,"7":42,"8":43,"9":44,"<":45,">":46,"_":47,"b":48,"e":49,"m":50,"p":51,"r":52,"s":53,"t":54,"v":55,"|":56,"~":57,"ء":58,"أ":59,"ؤ":60,"ئ":61,"ا":62,"ب":63,"ة":64,"ت":65,"ث":66,"ج":67,"ح":68,"خ":69,"د":70,"ذ":71,"ر":72,"ز":73,"س":74,"ش":75,"ص":76,"ض":77,"ط":78,"ظ":79,"ع":80,"غ":81,"ف":82,"ق":83,"ك":84,"ل":85,"م":86,"ن":87,"ه":88,"و":89,"ى":90,"ي":91,"ً":92,"ٌ":93,"ٍ":94,"َ":95,"ُ":96,"ِ":97,"ّ":98,"ْ":99}