michelleyunun commited on
Commit
38d7771
1 Parent(s): 705eb2e

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +150 -2
tokenizer.json CHANGED
@@ -386,7 +386,81 @@
386
  "Ġdaayim": 324,
387
  "Ġyuxw": 325,
388
  "Ġaloohl": 326,
389
- "Ġbax": 327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  },
391
  "merges": [
392
  "s t",
@@ -669,7 +743,81 @@
669
  "Ġd aayim",
670
  "Ġyu xw",
671
  "Ġa loohl",
672
- "Ġb ax"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
  ]
674
  }
675
  }
 
386
  "Ġdaayim": 324,
387
  "Ġyuxw": 325,
388
  "Ġaloohl": 326,
389
+ "Ġbax": 327,
390
+ "Ġbaasx": 328,
391
+ "Ġligit": 329,
392
+ "Ġjok": 330,
393
+ "Ġsg": 331,
394
+ "Ġsi": 332,
395
+ "ĠSpain": 333,
396
+ "nakwhl": 334,
397
+ "Ġhehl": 335,
398
+ "Ġhediit": 336,
399
+ "diithl": 337,
400
+ "witxwit": 338,
401
+ "Ġjaphl": 339,
402
+ "nithl": 340,
403
+ "ytxwhl": 341,
404
+ "Ġxhlii": 342,
405
+ "Ġdaayimaahl": 343,
406
+ "Ġyuxwdiithl": 344,
407
+ "Ġbaasxi": 345,
408
+ "Nakwhl": 346,
409
+ "gwi": 347,
410
+ "ukwhl": 348,
411
+ "yukwhl": 349,
412
+ "ĠAk": 350,
413
+ "ĠAgwi": 351,
414
+ "ĠAgwiyukwhl": 352,
415
+ "BM": 353,
416
+ "De": 354,
417
+ "Gi": 355,
418
+ "IBM": 356,
419
+ "aw": 357,
420
+ "ail": 358,
421
+ "ce": 359,
422
+ "ff": 360,
423
+ "gee": 361,
424
+ "it": 362,
425
+ "iwaa": 363,
426
+ "ice": 364,
427
+ "jit": 365,
428
+ "ljit": 366,
429
+ "mar": 367,
430
+ "mail": 368,
431
+ "nmar": 369,
432
+ "oxs": 370,
433
+ "off": 371,
434
+ "si": 372,
435
+ "wan": 373,
436
+ "way": 374,
437
+ "yo": 375,
438
+ "ĠDe": 376,
439
+ "ĠGi": 377,
440
+ "ĠIBM": 378,
441
+ "Ġmail": 379,
442
+ "Ġoff": 380,
443
+ "niiwan": 381,
444
+ "niiyo": 382,
445
+ "xsiwaa": 383,
446
+ "Ġsaw": 384,
447
+ "nix": 385,
448
+ "Ġwok": 386,
449
+ "atdiit": 387,
450
+ "̲.\"": 388,
451
+ "oosun": 389,
452
+ "ĠAp": 390,
453
+ "Ġamxsiwaa": 391,
454
+ "Ġaks": 392,
455
+ "geenix": 393,
456
+ "nmark": 394,
457
+ "oxsxw": 395,
458
+ "wayi": 396,
459
+ "ĠDenmark": 397,
460
+ "ĠGigeenix": 398,
461
+ "Ġoffice": 399,
462
+ "Ġsawatdiit": 400,
463
+ "ytxw": 401
464
  },
465
  "merges": [
466
  "s t",
 
743
  "Ġd aayim",
744
  "Ġyu xw",
745
  "Ġa loohl",
746
+ "Ġb ax",
747
+ "Ġb aasx",
748
+ "Ġligi t",
749
+ "Ġj ok",
750
+ "Ġs g",
751
+ "Ġs i",
752
+ "ĠS pain",
753
+ "na kwhl",
754
+ "Ġhe hl",
755
+ "Ġhe diit",
756
+ "diit hl",
757
+ "wit xwit",
758
+ "Ġja phl",
759
+ "nit hl",
760
+ "yt xwhl",
761
+ "Ġxhl ii",
762
+ "Ġdaayim aahl",
763
+ "Ġyuxw diithl",
764
+ "Ġbaasx i",
765
+ "N akwhl",
766
+ "g wi",
767
+ "u kwhl",
768
+ "y ukwhl",
769
+ "ĠA k",
770
+ "ĠA gwi",
771
+ "ĠAgwi yukwhl",
772
+ "B M",
773
+ "D e",
774
+ "G i",
775
+ "I BM",
776
+ "a w",
777
+ "a il",
778
+ "c e",
779
+ "f f",
780
+ "g ee",
781
+ "i t",
782
+ "i waa",
783
+ "i ce",
784
+ "j it",
785
+ "l jit",
786
+ "m ar",
787
+ "m ail",
788
+ "n mar",
789
+ "o xs",
790
+ "o ff",
791
+ "s i",
792
+ "w an",
793
+ "w ay",
794
+ "y o",
795
+ "Ġ De",
796
+ "Ġ Gi",
797
+ "Ġ IBM",
798
+ "Ġ mail",
799
+ "Ġ off",
800
+ "nii wan",
801
+ "nii yo",
802
+ "xs iwaa",
803
+ "Ġs aw",
804
+ "ni x",
805
+ "Ġw ok",
806
+ "at diit",
807
+ "̲. \"",
808
+ "oos un",
809
+ "ĠA p",
810
+ "Ġam xsiwaa",
811
+ "Ġak s",
812
+ "gee nix",
813
+ "nmar k",
814
+ "oxs xw",
815
+ "way i",
816
+ "ĠDe nmark",
817
+ "ĠGi geenix",
818
+ "Ġoff ice",
819
+ "Ġsaw atdiit",
820
+ "yt xw"
821
  ]
822
  }
823
  }