add tokenizer
Browse files- vocab.json +1 -1
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"č": 0, "Ɗ": 1, "á": 2, "Č": 3, "ŀ": 4, "ĸ": 5, "Ľ": 6, "Ø": 7, "Ý": 8, "ï": 9, "Ǝ": 10, "Ğ": 11, "ĩ": 12, "ß": 13, "Ň": 14, "ħ": 15, "Ă": 16, "ë": 17, "æ": 18, "Ō": 19, "Ę": 20, "ö": 21, "ă": 22, "ĭ": 23, "ŗ": 24, "Ď": 25, "Ű": 26, "ġ": 27, "í": 28, "ő": 29, "Û": 30, "ļ": 31, "Ł": 32, "ŵ": 33, "ŏ": 34, "ź": 35, "ƈ": 36, "ŝ": 37, "ä": 38, "ð": 39, "Ñ": 40, "Ƅ": 41, "ć": 42, "î": 43, "Ü": 44, "Ŕ": 45, "ĵ": 46, "ƍ": 47, "Ż": 48, "Ũ": 49, "ŷ": 50, "ċ": 51, "Ħ": 52, "ƅ": 53, "ű": 54, "â": 55, "ĥ": 56, "Ņ": 57, "Ī": 58, "š": 59, "Ą": 60, "Ŵ": 61, "Œ": 62, "Ĺ": 63, "ō": 64, "ē": 65, "Ĝ": 66, "Ö": 67, "đ": 68, "ñ": 69, "ţ": 70, "ř": 71, "Ŋ": 72, "Ï": 73, "ĕ": 74, "Í": 75, "Ş": 76, "İ": 77, "ŭ": 78, "ď": 79, "ā": 80, "ü": 81, "Ċ": 82, "é": 83, "Ɖ": 84, "Ɓ": 85, "ń": 86, "ŋ": 87, "Ŗ": 88, "ų": 89, "Ë": 90, "ç": 91, "Ɔ": 92, "Ƃ": 93, "ó": 94, "Ŷ": 95, "Ì": 96, "ij": 97, "ľ": 98, "ņ": 99, "ė": 100, "ž": 101, "ſ": 102, "Ţ": 103, "Ù": 104, "ý": 105, "É": 106, "Ó": 107, "ø": 108, "Ē": 109, "Ð": 110, "ģ": 111, "Ŝ": 112, "û": 113, "œ": 114, "ʼn": 115, "Ć": 116, "Ê": 117, "Ń": 118, "Ū": 119, "ÿ": 120, "Ų": 121, "ı": 122, "ś": 123, "ô": 124, "ì": 125, "Ĉ": 126, "Ģ": 127, "ě": 128, "ã": 129, "è": 130, "IJ": 131, "ş": 132, "ŧ": 133, "Î": 134, "ą": 135, "ī": 136, "Ĕ": 137, "ú": 138, "Ÿ": 139, "ż": 140, "ƌ": 141, "ū": 142, "Ő": 143, "Ə": 144, "ĺ": 145, "ƀ": 146, "ů": 147, "Ŀ": 148, "Ĥ": 149, "Į": 150, "Ĩ": 151, "ę": 152, "į": 153, "Ř": 154, "ù": 155, "ŕ": 156, "Ŏ": 157, "Ú": 158, "å": 159, "Đ": 160, "ĝ": 161, "Ś": 162, "Ĭ": 163, "Ķ": 164, "ť": 165, "ê": 166, "Ô": 167, "÷": 168, "ũ": 169, "ò": 170, "Ò": 171, "Ť": 172, "þ": 173, "Ů": 174, "È": 175, "Ŭ": 176, "ķ": 177, "Ƌ": 178, "ĉ": 179, "Ļ": 180, "Ž": 181, "ğ": 182, "à": 183, "Ƈ": 184, "Ġ": 185, "õ": 186, "Þ": 187, "Š": 188, "×": 189, "Ě": 190, "Ā": 191, "ł": 192, "ƃ": 193, "Ĵ": 194, "Ė": 195, "ň": 196, "Ŧ": 197, "Ź": 198, "Õ": 199, "[UNK]": 200, "[PAD]": 201}
|