Trifon commited on
Commit
710f924
·
1 Parent(s): 4615038

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"Ƅ": 0, "ŏ": 1, "ľ": 2, "ç": 3, "Ø": 4, "ų": 5, "ů": 6, "ź": 7, "Ķ": 8, "Ę": 9, "ū": 10, "Ģ": 11, "È": 12, "Ğ": 13, "Ē": 14, "ŀ": 15, "é": 16, "Ũ": 17, "Ý": 18, "ė": 19, "ij": 20, "IJ": 21, "ƀ": 22, "Ą": 23, "ž": 24, "ä": 25, "Ė": 26, "ĸ": 27, "Ŵ": 28, "Ƃ": 29, "Î": 30, "Ţ": 31, "Ņ": 32, "Þ": 33, "ģ": 34, "Ŷ": 35, "Ɔ": 36, "à": 37, "Ł": 38, "Ÿ": 39, "Ľ": 40, "ĝ": 41, "å": 42, "Ĺ": 43, "ŵ": 44, "ō": 45, "Ų": 46, "ß": 47, "Ŕ": 48, "á": 49, "ÿ": 50, "ś": 51, "Œ": 52, "ř": 53, "Ċ": 54, "Ö": 55, "ü": 56, "Ť": 57, "ũ": 58, "ń": 59, "Ƈ": 60, "ĭ": 61, "ħ": 62, "ĩ": 63, "Š": 64, "ġ": 65, "Ù": 66, "ı": 67, "ŕ": 68, "Ò": 69, "Ǝ": 70, "ù": 71, "Ā": 72, "ñ": 73, "ý": 74, "É": 75, "ŷ": 76, "ĉ": 77, "į": 78, "Ļ": 79, "ƍ": 80, "Ƌ": 81, "Ū": 82, "Ú": 83, "Ŀ": 84, "ę": 85, "ŋ": 86, "Ë": 87, "ķ": 88, "ú": 89, "Ɗ": 90, "Í": 91, "Ŗ": 92, "â": 93, "î": 94, "ſ": 95, "Ô": 96, "Ń": 97, "Ć": 98, "ó": 99, "ã": 100, "ţ": 101, "ŧ": 102, "ƈ": 103, "Ì": 104, "ð": 105, "ö": 106, "ğ": 107, "Ñ": 108, "Ĉ": 109, "ŝ": 110, "ƌ": 111, "Õ": 112, "Ġ": 113, "Č": 114, "ļ": 115, "ĺ": 116, "ě": 117, "Ű": 118, "Ɓ": 119, "ĵ": 120, "š": 121, "ē": 122, "Ê": 123, "ć": 124, "×": 125, "Ĕ": 126, "Ŭ": 127, "Ď": 128, "Ě": 129, "í": 130, "ĕ": 131, "Ă": 132, "Ś": 133, "ő": 134, "ê": 135, "ŗ": 136, "Ə": 137, "ë": 138, "ĥ": 139, "Ň": 140, "Į": 141, "û": 142, "÷": 143, "Đ": 144, "Ī": 145, "Ž": 146, "đ": 147, "Û": 148, "Ź": 149, "ą": 150, "Ï": 151, "Ĩ": 152, "è": 153, "č": 154, "ť": 155, "Ó": 156, "Ĵ": 157, "œ": 158, "ø": 159, "ş": 160, "Ż": 161, "Ŏ": 162, "ò": 163, "Ĭ": 164, "ƅ": 165, "þ": 166, "ċ": 167, "Ɖ": 168, "æ": 169, "Ŝ": 170, "ŭ": 171, "Ĝ": 172, "Ĥ": 173, "Ŧ": 174, "ű": 175, "ƃ": 176, "ď": 177, "ʼn": 178, "ż": 179, "Ő": 180, "ă": 181, "Ð": 182, "Ů": 183, "ï": 184, "ô": 185, "Ř": 186, "ī": 187, "ň": 188, "Ō": 189, "Ħ": 190, "Ŋ": 191, "İ": 192, "ņ": 193, "ā": 194, "Ş": 195, "ì": 196, "Ü": 197, "õ": 198, "ł": 199, "[UNK]": 200, "[PAD]": 201}
 
1
+ {"č": 0, "Ɗ": 1, "á": 2, "Č": 3, "ŀ": 4, "ĸ": 5, "Ľ": 6, "Ø": 7, "Ý": 8, "ï": 9, "Ǝ": 10, "Ğ": 11, "ĩ": 12, "ß": 13, "Ň": 14, "ħ": 15, "Ă": 16, "ë": 17, "æ": 18, "Ō": 19, "Ę": 20, "ö": 21, "ă": 22, "ĭ": 23, "ŗ": 24, "Ď": 25, "Ű": 26, "ġ": 27, "í": 28, "ő": 29, "Û": 30, "ļ": 31, "Ł": 32, "ŵ": 33, "ŏ": 34, "ź": 35, "ƈ": 36, "ŝ": 37, "ä": 38, "ð": 39, "Ñ": 40, "Ƅ": 41, "ć": 42, "î": 43, "Ü": 44, "Ŕ": 45, "ĵ": 46, "ƍ": 47, "Ż": 48, "Ũ": 49, "ŷ": 50, "ċ": 51, "Ħ": 52, "ƅ": 53, "ű": 54, "â": 55, "ĥ": 56, "Ņ": 57, "Ī": 58, "š": 59, "Ą": 60, "Ŵ": 61, "Œ": 62, "Ĺ": 63, "ō": 64, "ē": 65, "Ĝ": 66, "Ö": 67, "đ": 68, "ñ": 69, "ţ": 70, "ř": 71, "Ŋ": 72, "Ï": 73, "ĕ": 74, "Í": 75, "Ş": 76, "İ": 77, "ŭ": 78, "ď": 79, "ā": 80, "ü": 81, "Ċ": 82, "é": 83, "Ɖ": 84, "Ɓ": 85, "ń": 86, "ŋ": 87, "Ŗ": 88, "ų": 89, "Ë": 90, "ç": 91, "Ɔ": 92, "Ƃ": 93, "ó": 94, "Ŷ": 95, "Ì": 96, "ij": 97, "ľ": 98, "ņ": 99, "ė": 100, "ž": 101, "ſ": 102, "Ţ": 103, "Ù": 104, "ý": 105, "É": 106, "Ó": 107, "ø": 108, "Ē": 109, "Ð": 110, "ģ": 111, "Ŝ": 112, "û": 113, "œ": 114, "ʼn": 115, "Ć": 116, "Ê": 117, "Ń": 118, "Ū": 119, "ÿ": 120, "Ų": 121, "ı": 122, "ś": 123, "ô": 124, "ì": 125, "Ĉ": 126, "Ģ": 127, "ě": 128, "ã": 129, "è": 130, "IJ": 131, "ş": 132, "ŧ": 133, "Î": 134, "ą": 135, "ī": 136, "Ĕ": 137, "ú": 138, "Ÿ": 139, "ż": 140, "ƌ": 141, "ū": 142, "Ő": 143, "Ə": 144, "ĺ": 145, "ƀ": 146, "ů": 147, "Ŀ": 148, "Ĥ": 149, "Į": 150, "Ĩ": 151, "ę": 152, "į": 153, "Ř": 154, "ù": 155, "ŕ": 156, "Ŏ": 157, "Ú": 158, "å": 159, "Đ": 160, "ĝ": 161, "Ś": 162, "Ĭ": 163, "Ķ": 164, "ť": 165, "ê": 166, "Ô": 167, "÷": 168, "ũ": 169, "ò": 170, "Ò": 171, "Ť": 172, "þ": 173, "Ů": 174, "È": 175, "Ŭ": 176, "ķ": 177, "Ƌ": 178, "ĉ": 179, "Ļ": 180, "Ž": 181, "ğ": 182, "à": 183, "Ƈ": 184, "Ġ": 185, "õ": 186, "Þ": 187, "Š": 188, "×": 189, "Ě": 190, "Ā": 191, "ł": 192, "ƃ": 193, "Ĵ": 194, "Ė": 195, "ň": 196, "Ŧ": 197, "Ź": 198, "Õ": 199, "[UNK]": 200, "[PAD]": 201}