Trifon commited on
Commit
4615038
1 Parent(s): d853a4f

add tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +1 -1
  2. vocab.json +1 -1
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "use_auth_token": "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", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json CHANGED
@@ -1 +1 @@
1
- {"Ŷ": 0, "Š": 1, "í": 2, "Ì": 3, "Þ": 4, "ś": 5, "á": 6, "ĝ": 7, "Ų": 8, "Ō": 9, "Ɔ": 10, "ń": 11, "Ø": 12, "ſ": 13, "ą": 14, "į": 15, "Ƃ": 16, "ó": 17, "Ə": 18, "ď": 19, "İ": 20, "č": 21, "ã": 22, "Ł": 23, "œ": 24, "ÿ": 25, "Ü": 26, "ş": 27, "Ƈ": 28, "Ū": 29, "Ĥ": 30, "ö": 31, "Ž": 32, "ů": 33, "ĺ": 34, "Ż": 35, "Ò": 36, "Ñ": 37, "Ŵ": 38, "ý": 39, "ŏ": 40, "Ŀ": 41, "Ţ": 42, "É": 43, "Ą": 44, "Ŭ": 45, "Ý": 46, "Ő": 47, "Ô": 48, "ƃ": 49, "Ɖ": 50, "Ĭ": 51, "ĵ": 52, "ä": 53, "ī": 54, "Ó": 55, "å": 56, "Ļ": 57, "Ŧ": 58, "ı": 59, "ʼn": 60, "ĩ": 61, "Ĝ": 62, "ų": 63, "Ũ": 64, "Ö": 65, "ƌ": 66, "Ē": 67, "æ": 68, "Ă": 69, "ķ": 70, "ł": 71, "ğ": 72, "ř": 73, "Ş": 74, "Ľ": 75, "ï": 76, "IJ": 77, "ě": 78, "Ƅ": 79, "Ŋ": 80, "Ű": 81, "Õ": 82, "è": 83, "ę": 84, "Ź": 85, "Č": 86, "î": 87, "ƍ": 88, "Ť": 89, "ú": 90, "à": 91, "ť": 92, "ũ": 93, "ć": 94, "š": 95, "ŧ": 96, "ĥ": 97, "û": 98, "Ê": 99, "È": 100, "ă": 101, "â": 102, "ņ": 103, "ŝ": 104, "ŵ": 105, "ž": 106, "Ŕ": 107, "Ā": 108, "Ĕ": 109, "Ɗ": 110, "Ģ": 111, "é": 112, "Ï": 113, "Ř": 114, "Ś": 115, "ù": 116, "Ĉ": 117, "ç": 118, "ħ": 119, "Ů": 120, "ĭ": 121, "ŗ": 122, "ƀ": 123, "Î": 124, "ƅ": 125, "ê": 126, "ľ": 127, "Í": 128, "đ": 129, "ij": 130, "Ď": 131, "÷": 132, "ő": 133, "õ": 134, "ë": 135, "Ĺ": 136, "ò": 137, "Ń": 138, "ð": 139, "ō": 140, "Ǝ": 141, "ŕ": 142, "Ŏ": 143, "Œ": 144, "ň": 145, "Ƌ": 146, "ĕ": 147, "Đ": 148, "Į": 149, "Ħ": 150, "Ķ": 151, "Ġ": 152, "Ņ": 153, "Ė": 154, "ñ": 155, "Ÿ": 156, "Ŝ": 157, "ĉ": 158, "Ě": 159, "ø": 160, "Ę": 161, "Ĩ": 162, "Ð": 163, "Ĵ": 164, "ŀ": 165, "ŷ": 166, "ô": 167, "ļ": 168, "þ": 169, "ƈ": 170, "ţ": 171, "ß": 172, "ŋ": 173, "Ŗ": 174, "ì": 175, "ė": 176, "ġ": 177, "ź": 178, "ā": 179, "Ɓ": 180, "ģ": 181, "Ć": 182, "ü": 183, "ĸ": 184, "Ň": 185, "ē": 186, "Ù": 187, "Ú": 188, "Ë": 189, "ū": 190, "Ċ": 191, "Û": 192, "ċ": 193, "Ğ": 194, "ű": 195, "ż": 196, "Ī": 197, "×": 198, "ŭ": 199, "[UNK]": 200, "[PAD]": 201}
 
1
+ {"Ƅ": 0, "ŏ": 1, "ľ": 2, "ç": 3, "Ø": 4, "ų": 5, "ů": 6, "ź": 7, "Ķ": 8, "Ę": 9, "ū": 10, "Ģ": 11, "È": 12, "Ğ": 13, "Ē": 14, "ŀ": 15, "é": 16, "Ũ": 17, "Ý": 18, "ė": 19, "ij": 20, "IJ": 21, "ƀ": 22, "Ą": 23, "ž": 24, "ä": 25, "Ė": 26, "ĸ": 27, "Ŵ": 28, "Ƃ": 29, "Î": 30, "Ţ": 31, "Ņ": 32, "Þ": 33, "ģ": 34, "Ŷ": 35, "Ɔ": 36, "à": 37, "Ł": 38, "Ÿ": 39, "Ľ": 40, "ĝ": 41, "å": 42, "Ĺ": 43, "ŵ": 44, "ō": 45, "Ų": 46, "ß": 47, "Ŕ": 48, "á": 49, "ÿ": 50, "ś": 51, "Œ": 52, "ř": 53, "Ċ": 54, "Ö": 55, "ü": 56, "Ť": 57, "ũ": 58, "ń": 59, "Ƈ": 60, "ĭ": 61, "ħ": 62, "ĩ": 63, "Š": 64, "ġ": 65, "Ù": 66, "ı": 67, "ŕ": 68, "Ò": 69, "Ǝ": 70, "ù": 71, "Ā": 72, "ñ": 73, "ý": 74, "É": 75, "ŷ": 76, "ĉ": 77, "į": 78, "Ļ": 79, "ƍ": 80, "Ƌ": 81, "Ū": 82, "Ú": 83, "Ŀ": 84, "ę": 85, "ŋ": 86, "Ë": 87, "ķ": 88, "ú": 89, "Ɗ": 90, "Í": 91, "Ŗ": 92, "â": 93, "î": 94, "ſ": 95, "Ô": 96, "Ń": 97, "Ć": 98, "ó": 99, "ã": 100, "ţ": 101, "ŧ": 102, "ƈ": 103, "Ì": 104, "ð": 105, "ö": 106, "ğ": 107, "Ñ": 108, "Ĉ": 109, "ŝ": 110, "ƌ": 111, "Õ": 112, "Ġ": 113, "Č": 114, "ļ": 115, "ĺ": 116, "ě": 117, "Ű": 118, "Ɓ": 119, "ĵ": 120, "š": 121, "ē": 122, "Ê": 123, "ć": 124, "×": 125, "Ĕ": 126, "Ŭ": 127, "Ď": 128, "Ě": 129, "í": 130, "ĕ": 131, "Ă": 132, "Ś": 133, "ő": 134, "ê": 135, "ŗ": 136, "Ə": 137, "ë": 138, "ĥ": 139, "Ň": 140, "Į": 141, "û": 142, "÷": 143, "Đ": 144, "Ī": 145, "Ž": 146, "đ": 147, "Û": 148, "Ź": 149, "ą": 150, "Ï": 151, "Ĩ": 152, "è": 153, "č": 154, "ť": 155, "Ó": 156, "Ĵ": 157, "œ": 158, "ø": 159, "ş": 160, "Ż": 161, "Ŏ": 162, "ò": 163, "Ĭ": 164, "ƅ": 165, "þ": 166, "ċ": 167, "Ɖ": 168, "æ": 169, "Ŝ": 170, "ŭ": 171, "Ĝ": 172, "Ĥ": 173, "Ŧ": 174, "ű": 175, "ƃ": 176, "ď": 177, "ʼn": 178, "ż": 179, "Ő": 180, "ă": 181, "Ð": 182, "Ů": 183, "ï": 184, "ô": 185, "Ř": 186, "ī": 187, "ň": 188, "Ō": 189, "Ħ": 190, "Ŋ": 191, "İ": 192, "ņ": 193, "ā": 194, "Ş": 195, "ì": 196, "Ü": 197, "õ": 198, "ł": 199, "[UNK]": 200, "[PAD]": 201}