shawn-nyk commited on
Commit
263b962
1 Parent(s): 6379eee

Remove special tokens

Browse files
alphabet.json CHANGED
@@ -1 +1 @@
1
- {"labels": ["\u0d7c", "\u0d38", "\u0d02", "\u0d0b", "\u0d39", "\u0d27", "\u0d22", "\u0d20", "\u0d1f", "\u0d2a", "\u0d2f", "\u0d40", "\u0d15", " ", "\u0d30", "\u0d0f", "\u0d09", "\u0d43", "\u0d7a", "\u0d23", "\u0d3f", "\u200c", "\u0d7d", "\u0d4a", "\u0d32", "\u0d57", "\u0d18", "\u0d4c", "\u0d26", "\u0d2c", "\u0d1b", "\u0d1d", "\u0d0a", "\u0d28", "\u0d4b", "\u0d71", "\u0d07", "\u0d10", "\u0d48", "\u0d03", "\u0d3e", "\u0d1a", "\u0d47", "\u0d25", "\u0d21", "\u0d0e", "\u0d37", "\u0d06", "\u0d05", "\u0d2e", "\u0d12", "\u0d36", "\u0d16", "\u0d2b", "\u0d41", "\u0d17", "\u0d33", "\u0d13", "\u0d42", "\u0d35", "\u0d1e", "\u0d31", "\u0d2d", "\u0d7b", "\u0d7e", "\u0d08", "\u0d4d", "\u0d46", "\u0d19", "\u0d24", "\u0d1c", "\u200d", "\u0d14", "\u0d34", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
1
+ {"labels": ["\u0d7c", "\u0d38", "\u0d02", "\u0d0b", "\u0d39", "\u0d27", "\u0d22", "\u0d20", "\u0d1f", "\u0d2a", "\u0d2f", "\u0d40", "\u0d15", " ", "\u0d30", "\u0d0f", "\u0d09", "\u0d43", "\u0d7a", "\u0d23", "\u0d3f", "\u200c", "\u0d7d", "\u0d4a", "\u0d32", "\u0d57", "\u0d18", "\u0d4c", "\u0d26", "\u0d2c", "\u0d1b", "\u0d1d", "\u0d0a", "\u0d28", "\u0d4b", "\u0d71", "\u0d07", "\u0d10", "\u0d48", "\u0d03", "\u0d3e", "\u0d1a", "\u0d47", "\u0d25", "\u0d21", "\u0d0e", "\u0d37", "\u0d06", "\u0d05", "\u0d2e", "\u0d12", "\u0d36", "\u0d16", "\u0d2b", "\u0d41", "\u0d17", "\u0d33", "\u0d13", "\u0d42", "\u0d35", "\u0d1e", "\u0d31", "\u0d2d", "\u0d7b", "\u0d7e", "\u0d08", "\u0d4d", "\u0d46", "\u0d19", "\u0d24", "\u0d1c", "\u200d", "\u0d14", "\u0d34", "\u2047", ""], "is_bpe": false}
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
1
+ {"bos_token": "null", "eos_token": "null", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|"}
1
+ {"unk_token": "[UNK]", "bos_token": "null", "eos_token": "null", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|"}
vocab.json CHANGED
@@ -1 +1 @@
1
- {"ർ": 0, "സ": 1, "ം": 2, "ഋ": 3, "ഹ": 4, "ധ": 5, "ഢ": 6, "ഠ": 7, "ട": 8, "പ": 9, "യ": 10, "ീ": 11, "ക": 12, "ര": 14, "ഏ": 15, "ഉ": 16, "ൃ": 17, "ൺ": 18, "ണ": 19, "ി": 20, "‌": 21, "ൽ": 22, "ൊ": 23, "ല": 24, "ൗ": 25, "ഘ": 26, "ൌ": 27, "ദ": 28, "ബ": 29, "ഛ": 30, "ഝ": 31, "ഊ": 32, "ന": 33, "ോ": 34, "൱": 35, "ഇ": 36, "ഐ": 37, "ൈ": 38, "ഃ": 39, "ാ": 40, "ച": 41, "േ": 42, "ഥ": 43, "ഡ": 44, "എ": 45, "ഷ": 46, "ആ": 47, "അ": 48, "മ": 49, "ഒ": 50, "ശ": 51, "ഖ": 52, "ഫ": 53, "ു": 54, "ഗ": 55, "ള": 56, "ഓ": 57, "ൂ": 58, "വ": 59, "ഞ": 60, "റ": 61, "ഭ": 62, "ൻ": 63, "ൾ": 64, "ഈ": 65, "്": 66, "െ": 67, "ങ": 68, "ത": 69, "ജ": 70, "‍": 71, "ഔ": 72, "ഴ": 73, "|": 13, "[UNK]": 74, "[PAD]": 75, "<s>": 76, "</s>": 77}
1
+ {"ർ": 0, "സ": 1, "ം": 2, "ഋ": 3, "ഹ": 4, "ധ": 5, "ഢ": 6, "ഠ": 7, "ട": 8, "പ": 9, "യ": 10, "ീ": 11, "ക": 12, "ര": 14, "ഏ": 15, "ഉ": 16, "ൃ": 17, "ൺ": 18, "ണ": 19, "ി": 20, "‌": 21, "ൽ": 22, "ൊ": 23, "ല": 24, "ൗ": 25, "ഘ": 26, "ൌ": 27, "ദ": 28, "ബ": 29, "ഛ": 30, "ഝ": 31, "ഊ": 32, "ന": 33, "ോ": 34, "൱": 35, "ഇ": 36, "ഐ": 37, "ൈ": 38, "ഃ": 39, "ാ": 40, "ച": 41, "േ": 42, "ഥ": 43, "ഡ": 44, "എ": 45, "ഷ": 46, "ആ": 47, "അ": 48, "മ": 49, "ഒ": 50, "ശ": 51, "ഖ": 52, "ഫ": 53, "ു": 54, "ഗ": 55, "ള": 56, "ഓ": 57, "ൂ": 58, "വ": 59, "ഞ": 60, "റ": 61, "ഭ": 62, "ൻ": 63, "ൾ": 64, "ഈ": 65, "്": 66, "െ": 67, "ങ": 68, "ത": 69, "ജ": 70, "‍": 71, "ഔ": 72, "ഴ": 73, "|": 13, "[UNK]": 74, "[PAD]": 75}