speecht5_tts_sinhala / tokenizer.json
Ransaka's picture
End of training
9b40992
raw
history blame contribute delete
No virus
2.69 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 67,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 68,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 69,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 70,
"content": "<mask>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "NFKC"
},
"pre_tokenizer": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true,
"prepend_scheme": "always"
},
"post_processor": null,
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true,
"prepend_scheme": "always"
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"<unk>": 0,
"ං": 1,
"අ": 2,
"ආ": 3,
"ඇ": 4,
"ඈ": 5,
"ඉ": 6,
"ඊ": 7,
"උ": 8,
"ඌ": 9,
"එ": 10,
"ඒ": 11,
"ඓ": 12,
"ඔ": 13,
"ඕ": 14,
"ක": 15,
"ඛ": 16,
"ග": 17,
"ඝ": 18,
"ඟ": 19,
"ච": 20,
"ඡ": 21,
"ජ": 22,
"ඤ": 23,
"ට": 24,
"ඨ": 25,
"ඩ": 26,
"ඪ": 27,
"ණ": 28,
"ඬ": 29,
"ත": 30,
"ථ": 31,
"ද": 32,
"ධ": 33,
"න": 34,
"ඳ": 35,
"ප": 36,
"ඵ": 37,
"බ": 38,
"භ": 39,
"ම": 40,
"ඹ": 41,
"ය": 42,
"ර": 43,
"ල": 44,
"ව": 45,
"ශ": 46,
"ෂ": 47,
"ස": 48,
"හ": 49,
"ළ": 50,
"ෆ": 51,
"්": 52,
"ා": 53,
"ැ": 54,
"ෑ": 55,
"ි": 56,
"ී": 57,
"ු": 58,
"ූ": 59,
"ෙ": 60,
"ේ": 61,
"ෛ": 62,
"ො": 63,
"ෝ": 64,
"ෞ": 65,
"▁": 66
},
"merges": []
}
}