|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "UNK", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "PAD", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "WORD_BOUNDARY", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 3, |
|
"content": "UTT_BOUNDARY", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": { |
|
"type": "Sequence", |
|
"normalizers": [ |
|
{ |
|
"type": "Strip", |
|
"strip_left": true, |
|
"strip_right": true |
|
} |
|
] |
|
}, |
|
"pre_tokenizer": { |
|
"type": "WhitespaceSplit" |
|
}, |
|
"post_processor": { |
|
"type": "TemplateProcessing", |
|
"single": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "UTT_BOUNDARY", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
} |
|
], |
|
"pair": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "UTT_BOUNDARY", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "UTT_BOUNDARY", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "B", |
|
"type_id": 1 |
|
} |
|
} |
|
], |
|
"special_tokens": { |
|
"UTT_BOUNDARY": { |
|
"id": "UTT_BOUNDARY", |
|
"ids": [ |
|
3 |
|
], |
|
"tokens": [ |
|
"UTT_BOUNDARY" |
|
] |
|
} |
|
} |
|
}, |
|
"decoder": null, |
|
"model": { |
|
"type": "WordLevel", |
|
"vocab": { |
|
"UNK": 0, |
|
"PAD": 1, |
|
"WORD_BOUNDARY": 2, |
|
"UTT_BOUNDARY": 3, |
|
"j": 4, |
|
"ɐ˥": 5, |
|
"t": 6, |
|
"k": 7, |
|
"ɐu˧˥": 8, |
|
"i˨": 9, |
|
"n": 10, |
|
"i˧˩̰": 11, |
|
"y˨": 12, |
|
"s": 13, |
|
"ɐ˨": 14, |
|
"p": 15, |
|
"ts": 16, |
|
"ɐu˥": 17, |
|
"ɪ̞˧˥": 18, |
|
"ŋ": 19, |
|
"ɵ˧": 20, |
|
"a̞˧": 21, |
|
"l": 22, |
|
"ʊ̟˥": 23, |
|
"a̞˧˩̰": 24, |
|
"ɛ˥": 25, |
|
"ei˩˧": 26, |
|
"w": 27, |
|
"a̞˨": 28, |
|
"ɐi˧˥": 29, |
|
"a̞˧˥": 30, |
|
"m̩˧˥": 31, |
|
"m": 32, |
|
"ou˥": 33, |
|
"ei˥": 34, |
|
"i˧": 35, |
|
"ɔ̽˧˥": 36, |
|
"tʰ": 37, |
|
"i˥": 38, |
|
"f": 39, |
|
"aːĭ˧": 40, |
|
"h": 41, |
|
"ɵy˧": 42, |
|
"a̞˥": 43, |
|
"ei˧˩̰": 44, |
|
"ou˨": 45, |
|
"ɔ̽˧": 46, |
|
"ɐi˧˩̰": 47, |
|
"u˧": 48, |
|
"ɔːĭ˥": 49, |
|
"ɐu˨": 50, |
|
"ei˧˥": 51, |
|
"ɐi˨": 52, |
|
"ʊ̟˧˩̰": 53, |
|
"ʊ̟˨": 54, |
|
"a̞˩˧": 55, |
|
"ou˧˥": 56, |
|
"aːĭ˧˥": 57, |
|
"ɔ̽˨": 58, |
|
"ɛ˩˧": 59, |
|
"ɪ̞˨": 60, |
|
"iːŭ˧": 61, |
|
"ɛ˧˩̰": 62, |
|
"m̩˧˩̰": 63, |
|
"ɵ˧˥": 64, |
|
"ei˧": 65, |
|
"ɐu˧˩̰": 66, |
|
"m̩˧": 67, |
|
"ɐ˧˥": 68, |
|
"ɐu˩˧": 69, |
|
"ɐi˥": 70, |
|
"ɔ̽˥": 71, |
|
"ɔ̽˧˩̰": 72, |
|
"ɔːĭ˧": 73, |
|
"ou˩˧": 74, |
|
"m̩˥": 75, |
|
"ɐ˧": 76, |
|
"tsʰ": 77, |
|
"ɛ˧˥": 78, |
|
"i˧˥": 79, |
|
"ɔ̽˩˧": 80, |
|
"kʰ": 81, |
|
"ɐ˧˩̰": 82, |
|
"aːŭ˧˥": 83, |
|
"pʰ": 84, |
|
"aːĭ˧˩̰": 85, |
|
"ɵy˩˧": 86, |
|
"ɛ˧": 87, |
|
"u˧˥": 88, |
|
"ɛ˨": 89, |
|
"ʊ̟˧": 90, |
|
"u˥": 91, |
|
"m̩˩˧": 92, |
|
"aːŭ˧": 93, |
|
"œ̞˩˧": 94, |
|
"i˩˧": 95, |
|
"ɪ̞˧˩̰": 96, |
|
"u˨": 97, |
|
"ɪ̞˥": 98, |
|
"iːŭ˧˩̰": 99, |
|
"œ̞˧˥": 100, |
|
"y˧": 101, |
|
"uːĭ˩˧": 102, |
|
"uːĭ˥": 103, |
|
"ɵy˧˥": 104, |
|
"y˧˩̰": 105, |
|
"ɔːĭ˧˥": 106, |
|
"ɛ": 107, |
|
"ou˧": 108, |
|
"ei˨": 109, |
|
"ɵ˥": 110, |
|
"u˧˩̰": 111, |
|
"y˥": 112, |
|
"œ̞˥": 113, |
|
"œ̞˧˩̰": 114, |
|
"aːĭ˨": 115, |
|
"ɐ˩˧": 116, |
|
"œ̞˧": 117, |
|
"uːĭ˧˥": 118, |
|
"ɐu˧": 119, |
|
"ɐi˩˧": 120, |
|
"ɐi˧": 121, |
|
"ou˧˩̰": 122, |
|
"aːĭ˥": 123, |
|
"aːŭ˥": 124, |
|
"ŋ˩˧": 125, |
|
"y˧˥": 126, |
|
"iːŭ˥": 127, |
|
"ɔːĭ˨": 128, |
|
"ʊ̟˧˥": 129, |
|
"iːŭ˧˥": 130, |
|
"ɵy˥": 131, |
|
"ɔːĭ˧˩̰": 132, |
|
"uːĭ˧": 133, |
|
"ɵy˧˩̰": 134, |
|
"œ̞˨": 135, |
|
"m̩˨": 136, |
|
"aːŭ˧˩̰": 137, |
|
"y˩˧": 138, |
|
"aːŭ˩˧": 139, |
|
"aːĭ˩˧": 140, |
|
"uːĭ˨": 141, |
|
"ɵy˨": 142, |
|
"aːŭ˨": 143, |
|
"ɪ̞˧": 144, |
|
"ɵ˨": 145, |
|
"iːŭ˩˧": 146, |
|
"iːŭ˨": 147, |
|
"ɵ˧˩̰": 148, |
|
"uːĭ˧˩̰": 149, |
|
"u˩˧": 150, |
|
"ŋ˧˩̰": 151 |
|
}, |
|
"unk_token": "UNK" |
|
} |
|
} |