speecht5_tts_jvs_ver1_e20_openjtalk_longer_20230809-031157_tokenizer
Browse files
speecht5_openjtalk_tokenizer.py
CHANGED
@@ -34,7 +34,6 @@ def _g2p_with_np(text: str, np_lsit: str) -> List[str]:
|
|
34 |
|
35 |
VOCAB_FILES_NAMES = {
|
36 |
"vocab_file": "vocab.json",
|
37 |
-
"tokenizer_file": "tokenizer.json",
|
38 |
}
|
39 |
|
40 |
PRETRAINED_VOCAB_FILES_MAP = {
|
@@ -119,24 +118,7 @@ class SpeechT5OpenjtalkTokenizer(SpeechT5Tokenizer):
|
|
119 |
with open(vocab_path, "w", encoding="utf-8") as f:
|
120 |
json.dump(self.label2id, f, ensure_ascii=False, indent=2)
|
121 |
|
122 |
-
|
123 |
-
# f"special_tokens_map{filename_prefix}"
|
124 |
-
# )
|
125 |
-
# with open(special_tokens_path, "w", encoding="utf-8") as f:
|
126 |
-
# json.dump(
|
127 |
-
# {
|
128 |
-
# "bos_token": self.bos_token,
|
129 |
-
# "eos_token": self.eos_token,
|
130 |
-
# "unk_token": self.unk_token,
|
131 |
-
# "pad_token": self.pad_token,
|
132 |
-
# "mask_token": self.mask_token,
|
133 |
-
# },
|
134 |
-
# f,
|
135 |
-
# ensure_ascii=False,
|
136 |
-
# indent=2,
|
137 |
-
# )
|
138 |
-
|
139 |
-
return str(vocab_path), None # str(special_tokens_path)
|
140 |
|
141 |
def _tokenize(self, text: str) -> List[str]:
|
142 |
return _g2p_with_np(text, self.non_phenome_characters)
|
|
|
34 |
|
35 |
VOCAB_FILES_NAMES = {
|
36 |
"vocab_file": "vocab.json",
|
|
|
37 |
}
|
38 |
|
39 |
PRETRAINED_VOCAB_FILES_MAP = {
|
|
|
118 |
with open(vocab_path, "w", encoding="utf-8") as f:
|
119 |
json.dump(self.label2id, f, ensure_ascii=False, indent=2)
|
120 |
|
121 |
+
return (str(vocab_path),)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
def _tokenize(self, text: str) -> List[str]:
|
124 |
return _g2p_with_np(text, self.non_phenome_characters)
|