File size: 6,830 Bytes
9dd3461 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
import os
from os.path import exists
import pkg_resources
import six
from tqdm.auto import tqdm
if six.PY2:
from urllib import urlretrieve
else:
from urllib.request import urlretrieve
import tarfile
try:
from .version import __version__ # NOQA
except ImportError:
raise ImportError("BUG: version.py doesn't exist. Please file a bug report.")
from .htsengine import HTSEngine
from .openjtalk import OpenJTalk
from .utils import merge_njd_marine_features
# Dictionary directory
# defaults to the package directory where the dictionary will be automatically downloaded
OPEN_JTALK_DICT_DIR = os.environ.get(
"OPEN_JTALK_DICT_DIR",
pkg_resources.resource_filename(__name__, "open_jtalk_dic_utf_8-1.11"),
).encode("utf-8")
_dict_download_url = "https://github.com/r9y9/open_jtalk/releases/download/v1.11.1"
_DICT_URL = f"{_dict_download_url}/open_jtalk_dic_utf_8-1.11.tar.gz"
# Default mei_normal.voice for HMM-based TTS
DEFAULT_HTS_VOICE = pkg_resources.resource_filename(
__name__, "htsvoice/mei_normal.htsvoice"
).encode("utf-8")
# Global instance of OpenJTalk
_global_jtalk = None
# Global instance of HTSEngine
# mei_normal.voice is used as default
_global_htsengine = None
# Global instance of Marine
_global_marine = None
# https://github.com/tqdm/tqdm#hooks-and-callbacks
class _TqdmUpTo(tqdm): # type: ignore
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
return self.update(b * bsize - self.n)
def _extract_dic():
global OPEN_JTALK_DICT_DIR
filename = pkg_resources.resource_filename(__name__, "dic.tar.gz")
print('Downloading: "{}"'.format(_DICT_URL))
with _TqdmUpTo(
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
desc="dic.tar.gz",
) as t: # all optional kwargs
urlretrieve(_DICT_URL, filename, reporthook=t.update_to)
t.total = t.n
print("Extracting tar file {}".format(filename))
with tarfile.open(filename, mode="r|gz") as f:
f.extractall(path=pkg_resources.resource_filename(__name__, ""))
OPEN_JTALK_DICT_DIR = pkg_resources.resource_filename(
__name__, "open_jtalk_dic_utf_8-1.11"
).encode("utf-8")
os.remove(filename)
def _lazy_init():
if not exists(OPEN_JTALK_DICT_DIR):
_extract_dic()
def g2p(*args, **kwargs):
"""Grapheme-to-phoeneme (G2P) conversion
This is just a convenient wrapper around `run_frontend`.
Args:
text (str): Unicode Japanese text.
kana (bool): If True, returns the pronunciation in katakana, otherwise in phone.
Default is False.
join (bool): If True, concatenate phones or katakana's into a single string.
Default is True.
Returns:
str or list: G2P result in 1) str if join is True 2) list if join is False.
"""
global _global_jtalk
if _global_jtalk is None:
_lazy_init()
_global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
return _global_jtalk.g2p(*args, **kwargs)
def estimate_accent(njd_features):
"""Accent estimation using marine
This function requires marine (https://github.com/6gsn/marine)
Args:
njd_result (list): features generated by OpenJTalk.
Returns:
list: features for NJDNode with estimation results by marine.
"""
global _global_marine
if _global_marine is None:
try:
from marine.predict import Predictor
except BaseException:
raise ImportError(
"Please install marine by `pip install pyopenjtalk[marine]`"
)
_global_marine = Predictor()
from marine.utils.openjtalk_util import convert_njd_feature_to_marine_feature
marine_feature = convert_njd_feature_to_marine_feature(njd_features)
marine_results = _global_marine.predict(
[marine_feature], require_open_jtalk_format=True
)
njd_features = merge_njd_marine_features(njd_features, marine_results)
return njd_features
def extract_fullcontext(text, run_marine=False):
"""Extract full-context labels from text
Args:
text (str): Input text
run_marine (bool): Whether to estimate accent using marine.
Default is False. If you want to activate this option, you need to install marine
by `pip install pyopenjtalk[marine]`
Returns:
list: List of full-context labels
"""
njd_features = run_frontend(text)
if run_marine:
njd_features = estimate_accent(njd_features)
return make_label(njd_features)
def synthesize(labels, speed=1.0, half_tone=0.0):
"""Run OpenJTalk's speech synthesis backend
Args:
labels (list): Full-context labels
speed (float): speech speed rate. Default is 1.0.
half_tone (float): additional half-tone. Default is 0.
Returns:
np.ndarray: speech waveform (dtype: np.float64)
int: sampling frequency (defualt: 48000)
"""
if isinstance(labels, tuple) and len(labels) == 2:
labels = labels[1]
global _global_htsengine
if _global_htsengine is None:
_global_htsengine = HTSEngine(DEFAULT_HTS_VOICE)
sr = _global_htsengine.get_sampling_frequency()
_global_htsengine.set_speed(speed)
_global_htsengine.add_half_tone(half_tone)
return _global_htsengine.synthesize(labels), sr
def tts(text, speed=1.0, half_tone=0.0, run_marine=False):
"""Text-to-speech
Args:
text (str): Input text
speed (float): speech speed rate. Default is 1.0.
half_tone (float): additional half-tone. Default is 0.
run_marine (bool): Whether to estimate accent using marine.
Default is False. If you want activate this option, you need to install marine
by `pip install pyopenjtalk[marine]`
Returns:
np.ndarray: speech waveform (dtype: np.float64)
int: sampling frequency (defualt: 48000)
"""
return synthesize(
extract_fullcontext(text, run_marine=run_marine), speed, half_tone
)
def run_frontend(text):
"""Run OpenJTalk's text processing frontend
Args:
text (str): Unicode Japanese text.
Returns:
list: features for NJDNode.
"""
global _global_jtalk
if _global_jtalk is None:
_lazy_init()
_global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
return _global_jtalk.run_frontend(text)
def make_label(njd_features):
"""Make full-context label using features
Args:
njd_features (list): features for NJDNode.
Returns:
list: full-context labels.
"""
global _global_jtalk
if _global_jtalk is None:
_lazy_init()
_global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
return _global_jtalk.make_label(njd_features)
|