|
from abc import abstractmethod |
|
from enum import Enum |
|
from pathlib import Path |
|
from typing import List, Sequence |
|
|
|
import numpy |
|
|
|
|
|
class BasePhoneme(object): |
|
""" |
|
音素の応用クラス群の抽象基底クラス |
|
|
|
Attributes |
|
---------- |
|
phoneme_list : Sequence[str] |
|
音素のリスト |
|
num_phoneme : int |
|
音素リストの要素数 |
|
space_phoneme : str |
|
読点に値する音素 |
|
""" |
|
|
|
phoneme_list: Sequence[str] |
|
num_phoneme: int |
|
space_phoneme: str |
|
|
|
def __init__( |
|
self, |
|
phoneme: str, |
|
start: float, |
|
end: float, |
|
): |
|
self.phoneme = phoneme |
|
self.start = numpy.round(start, decimals=2) |
|
self.end = numpy.round(end, decimals=2) |
|
|
|
def __repr__(self): |
|
return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})" |
|
|
|
def __eq__(self, o: object): |
|
return isinstance(o, BasePhoneme) and ( |
|
self.phoneme == o.phoneme and self.start == o.start and self.end == o.end |
|
) |
|
|
|
def verify(self): |
|
""" |
|
音素クラスとして、データが正しいかassertする |
|
""" |
|
assert self.phoneme in self.phoneme_list, f"{self.phoneme} is not defined." |
|
|
|
@property |
|
def phoneme_id(self): |
|
""" |
|
phoneme_id (phoneme list内でのindex)を取得する |
|
Returns |
|
------- |
|
id : int |
|
phoneme_idを返す |
|
""" |
|
return self.phoneme_list.index(self.phoneme) |
|
|
|
@property |
|
def duration(self): |
|
""" |
|
音素継続期間を取得する |
|
Returns |
|
------- |
|
duration : int |
|
音素継続期間を返す |
|
""" |
|
return self.end - self.start |
|
|
|
@property |
|
def onehot(self): |
|
""" |
|
phoneme listの長さ分の0埋め配列のうち、phoneme id番目がTrue(1)の配列を返す |
|
Returns |
|
------- |
|
onehot : numpu.ndarray |
|
関数内で変更された配列を返す |
|
""" |
|
array = numpy.zeros(self.num_phoneme, dtype=bool) |
|
array[self.phoneme_id] = True |
|
return array |
|
|
|
@classmethod |
|
def parse(cls, s: str): |
|
""" |
|
文字列をパースして音素クラスを作る |
|
Parameters |
|
---------- |
|
s : str |
|
パースしたい文字列 |
|
|
|
Returns |
|
------- |
|
phoneme : BasePhoneme |
|
パース結果を用いた音素クラスを返す |
|
|
|
Examples |
|
-------- |
|
>>> BasePhoneme.parse('1.7425000 1.9125000 o:') |
|
Phoneme(phoneme='o:', start=1.74, end=1.91) |
|
""" |
|
words = s.split() |
|
return cls( |
|
start=float(words[0]), |
|
end=float(words[1]), |
|
phoneme=words[2], |
|
) |
|
|
|
@classmethod |
|
@abstractmethod |
|
def convert(cls, phonemes: List["BasePhoneme"]) -> List["BasePhoneme"]: |
|
raise NotImplementedError |
|
|
|
@classmethod |
|
def load_lab_list(cls, path: Path): |
|
""" |
|
labファイルを読み込む |
|
Parameters |
|
---------- |
|
path : Path |
|
読み込みたいlabファイルのパス |
|
|
|
Returns |
|
------- |
|
phonemes : List[BasePhoneme] |
|
パース結果を用いた音素クラスを返す |
|
""" |
|
phonemes = [cls.parse(s) for s in path.read_text().split("\n") if len(s) > 0] |
|
phonemes = cls.convert(phonemes) |
|
|
|
for phoneme in phonemes: |
|
phoneme.verify() |
|
return phonemes |
|
|
|
@classmethod |
|
def save_lab_list(cls, phonemes: List["BasePhoneme"], path: Path): |
|
""" |
|
音素クラスのリストをlabファイル形式で保存する |
|
Parameters |
|
---------- |
|
phonemes : List[BasePhoneme] |
|
保存したい音素クラスのリスト |
|
path : Path |
|
labファイルの保存先パス |
|
""" |
|
text = "\n".join( |
|
[ |
|
f"{numpy.round(p.start, decimals=2):.2f}\t" |
|
f"{numpy.round(p.end, decimals=2):.2f}\t" |
|
f"{p.phoneme}" |
|
for p in phonemes |
|
] |
|
) |
|
path.write_text(text) |
|
|
|
|
|
class JvsPhoneme(BasePhoneme): |
|
""" |
|
JVS(Japanese versatile speech)コーパスに含まれる音素群クラス |
|
|
|
Attributes |
|
---------- |
|
phoneme_list : Sequence[str] |
|
音素のリスト |
|
num_phoneme : int |
|
音素リストの要素数 |
|
space_phoneme : str |
|
読点に値する音素 |
|
""" |
|
|
|
phoneme_list = ( |
|
"pau", |
|
"I", |
|
"N", |
|
"U", |
|
"a", |
|
"b", |
|
"by", |
|
"ch", |
|
"cl", |
|
"d", |
|
"dy", |
|
"e", |
|
"f", |
|
"g", |
|
"gy", |
|
"h", |
|
"hy", |
|
"i", |
|
"j", |
|
"k", |
|
"ky", |
|
"m", |
|
"my", |
|
"n", |
|
"ny", |
|
"o", |
|
"p", |
|
"py", |
|
"r", |
|
"ry", |
|
"s", |
|
"sh", |
|
"t", |
|
"ts", |
|
"u", |
|
"v", |
|
"w", |
|
"y", |
|
"z", |
|
) |
|
num_phoneme = len(phoneme_list) |
|
space_phoneme = "pau" |
|
|
|
@classmethod |
|
def convert(cls, phonemes: List["JvsPhoneme"]) -> List["JvsPhoneme"]: |
|
""" |
|
最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する |
|
Parameters |
|
---------- |
|
phonemes : List[JvsPhoneme] |
|
変換したいphonemeのリスト |
|
|
|
Returns |
|
------- |
|
phonemes : List[JvsPhoneme] |
|
変換されたphonemeのリスト |
|
""" |
|
if "sil" in phonemes[0].phoneme: |
|
phonemes[0].phoneme = cls.space_phoneme |
|
if "sil" in phonemes[-1].phoneme: |
|
phonemes[-1].phoneme = cls.space_phoneme |
|
return phonemes |
|
|
|
|
|
class OjtPhoneme(BasePhoneme): |
|
""" |
|
OpenJTalkに含まれる音素群クラス |
|
|
|
Attributes |
|
---------- |
|
phoneme_list : Sequence[str] |
|
音素のリスト |
|
num_phoneme : int |
|
音素リストの要素数 |
|
space_phoneme : str |
|
読点に値する音素 |
|
""" |
|
|
|
phoneme_list = ( |
|
"pau", |
|
"A", |
|
"E", |
|
"I", |
|
"N", |
|
"O", |
|
"U", |
|
"a", |
|
"b", |
|
"by", |
|
"ch", |
|
"cl", |
|
"d", |
|
"dy", |
|
"e", |
|
"f", |
|
"g", |
|
"gw", |
|
"gy", |
|
"h", |
|
"hy", |
|
"i", |
|
"j", |
|
"k", |
|
"kw", |
|
"ky", |
|
"m", |
|
"my", |
|
"n", |
|
"ny", |
|
"o", |
|
"p", |
|
"py", |
|
"r", |
|
"ry", |
|
"s", |
|
"sh", |
|
"t", |
|
"ts", |
|
"ty", |
|
"u", |
|
"v", |
|
"w", |
|
"y", |
|
"z", |
|
) |
|
num_phoneme = len(phoneme_list) |
|
space_phoneme = "pau" |
|
|
|
@classmethod |
|
def convert(cls, phonemes: List["OjtPhoneme"]): |
|
""" |
|
最初と最後のsil(silent)をspace_phoneme(pau)に置き換え(変換)する |
|
Parameters |
|
---------- |
|
phonemes : List[OjtPhoneme] |
|
変換したいphonemeのリスト |
|
|
|
Returns |
|
------- |
|
phonemes : List[OjtPhoneme] |
|
変換されたphonemeのリスト |
|
""" |
|
if "sil" in phonemes[0].phoneme: |
|
phonemes[0].phoneme = cls.space_phoneme |
|
if "sil" in phonemes[-1].phoneme: |
|
phonemes[-1].phoneme = cls.space_phoneme |
|
return phonemes |
|
|
|
|
|
class PhonemeType(str, Enum): |
|
jvs = "jvs" |
|
openjtalk = "openjtalk" |
|
|
|
|
|
phoneme_type_to_class = { |
|
PhonemeType.jvs: JvsPhoneme, |
|
PhonemeType.openjtalk: OjtPhoneme, |
|
} |
|
|