Style-Bert-VITS2-Editor-Demo-fumifumi

Sleeping

App Files Files Community

Style-Bert-VITS2-Editor-Demo-fumifumi / text /user_dict /word_model.py

litagin

init

2916d61 9 months ago

raw

history blame

5.14 kB

	from enum import Enum
	from re import findall, fullmatch
	from typing import List, Optional

	from pydantic import BaseModel, Field, validator

	USER_DICT_MIN_PRIORITY = 0
	USER_DICT_MAX_PRIORITY = 10


	class UserDictWord(BaseModel):
	"""
	辞書のコンパイルに使われる情報
	"""

	surface: str = Field(title="表層形")
	priority: int = Field(
	title="優先度", ge=USER_DICT_MIN_PRIORITY, le=USER_DICT_MAX_PRIORITY
	)
	context_id: int = Field(title="文脈ID", default=1348)
	part_of_speech: str = Field(title="品詞")
	part_of_speech_detail_1: str = Field(title="品詞細分類1")
	part_of_speech_detail_2: str = Field(title="品詞細分類2")
	part_of_speech_detail_3: str = Field(title="品詞細分類3")
	inflectional_type: str = Field(title="活用型")
	inflectional_form: str = Field(title="活用形")
	stem: str = Field(title="原形")
	yomi: str = Field(title="読み")
	pronunciation: str = Field(title="発音")
	accent_type: int = Field(title="アクセント型")
	mora_count: Optional[int] = Field(title="モーラ数", default=None)
	accent_associative_rule: str = Field(title="アクセント結合規則")

	class Config:
	validate_assignment = True

	@validator("surface")
	def convert_to_zenkaku(cls, surface):
	return surface.translate(
	str.maketrans(
	"".join(chr(0x21 + i) for i in range(94)),
	"".join(chr(0xFF01 + i) for i in range(94)),
	)
	)

	@validator("pronunciation", pre=True)
	def check_is_katakana(cls, pronunciation):
	if not fullmatch(r"[ァ-ヴー]+", pronunciation):
	raise ValueError("発音は有効なカタカナでなくてはいけません。")
	sutegana = ["ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ヮ", "ッ"]
	for i in range(len(pronunciation)):
	if pronunciation[i] in sutegana:
	# 「キャット」のように、捨て仮名が連続する可能性が考えられるので、
	# 「ッ」に関しては「ッ」そのものが連続している場合と、「ッ」の後にほかの捨て仮名が連続する場合のみ無効とする
	if i < len(pronunciation) - 1 and (
	pronunciation[i + 1] in sutegana[:-1]
	or (
	pronunciation[i] == sutegana[-1]
	and pronunciation[i + 1] == sutegana[-1]
	)
	):
	raise ValueError("無効な発音です。(捨て仮名の連続)")
	if pronunciation[i] == "ヮ":
	if i != 0 and pronunciation[i - 1] not in ["ク", "グ"]:
	raise ValueError(
	"無効な発音です。(「くゎ」「ぐゎ」以外の「ゎ」の使用)"
	)
	return pronunciation

	@validator("mora_count", pre=True, always=True)
	def check_mora_count_and_accent_type(cls, mora_count, values):
	if "pronunciation" not in values or "accent_type" not in values:
	# 適切な場所でエラーを出すようにする
	return mora_count

	if mora_count is None:
	rule_others = (
	"[イ][ェ]\|[ヴ][ャュョ]\|[トド][ゥ]\|[テデ][ィャュョ]\|[デ][ェ]\|[クグ][ヮ]"
	)
	rule_line_i = "[キシチニヒミリギジビピ][ェャュョ]"
	rule_line_u = "[ツフヴ][ァ]\|[ウスツフヴズ][ィ]\|[ウツフヴ][ェォ]"
	rule_one_mora = "[ァ-ヴー]"
	mora_count = len(
	findall(
	f"(?:{rule_others}\|{rule_line_i}\|{rule_line_u}\|{rule_one_mora})",
	values["pronunciation"],
	)
	)

	if not 0 <= values["accent_type"] <= mora_count:
	raise ValueError(
	"誤ったアクセント型です({})。 expect: 0 <= accent_type <= {}".format(
	values["accent_type"], mora_count
	)
	)
	return mora_count


	class PartOfSpeechDetail(BaseModel):
	"""
	品詞ごとの情報
	"""

	part_of_speech: str = Field(title="品詞")
	part_of_speech_detail_1: str = Field(title="品詞細分類1")
	part_of_speech_detail_2: str = Field(title="品詞細分類2")
	part_of_speech_detail_3: str = Field(title="品詞細分類3")
	# context_idは辞書の左・右文脈IDのこと
	# https://github.com/VOICEVOX/open_jtalk/blob/427cfd761b78efb6094bea3c5bb8c968f0d711ab/src/mecab-naist-jdic/_left-id.def # noqa
	context_id: int = Field(title="文脈ID")
	cost_candidates: List[int] = Field(title="コストのパーセンタイル")
	accent_associative_rules: List[str] = Field(title="アクセント結合規則の一覧")


	class WordTypes(str, Enum):
	"""
	fastapiでword_type引数を検証する時に使用するクラス
	"""

	PROPER_NOUN = "PROPER_NOUN"
	COMMON_NOUN = "COMMON_NOUN"
	VERB = "VERB"
	ADJECTIVE = "ADJECTIVE"
	SUFFIX = "SUFFIX"