MeloTTS

Runtime error

App Files Files Community

MeloTTS / melo /text /fr_phonemizer /punctuation.py

mrfakename

Init

4300fed over 1 year ago

raw

history blame

5.45 kB

	import collections
	import re
	from enum import Enum

	import six

	_DEF_PUNCS = ';:,.!?¡¿—…"«»“”'

	_PUNC_IDX = collections.namedtuple("_punc_index", ["punc", "position"])


	class PuncPosition(Enum):
	"""Enum for the punctuations positions"""

	BEGIN = 0
	END = 1
	MIDDLE = 2
	ALONE = 3


	class Punctuation:
	"""Handle punctuations in text.

	Just strip punctuations from text or strip and restore them later.

	Args:
	puncs (str): The punctuations to be processed. Defaults to `_DEF_PUNCS`.

	Example:
	>>> punc = Punctuation()
	>>> punc.strip("This is. example !")
	'This is example'

	>>> text_striped, punc_map = punc.strip_to_restore("This is. example !")
	>>> ' '.join(text_striped)
	'This is example'

	>>> text_restored = punc.restore(text_striped, punc_map)
	>>> text_restored[0]
	'This is. example !'
	"""

	def __init__(self, puncs: str = _DEF_PUNCS):
	self.puncs = puncs

	@staticmethod
	def default_puncs():
	"""Return default set of punctuations."""
	return _DEF_PUNCS

	@property
	def puncs(self):
	return self._puncs

	@puncs.setter
	def puncs(self, value):
	if not isinstance(value, six.string_types):
	raise ValueError("[!] Punctuations must be of type str.")
	self._puncs = "".join(list(dict.fromkeys(list(value)))) # remove duplicates without changing the oreder
	self.puncs_regular_exp = re.compile(rf"(\s[{re.escape(self._puncs)}]+\s)+")

	def strip(self, text):
	"""Remove all the punctuations by replacing with `space`.

	Args:
	text (str): The text to be processed.

	Example::

	"This is. example !" -> "This is example "
	"""
	return re.sub(self.puncs_regular_exp, " ", text).rstrip().lstrip()

	def strip_to_restore(self, text):
	"""Remove punctuations from text to restore them later.

	Args:
	text (str): The text to be processed.

	Examples ::

	"This is. example !" -> [["This is", "example"], [".", "!"]]

	"""
	text, puncs = self._strip_to_restore(text)
	return text, puncs

	def _strip_to_restore(self, text):
	"""Auxiliary method for Punctuation.preserve()"""
	matches = list(re.finditer(self.puncs_regular_exp, text))
	if not matches:
	return [text], []
	# the text is only punctuations
	if len(matches) == 1 and matches[0].group() == text:
	return [], [_PUNC_IDX(text, PuncPosition.ALONE)]
	# build a punctuation map to be used later to restore punctuations
	puncs = []
	for match in matches:
	position = PuncPosition.MIDDLE
	if match == matches[0] and text.startswith(match.group()):
	position = PuncPosition.BEGIN
	elif match == matches[-1] and text.endswith(match.group()):
	position = PuncPosition.END
	puncs.append(_PUNC_IDX(match.group(), position))
	# convert str text to a List[str], each item is separated by a punctuation
	splitted_text = []
	for idx, punc in enumerate(puncs):
	split = text.split(punc.punc)
	prefix, suffix = split[0], punc.punc.join(split[1:])
	splitted_text.append(prefix)
	# if the text does not end with a punctuation, add it to the last item
	if idx == len(puncs) - 1 and len(suffix) > 0:
	splitted_text.append(suffix)
	text = suffix
	return splitted_text, puncs

	@classmethod
	def restore(cls, text, puncs):
	"""Restore punctuation in a text.

	Args:
	text (str): The text to be processed.
	puncs (List[str]): The list of punctuations map to be used for restoring.

	Examples ::

	['This is', 'example'], ['.', '!'] -> "This is. example!"

	"""
	return cls._restore(text, puncs, 0)

	@classmethod
	def _restore(cls, text, puncs, num): # pylint: disable=too-many-return-statements
	"""Auxiliary method for Punctuation.restore()"""
	if not puncs:
	return text

	# nothing have been phonemized, returns the puncs alone
	if not text:
	return ["".join(m.punc for m in puncs)]

	current = puncs[0]

	if current.position == PuncPosition.BEGIN:
	return cls._restore([current.punc + text[0]] + text[1:], puncs[1:], num)

	if current.position == PuncPosition.END:
	return [text[0] + current.punc] + cls._restore(text[1:], puncs[1:], num + 1)

	if current.position == PuncPosition.ALONE:
	return [current.mark] + cls._restore(text, puncs[1:], num + 1)

	# POSITION == MIDDLE
	if len(text) == 1: # pragma: nocover
	# a corner case where the final part of an intermediate
	# mark (I) has not been phonemized
	return cls._restore([text[0] + current.punc], puncs[1:], num)

	return cls._restore([text[0] + current.punc + text[1]] + text[2:], puncs[1:], num)


	# if __name__ == "__main__":
	# punc = Punctuation()
	# text = "This is. This is, example!"

	# print(punc.strip(text))

	# split_text, puncs = punc.strip_to_restore(text)
	# print(split_text, " ---- ", puncs)

	# restored_text = punc.restore(split_text, puncs)
	# print(restored_text)