import abc from typing import List, Tuple from .punctuation import Punctuation class BasePhonemizer(abc.ABC): """Base phonemizer class Phonemization follows the following steps: 1. Preprocessing: - remove empty lines - remove punctuation - keep track of punctuation marks 2. Phonemization: - convert text to phonemes 3. Postprocessing: - join phonemes - restore punctuation marks Args: language (str): Language used by the phonemizer. punctuations (List[str]): List of punctuation marks to be preserved. keep_puncs (bool): Whether to preserve punctuation marks or not. """ def __init__(self, language, punctuations=Punctuation.default_puncs(), keep_puncs=False): # ensure the backend is installed on the system if not self.is_available(): raise RuntimeError("{} not installed on your system".format(self.name())) # pragma: nocover # ensure the backend support the requested language self._language = self._init_language(language) # setup punctuation processing self._keep_puncs = keep_puncs self._punctuator = Punctuation(punctuations) def _init_language(self, language): """Language initialization This method may be overloaded in child classes (see Segments backend) """ if not self.is_supported_language(language): raise RuntimeError(f'language "{language}" is not supported by the ' f"{self.name()} backend") return language @property def language(self): """The language code configured to be used for phonemization""" return self._language @staticmethod @abc.abstractmethod def name(): """The name of the backend""" ... @classmethod @abc.abstractmethod def is_available(cls): """Returns True if the backend is installed, False otherwise""" ... @classmethod @abc.abstractmethod def version(cls): """Return the backend version as a tuple (major, minor, patch)""" ... @staticmethod @abc.abstractmethod def supported_languages(): """Return a dict of language codes -> name supported by the backend""" ... def is_supported_language(self, language): """Returns True if `language` is supported by the backend""" return language in self.supported_languages() @abc.abstractmethod def _phonemize(self, text, separator): """The main phonemization method""" def _phonemize_preprocess(self, text) -> Tuple[List[str], List]: """Preprocess the text before phonemization 1. remove spaces 2. remove punctuation Override this if you need a different behaviour """ text = text.strip() if self._keep_puncs: # a tuple (text, punctuation marks) return self._punctuator.strip_to_restore(text) return [self._punctuator.strip(text)], [] def _phonemize_postprocess(self, phonemized, punctuations) -> str: """Postprocess the raw phonemized output Override this if you need a different behaviour """ if self._keep_puncs: return self._punctuator.restore(phonemized, punctuations)[0] return phonemized[0] def phonemize(self, text: str, separator="|", language: str = None) -> str: # pylint: disable=unused-argument """Returns the `text` phonemized for the given language Args: text (str): Text to be phonemized. separator (str): string separator used between phonemes. Default to '_'. Returns: (str): Phonemized text """ text, punctuations = self._phonemize_preprocess(text) phonemized = [] for t in text: p = self._phonemize(t, separator) phonemized.append(p) phonemized = self._phonemize_postprocess(phonemized, punctuations) return phonemized def print_logs(self, level: int = 0): indent = "\t" * level print(f"{indent}| > phoneme language: {self.language}") print(f"{indent}| > phoneme backend: {self.name()}")