| | from typing import Callable, Dict, List, Union |
| | from dataclasses import asdict, dataclass, field |
| |
|
| |
|
| | import re |
| | from dataclasses import replace |
| | from typing import Dict |
| | _whitespace_re = re.compile(r"\s+") |
| |
|
| | from dataclasses import dataclass, field |
| | from typing import List |
| |
|
| | |
| | |
| |
|
| | @dataclass |
| | class CharactersConfig(): |
| |
|
| | characters_class: str = None |
| |
|
| | |
| | vocab_dict: Dict = None |
| |
|
| | |
| | pad: str = None |
| | eos: str = None |
| | bos: str = None |
| | blank: str = None |
| | characters: str = None |
| | punctuations: str = None |
| | phonemes: str = None |
| | is_unique: bool = True |
| | is_sorted: bool = True |
| |
|
| |
|
| | @dataclass |
| | class BaseTTSConfig(): |
| |
|
| | |
| | |
| | use_phonemes: bool = False |
| | phonemizer: str = None |
| | phoneme_language: str = None |
| | compute_input_seq_cache: bool = False |
| | text_cleaner: str = None |
| | enable_eos_bos_chars: bool = False |
| | test_sentences_file: str = "" |
| | phoneme_cache_path: str = None |
| | |
| | characters: CharactersConfig = None |
| | add_blank: bool = False |
| | |
| | batch_group_size: int = 0 |
| | loss_masking: bool = None |
| | |
| | min_audio_len: int = 1 |
| | max_audio_len: int = float("inf") |
| | min_text_len: int = 1 |
| | max_text_len: int = float("inf") |
| | compute_f0: bool = False |
| | compute_energy: bool = False |
| | compute_linear_spec: bool = False |
| | precompute_num_workers: int = 0 |
| | use_noise_augment: bool = False |
| | start_by_longest: bool = False |
| | shuffle: bool = False |
| | drop_last: bool = False |
| | |
| | datasets: str = None |
| | |
| | optimizer: str = "radam" |
| | optimizer_params: dict = None |
| | |
| | lr_scheduler: str = None |
| | lr_scheduler_params: dict = field(default_factory=lambda: {}) |
| | |
| | test_sentences: List[str] = field(default_factory=lambda: []) |
| | |
| | eval_split_max_size: int = None |
| | eval_split_size: float = 0.01 |
| | |
| | use_speaker_weighted_sampler: bool = False |
| | speaker_weighted_sampler_alpha: float = 1.0 |
| | use_language_weighted_sampler: bool = False |
| | language_weighted_sampler_alpha: float = 1.0 |
| | use_length_weighted_sampler: bool = False |
| | length_weighted_sampler_alpha: float = 1.0 |
| |
|
| |
|
| | @dataclass |
| | class VitsAudioConfig(): |
| | fft_size: int = 1024 |
| | sample_rate: int = 22050 |
| | win_length: int = 1024 |
| | hop_length: int = 256 |
| | num_mels: int = 80 |
| | mel_fmin: int = 0 |
| | mel_fmax: int = None |
| | |
| | @dataclass |
| | class VitsArgs(): |
| | num_chars: int = 100 |
| | out_channels: int = 513 |
| | spec_segment_size: int = 32 |
| | hidden_channels: int = 192 |
| | hidden_channels_ffn_text_encoder: int = 768 |
| | num_heads_text_encoder: int = 2 |
| | num_layers_text_encoder: int = 6 |
| | kernel_size_text_encoder: int = 3 |
| | dropout_p_text_encoder: float = 0.1 |
| | dropout_p_duration_predictor: float = 0.5 |
| | kernel_size_posterior_encoder: int = 5 |
| | dilation_rate_posterior_encoder: int = 1 |
| | num_layers_posterior_encoder: int = 16 |
| | kernel_size_flow: int = 5 |
| | dilation_rate_flow: int = 1 |
| | num_layers_flow: int = 4 |
| | resblock_type_decoder: str = "1" |
| | resblock_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [3, 7, 11]) |
| | resblock_dilation_sizes_decoder: List[List[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]) |
| | upsample_rates_decoder: List[int] = field(default_factory=lambda: [8, 8, 2, 2]) |
| | upsample_initial_channel_decoder: int = 512 |
| | upsample_kernel_sizes_decoder: List[int] = field(default_factory=lambda: [16, 16, 4, 4]) |
| | periods_multi_period_discriminator: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 11]) |
| | use_sdp: bool = True |
| | noise_scale: float = 1.0 |
| | inference_noise_scale: float = 0.667 |
| | length_scale: float = 1 |
| | noise_scale_dp: float = 1.0 |
| | inference_noise_scale_dp: float = 1.0 |
| | max_inference_len: int = None |
| | init_discriminator: bool = True |
| | use_spectral_norm_disriminator: bool = False |
| | use_speaker_embedding: bool = False |
| | num_speakers: int = 0 |
| | speakers_file: str = None |
| | d_vector_file: List[str] = None |
| | speaker_embedding_channels: int = 256 |
| | use_d_vector_file: bool = False |
| | d_vector_dim: int = 0 |
| | detach_dp_input: bool = True |
| | use_language_embedding: bool = False |
| | embedded_language_dim: int = 4 |
| | num_languages: int = 0 |
| | language_ids_file: str = None |
| | use_speaker_encoder_as_loss: bool = False |
| | speaker_encoder_config_path: str = "" |
| | speaker_encoder_model_path: str = "" |
| | condition_dp_on_speaker: bool = True |
| | freeze_encoder: bool = False |
| | freeze_DP: bool = False |
| | freeze_PE: bool = False |
| | freeze_flow_decoder: bool = False |
| | freeze_waveform_decoder: bool = False |
| | encoder_sample_rate: int = None |
| | interpolate_z: bool = True |
| | reinit_DP: bool = False |
| | reinit_text_encoder: bool = False |
| | @dataclass |
| | class VitsConfig(BaseTTSConfig): |
| |
|
| | model: str = "vits" |
| | |
| | model_args: VitsArgs = field(default_factory=VitsArgs) |
| | audio: VitsAudioConfig = field(default_factory=VitsAudioConfig) |
| |
|
| | |
| | grad_clip: List[float] = field(default_factory=lambda: [1000, 1000]) |
| | lr_gen: float = 0.0002 |
| | lr_disc: float = 0.0002 |
| | lr_scheduler_gen: str = "ExponentialLR" |
| | lr_scheduler_gen_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1}) |
| | lr_scheduler_disc: str = "ExponentialLR" |
| | lr_scheduler_disc_params: dict = field(default_factory=lambda: {"gamma": 0.999875, "last_epoch": -1}) |
| | scheduler_after_epoch: bool = True |
| | optimizer: str = "AdamW" |
| | optimizer_params: dict = field(default_factory=lambda: {"betas": [0.8, 0.99], "eps": 1e-9, "weight_decay": 0.01}) |
| |
|
| | |
| | kl_loss_alpha: float = 1.0 |
| | disc_loss_alpha: float = 1.0 |
| | gen_loss_alpha: float = 1.0 |
| | feat_loss_alpha: float = 1.0 |
| | mel_loss_alpha: float = 45.0 |
| | dur_loss_alpha: float = 1.0 |
| | speaker_encoder_loss_alpha: float = 1.0 |
| |
|
| | |
| | return_wav: bool = True |
| | compute_linear_spec: bool = True |
| |
|
| | |
| | use_weighted_sampler: bool = False |
| | weighted_sampler_attrs: dict = field(default_factory=lambda: {}) |
| | weighted_sampler_multipliers: dict = field(default_factory=lambda: {}) |
| |
|
| | |
| | r: int = 1 |
| | add_blank: bool = True |
| |
|
| | |
| | test_sentences: List[List] = field( |
| | default_factory=lambda: [ |
| | ["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."], |
| | ["Be a voice, not an echo."], |
| | ["I'm sorry Dave. I'm afraid I can't do that."], |
| | ["This cake is great. It's so delicious and moist."], |
| | ["Prior to November 22, 1963."], |
| | ] |
| | ) |
| |
|
| | |
| | |
| | num_speakers: int = 0 |
| | use_speaker_embedding: bool = False |
| | speakers_file: str = None |
| | speaker_embedding_channels: int = 256 |
| | language_ids_file: str = None |
| | use_language_embedding: bool = False |
| |
|
| | |
| | use_d_vector_file: bool = False |
| | d_vector_file: List[str] = None |
| | d_vector_dim: int = None |
| |
|
| | def __post_init__(self): |
| | pass |
| | |
| | |
| | |
| |
|
| |
|
| |
|
| |
|
| |
|
| | def parse_symbols(): |
| | return { |
| | "pad": _pad, |
| | "eos": _eos, |
| | "bos": _bos, |
| | "characters": _characters, |
| | "punctuations": _punctuations, |
| | "phonemes": _phonemes, |
| | } |
| |
|
| |
|
| | |
| | _pad = "<PAD>" |
| | _eos = "<EOS>" |
| | _bos = "<BOS>" |
| | _blank = "<BLNK>" |
| | _characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" |
| | _punctuations = "!'(),-.:;? " |
| |
|
| |
|
| | |
| | |
| | _vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ" |
| | _non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ" |
| | _pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ" |
| | _suprasegmentals = "ˈˌːˑ" |
| | _other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ" |
| | _diacrilics = "ɚ˞ɫ" |
| | _phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics |
| |
|
| |
|
| | class BaseVocabulary: |
| | """Base Vocabulary class. |
| | |
| | This class only needs a vocabulary dictionary without specifying the characters. |
| | |
| | Args: |
| | vocab (Dict): A dictionary of characters and their corresponding indices. |
| | """ |
| |
|
| | def __init__(self, vocab: Dict, pad: str = None, blank: str = None, bos: str = None, eos: str = None): |
| | self.vocab = vocab |
| | self.pad = pad |
| | self.blank = blank |
| | self.bos = bos |
| | self.eos = eos |
| |
|
| | @property |
| | def pad_id(self) -> int: |
| | """Return the index of the padding character. If the padding character is not specified, return the length |
| | of the vocabulary.""" |
| | return self.char_to_id(self.pad) if self.pad else len(self.vocab) |
| |
|
| | @property |
| | def blank_id(self) -> int: |
| | """Return the index of the blank character. If the blank character is not specified, return the length of |
| | the vocabulary.""" |
| | return self.char_to_id(self.blank) if self.blank else len(self.vocab) |
| |
|
| | @property |
| | def bos_id(self) -> int: |
| | """Return the index of the bos character. If the bos character is not specified, return the length of the |
| | vocabulary.""" |
| | return self.char_to_id(self.bos) if self.bos else len(self.vocab) |
| |
|
| | @property |
| | def eos_id(self) -> int: |
| | """Return the index of the eos character. If the eos character is not specified, return the length of the |
| | vocabulary.""" |
| | return self.char_to_id(self.eos) if self.eos else len(self.vocab) |
| |
|
| | @property |
| | def vocab(self): |
| | """Return the vocabulary dictionary.""" |
| | return self._vocab |
| |
|
| | @vocab.setter |
| | def vocab(self, vocab): |
| | """Set the vocabulary dictionary and character mapping dictionaries.""" |
| | self._vocab, self._char_to_id, self._id_to_char = None, None, None |
| | if vocab is not None: |
| | self._vocab = vocab |
| | self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)} |
| | self._id_to_char = { |
| | idx: char for idx, char in enumerate(self._vocab) |
| | } |
| |
|
| | @staticmethod |
| | def init_from_config(config, **kwargs): |
| | """Initialize from the given config.""" |
| | if config.characters is not None and "vocab_dict" in config.characters and config.characters.vocab_dict: |
| | return ( |
| | BaseVocabulary( |
| | config.characters.vocab_dict, |
| | config.characters.pad, |
| | config.characters.blank, |
| | config.characters.bos, |
| | config.characters.eos, |
| | ), |
| | config, |
| | ) |
| | return BaseVocabulary(**kwargs), config |
| |
|
| | def to_config(self): |
| | return CharactersConfig( |
| | vocab_dict=self._vocab, |
| | pad=self.pad, |
| | eos=self.eos, |
| | bos=self.bos, |
| | blank=self.blank, |
| | is_unique=False, |
| | is_sorted=False, |
| | ) |
| |
|
| | @property |
| | def num_chars(self): |
| | """Return number of tokens in the vocabulary.""" |
| | return len(self._vocab) |
| |
|
| | def char_to_id(self, char: str) -> int: |
| | """Map a character to an token ID.""" |
| | try: |
| | return self._char_to_id[char] |
| | except KeyError as e: |
| | raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e |
| |
|
| | def id_to_char(self, idx: int) -> str: |
| | """Map an token ID to a character.""" |
| | return self._id_to_char[idx] |
| |
|
| |
|
| | class BaseCharacters: |
| |
|
| |
|
| | def __init__( |
| | self, |
| | characters: str = None, |
| | punctuations: str = None, |
| | pad: str = None, |
| | eos: str = None, |
| | bos: str = None, |
| | blank: str = None, |
| | is_unique: bool = False, |
| | is_sorted: bool = True, |
| | ) -> None: |
| | self._characters = characters |
| | self._punctuations = punctuations |
| | self._pad = pad |
| | self._eos = eos |
| | self._bos = bos |
| | self._blank = blank |
| | self.is_unique = is_unique |
| | self.is_sorted = is_sorted |
| | self._create_vocab() |
| |
|
| | @property |
| | def pad_id(self) -> int: |
| | return self.char_to_id(self.pad) if self.pad else len(self.vocab) |
| |
|
| | @property |
| | def blank_id(self) -> int: |
| | return self.char_to_id(self.blank) if self.blank else len(self.vocab) |
| |
|
| | @property |
| | def eos_id(self) -> int: |
| | return self.char_to_id(self.eos) if self.eos else len(self.vocab) |
| |
|
| | @property |
| | def bos_id(self) -> int: |
| | return self.char_to_id(self.bos) if self.bos else len(self.vocab) |
| |
|
| | @property |
| | def characters(self): |
| | return self._characters |
| |
|
| | @characters.setter |
| | def characters(self, characters): |
| | self._characters = characters |
| | self._create_vocab() |
| |
|
| | @property |
| | def punctuations(self): |
| | return self._punctuations |
| |
|
| | @punctuations.setter |
| | def punctuations(self, punctuations): |
| | self._punctuations = punctuations |
| | self._create_vocab() |
| |
|
| | @property |
| | def pad(self): |
| | return self._pad |
| |
|
| | @pad.setter |
| | def pad(self, pad): |
| | self._pad = pad |
| | self._create_vocab() |
| |
|
| | @property |
| | def eos(self): |
| | return self._eos |
| |
|
| | @eos.setter |
| | def eos(self, eos): |
| | self._eos = eos |
| | self._create_vocab() |
| |
|
| | @property |
| | def bos(self): |
| | return self._bos |
| |
|
| | @bos.setter |
| | def bos(self, bos): |
| | self._bos = bos |
| | self._create_vocab() |
| |
|
| | @property |
| | def blank(self): |
| | return self._blank |
| |
|
| | @blank.setter |
| | def blank(self, blank): |
| | self._blank = blank |
| | self._create_vocab() |
| |
|
| | @property |
| | def vocab(self): |
| | return self._vocab |
| |
|
| | @vocab.setter |
| | def vocab(self, vocab): |
| | self._vocab = vocab |
| | self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)} |
| | self._id_to_char = { |
| | idx: char for idx, char in enumerate(self.vocab) |
| | } |
| |
|
| | @property |
| | def num_chars(self): |
| | return len(self._vocab) |
| |
|
| | def _create_vocab(self): |
| | _vocab = self._characters |
| | if self.is_unique: |
| | _vocab = list(set(_vocab)) |
| | if self.is_sorted: |
| | _vocab = sorted(_vocab) |
| | _vocab = list(_vocab) |
| | _vocab = [self._blank] + _vocab if self._blank is not None and len(self._blank) > 0 else _vocab |
| | _vocab = [self._bos] + _vocab if self._bos is not None and len(self._bos) > 0 else _vocab |
| | _vocab = [self._eos] + _vocab if self._eos is not None and len(self._eos) > 0 else _vocab |
| | _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab |
| | self.vocab = _vocab + list(self._punctuations) |
| | if self.is_unique: |
| | duplicates = {x for x in self.vocab if self.vocab.count(x) > 1} |
| | assert ( |
| | len(self.vocab) == len(self._char_to_id) == len(self._id_to_char) |
| | ), f" [!] There are duplicate characters in the character set. {duplicates}" |
| |
|
| | def char_to_id(self, char: str) -> int: |
| | try: |
| | return self._char_to_id[char] |
| | except KeyError as e: |
| | raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e |
| |
|
| | def id_to_char(self, idx: int) -> str: |
| | return self._id_to_char[idx] |
| |
|
| | def print_log(self, level: int = 0): |
| | """ |
| | Prints the vocabulary in a nice format. |
| | """ |
| | indent = "\t" * level |
| | print(f"{indent}| > Characters: {self._characters}") |
| | print(f"{indent}| > Punctuations: {self._punctuations}") |
| | print(f"{indent}| > Pad: {self._pad}") |
| | print(f"{indent}| > EOS: {self._eos}") |
| | print(f"{indent}| > BOS: {self._bos}") |
| | print(f"{indent}| > Blank: {self._blank}") |
| | print(f"{indent}| > Vocab: {self.vocab}") |
| | print(f"{indent}| > Num chars: {self.num_chars}") |
| |
|
| | @staticmethod |
| | def init_from_config(config: "Coqpit"): |
| | """Init your character class from a config. |
| | |
| | Implement this method for your subclass. |
| | """ |
| | |
| | if config.characters is not None: |
| | return BaseCharacters(**config.characters), config |
| | |
| | characters = BaseCharacters() |
| | new_config = replace(config, characters=characters.to_config()) |
| | return characters, new_config |
| |
|
| | def to_config(self) -> "CharactersConfig": |
| | return CharactersConfig( |
| | characters=self._characters, |
| | punctuations=self._punctuations, |
| | pad=self._pad, |
| | eos=self._eos, |
| | bos=self._bos, |
| | blank=self._blank, |
| | is_unique=self.is_unique, |
| | is_sorted=self.is_sorted, |
| | ) |
| |
|
| |
|
| | class IPAPhonemes(BaseCharacters): |
| | |
| |
|
| | def __init__( |
| | self, |
| | characters: str = _phonemes, |
| | punctuations: str = _punctuations, |
| | pad: str = _pad, |
| | eos: str = _eos, |
| | bos: str = _bos, |
| | blank: str = _blank, |
| | is_unique: bool = False, |
| | is_sorted: bool = True, |
| | ) -> None: |
| | super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted) |
| |
|
| | @staticmethod |
| | def init_from_config(config: "Coqpit"): |
| | """Init a IPAPhonemes object from a model config |
| | |
| | If characters are not defined in the config, it will be set to the default characters and the config |
| | will be updated. |
| | """ |
| | |
| | if "characters" in config and config.characters is not None: |
| | if "phonemes" in config.characters and config.characters.phonemes is not None: |
| | config.characters["characters"] = config.characters["phonemes"] |
| | return ( |
| | IPAPhonemes( |
| | characters=config.characters["characters"], |
| | punctuations=config.characters["punctuations"], |
| | pad=config.characters["pad"], |
| | eos=config.characters["eos"], |
| | bos=config.characters["bos"], |
| | blank=config.characters["blank"], |
| | is_unique=config.characters["is_unique"], |
| | is_sorted=config.characters["is_sorted"], |
| | ), |
| | config, |
| | ) |
| | |
| | if config.characters is not None: |
| | return IPAPhonemes(**config.characters), config |
| | |
| | characters = IPAPhonemes() |
| | new_config = replace(config, characters=characters.to_config()) |
| | return characters, new_config |
| |
|
| |
|
| | class Graphemes(BaseCharacters): |
| | |
| |
|
| | def __init__( |
| | self, |
| | characters: str = _characters, |
| | punctuations: str = _punctuations, |
| | pad: str = _pad, |
| | eos: str = _eos, |
| | bos: str = _bos, |
| | blank: str = _blank, |
| | is_unique: bool = False, |
| | is_sorted: bool = True, |
| | ) -> None: |
| | super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted) |
| |
|
| | @staticmethod |
| | def init_from_config(config: "Coqpit"): |
| | """Init a Graphemes object from a model config |
| | |
| | If characters are not defined in the config, it will be set to the default characters and the config |
| | will be updated. |
| | """ |
| | if config.characters is not None: |
| | |
| | if "phonemes" in config.characters: |
| | return ( |
| | Graphemes( |
| | characters=config.characters["characters"], |
| | punctuations=config.characters["punctuations"], |
| | pad=config.characters["pad"], |
| | eos=config.characters["eos"], |
| | bos=config.characters["bos"], |
| | blank=config.characters["blank"], |
| | is_unique=config.characters["is_unique"], |
| | is_sorted=config.characters["is_sorted"], |
| | ), |
| | config, |
| | ) |
| | return Graphemes(**config.characters), config |
| | characters = Graphemes() |
| | new_config = replace(config, characters=characters.to_config()) |
| | return characters, new_config |
| |
|
| |
|
| | if __name__ == "__main__": |
| | gr = Graphemes() |
| | ph = IPAPhonemes() |
| | gr.print_log() |
| | ph.print_log() |
| |
|
| |
|
| | class VitsCharacters(BaseCharacters): |
| | """Characters class for VITs model for compatibility with pre-trained models""" |
| |
|
| | def __init__( |
| | self, |
| | graphemes: str = _characters, |
| | punctuations: str = _punctuations, |
| | pad: str = _pad, |
| | ipa_characters: str = _phonemes, |
| | ) -> None: |
| | if ipa_characters is not None: |
| | graphemes += ipa_characters |
| | super().__init__(graphemes, punctuations, pad, None, None, "<BLNK>", is_unique=False, is_sorted=True) |
| |
|
| | def _create_vocab(self): |
| | self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank] |
| | self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)} |
| | |
| | self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)} |
| |
|
| | @staticmethod |
| | def init_from_config(config): |
| | _pad = config.characters.pad |
| | _punctuations = config.characters.punctuations |
| | _letters = config.characters.characters |
| | _letters_ipa = config.characters.phonemes |
| | return ( |
| | VitsCharacters(graphemes=_letters, ipa_characters=_letters_ipa, punctuations=_punctuations, pad=_pad), |
| | config, |
| | ) |
| |
|
| | def to_config(self) -> "CharactersConfig": |
| | return CharactersConfig( |
| | characters=self._characters, |
| | punctuations=self._punctuations, |
| | pad=self._pad, |
| | eos=None, |
| | bos=None, |
| | blank=self._blank, |
| | is_unique=False, |
| | is_sorted=True, |
| | ) |
| | |
| | class TTSTokenizer: |
| | def __init__( |
| | self, |
| | text_cleaner: Callable = None, |
| | characters: "BaseCharacters" = None, |
| | ): |
| | self.text_cleaner = text_cleaner |
| | self.characters = characters |
| | self.not_found_characters = [] |
| |
|
| | @property |
| | def characters(self): |
| | return self._characters |
| |
|
| | @characters.setter |
| | def characters(self, new_characters): |
| | self._characters = new_characters |
| | self.pad_id = self.characters.char_to_id(self.characters.pad) if self.characters.pad else None |
| | self.blank_id = self.characters.char_to_id(self.characters.blank) if self.characters.blank else None |
| |
|
| | def encode(self, text: str) -> List[int]: |
| | """Encodes a string of text as a sequence of IDs.""" |
| | token_ids = [] |
| | for char in text: |
| | try: |
| | idx = self.characters.char_to_id(char) |
| | token_ids.append(idx) |
| | except KeyError: |
| | |
| | if char not in self.not_found_characters: |
| | self.not_found_characters.append(char) |
| | print(text) |
| | print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.") |
| | return token_ids |
| |
|
| | def text_to_ids(self, text: str, language: str = None) -> List[int]: |
| | text = self.text_cleaner(text) |
| | text = self.encode(text) |
| | text = self.intersperse_blank_char(text, True) |
| | return text |
| |
|
| | def pad_with_bos_eos(self, char_sequence: List[str]): |
| | """Pads a sequence with the special BOS and EOS characters.""" |
| | return [self.characters.bos_id] + list(char_sequence) + [self.characters.eos_id] |
| |
|
| | def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool = False): |
| | """Intersperses the blank character between characters in a sequence. |
| | |
| | Use the ```blank``` character if defined else use the ```pad``` character. |
| | """ |
| | char_to_use = self.characters.blank_id if use_blank_char else self.characters.pad |
| | result = [char_to_use] * (len(char_sequence) * 2 + 1) |
| | result[1::2] = char_sequence |
| | return result |
| |
|
| | @staticmethod |
| | def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None): |
| | text_cleaner = multilingual_cleaners |
| | CharactersClass = VitsCharacters |
| | characters, new_config = CharactersClass.init_from_config(config) |
| | |
| | new_config.characters.characters_class = VitsCharacters |
| | return ( |
| | TTSTokenizer(text_cleaner, characters),new_config) |
| |
|
| |
|
| | def multilingual_cleaners(text): |
| | """Pipeline for multilingual text""" |
| | text = lowercase(text) |
| | text = replace_symbols(text, lang=None) |
| | text = remove_aux_symbols(text) |
| | text = collapse_whitespace(text) |
| | return text |
| |
|
| | def lowercase(text): |
| | return text.lower() |
| |
|
| | def collapse_whitespace(text): |
| | return re.sub(_whitespace_re, " ", text).strip() |
| |
|
| | def replace_symbols(text, lang="en"): |
| |
|
| | text = text.replace(";", ",") |
| | text = text.replace("-", " ") if lang != "ca" else text.replace("-", "") |
| | text = text.replace(":", ",") |
| | if lang == "en": |
| | text = text.replace("&", " and ") |
| | elif lang == "fr": |
| | text = text.replace("&", " et ") |
| | elif lang == "pt": |
| | text = text.replace("&", " e ") |
| | elif lang == "ca": |
| | text = text.replace("&", " i ") |
| | text = text.replace("'", "") |
| | return text |
| |
|
| | def remove_aux_symbols(text): |
| | text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text) |
| | return text |