File size: 1,391 Bytes
35f6708 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
"""Piper configuration"""
from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, Mapping, Sequence
class PhonemeType(str, Enum):
ESPEAK = "espeak"
TEXT = "text"
@dataclass
class PiperConfig:
"""Piper configuration"""
num_symbols: int
"""Number of phonemes"""
num_speakers: int
"""Number of speakers"""
sample_rate: int
"""Sample rate of output audio"""
espeak_voice: str
"""Name of espeak-ng voice or alphabet"""
length_scale: float
noise_scale: float
noise_w: float
phoneme_id_map: Mapping[str, Sequence[int]]
"""Phoneme -> [id,]"""
phoneme_type: PhonemeType
"""espeak or text"""
@staticmethod
def from_dict(config: Dict[str, Any]) -> "PiperConfig":
inference = config.get("inference", {})
return PiperConfig(
num_symbols=config["num_symbols"],
num_speakers=config["num_speakers"],
sample_rate=config["audio"]["sample_rate"],
noise_scale=inference.get("noise_scale", 0.667),
length_scale=inference.get("length_scale", 1.0),
noise_w=inference.get("noise_w", 0.8),
#
espeak_voice=config["espeak"]["voice"],
phoneme_id_map=config["phoneme_id_map"],
phoneme_type=PhonemeType(config.get("phoneme_type", PhonemeType.ESPEAK)),
)
|