Style-Bert-VITS2-Editor-Demo

Sleeping

File size: 3,956 Bytes

83d190a

import re

import inflect


__INFLECT = inflect.engine()
__COMMA_NUMBER_PATTERN = re.compile(r"([0-9][0-9\,]+[0-9])")
__DECIMAL_NUMBER_PATTERN = re.compile(r"([0-9]+\.[0-9]+)")
__POUNDS_PATTERN = re.compile(r"£([0-9\,]*[0-9]+)")
__DOLLARS_PATTERN = re.compile(r"\$([0-9\.\,]*[0-9]+)")
__ORDINAL_PATTERN = re.compile(r"[0-9]+(st|nd|rd|th)")
__NUMBER_PATTERN = re.compile(r"[0-9]+")


def normalize_text(text: str) -> str:
    text = __normalize_numbers(text)
    text = replace_punctuation(text)
    text = re.sub(r"([,;.\?\!])([\w])", r"\1 \2", text)
    return text


def replace_punctuation(text: str) -> str:
    REPLACE_MAP = {
        "：": ",",
        "；": ",",
        "，": ",",
        "。": ".",
        "！": "!",
        "？": "?",
        "\n": ".",
        "．": ".",
        "…": "...",
        "···": "...",
        "・・・": "...",
        "·": ",",
        "・": ",",
        "、": ",",
        "$": ".",
        "“": "'",
        "”": "'",
        '"': "'",
        "‘": "'",
        "’": "'",
        "（": "'",
        "）": "'",
        "(": "'",
        ")": "'",
        "《": "'",
        "》": "'",
        "【": "'",
        "】": "'",
        "[": "'",
        "]": "'",
        "—": "-",
        "−": "-",
        "～": "-",
        "~": "-",
        "「": "'",
        "」": "'",
    }
    pattern = re.compile("|".join(re.escape(p) for p in REPLACE_MAP))
    replaced_text = pattern.sub(lambda x: REPLACE_MAP[x.group()], text)
    # replaced_text = re.sub(
    #     r"[^\u3040-\u309F\u30A0-\u30FF\u4E00-\u9FFF\u3400-\u4DBF\u3005"
    #     + "".join(punctuation)
    #     + r"]+",
    #     "",
    #     replaced_text,
    # )
    return replaced_text


def __normalize_numbers(text: str) -> str:
    text = re.sub(__COMMA_NUMBER_PATTERN, __remove_commas, text)
    text = re.sub(__POUNDS_PATTERN, r"\1 pounds", text)
    text = re.sub(__DOLLARS_PATTERN, __expand_dollars, text)
    text = re.sub(__DECIMAL_NUMBER_PATTERN, __expand_decimal_point, text)
    text = re.sub(__ORDINAL_PATTERN, __expand_ordinal, text)
    text = re.sub(__NUMBER_PATTERN, __expand_number, text)
    return text


def __expand_dollars(m: re.Match[str]) -> str:
    match = m.group(1)
    parts = match.split(".")
    if len(parts) > 2:
        return match + " dollars"  # Unexpected format
    dollars = int(parts[0]) if parts[0] else 0
    cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
    if dollars and cents:
        dollar_unit = "dollar" if dollars == 1 else "dollars"
        cent_unit = "cent" if cents == 1 else "cents"
        return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit)
    elif dollars:
        dollar_unit = "dollar" if dollars == 1 else "dollars"
        return "%s %s" % (dollars, dollar_unit)
    elif cents:
        cent_unit = "cent" if cents == 1 else "cents"
        return "%s %s" % (cents, cent_unit)
    else:
        return "zero dollars"


def __remove_commas(m: re.Match[str]) -> str:
    return m.group(1).replace(",", "")


def __expand_ordinal(m: re.Match[str]) -> str:
    return __INFLECT.number_to_words(m.group(0))  # type: ignore


def __expand_number(m: re.Match[str]) -> str:
    num = int(m.group(0))
    if num > 1000 and num < 3000:
        if num == 2000:
            return "two thousand"
        elif num > 2000 and num < 2010:
            return "two thousand " + __INFLECT.number_to_words(num % 100)  # type: ignore
        elif num % 100 == 0:
            return __INFLECT.number_to_words(num // 100) + " hundred"  # type: ignore
        else:
            return __INFLECT.number_to_words(
                num, andword="", zero="oh", group=2  # type: ignore
            ).replace(
                ", ", " "
            )  # type: ignore
    else:
        return __INFLECT.number_to_words(num, andword="")  # type: ignore


def __expand_decimal_point(m: re.Match[str]) -> str:
    return m.group(1).replace(".", " point ")