Spaces:

ntt123
/

vietTTS

Running

File size: 932 Bytes

cd75eda
 
 
 
aa0196c
e89bb30
aa0196c
cd75eda
 
f4d29b7
daea792
 
cd75eda
 
acbc346
 
 
3f2e911
630394d
cd75eda
e89bb30
cd75eda
 
 
 
 
 
 
e89bb30
cd75eda
 
 
e89bb30
cd75eda
 
 
 
e89bb30
926a62f
 
ba54530
fd46c98

from vietTTS.hifigan.mel2wave import mel2wave
from vietTTS.nat.text2mel import text2mel
from vietTTS import nat_normalize_text
import numpy as np
import gradio as gr
import os


def text_to_speech(text):
    # prevent too long text
    if len(text) > 500:
        text = text[:500]
    text = nat_normalize_text(text)
    mel = text2mel(
        text,
        "lexicon.txt",
        0.2,
        "acoustic_ckpt_latest.pickle",
        "duration_ckpt_latest.pickle",
    )
    wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
    return (wave * (2**15)).astype(np.int16)


def speak(text):
    y = text_to_speech(text)
    return 16_000, y


title = "vietTTS"
description = "A vietnamese text-to-speech demo."

gr.Interface(
    fn=speak, 
    inputs="text", 
    outputs="audio",
    title = title,
    description=description,
    theme="default",
    allow_screenshot=False,
    allow_flagging="never",
).launch(debug=False)